diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index dff4483cc5505..70c3f9b0c3c83 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -69,12 +69,11 @@ jobs:
         # In order to get diff files
         with:
           fetch-depth: 0
-      - name: Cache Scala, SBT and Maven
+      - name: Cache SBT and Maven
         uses: actions/cache@v4
         with:
           path: |
             build/apache-maven-*
-            build/scala-*
             build/*.jar
             ~/.sbt
           key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -137,12 +136,11 @@ jobs:
       # In order to get diff files
       with:
         fetch-depth: 0
-    - name: Cache Scala, SBT and Maven
+    - name: Cache SBT and Maven
       uses: actions/cache@v4
       with:
         path: |
           build/apache-maven-*
-          build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 832826333f090..9dc9d85520c2c 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -76,26 +76,44 @@ jobs:
       id: set-outputs
       run: |
         if [ -z "${{ inputs.jobs }}" ]; then
-          pyspark=true; sparkr=true; tpcds=true; docker=true;
           pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
           pyspark=`./dev/is-changed.py -m $pyspark_modules`
-          sparkr=`./dev/is-changed.py -m sparkr`
-          tpcds=`./dev/is-changed.py -m sql`
-          docker=`./dev/is-changed.py -m docker-integration-tests`
-          # 'build' and 'maven-build' are always true for now.
-          # It does not save significant time and most of PRs trigger the build.
+          if [[ "${{ github.repository }}" != 'apache/spark' ]]; then
+            pandas=$pyspark
+            yarn=`./dev/is-changed.py -m yarn`
+            kubernetes=`./dev/is-changed.py -m kubernetes`
+            sparkr=`./dev/is-changed.py -m sparkr`
+            tpcds=`./dev/is-changed.py -m sql`
+            docker=`./dev/is-changed.py -m docker-integration-tests`
+            buf=true
+            ui=true
+            docs=true
+          else
+            pandas=false
+            yarn=false
+            kubernetes=false
+            sparkr=false
+            tpcds=false
+            docker=false
+            buf=false
+            ui=false
+            docs=false
+          fi
+          build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,protobuf,yarn,connect,sql,hive"`
           precondition="
             {
-              \"build\": \"true\",
+              \"build\": \"$build\",
               \"pyspark\": \"$pyspark\",
+              \"pyspark-pandas\": \"$pandas\",
               \"sparkr\": \"$sparkr\",
               \"tpcds-1g\": \"$tpcds\",
               \"docker-integration-tests\": \"$docker\",
-              \"maven-build\": \"true\",
               \"lint\" : \"true\",
-              \"k8s-integration-tests\" : \"true\",
-              \"buf\" : \"true\",
-              \"ui\" : \"true\",
+              \"docs\" : \"$docs\",
+              \"yarn\" : \"$yarn\",
+              \"k8s-integration-tests\" : \"$kubernetes\",
+              \"buf\" : \"$buf\",
+              \"ui\" : \"$ui\",
             }"
           echo $precondition # For debugging
           # Remove `\n` to avoid "Invalid format" error
@@ -123,7 +141,7 @@ jobs:
     needs: precondition
     if: fromJson(needs.precondition.outputs.required).build == 'true'
     runs-on: ubuntu-latest
-    timeout-minutes: 300
+    timeout-minutes: 180
     strategy:
       fail-fast: false
       matrix:
@@ -145,9 +163,8 @@ jobs:
             mllib-local, mllib, graphx
           - >-
             streaming, sql-kafka-0-10, streaming-kafka-0-10, streaming-kinesis-asl,
-            kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf
-          - >-
-            yarn, connect
+            kubernetes, hadoop-cloud, spark-ganglia-lgpl, protobuf, connect
+          - yarn
         # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
         included-tags: [""]
         excluded-tags: [""]
@@ -185,14 +202,22 @@ jobs:
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.ExtendedSQLTest,org.apache.spark.tags.SlowSQLTest
             comment: "- other tests"
+        exclude:
+          # Always run if yarn == 'true', even infra-image is skip (such as non-master job)
+          # In practice, the build will run in individual PR, but not against the individual commit
+          # in Apache Spark repository.
+          - modules: ${{ fromJson(needs.precondition.outputs.required).yarn != 'true' && 'yarn' }}
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
       INCLUDED_TAGS: ${{ matrix.included-tags }}
       HADOOP_PROFILE: ${{ matrix.hadoop }}
       HIVE_PROFILE: ${{ matrix.hive }}
+      # GitHub Actions' default miniconda to use in pip packaging test.
+      CONDA_PREFIX: /usr/share/miniconda
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
+      NOLINT_ON_COMPILE: true
       SKIP_UNIDOC: true
       SKIP_MIMA: true
       SKIP_PACKAGING: true
@@ -212,12 +237,11 @@ jobs:
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 10G limit.
-    - name: Cache Scala, SBT and Maven
+    - name: Cache SBT and Maven
       uses: actions/cache@v4
       with:
         path: |
           build/apache-maven-*
-          build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -334,7 +358,7 @@ jobs:
     if: (!cancelled()) && fromJson(needs.precondition.outputs.required).pyspark == 'true'
     name: "Build modules: ${{ matrix.modules }}"
     runs-on: ubuntu-latest
-    timeout-minutes: 300
+    timeout-minutes: 180
     container:
       image: ${{ needs.precondition.outputs.image_url }}
     strategy:
@@ -349,12 +373,12 @@ jobs:
             pyspark-core, pyspark-errors, pyspark-streaming
           - >-
             pyspark-mllib, pyspark-ml, pyspark-ml-connect
+          - >-
+            pyspark-connect
           - >-
             pyspark-pandas
           - >-
             pyspark-pandas-slow
-          - >-
-            pyspark-connect
           - >-
             pyspark-pandas-connect-part0
           - >-
@@ -363,11 +387,23 @@ jobs:
             pyspark-pandas-connect-part2
           - >-
             pyspark-pandas-connect-part3
+        exclude:
+          # Always run if pyspark-pandas == 'true', even infra-image is skip (such as non-master job)
+          # In practice, the build will run in individual PR, but not against the individual commit
+          # in Apache Spark repository.
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part0' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part1' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part2' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part3' }}
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
-      PYTHON_TO_TEST: 'python3.9'
+      PYTHON_TO_TEST: 'python3.11'
       HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
+      # GitHub Actions' default miniconda to use in pip packaging test.
+      CONDA_PREFIX: /usr/share/miniconda
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_UNIDOC: true
@@ -394,12 +430,11 @@ jobs:
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 10G limit.
-    - name: Cache Scala, SBT and Maven
+    - name: Cache SBT and Maven
       uses: actions/cache@v4
       with:
         path: |
           build/apache-maven-*
-          build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -432,19 +467,12 @@ jobs:
           echo $py
           $py -m pip list
         done
-    - name: Install Conda for pip packaging test
-      if: contains(matrix.modules, 'pyspark-errors')
-      run: |
-        curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
-        bash miniconda.sh -b -p $HOME/miniconda
-        rm miniconda.sh
     # Run the tests.
     - name: Run tests
       env: ${{ fromJSON(inputs.envs) }}
       shell: 'script -q -e -c "bash {0}"'
       run: |
         if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then
-          export PATH=$PATH:$HOME/miniconda/bin
           export SKIP_PACKAGING=false
           echo "Python Packaging Tests Enabled!"
         fi
@@ -482,7 +510,7 @@ jobs:
     if: (!cancelled()) && fromJson(needs.precondition.outputs.required).sparkr == 'true'
     name: "Build modules: sparkr"
     runs-on: ubuntu-latest
-    timeout-minutes: 300
+    timeout-minutes: 180
     container:
       image: ${{ needs.precondition.outputs.image_url }}
     env:
@@ -512,12 +540,11 @@ jobs:
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 10G limit.
-    - name: Cache Scala, SBT and Maven
+    - name: Cache SBT and Maven
       uses: actions/cache@v4
       with:
         path: |
           build/apache-maven-*
-          build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -598,17 +625,18 @@ jobs:
     - name: Python CodeGen check
       run: ./dev/connect-check-protos.py
 
-  # Static analysis, and documentation build
+  # Static analysis
   lint:
     needs: [precondition, infra-image]
     # always run if lint == 'true', even infra-image is skip (such as non-master job)
     if: (!cancelled()) && fromJson(needs.precondition.outputs.required).lint == 'true'
-    name: Linters, licenses, dependencies and documentation generation
+    name: Linters, licenses, and dependencies
     runs-on: ubuntu-latest
-    timeout-minutes: 300
+    timeout-minutes: 180
     env:
       LC_ALL: C.UTF-8
       LANG: C.UTF-8
+      NOLINT_ON_COMPILE: false
       PYSPARK_DRIVER_PYTHON: python3.9
       PYSPARK_PYTHON: python3.9
       GITHUB_PREV_SHA: ${{ github.event.before }}
@@ -632,12 +660,11 @@ jobs:
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 10G limit.
-    - name: Cache Scala, SBT and Maven
+    - name: Cache SBT and Maven
       uses: actions/cache@v4
       with:
         path: |
           build/apache-maven-*
-          build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -741,7 +768,90 @@ jobs:
         Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')"
     - name: Install R linter dependencies and SparkR
       run: ./R/install-dev.sh
-    # Should delete this section after SPARK 3.5 EOL.
+    - name: R linter
+      run: ./dev/lint-r
+
+  # Documentation build
+  docs:
+    needs: [precondition, infra-image]
+    # always run if lint == 'true', even infra-image is skip (such as non-master job)
+    if: (!cancelled()) && fromJson(needs.precondition.outputs.required).docs == 'true'
+    name: Documentation generation
+    runs-on: ubuntu-latest
+    timeout-minutes: 180
+    env:
+      LC_ALL: C.UTF-8
+      LANG: C.UTF-8
+      NOLINT_ON_COMPILE: false
+      PYSPARK_DRIVER_PYTHON: python3.9
+      PYSPARK_PYTHON: python3.9
+      GITHUB_PREV_SHA: ${{ github.event.before }}
+    container:
+      image: ${{ needs.precondition.outputs.image_url }}
+    steps:
+    - name: Checkout Spark repository
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+        repository: apache/spark
+        ref: ${{ inputs.branch }}
+    - name: Add GITHUB_WORKSPACE to git trust safe.directory
+      run: |
+        git config --global --add safe.directory ${GITHUB_WORKSPACE}
+    - name: Sync the current branch with the latest in Apache Spark
+      if: github.repository != 'apache/spark'
+      run: |
+        echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+        git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
+    # Cache local repositories. Note that GitHub Actions cache has a 10G limit.
+    - name: Cache SBT and Maven
+      uses: actions/cache@v4
+      with:
+        path: |
+          build/apache-maven-*
+          build/*.jar
+          ~/.sbt
+        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+        restore-keys: |
+          build-
+    - name: Cache Coursier local repository
+      uses: actions/cache@v4
+      with:
+        path: ~/.cache/coursier
+        key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+        restore-keys: |
+          docs-coursier-
+    - name: Cache Maven local repository
+      uses: actions/cache@v4
+      with:
+        path: ~/.m2/repository
+        key: docs-maven-${{ hashFiles('**/pom.xml') }}
+        restore-keys: |
+          docs-maven-
+    - name: Free up disk space
+      run: |
+        if [ -f ./dev/free_disk_space_container ]; then
+          ./dev/free_disk_space_container
+        fi
+    - name: Install Java ${{ inputs.java }}
+      uses: actions/setup-java@v4
+      with:
+        distribution: zulu
+        java-version: ${{ inputs.java }}
+    - name: Install Python dependencies for python linter and documentation generation
+      if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5'
+      run: |
+        # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
+        # See 'ipython_genutils' in SPARK-38517
+        # See 'docutils<0.18.0' in SPARK-39421
+        python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
+          ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
+          'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
+          'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+          'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
+        python3.9 -m pip list
     - name: Install dependencies for documentation generation for branch-3.4, branch-3.5
       if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
       run: |
@@ -759,13 +869,16 @@ jobs:
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
     - name: Install dependencies for documentation generation
       run: |
+        # Keep the version of Bundler here in sync with the following locations:
+        #   - dev/create-release/spark-rm/Dockerfile
+        #   - docs/README.md
         gem install bundler -v 2.4.22
         cd docs
         bundle install
-    - name: R linter
-      run: ./dev/lint-r
     - name: Run documentation build
       run: |
+        # We need this link because the jekyll build calls `python`.
+        ln -s "$(which python3.9)" "/usr/local/bin/python"
         # Build docs first with SKIP_API to ensure they are buildable without requiring any
         # language docs to be built beforehand.
         cd docs; SKIP_API=1 bundle exec jekyll build; cd ..
@@ -788,67 +901,6 @@ jobs:
         path: site.tar.bz2
         retention-days: 1
 
-  maven-build:
-    needs: precondition
-    if: fromJson(needs.precondition.outputs.required).maven-build == 'true'
-    name: Java ${{ matrix.java }} build with Maven (${{ matrix.os }})
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - java: 17
-            os: ubuntu-latest
-          - java: 21
-            os: ubuntu-latest
-          - java: 21
-            os: macos-14 
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 300
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v4
-      with:
-        fetch-depth: 0
-        repository: apache/spark
-        ref: ${{ inputs.branch }}
-    - name: Sync the current branch with the latest in Apache Spark
-      if: github.repository != 'apache/spark'
-      run: |
-        git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
-    - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v4
-      with:
-        path: |
-          build/apache-maven-*
-          build/scala-*
-          build/*.jar
-          ~/.sbt
-        key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
-        restore-keys: |
-          build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v4
-      with:
-        path: ~/.m2/repository
-        key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          java${{ matrix.java }}-maven-
-    - name: Install Java ${{ matrix.java }}
-      uses: actions/setup-java@v4
-      with:
-        distribution: zulu
-        java-version: ${{ matrix.java }}
-    - name: Build with Maven
-      run: |
-        export MAVEN_OPTS="-Xss64m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN"
-        export MAVEN_CLI_OPTS="--no-transfer-progress"
-        export JAVA_VERSION=${{ matrix.java }}
-        # It uses Maven's 'install' intentionally, see https://github.com/apache/spark/pull/26414.
-        ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=${JAVA_VERSION/-ea} install
-        rm -rf ~/.m2/repository/org/apache/spark
-
   # Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well
   tpcds-1g:
     needs: precondition
@@ -856,7 +908,7 @@ jobs:
     name: Run TPC-DS queries with SF=1
     # Pin to 'Ubuntu 20.04' due to 'databricks/tpcds-kit' compilation
     runs-on: ubuntu-20.04
-    timeout-minutes: 300
+    timeout-minutes: 180
     env:
       SPARK_LOCAL_IP: localhost
     steps:
@@ -872,12 +924,11 @@ jobs:
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
-    - name: Cache Scala, SBT and Maven
+    - name: Cache SBT and Maven
       uses: actions/cache@v4
       with:
         path: |
           build/apache-maven-*
-          build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -958,13 +1009,12 @@ jobs:
     if: fromJson(needs.precondition.outputs.required).docker-integration-tests == 'true'
     name: Run Docker integration tests
     runs-on: ubuntu-latest
-    timeout-minutes: 300
+    timeout-minutes: 180
     env:
       HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
-      ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-free:23.3
       SKIP_UNIDOC: true
       SKIP_MIMA: true
       SKIP_PACKAGING: true
@@ -982,12 +1032,11 @@ jobs:
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
-    - name: Cache Scala, SBT and Maven
+    - name: Cache SBT and Maven
       uses: actions/cache@v4
       with:
         path: |
           build/apache-maven-*
-          build/scala-*
           build/*.jar
           ~/.sbt
         key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -1027,7 +1076,7 @@ jobs:
     if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true'
     name: Run Spark on Kubernetes Integration test
     runs-on: ubuntu-latest
-    timeout-minutes: 300
+    timeout-minutes: 180
     steps:
       - name: Checkout Spark repository
         uses: actions/checkout@v4
@@ -1042,12 +1091,11 @@ jobs:
           git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
           git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
           git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
-      - name: Cache Scala, SBT and Maven
+      - name: Cache SBT and Maven
         uses: actions/cache@v4
         with:
           path: |
             build/apache-maven-*
-            build/scala-*
             build/*.jar
             ~/.sbt
           key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -1086,6 +1134,11 @@ jobs:
           minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 &
           kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
           kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true
+          if [[ "${{ inputs.branch }}" == 'branch-3.5' || "${{ inputs.branch }}" == 'branch-3.4' ]]; then
+            kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true
+          else
+            kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true
+          fi
           eval $(minikube docker-env)
           build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
       - name: Upload Spark on K8S integration tests log files
@@ -1100,7 +1153,7 @@ jobs:
     if: fromJson(needs.precondition.outputs.required).ui == 'true'
     name: Run Spark UI tests
     runs-on: ubuntu-latest
-    timeout-minutes: 300
+    timeout-minutes: 180
     steps:
       - uses: actions/checkout@v4
       - name: Use Node.js
diff --git a/.github/workflows/build_branch34.yml b/.github/workflows/build_branch34.yml
index deb43d82c9791..deb6c42407970 100644
--- a/.github/workflows/build_branch34.yml
+++ b/.github/workflows/build_branch34.yml
@@ -43,9 +43,9 @@ jobs:
       jobs: >-
         {
           "build": "true",
-          "pyspark": "true",
           "sparkr": "true",
           "tpcds-1g": "true",
           "docker-integration-tests": "true",
+          "k8s-integration-tests": "true",
           "lint" : "true"
         }
diff --git a/.github/workflows/cancel_duplicate_workflow_runs.yml b/.github/workflows/build_branch34_python.yml
similarity index 58%
rename from .github/workflows/cancel_duplicate_workflow_runs.yml
rename to .github/workflows/build_branch34_python.yml
index d41ca31190d94..c109ba2dc7922 100644
--- a/.github/workflows/cancel_duplicate_workflow_runs.yml
+++ b/.github/workflows/build_branch34_python.yml
@@ -17,22 +17,29 @@
 # under the License.
 #
 
-name: Cancelling Duplicates
+name: "Build / Python-only (branch-3.4)"
+
 on:
-  workflow_run:
-    workflows: 
-      - 'Build'
-    types: ['requested']
+  schedule:
+    - cron: '0 9 * * *'
 
 jobs:
-  cancel-duplicate-workflow-runs:
-    name: "Cancel duplicate workflow runs"
-    runs-on: ubuntu-latest
-    steps:
-      - uses: potiuk/cancel-workflow-runs@4723494a065d162f8e9efd071b98e0126e00f866 # @master
-        name: "Cancel duplicate workflow runs"
-        with:
-          cancelMode: allDuplicates
-          token: ${{ secrets.GITHUB_TOKEN }}
-          sourceRunId: ${{ github.event.workflow_run.id }}
-          skipEventTypes: '["push", "schedule"]'
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: branch-3.4
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYTHON_TO_TEST": ""
+        }
+      jobs: >-
+        {
+          "pyspark": "true",
+          "pyspark-pandas": "true"
+        }
diff --git a/.github/workflows/build_branch35.yml b/.github/workflows/build_branch35.yml
index 9e6fe13c020e4..2ec080d5722c1 100644
--- a/.github/workflows/build_branch35.yml
+++ b/.github/workflows/build_branch35.yml
@@ -43,9 +43,9 @@ jobs:
       jobs: >-
         {
           "build": "true",
-          "pyspark": "true",
           "sparkr": "true",
           "tpcds-1g": "true",
           "docker-integration-tests": "true",
+          "k8s-integration-tests": "true",
           "lint" : "true"
         }
diff --git a/.github/workflows/build_branch35_python.yml b/.github/workflows/build_branch35_python.yml
new file mode 100644
index 0000000000000..1585534d33ba9
--- /dev/null
+++ b/.github/workflows/build_branch35_python.yml
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (branch-3.5)"
+
+on:
+  schedule:
+    - cron: '0 11 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: branch-3.5
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYTHON_TO_TEST": ""
+        }
+      jobs: >-
+        {
+          "pyspark": "true",
+          "pyspark-pandas": "true"
+        }
diff --git a/.github/workflows/build_java21.yml b/.github/workflows/build_java21.yml
index b1ef5a3218356..871e1a9c07ef0 100644
--- a/.github/workflows/build_java21.yml
+++ b/.github/workflows/build_java21.yml
@@ -46,5 +46,9 @@ jobs:
           "pyspark": "true",
           "sparkr": "true",
           "tpcds-1g": "true",
-          "docker-integration-tests": "true"
+          "docker-integration-tests": "true",
+          "yarn": "true",
+          "k8s-integration-tests": "true",
+          "buf": "true",
+          "ui": "true"
         }
diff --git a/.github/workflows/build_maven_java21_macos14.yml b/.github/workflows/build_maven_java21_macos14.yml
index 70b47fcecb260..fb5e609f4eae0 100644
--- a/.github/workflows/build_maven_java21_macos14.yml
+++ b/.github/workflows/build_maven_java21_macos14.yml
@@ -21,7 +21,7 @@ name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, macos-14)"
 
 on:
   schedule:
-    - cron: '0 20 * * *'
+    - cron: '0 20 */2 * *'
 
 jobs:
   run-build:
diff --git a/.github/workflows/build_ansi.yml b/.github/workflows/build_non_ansi.yml
similarity index 84%
rename from .github/workflows/build_ansi.yml
rename to .github/workflows/build_non_ansi.yml
index d9f587ae203bb..4ac2a589f4f81 100644
--- a/.github/workflows/build_ansi.yml
+++ b/.github/workflows/build_non_ansi.yml
@@ -17,11 +17,11 @@
 # under the License.
 #
 
-name: "Build / ANSI (master, Hadoop 3, JDK 17, Scala 2.13)"
+name: "Build / Non-ANSI (master, Hadoop 3, JDK 17, Scala 2.13)"
 
 on:
   schedule:
-    - cron: '0 1,13 * * *'
+    - cron: '0 1 * * *'
 
 jobs:
   run-build:
@@ -36,13 +36,15 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
-          "SPARK_ANSI_SQL_MODE": "true",
+          "SPARK_ANSI_SQL_MODE": "false",
         }
       jobs: >-
         {
           "build": "true",
+          "docs": "true",
           "pyspark": "true",
           "sparkr": "true",
           "tpcds-1g": "true",
-          "docker-integration-tests": "true"
+          "docker-integration-tests": "true",
+          "yarn": "true"
         }
diff --git a/.github/workflows/build_python_3.10.yml b/.github/workflows/build_python_3.10.yml
new file mode 100644
index 0000000000000..5ae37fbc9120e
--- /dev/null
+++ b/.github/workflows/build_python_3.10.yml
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (master, Python 3.10)"
+
+on:
+  schedule:
+    - cron: '0 17 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 17
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYTHON_TO_TEST": "python3.10"
+        }
+      jobs: >-
+        {
+          "pyspark": "true",
+          "pyspark-pandas": "true"
+        }
diff --git a/.github/workflows/build_python_3.12.yml b/.github/workflows/build_python_3.12.yml
new file mode 100644
index 0000000000000..e1fd45a7d8838
--- /dev/null
+++ b/.github/workflows/build_python_3.12.yml
@@ -0,0 +1,45 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (master, Python 3.12)"
+
+on:
+  schedule:
+    - cron: '0 19 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 17
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYTHON_TO_TEST": "python3.12"
+        }
+      jobs: >-
+        {
+          "pyspark": "true",
+          "pyspark-pandas": "true"
+        }
diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml
index 965e839b6b2bc..01d9d272d4366 100644
--- a/.github/workflows/build_python_connect.yml
+++ b/.github/workflows/build_python_connect.yml
@@ -33,12 +33,11 @@ jobs:
     steps:
       - name: Checkout Spark repository
         uses: actions/checkout@v4
-      - name: Cache Scala, SBT and Maven
+      - name: Cache SBT and Maven
         uses: actions/cache@v4
         with:
           path: |
             build/apache-maven-*
-            build/scala-*
             build/*.jar
             ~/.sbt
           key: build-spark-connect-python-only-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -63,7 +62,7 @@ jobs:
           architecture: x64
       - name: Build Spark
         run: |
-          ./build/sbt -Phive test:package
+          ./build/sbt -Phive Test/package
       - name: Install pure Python package (pyspark-connect)
         env:
           SPARK_TESTING: 1
@@ -71,8 +70,8 @@ jobs:
           cd python
           python packaging/connect/setup.py sdist
           cd dist
-          pip install pyspark-connect-*.tar.gz
-          pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' torch torchvision torcheval deepspeed unittest-xml-reporting
+          pip install pyspark*connect-*.tar.gz
+          pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting
       - name: Run tests
         env:
           SPARK_TESTING: 1
@@ -81,26 +80,46 @@ jobs:
           # Make less noisy
           cp conf/log4j2.properties.template conf/log4j2.properties
           sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties
-          # Start a Spark Connect server
-          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" --jars `find connector/connect/server/target -name spark-connect*SNAPSHOT.jar`
-          # Make sure running Python workers that contains pyspark.core once. They will be reused.
-          python -c "from pyspark.sql import SparkSession; _ = SparkSession.builder.remote('sc://localhost').getOrCreate().range(100).repartition(100).mapInPandas(lambda x: x, 'id INT').collect()"
+
+          # Start a Spark Connect server for local
+          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
+            --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
+            --jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
+
           # Remove Py4J and PySpark zipped library to make sure there is no JVM connection
-          rm python/lib/*
-          rm -r python/pyspark
+          mv python/lib lib.back
+          mv python/pyspark pyspark.back
+
           # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener.
           ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect
           # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
           ./python/run-tests --parallelism=4 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
+
+          # Stop Spark Connect server.
+          ./sbin/stop-connect-server.sh
+          mv lib.back python/lib
+          mv pyspark.back python/pyspark
+
+          # Start a Spark Connect server for local-cluster
+          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
+            --master "local-cluster[2, 4, 1024]" \
+            --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
+            --jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
+
+          # Remove Py4J and PySpark zipped library to make sure there is no JVM connection
+          mv python/lib lib.back
+          mv python/pyspark lib.back
+
+          ./python/run-tests --parallelism=1 --python-executables=python3 --testnames "pyspark.resource.tests.test_connect_resources,pyspark.sql.tests.connect.client.test_artifact,pyspark.sql.tests.connect.client.test_artifact_localcluster,pyspark.sql.tests.connect.test_resources"
       - name: Upload test results to report
         if: always()
         uses: actions/upload-artifact@v4
         with:
           name: test-results-spark-connect-python-only
           path: "**/target/test-reports/*.xml"
-      - name: Upload unit tests log files
-        if: failure()
+      - name: Upload Spark Connect server log file
+        if: ${{ !success() }}
         uses: actions/upload-artifact@v4
         with:
           name: unit-tests-log-spark-connect-python-only
-          path: "**/target/unit-tests.log"
+          path: logs/*.out
diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml
new file mode 100644
index 0000000000000..abff471349a22
--- /dev/null
+++ b/.github/workflows/build_python_connect35.yml
@@ -0,0 +1,113 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: Build / Spark Connect Python-only (master-server, 35-client, Python 3.11)
+
+on:
+  schedule:
+    - cron: '0 21 * * *'
+
+jobs:
+  # Build: build Spark and run the tests for specified modules using SBT
+  build:
+    name: "Build modules: pyspark-connect"
+    runs-on: ubuntu-latest
+    timeout-minutes: 100
+    if: github.repository == 'apache/spark'
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Cache SBT and Maven
+        uses: actions/cache@v4
+        with:
+          path: |
+            build/apache-maven-*
+            build/*.jar
+            ~/.sbt
+          key: build-spark-connect-python-only-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+          restore-keys: |
+            build-spark-connect-python-only-
+      - name: Cache Coursier local repository
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/coursier
+          key: coursier-build-spark-connect-python-only-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            coursier-build-spark-connect-python-only-
+      - name: Install Java 17
+        uses: actions/setup-java@v4
+        with:
+          distribution: zulu
+          java-version: 17
+      - name: Install Python 3.11
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+          architecture: x64
+      - name: Build Spark
+        run: |
+          ./build/sbt -Phive Test/package
+      - name: Install Python dependencies
+        run: |
+          pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
+
+          # Add Python deps for Spark Connect.
+          pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4' 'graphviz==0.20.3'
+
+          # Add torch as a testing dependency for TorchDistributor
+          pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval
+      - name: Run tests
+        env:
+          SPARK_TESTING: 1
+          SPARK_SKIP_CONNECT_COMPAT_TESTS: 1
+          SPARK_CONNECT_TESTING_REMOTE: sc://localhost
+        run: |
+          # Make less noisy
+          cp conf/log4j2.properties.template conf/log4j2.properties
+          sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties
+
+          # Start a Spark Connect server for local
+          PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
+            --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
+            --jars "`find connector/connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
+
+          # Checkout to branch-3.5 to use the tests in branch-3.5.
+          cd ..
+          git clone --single-branch --branch branch-3.5 $GITHUB_SERVER_URL/$GITHUB_REPOSITORY spark-3.5
+          cd spark-3.5
+
+          # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener.
+          # Run branch-3.5 tests
+          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect
+          # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
+          ./python/run-tests --parallelism=4 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
+      - name: Upload test results to report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-results-spark-connect-python-only
+          path: "**/target/test-reports/*.xml"
+      - name: Upload Spark Connect server log file
+        if: ${{ !success() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: unit-tests-log-spark-connect-python-only
+          path: logs/*.out
diff --git a/.github/workflows/build_python.yml b/.github/workflows/build_python_pypy3.9.yml
similarity index 79%
rename from .github/workflows/build_python.yml
rename to .github/workflows/build_python_pypy3.9.yml
index 8e36227c08372..e05071ef034a0 100644
--- a/.github/workflows/build_python.yml
+++ b/.github/workflows/build_python_pypy3.9.yml
@@ -17,7 +17,7 @@
 # under the License.
 #
 
-name: "Build / Python-only (master, PyPy 3.8/Python 3.10/Python 3.11/Python 3.12)"
+name: "Build / Python-only (master, PyPy 3.9)"
 
 on:
   schedule:
@@ -25,10 +25,6 @@ on:
 
 jobs:
   run-build:
-    strategy:
-      fail-fast: false
-      matrix:
-        pyversion: ["pypy3", "python3.10", "python3.11", "python3.12"]
     permissions:
       packages: write
     name: Run
@@ -40,9 +36,10 @@ jobs:
       hadoop: hadoop3
       envs: >-
         {
-          "PYTHON_TO_TEST": "${{ matrix.pyversion }}"
+          "PYTHON_TO_TEST": "pypy3"
         }
       jobs: >-
         {
-          "pyspark": "true"
+          "pyspark": "true",
+          "pyspark-pandas": "true"
         }
diff --git a/.github/workflows/build_rockdb_as_ui_backend.yml b/.github/workflows/build_rockdb_as_ui_backend.yml
index e11ec85b8b176..96009c41dbbf9 100644
--- a/.github/workflows/build_rockdb_as_ui_backend.yml
+++ b/.github/workflows/build_rockdb_as_ui_backend.yml
@@ -43,6 +43,5 @@ jobs:
           "build": "true",
           "pyspark": "true",
           "sparkr": "true",
-          "tpcds-1g": "true",
-          "docker-integration-tests": "true"
+          "yarn": "true"
         }
diff --git a/.github/workflows/build_sparkr_window.yml b/.github/workflows/build_sparkr_window.yml
index 6debf0cd12235..e7db2b909f8f5 100644
--- a/.github/workflows/build_sparkr_window.yml
+++ b/.github/workflows/build_sparkr_window.yml
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-name: "Build / SparkR-only (master, 4.3.3, windows-2019)"
+name: "Build / SparkR-only (master, 4.4.0, windows-2022)"
 
 on:
   schedule:
@@ -25,7 +25,7 @@ on:
 jobs:
   build:
     name: "Build module: sparkr"
-    runs-on: windows-2019
+    runs-on: windows-2022
     timeout-minutes: 300
     if: github.repository == 'apache/spark'
     steps:
@@ -35,7 +35,7 @@ jobs:
         repository: cdarlint/winutils
     - name: Move Hadoop winutil into home directory
       run: |
-        Move-Item -Path hadoop-3.3.5 -Destination ~\
+        Move-Item -Path hadoop-3.3.6 -Destination ~\
     - name: Checkout Spark repository
       uses: actions/checkout@v4
     - name: Cache Maven local repository
@@ -50,10 +50,10 @@ jobs:
       with:
         distribution: zulu
         java-version: 17
-    - name: Install R 4.3.3
+    - name: Install R 4.4.0
       uses: r-lib/actions/setup-r@v2
       with:
-        r-version: 4.3.3
+        r-version: 4.4.0
     - name: Install R dependencies
       run: |
         Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
@@ -79,7 +79,7 @@ jobs:
       shell: cmd
     - name: Run SparkR tests
       run: |
-        set HADOOP_HOME=%USERPROFILE%\hadoop-3.3.5
+        set HADOOP_HOME=%USERPROFILE%\hadoop-3.3.6
         set PATH=%HADOOP_HOME%\bin;%PATH%
         .\bin\spark-submit2.cmd --driver-java-options "-Dlog4j.configurationFile=file:///%CD:\=/%/R/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" R\pkg\tests\run-all.R
       shell: cmd
diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index b01f08a23e470..d23cea926a274 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -142,12 +142,11 @@ jobs:
           git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
           git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
       # Cache local repositories. Note that GitHub Actions cache has a 10G limit.
-      - name: Cache Scala, SBT and Maven
+      - name: Cache SBT and Maven
         uses: actions/cache@v4
         with:
           path: |
             build/apache-maven-*
-            build/scala-*
             build/*.jar
             ~/.sbt
           key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
@@ -191,18 +190,18 @@ jobs:
           export ENABLE_KINESIS_TESTS=0
           # Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10
           export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"`
-          ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
+          ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install
           if [[ "$INCLUDED_TAGS" != "" ]]; then
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
           elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
             ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connector/connect/common,connector/connect/server test -fae
           elif [[ "$EXCLUDED_TAGS" != "" ]]; then
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
           elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
             # To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae
           else
-            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
+            ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae
           fi
       - name: Clean up local Maven repository
         run: |  
diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
index d09babd372403..1b5bd0ba61288 100644
--- a/.github/workflows/publish_snapshot.yml
+++ b/.github/workflows/publish_snapshot.yml
@@ -17,11 +17,11 @@
 # under the License.
 #
 
-name: Publish Snapshot
+name: Publish snapshot
 
 on:
   schedule:
-  - cron: '0 0,12 * * *'
+  - cron: '0 0 * * *'
   workflow_dispatch:
     inputs:
       branch:
diff --git a/.gitignore b/.gitignore
index 174f66c6064fe..787eb6180c35c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ dev/create-release/*final
 dev/create-release/*txt
 dev/pr-deps/
 dist/
+docs/_generated/
 docs/_site/
 docs/api
 docs/.local_ruby_bundle
diff --git a/LICENSE-binary b/LICENSE-binary
index 40271c9924bc4..b6971798e5577 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -204,171 +204,167 @@
 This project bundles some components that are also licensed under the Apache
 License Version 2.0:
 
-org.apache.zookeeper:zookeeper
-oro:oro
-commons-configuration:commons-configuration
-commons-digester:commons-digester
-com.chuusai:shapeless_2.13
-com.googlecode.javaewah:JavaEWAH
-com.twitter:chill-java
-com.twitter:chill_2.13
-com.univocity:univocity-parsers
-javax.jdo:jdo-api
-joda-time:joda-time
-net.sf.opencsv:opencsv
-org.apache.derby:derby
-org.objenesis:objenesis
-org.roaringbitmap:RoaringBitmap
-org.scalanlp:breeze-macros_2.13
-org.scalanlp:breeze_2.13
-org.typelevel:macro-compat_2.13
-org.yaml:snakeyaml
-org.apache.xbean:xbean-asm7-shaded
-com.squareup.okhttp3:logging-interceptor
-com.squareup.okhttp3:okhttp
-com.squareup.okio:okio
-org.apache.spark:spark-catalyst_2.13
-org.apache.spark:spark-kvstore_2.13
-org.apache.spark:spark-launcher_2.13
-org.apache.spark:spark-mllib-local_2.13
-org.apache.spark:spark-network-common_2.13
-org.apache.spark:spark-network-shuffle_2.13
-org.apache.spark:spark-sketch_2.13
-org.apache.spark:spark-tags_2.13
-org.apache.spark:spark-unsafe_2.13
-commons-httpclient:commons-httpclient
-com.vlkan:flatbuffers
-com.ning:compress-lzf
-io.airlift:aircompressor
-io.dropwizard.metrics:metrics-core
-io.dropwizard.metrics:metrics-graphite
-io.dropwizard.metrics:metrics-json
-io.dropwizard.metrics:metrics-jvm
-io.dropwizard.metrics:metrics-jmx
-org.iq80.snappy:snappy
 com.clearspring.analytics:stream
-com.jamesmurty.utils:java-xmlbuilder
-commons-codec:commons-codec
-commons-collections:commons-collections
-io.fabric8:kubernetes-client
-io.fabric8:kubernetes-model
-io.fabric8:kubernetes-model-common
-io.netty:netty-all
-net.hydromatic:eigenbase-properties
-net.sf.supercsv:super-csv
-org.apache.arrow:arrow-format
-org.apache.arrow:arrow-memory
-org.apache.arrow:arrow-vector
-org.apache.commons:commons-crypto
-org.apache.commons:commons-lang3
-org.apache.hadoop:hadoop-annotations
-org.apache.hadoop:hadoop-auth
-org.apache.hadoop:hadoop-client
-org.apache.hadoop:hadoop-common
-org.apache.hadoop:hadoop-hdfs
-org.apache.hadoop:hadoop-hdfs-client
-org.apache.hadoop:hadoop-mapreduce-client-app
-org.apache.hadoop:hadoop-mapreduce-client-common
-org.apache.hadoop:hadoop-mapreduce-client-core
-org.apache.hadoop:hadoop-mapreduce-client-jobclient
-org.apache.hadoop:hadoop-mapreduce-client-shuffle
-org.apache.hadoop:hadoop-yarn-api
-org.apache.hadoop:hadoop-yarn-client
-org.apache.hadoop:hadoop-yarn-common
-org.apache.hadoop:hadoop-yarn-server-common
-org.apache.hadoop:hadoop-yarn-server-web-proxy
-org.apache.httpcomponents:httpclient
-org.apache.httpcomponents:httpcore
-org.apache.kerby:kerb-admin
-org.apache.kerby:kerb-client
-org.apache.kerby:kerb-common
-org.apache.kerby:kerb-core
-org.apache.kerby:kerb-crypto
-org.apache.kerby:kerb-identity
-org.apache.kerby:kerb-server
-org.apache.kerby:kerb-simplekdc
-org.apache.kerby:kerb-util
-org.apache.kerby:kerby-asn1
-org.apache.kerby:kerby-config
-org.apache.kerby:kerby-pkix
-org.apache.kerby:kerby-util
-org.apache.kerby:kerby-xdr
-org.apache.orc:orc-core
-org.apache.orc:orc-mapreduce
-org.mortbay.jetty:jetty
-org.mortbay.jetty:jetty-util
-com.jolbox:bonecp
-org.json4s:json4s-ast_2.13
-org.json4s:json4s-core_2.13
-org.json4s:json4s-jackson_2.13
-org.json4s:json4s-scalap_2.13
-com.carrotsearch:hppc
 com.fasterxml.jackson.core:jackson-annotations
 com.fasterxml.jackson.core:jackson-core
 com.fasterxml.jackson.core:jackson-databind
 com.fasterxml.jackson.dataformat:jackson-dataformat-yaml
-com.fasterxml.jackson.jaxrs:jackson-jaxrs-base
-com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider
-com.fasterxml.jackson.module:jackson-module-jaxb-annotations
-com.fasterxml.jackson.module:jackson-module-paranamer
+com.fasterxml.jackson.datatype:jackson-datatype-jsr310
 com.fasterxml.jackson.module:jackson-module-scala_2.13
-com.github.mifmif:generex
+com.github.joshelser:dropwizard-metrics-hadoop-metrics2-reporter
 com.google.code.findbugs:jsr305
 com.google.code.gson:gson
+com.google.crypto.tink:tink
 com.google.flatbuffers:flatbuffers-java
 com.google.guava:guava
-com.google.inject:guice
-com.google.inject.extensions:guice-servlet
-com.twitter:parquet-hadoop-bundle
+com.jamesmurty.utils:java-xmlbuilder
+com.ning:compress-lzf
+com.squareup.okhttp3:logging-interceptor
+com.squareup.okhttp3:okhttp
+com.squareup.okio:okio
+com.tdunning:json
+com.twitter:chill-java
+com.twitter:chill_2.13
+com.univocity:univocity-parsers
+com.zaxxer.HikariCP
 commons-cli:commons-cli
+commons-codec:commons-codec
+commons-collections:commons-collections
 commons-dbcp:commons-dbcp
 commons-io:commons-io
 commons-lang:commons-lang
-commons-net:commons-net
 commons-pool:commons-pool
+io.airlift:aircompressor
+io.dropwizard.metrics:metrics-core
+io.dropwizard.metrics:metrics-graphite
+io.dropwizard.metrics:metrics-jmx
+io.dropwizard.metrics:metrics-json
+io.dropwizard.metrics:metrics-jvm
+io.fabric8:kubernetes-client
+io.fabric8:kubernetes-client-api
+io.fabric8:kubernetes-httpclient-okhttp
+io.fabric8:kubernetes-model-admissionregistration
+io.fabric8:kubernetes-model-apiextensions
+io.fabric8:kubernetes-model-apps
+io.fabric8:kubernetes-model-autoscaling
+io.fabric8:kubernetes-model-batch
+io.fabric8:kubernetes-model-certificates
+io.fabric8:kubernetes-model-common
+io.fabric8:kubernetes-model-coordination
+io.fabric8:kubernetes-model-core
+io.fabric8:kubernetes-model-discovery
+io.fabric8:kubernetes-model-events
+io.fabric8:kubernetes-model-extensions
+io.fabric8:kubernetes-model-flowcontrol
+io.fabric8:kubernetes-model-gatewayapi
+io.fabric8:kubernetes-model-metrics
+io.fabric8:kubernetes-model-networking
+io.fabric8:kubernetes-model-node
+io.fabric8:kubernetes-model-policy
+io.fabric8:kubernetes-model-rbac
+io.fabric8:kubernetes-model-resource
+io.fabric8:kubernetes-model-scheduling
+io.fabric8:kubernetes-model-storageclass
 io.fabric8:zjsonpatch
-javax.inject:javax.inject
-javax.validation:validation-api
-log4j:apache-log4j-extras
-log4j:log4j
+io.github.java-diff-utils:java-diff-utils
+io.netty:netty-all
+io.netty:netty-buffer
+io.netty:netty-codec
+io.netty:netty-codec-http
+io.netty:netty-codec-http2
+io.netty:netty-codec-socks
+io.netty:netty-common
+io.netty:netty-handler
+io.netty:netty-handler-proxy
+io.netty:netty-resolver
+io.netty:netty-tcnative-boringssl-static
+io.netty:netty-tcnative-classes
+io.netty:netty-transport
+io.netty:netty-transport-classes-epoll
+io.netty:netty-transport-classes-kqueue
+io.netty:netty-transport-native-epoll
+io.netty:netty-transport-native-kqueue
+io.netty:netty-transport-native-unix-common
+jakarta.inject:jakarta.inject-api
+jakarta.validation:jakarta.validation-api
+javax.jdo:jdo-api
+joda-time:joda-time
+net.java.dev.jna:jna
+net.sf.opencsv:opencsv
+net.sf.supercsv:super-csv
 net.sf.jpam:jpam
+org.apache.arrow:arrow-format
+org.apache.arrow:arrow-memory-core
+org.apache.arrow:arrow-memory-netty
+org.apache.arrow:arrow-memory-netty-buffer-patch
+org.apache.arrow:arrow-vector
 org.apache.avro:avro
 org.apache.avro:avro-ipc
 org.apache.avro:avro-mapred
+org.apache.commons:commons-collections4
 org.apache.commons:commons-compress
+org.apache.commons:commons-crypto
+org.apache.commons:commons-lang3
 org.apache.commons:commons-math3
+org.apache.commons:commons-text
 org.apache.curator:curator-client
 org.apache.curator:curator-framework
 org.apache.curator:curator-recipes
-org.apache.directory.api:api-asn1-api
-org.apache.directory.api:api-util
-org.apache.directory.server:apacheds-i18n
-org.apache.directory.server:apacheds-kerberos-codec
-org.apache.htrace:htrace-core
+org.apache.derby:derby
+org.apache.derby:derbyshared
+org.apache.derby:derbytools
+org.apache.datasketches:datasketches-java
+org.apache.datasketches:datasketches-memory
+org.apache.hadoop:hadoop-client-api
+org.apache.hadoop:hadoop-client-runtime
+org.apache.hive:hive-beeline
+org.apache.hive:hive-cli
+org.apache.hive:hive-common
+org.apache.hive:hive-exec
+org.apache.hive:hive-jdbc
+org.apache.hive:hive-llap-common
+org.apache.hive:hive-metastore
+org.apache.hive:hive-serde
+org.apache.hive:hive-service-rpc
+org.apache.hive:hive-shims-0.23
+org.apache.hive:hive-shims
+org.apache.hive:hive-shims-common
+org.apache.hive:hive-shims-scheduler
+org.apache.hive:hive-storage-api
+org.apache.httpcomponents:httpclient
+org.apache.httpcomponents:httpcore
 org.apache.ivy:ivy
-=org.apache.parquet:parquet-column
+org.apache.logging.log4j:log4j-1.2-api
+org.apache.logging.log4j:log4j-api
+org.apache.logging.log4j:log4j-core
+org.apache.logging.log4j:log4j-layout-template-json
+org.apache.logging.log4j:log4j-slf4j-impl
+org.apache.orc:orc-core
+org.apache.orc:orc-format
+org.apache.orc:orc-mapreduce
+org.apache.orc:orc-shims
+org.apache.parquet:parquet-column
 org.apache.parquet:parquet-common
 org.apache.parquet:parquet-encoding
-org.apache.parquet:parquet-format
+org.apache.parquet:parquet-format-structures
 org.apache.parquet:parquet-hadoop
 org.apache.parquet:parquet-jackson
 org.apache.thrift:libfb303
 org.apache.thrift:libthrift
+org.apache.ws.xmlschema:xmlschema-core
+org.apache.xbean:xbean-asm9-shaded
+org.apache.yetus:audience-annotations
+org.apache.zookeeper:zookeeper
+org.apache.zookeeper:zookeeper-jute
 org.codehaus.jackson:jackson-core-asl
 org.codehaus.jackson:jackson-mapper-asl
 org.datanucleus:datanucleus-api-jdo
 org.datanucleus:datanucleus-core
 org.datanucleus:datanucleus-rdbms
-org.lz4:lz4-java
-org.xerial.snappy:snappy-java
-stax:stax-api
-xerces:xercesImpl
-org.codehaus.jackson:jackson-jaxrs
-org.codehaus.jackson:jackson-xc
+org.datanucleus:javax.jdo
 org.eclipse.jetty:jetty-client
 org.eclipse.jetty:jetty-http
 org.eclipse.jetty:jetty-io
-org.eclipse.jetty:jetty-jndi
 org.eclipse.jetty:jetty-plus
 org.eclipse.jetty:jetty-proxy
 org.eclipse.jetty:jetty-security
@@ -376,43 +372,44 @@ org.eclipse.jetty:jetty-server
 org.eclipse.jetty:jetty-servlet
 org.eclipse.jetty:jetty-servlets
 org.eclipse.jetty:jetty-util
-org.eclipse.jetty:jetty-webapp
-org.eclipse.jetty:jetty-xml
+org.glassfish.jersey.containers:jersey-container-servlet
+org.glassfish.jersey.containers:jersey-container-servlet-core
+org.glassfish.jersey.core:jersey-client
+org.glassfish.jersey.core:jersey-common
+org.glassfish.jersey.core:jersey-server
+org.glassfish.jersey.inject:jersey-hk2
+org.json4s:json4s-ast_2.13
+org.json4s:json4s-core_2.13
+org.json4s:json4s-jackson-core_2.13
+org.json4s:json4s-jackson_2.13
+org.json4s:json4s-scalap_2.13
+org.lz4:lz4-java
+org.objenesis:objenesis
+org.roaringbitmap:RoaringBitmap
+org.rocksdb:rocksdbjni
 org.scala-lang:scala-compiler
 org.scala-lang:scala-library
 org.scala-lang:scala-reflect
+org.scala-lang.modules:scala-collection-compat_2.13
+org.scala-lang.modules:scala-parallel-collections_2.13
 org.scala-lang.modules:scala-parser-combinators_2.13
 org.scala-lang.modules:scala-xml_2.13
-com.github.joshelser:dropwizard-metrics-hadoop-metrics2-reporter
-com.zaxxer.HikariCP
-org.apache.hive:hive-beeline
-org.apache.hive:hive-cli
-org.apache.hive:hive-common
-org.apache.hive:hive-exec
-org.apache.hive:hive-jdbc
-org.apache.hive:hive-llap-common
-org.apache.hive:hive-metastore
-org.apache.hive:hive-serde
-org.apache.hive:hive-service-rpc
-org.apache.hive:hive-shims-0.23
-org.apache.hive:hive-shims
-org.apache.hive:hive-common
-org.apache.hive:hive-shims-scheduler
-org.apache.hive:hive-storage-api
-org.apache.hive:hive-vector-code-gen
-org.datanucleus:javax.jdo
-com.tdunning:json
-org.apache.velocity:velocity
-org.apache.yetus:audience-annotations
-com.google.cloud.bigdataoss:gcs-connector
+org.scalanlp:breeze-macros_2.13
+org.scalanlp:breeze_2.13
+org.snakeyaml:snakeyaml-engine
+org.xerial.snappy:snappy-java
+org.yaml:snakeyaml
+oro:oro
+stax:stax-api
+xerces:xercesImpl
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
 core/src/main/resources/org/apache/spark/ui/static/vis*
-docs/js/vendor/bootstrap.js
 core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.min.js
 core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.css
 
+
 ------------------------------------------------------------------------------------
 This product bundles various third-party components under other open source licenses.
 This section summarizes those components and their licenses. See licenses-binary/
@@ -421,45 +418,37 @@ for text of these licenses.
 
 Python Software Foundation License
 ----------------------------------
-
 python/pyspark/loose_version.py
 
 
 BSD 2-Clause
 ------------
-
 com.github.luben:zstd-jni
+com.github.wendykierp:JTransforms
 javolution:javolution
-com.esotericsoftware:kryo-shaded
-com.esotericsoftware:minlog
-com.esotericsoftware:reflectasm
-org.codehaus.janino:commons-compiler
-org.codehaus.janino:janino
 jline:jline
 org.jodd:jodd-core
-com.github.wendykierp:JTransforms
 pl.edu.icm:JLargeArrays
 
 
 BSD 3-Clause
 ------------
-
+com.esotericsoftware:kryo-shaded
+com.esotericsoftware:minlog
+com.esotericsoftware:reflectasm
 com.google.protobuf:protobuf-java
-dk.brics.automaton:automaton
-org.antlr:antlr-runtime
+com.thoughtworks.paranamer:paranamer
+net.sf.py4j:py4j
+net.sourceforge.f2j:arpack_combined_all
 org.antlr:ST4
-org.antlr:stringtemplate
+org.antlr:antlr-runtime
 org.antlr:antlr4-runtime
-antlr:antlr
-com.thoughtworks.paranamer:paranamer
+org.codehaus.janino:commons-compiler
+org.codehaus.janino:janino
 org.fusesource.leveldbjni:leveldbjni-all
-net.sourceforge.f2j:arpack_combined_all
-xmlenc:xmlenc
-net.sf.py4j:py4j
+org.jline:jline
 org.jpmml:pmml-model
-org.jpmml:pmml-schema
 org.threeten:threeten-extra
-org.jdom:jdom2
 
 python/lib/py4j-*-src.zip
 python/pyspark/cloudpickle.py
@@ -472,95 +461,68 @@ is distributed under the 3-Clause BSD license.
 
 MIT License
 -----------
-
-com.microsoft.sqlserver:mssql-jdbc
+com.github.scopt:scopt_2.13
+dev.ludovic.netlib:blas
+dev.ludovic.netlib:arpack
+dev.ludovic.netlib:lapack
+net.razorvine:pickle
+org.checkerframework:checker-qual
+org.typelevel:algebra_2.13:jar
+org.typelevel:cats-kernel_2.13
 org.typelevel:spire_2.13
 org.typelevel:spire-macros_2.13
 org.typelevel:spire-platform_2.13
 org.typelevel:spire-util_2.13
-org.typelevel:algebra_2.13:jar
-org.typelevel:cats-kernel_2.13
-org.typelevel:machinist_2.13
-net.razorvine:pickle
 org.slf4j:jcl-over-slf4j
 org.slf4j:jul-to-slf4j
 org.slf4j:slf4j-api
-org.slf4j:slf4j-log4j12
-com.github.scopt:scopt_2.13
-dev.ludovic.netlib:blas
-dev.ludovic.netlib:arpack
-dev.ludovic.netlib:lapack
 
 core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
 core/src/main/resources/org/apache/spark/ui/static/*dataTables*
 core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js
 core/src/main/resources/org/apache/spark/ui/static/jquery*
 core/src/main/resources/org/apache/spark/ui/static/sorttable.js
-docs/js/vendor/anchor.min.js
-docs/js/vendor/jquery*
-docs/js/vendor/modernizer*
+
 
 ISC License
 -----------
-
 core/src/main/resources/org/apache/spark/ui/static/d3.min.js
 
 
 Common Development and Distribution License (CDDL) 1.0
 ------------------------------------------------------
-
 javax.activation:activation  http://www.oracle.com/technetwork/java/javase/tech/index-jsp-138795.html
-javax.xml.stream:stax-api    https://jcp.org/en/jsr/detail?id=173
-javax.transaction:javax.transaction-api
-javax.xml.bind:jaxb-api
+javax.transaction:transaction-api
 
 
 Common Development and Distribution License (CDDL) 1.1
 ------------------------------------------------------
-
-javax.el:javax.el-api	https://javaee.github.io/uel-ri/
-javax.servlet.jsp:jsp-api
 javax.transaction:jta http://www.oracle.com/technetwork/java/index.html
 javax.xml.bind:jaxb-api    https://github.com/javaee/jaxb-v2
-org.glassfish.hk2:hk2-api https://github.com/javaee/glassfish
-org.glassfish.hk2:hk2-locator (same)
-org.glassfish.hk2:hk2-utils
-org.glassfish.hk2:osgi-resource-locator
-org.glassfish.hk2.external:aopalliance-repackaged
-org.glassfish.hk2.external:javax.inject
-org.glassfish.jersey.bundles.repackaged:jersey-guava
-org.glassfish.jersey.containers:jersey-container-servlet
-org.glassfish.jersey.containers:jersey-container-servlet-core
-org.glassfish.jersey.core:jersey-client
-org.glassfish.jersey.core:jersey-common
-org.glassfish.jersey.core:jersey-server
-org.glassfish.jersey.media:jersey-media-jaxb
+
 
 
 Eclipse Distribution License (EDL) 1.0
 --------------------------------------
-
-org.glassfish.jaxb:jaxb-runtime
-jakarta.activation:jakarta.activation-api
-jakarta.xml.bind:jakarta.xml.bind-api
 com.sun.istack:istack-commons-runtime
-
+jakarta.xml.bind:jakarta.xml.bind-api
+org.glassfish.jaxb:jaxb-runtime
+org.glassfish.jaxb:txw2
 
 Eclipse Public License (EPL) 2.0
 --------------------------------
-
-jakarta.annotation:jakarta-annotation-api https://projects.eclipse.org/projects/ee4j.ca
+jakarta.annotation:jakarta.annotation-api https://projects.eclipse.org/projects/ee4j.ca
 jakarta.servlet:jakarta.servlet-api https://projects.eclipse.org/projects/ee4j.servlet
 jakarta.ws.rs:jakarta.ws.rs-api https://github.com/eclipse-ee4j/jaxrs-api
-org.glassfish.hk2.external:jakarta.inject
-com.github.jnr:jnr-posix
+org.glassfish.hk2.external:aopalliance-repackaged
+org.glassfish.hk2:hk2-api
+org.glassfish.hk2:hk2-locator
+org.glassfish.hk2:hk2-utils
+org.glassfish.hk2:osgi-resource-locator
 
 
 Public Domain
 -------------
-
-aopalliance:aopalliance
-net.iharder:base64
 org.tukaani:xz
 
 
@@ -573,3 +535,7 @@ data/mllib/images/kittens/54893.jpg
 data/mllib/images/kittens/DP153539.jpg
 data/mllib/images/kittens/DP802813.jpg
 data/mllib/images/multi-channel/chr30.4.184.jpg
+
+Unicode/ICU License
+-------------------
+com.ibm.icu:icu4j
diff --git a/NOTICE-binary b/NOTICE-binary
index 5f1c1c617c36f..c4cfe0e9f8b31 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -33,11 +33,12 @@ services.
 // Version 2.0, in this case for
 // ------------------------------------------------------------------
 
-Hive Beeline
-Copyright 2016 The Apache Software Foundation
+=== NOTICE FOR com.clearspring.analytics:streams ===
+stream-api
+Copyright 2016 AddThis
 
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
+This product includes software developed by AddThis.
+=== END OF NOTICE FOR com.clearspring.analytics:streams ===
 
 Apache Avro
 Copyright 2009-2014 The Apache Software Foundation
@@ -131,14 +132,6 @@ been derived from the works by JSR-166 EG, Doug Lea, and Jason T. Greene:
     * http://gee.cs.oswego.edu/cgi-bin/viewcvs.cgi/jsr166/
     * http://viewvc.jboss.org/cgi-bin/viewvc.cgi/jbosscache/experimental/jsr166/
 
-This product contains a modified version of Robert Harder's Public Domain
-Base64 Encoder and Decoder, which can be obtained at:
-
-  * LICENSE:
-    * license/LICENSE.base64.txt (Public Domain)
-  * HOMEPAGE:
-    * http://iharder.sourceforge.net/current/java/base64/
-
 This product contains a modified portion of 'Webbit', an event based
 WebSocket and HTTP server, which can be obtained at:
 
@@ -338,120 +331,102 @@ which has the following notices:
     Copyright 2002-2012 Ramnivas Laddad, Juergen Hoeller, Chris Beams
 
 The binary distribution of this product bundles binaries of
-Jetty 6.1.26,
+Jetty 11.0.20,
 which has the following notices:
- * ==============================================================
-    Jetty Web Container
-    Copyright 1995-2016 Mort Bay Consulting Pty Ltd.
-   ==============================================================
-
-   The Jetty Web Container is Copyright Mort Bay Consulting Pty Ltd
-   unless otherwise noted.
-
-   Jetty is dual licensed under both
-
-     * The Apache 2.0 License
-       http://www.apache.org/licenses/LICENSE-2.0.html
-
-         and
-
-     * The Eclipse Public 1.0 License
-       http://www.eclipse.org/legal/epl-v10.html
-
-   Jetty may be distributed under either license.
-
-   ------
-   Eclipse
-
-   The following artifacts are EPL.
-    * org.eclipse.jetty.orbit:org.eclipse.jdt.core
+=========================
+Notices for Eclipse Jetty
+=========================
+This content is produced and maintained by the Eclipse Jetty project.
 
-   The following artifacts are EPL and ASL2.
-    * org.eclipse.jetty.orbit:javax.security.auth.message
+Project home: https://eclipse.dev/jetty/
 
-   The following artifacts are EPL and CDDL 1.0.
-    * org.eclipse.jetty.orbit:javax.mail.glassfish
+Trademarks
+----------
+Eclipse Jetty, and Jetty are trademarks of the Eclipse Foundation.
 
-   ------
-   Oracle
+Copyright
+---------
+All contributions are the property of the respective authors or of
+entities to which copyright has been assigned by the authors (eg. employer).
 
-   The following artifacts are CDDL + GPLv2 with classpath exception.
-   https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html
+Declared Project Licenses
+-------------------------
+This artifacts of this project are made available under the terms of:
 
-    * javax.servlet:javax.servlet-api
-    * javax.annotation:javax.annotation-api
-    * javax.transaction:javax.transaction-api
-    * javax.websocket:javax.websocket-api
+  * the Eclipse Public License v2.0
+    https://www.eclipse.org/legal/epl-2.0
+    SPDX-License-Identifier: EPL-2.0
 
-   ------
-   Oracle OpenJDK
+  or
 
-   If ALPN is used to negotiate HTTP/2 connections, then the following
-   artifacts may be included in the distribution or downloaded when ALPN
-   module is selected.
+  * the Apache License, Version 2.0
+    https://www.apache.org/licenses/LICENSE-2.0
+    SPDX-License-Identifier: Apache-2.0
 
-    * java.sun.security.ssl
+The following dependencies are EPL.
+ * org.eclipse.jetty.orbit:org.eclipse.jdt.core
 
-   These artifacts replace/modify OpenJDK classes.  The modififications
-   are hosted at github and both modified and original are under GPL v2 with
-   classpath exceptions.
-   http://openjdk.java.net/legal/gplv2+ce.html
+The following dependencies are EPL and ASL2.
+ * org.eclipse.jetty.orbit:javax.security.auth.message
 
-   ------
-   OW2
+The following dependencies are EPL and CDDL 1.0.
+ * org.eclipse.jetty.orbit:javax.mail.glassfish
 
-   The following artifacts are licensed by the OW2 Foundation according to the
-   terms of http://asm.ow2.org/license.html
+The following dependencies are CDDL + GPLv2 with classpath exception.
+https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html
 
-   org.ow2.asm:asm-commons
-   org.ow2.asm:asm
+ * jakarta.servlet:jakarta.servlet-api
+ * javax.annotation:javax.annotation-api
+ * javax.transaction:javax.transaction-api
+ * javax.websocket:javax.websocket-api
 
-   ------
-   Apache
+The following dependencies are licensed by the OW2 Foundation according to the
+terms of http://asm.ow2.org/license.html
 
-   The following artifacts are ASL2 licensed.
+ * org.ow2.asm:asm-commons
+ * org.ow2.asm:asm
 
-   org.apache.taglibs:taglibs-standard-spec
-   org.apache.taglibs:taglibs-standard-impl
+The following dependencies are ASL2 licensed.
 
-   ------
-   MortBay
+ * org.apache.taglibs:taglibs-standard-spec
+ * org.apache.taglibs:taglibs-standard-impl
 
-   The following artifacts are ASL2 licensed.  Based on selected classes from
-   following Apache Tomcat jars, all ASL2 licensed.
+The following dependencies are ASL2 licensed.  Based on selected classes from
+following Apache Tomcat jars, all ASL2 licensed.
 
-   org.mortbay.jasper:apache-jsp
-     org.apache.tomcat:tomcat-jasper
-     org.apache.tomcat:tomcat-juli
-     org.apache.tomcat:tomcat-jsp-api
-     org.apache.tomcat:tomcat-el-api
-     org.apache.tomcat:tomcat-jasper-el
-     org.apache.tomcat:tomcat-api
-     org.apache.tomcat:tomcat-util-scan
-     org.apache.tomcat:tomcat-util
+ * org.mortbay.jasper:apache-jsp
+ * org.apache.tomcat:tomcat-jasper
+ * org.apache.tomcat:tomcat-juli
+ * org.apache.tomcat:tomcat-jsp-api
+ * org.apache.tomcat:tomcat-el-api
+ * org.apache.tomcat:tomcat-jasper-el
+ * org.apache.tomcat:tomcat-api
+ * org.apache.tomcat:tomcat-util-scan
+ * org.apache.tomcat:tomcat-util
+ * org.mortbay.jasper:apache-el
+ * org.apache.tomcat:tomcat-jasper-el
+ * org.apache.tomcat:tomcat-el-api
 
-   org.mortbay.jasper:apache-el
-     org.apache.tomcat:tomcat-jasper-el
-     org.apache.tomcat:tomcat-el-api
+The following artifacts are CDDL + GPLv2 with classpath exception.
+https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html
 
-   ------
-   Mortbay
+ * org.eclipse.jetty.toolchain:jetty-schemas
 
-   The following artifacts are CDDL + GPLv2 with classpath exception.
+Cryptography
+------------
+Content may contain encryption software. The country in which you are currently
+may have restrictions on the import, possession, and use, and/or re-export to
+another country, of encryption software. BEFORE using any encryption software,
+please check the country's laws, regulations and policies concerning the import,
+possession, or use, and re-export of encryption software, to see if this is
+permitted.
 
-   https://glassfish.dev.java.net/nonav/public/CDDL+GPL.html
-
-   org.eclipse.jetty.toolchain:jetty-schemas
-
-   ------
-   Assorted
-
-   The UnixCrypt.java code implements the one way cryptography used by
-   Unix systems for simple password protection.  Copyright 1996 Aki Yoshida,
-   modified April 2001  by Iris Van den Broeke, Daniel Deville.
-   Permission to use, copy, modify and distribute UnixCrypt
-   for non-commercial or commercial purposes and without fee is
-   granted provided that the copyright notice appears in all copies./
+The UnixCrypt.java code implements the one way cryptography used by
+Unix systems for simple password protection.  Copyright 1996 Aki Yoshida,
+modified April 2001  by Iris Van den Broeke, Daniel Deville.
+Permission to use, copy, modify and distribute UnixCrypt
+for non-commercial or commercial purposes and without fee is
+granted provided that the copyright notice appears in all copies.
 
 The binary distribution of this product bundles binaries of
 Snappy for Java 1.0.4.1,
@@ -506,36 +481,9 @@ Copyright 2001-2006 The Apache Software Foundation
 Apache Commons BeanUtils
 Copyright 2000-2008 The Apache Software Foundation
 
-ApacheDS Protocol Kerberos Codec
-Copyright 2003-2013 The Apache Software Foundation
-
-ApacheDS I18n
-Copyright 2003-2013 The Apache Software Foundation
-
-Apache Directory API ASN.1 API
-Copyright 2003-2013 The Apache Software Foundation
-
-Apache Directory LDAP API Utilities
-Copyright 2003-2013 The Apache Software Foundation
-
 Curator Client
 Copyright 2011-2015 The Apache Software Foundation
 
-htrace-core
-Copyright 2015 The Apache Software Foundation
-
-   =========================================================================
-   ==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
-   ==  Version 2.0, in this case for the Apache Xerces Java distribution. ==
-   =========================================================================
-
-   Portions of this software were originally based on the following:
-     - software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
-     - software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
-     - voluntary contributions made by Paul Eng on behalf of the
-       Apache Software Foundation that were originally developed at iClick, Inc.,
-       software copyright (c) 1999.
-
 # Jackson JSON processor
 
 Jackson is a high-performance, Free/Open Source JSON processing library.
@@ -656,21 +604,12 @@ Copyright 2001-2009 The Apache Software Foundation
 Apache Commons Daemon
 Copyright 1999-2019 The Apache Software Foundation
 
-Google Guice - Extensions - Servlet
-Copyright 2006-2011 Google, Inc.
-
 Apache Commons IO
 Copyright 2002-2012 The Apache Software Foundation
 
-Google Guice - Core Library
-Copyright 2006-2011 Google, Inc.
-
 Apache Parquet Hadoop Bundle (Incubating)
 Copyright 2015 The Apache Software Foundation
 
-Hive Query Language
-Copyright 2016 The Apache Software Foundation
-
 Apache Extras Companion for log4j 1.2.
 Copyright 2007 The Apache Software Foundation
 
@@ -884,18 +823,6 @@ Some data files (under analysis/icu/src/data) are derived from Unicode data such
 as the Unicode Character Database. See http://unicode.org/copyright.html for more
 details.
 
-Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is
-BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/
-
-The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were
-automatically generated with the moman/finenight FSA library, created by
-Jean-Philippe Barrette-LaPierre. This library is available under an MIT license,
-see http://sites.google.com/site/rrettesite/moman and
-http://bitbucket.org/jpbarrette/moman/overview/
-
-The class org.apache.lucene.util.WeakIdentityMap was derived from
-the Apache CXF project and is Apache License 2.0.
-
 The Google Code Prettify is Apache License 2.0.
 See http://code.google.com/p/google-code-prettify/
 
@@ -1378,13 +1305,6 @@ Copyright (C) 2010 The Android Open Source Project
 This product includes software developed by
 The Android Open Source Project
 
-Apache Velocity
-
-Copyright (C) 2000-2007 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
 Apache Yetus - Audience Annotations
 Copyright 2015-2017 The Apache Software Foundation
 
@@ -1404,102 +1324,6 @@ This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
 
 
-Kerby-kerb Admin
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby-kerb Client
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby-kerb Common
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby-kerb core
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby-kerb Crypto
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby-kerb Identity
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby-kerb Server
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerb Simple Kdc
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby-kerb Util
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby ASN1 Project
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby Config
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby PKIX Project
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby Util
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-
-Kerby XDR Project
-Copyright 2014-2017 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
 
 
 Token provider
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 2523104268d36..f7dd261c10fd2 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -21,7 +21,7 @@ Suggests:
     testthat,
     e1071,
     survival,
-    arrow (>= 1.0.0)
+    arrow (>= 10.0.0)
 Collate:
     'schema.R'
     'generics.R'
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 6c31ec745b5bd..58e7ae5bb0c7f 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -136,10 +136,6 @@
           <artifactId>spark-yarn_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
         </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-yarn-server-web-proxy</artifactId>
-        </dependency>
       </dependencies>
     </profile>
     <profile>
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 800ec0c02c22f..8703f5a86f109 100755
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -61,14 +61,24 @@ if not "x%JAVA_HOME%"=="x" (
   )
 )
 
+rem SPARK-23015: We create a temporary text file when launching Spark. 
+rem This file must be given a unique name or else we risk a race condition when launching multiple instances close together.
+rem The best way to create a unique file name is to add a GUID to the file name. Use Powershell to generate the GUID.
+where powershell.exe >nul 2>&1
+if %errorlevel%==0 (
+  FOR /F %%a IN ('POWERSHELL -COMMAND "$([guid]::NewGuid().ToString())"') DO (set RANDOM_SUFFIX=%%a)
+) else (
+  rem If Powershell is not installed, try to create a random file name suffix using the Windows %RANDOM%.
+  rem %RANDOM% is seeded with 1-second granularity so it is highly likely that two Spark instances
+  rem launched within the same second will fail to start.
+  rem Note that Powershell is automatically installed on all Windows OS from Windows 7/Windows Server 2008 R2 and onward.
+  set RANDOM_SUFFIX=%RANDOM%
+)
+
 rem The launcher library prints the command to be executed in a single line suitable for being
 rem executed by the batch interpreter. So read all the output of the launcher into a variable.
-:gen
-set LAUNCHER_OUTPUT=%temp%\spark-class-launcher-output-%RANDOM%.txt
-rem SPARK-28302: %RANDOM% would return the same number if we call it instantly after last call,
-rem so we should make it sure to generate unique file to avoid process collision of writing into
-rem the same file concurrently.
-if exist %LAUNCHER_OUTPUT% goto :gen
+set LAUNCHER_OUTPUT=%temp%\spark-class-launcher-output-%RANDOM_SUFFIX%.txt
+
 rem unset SHELL to indicate non-bash environment to launcher/Main
 set SHELL=
 "%RUNNER%" -Xmx128m -cp "%LAUNCH_CLASSPATH%" org.apache.spark.launcher.Main %* > %LAUNCHER_OUTPUT%
diff --git a/build/mvn b/build/mvn
index 3179099304c7a..3735461562e54 100755
--- a/build/mvn
+++ b/build/mvn
@@ -146,30 +146,6 @@ install_mvn() {
   fi
 }
 
-# Determine the Scala version from the root pom.xml file, set the Scala URL,
-# and, with that, download the specific version of Scala necessary under
-# the build/ folder
-install_scala() {
-  # determine the Scala version used in Spark
-  local scala_binary_version=`grep "scala.binary.version" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
-  local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | grep ${scala_binary_version} | head -n1 | awk -F '[<>]' '{print $3}'`
-  local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"
-  local TYPESAFE_MIRROR=${TYPESAFE_MIRROR:-https://downloads.lightbend.com}
-  local SCALA_TARBALL="scala-${scala_version}.tgz"
-
-  install_app \
-    "${TYPESAFE_MIRROR}" \
-    "scala/${scala_version}/${SCALA_TARBALL}" \
-    "" \
-    "" \
-    ${SCALA_TARBALL} \
-    "scala-${scala_version}/bin/scala"
-
-  SCALA_COMPILER="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-compiler.jar"
-  SCALA_LIBRARY="$(cd "$(dirname "${scala_bin}")/../lib" && pwd)/scala-library.jar"
-}
-
-install_scala
 install_mvn
 
 # Reset the current working directory
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 3820d1b8e395c..046648e9c2aec 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -40,6 +40,12 @@
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-common-utils_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
index b830e6afc6172..69757fdc65d68 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
@@ -29,8 +29,9 @@
 import com.google.common.base.Preconditions;
 import com.google.common.base.Throwables;
 import org.iq80.leveldb.DBIterator;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 class LevelDBIterator<T> implements KVStoreIterator<T> {
 
@@ -302,7 +303,7 @@ static int compare(byte[] a, byte[] b) {
   }
 
   static class ResourceCleaner implements Runnable {
-    private static final Logger LOG = LoggerFactory.getLogger(ResourceCleaner.class);
+    private static final SparkLogger LOG = SparkLoggerFactory.getLogger(ResourceCleaner.class);
 
     private final DBIterator dbIterator;
 
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java
index daedd56890a68..72c3690d1a187 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java
@@ -32,8 +32,10 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
+// checkstyle.off: RegexpSinglelineJava
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
 import static org.junit.jupiter.api.Assertions.*;
 
 public abstract class DBIteratorSuite {
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java
index 3158c18f9e1d3..ff6db8fc34c96 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java
@@ -34,7 +34,9 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+// checkstyle.off: RegexpSinglelineJava
 import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
 import static org.junit.jupiter.api.Assertions.*;
 
 /**
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java
index e18a3c3b1c288..c1b8009e97e66 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java
@@ -34,7 +34,9 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+// checkstyle.off: RegexpSinglelineJava
 import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
 import static org.junit.jupiter.api.Assertions.*;
 
 /**
diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index 9f3b9c59256b9..e8ce6840e3fc3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -34,9 +34,9 @@
 import io.netty.handler.stream.ChunkedWriteHandler;
 import io.netty.handler.timeout.IdleStateHandler;
 import io.netty.handler.codec.MessageToMessageEncoder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportClientBootstrap;
 import org.apache.spark.network.client.TransportClientFactory;
@@ -73,7 +73,7 @@
  * processes to send messages back to the client on an existing channel.
  */
 public class TransportContext implements Closeable {
-  private static final Logger logger = LoggerFactory.getLogger(TransportContext.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(TransportContext.class);
 
   private static final NettyLogger nettyLogger = new NettyLogger();
   private final TransportConf conf;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index 40825e06b82fd..4c144a73a9299 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -35,9 +35,11 @@
 import io.netty.util.concurrent.GenericFutureListener;
 import org.apache.commons.lang3.builder.ToStringBuilder;
 import org.apache.commons.lang3.builder.ToStringStyle;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NioManagedBuffer;
 import org.apache.spark.network.protocol.*;
@@ -71,7 +73,7 @@
  * Concurrency: thread safe and can be called from multiple threads.
  */
 public class TransportClient implements Closeable {
-  private static final Logger logger = LoggerFactory.getLogger(TransportClient.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(TransportClient.class);
 
   private final Channel channel;
   private final TransportResponseHandler handler;
@@ -364,11 +366,13 @@ public void operationComplete(Future<? super Void> future) throws Exception {
               getRemoteAddress(channel), timeTaken);
         }
       } else {
-        String errorMsg = String.format("Failed to send RPC %s to %s: %s", requestId,
-            getRemoteAddress(channel), future.cause());
-        logger.error(errorMsg, future.cause());
+        logger.error("Failed to send RPC {} to {}", future.cause(),
+            MDC.of(LogKeys.REQUEST_ID$.MODULE$, requestId),
+            MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
         channel.close();
         try {
+          String errorMsg = String.format("Failed to send RPC %s to %s: %s", requestId,
+            getRemoteAddress(channel), future.cause());
           handleFailure(errorMsg, future.cause());
         } catch (Exception e) {
           logger.error("Uncaught exception in RPC response callback handler!", e);
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index fd48020caac7f..e1f19f956cc0a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -42,9 +42,11 @@
 import io.netty.handler.ssl.SslHandler;
 import io.netty.util.concurrent.Future;
 import io.netty.util.concurrent.GenericFutureListener;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.TransportContext;
 import org.apache.spark.network.server.TransportChannelHandler;
 import org.apache.spark.network.util.*;
@@ -77,7 +79,8 @@ private static class ClientPool {
     }
   }
 
-  private static final Logger logger = LoggerFactory.getLogger(TransportClientFactory.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(TransportClientFactory.class);
 
   private final TransportContext context;
   private final TransportConf conf;
@@ -169,8 +172,10 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f
       // this code was able to update things.
       TransportChannelHandler handler = cachedClient.getChannel().pipeline()
         .get(TransportChannelHandler.class);
-      synchronized (handler) {
-        handler.getResponseHandler().updateTimeOfLastRequest();
+      if (handler != null) {
+        synchronized (handler) {
+          handler.getResponseHandler().updateTimeOfLastRequest();
+        }
       }
 
       if (cachedClient.isActive()) {
@@ -188,7 +193,9 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f
     final String resolvMsg = resolvedAddress.isUnresolved() ? "failed" : "succeed";
     if (hostResolveTimeMs > 2000) {
       logger.warn("DNS resolution {} for {} took {} ms",
-          resolvMsg, resolvedAddress, hostResolveTimeMs);
+        MDC.of(LogKeys.STATUS$.MODULE$, resolvMsg),
+        MDC.of(LogKeys.HOST_PORT$.MODULE$, resolvedAddress),
+        MDC.of(LogKeys.TIME$.MODULE$, hostResolveTimeMs));
     } else {
       logger.trace("DNS resolution {} for {} took {} ms",
           resolvMsg, resolvedAddress, hostResolveTimeMs);
@@ -202,7 +209,8 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f
           logger.trace("Returning cached connection to {}: {}", resolvedAddress, cachedClient);
           return cachedClient;
         } else {
-          logger.info("Found inactive connection to {}, creating a new one.", resolvedAddress);
+          logger.info("Found inactive connection to {}, creating a new one.",
+            MDC.of(LogKeys.HOST_PORT$.MODULE$, resolvedAddress));
         }
       }
       // If this connection should fast fail when last connection failed in last fast fail time
@@ -305,8 +313,8 @@ public void operationComplete(final Future<Channel> handshakeFuture) {
             if (handshakeFuture.isSuccess()) {
               logger.debug("{} successfully completed TLS handshake to ", address);
             } else {
-              logger.info(
-                "failed to complete TLS handshake to " + address, handshakeFuture.cause());
+              logger.info("failed to complete TLS handshake to {}", handshakeFuture.cause(),
+                MDC.of(LogKeys.HOST_PORT$.MODULE$, address));
               cf.channel().close();
             }
           }
@@ -331,14 +339,17 @@ public void operationComplete(final Future<Channel> handshakeFuture) {
       }
     } catch (Exception e) { // catch non-RuntimeExceptions too as bootstrap may be written in Scala
       long bootstrapTimeMs = (System.nanoTime() - preBootstrap) / 1000000;
-      logger.error("Exception while bootstrapping client after " + bootstrapTimeMs + " ms", e);
+      logger.error("Exception while bootstrapping client after {} ms", e,
+        MDC.of(LogKeys.BOOTSTRAP_TIME$.MODULE$, bootstrapTimeMs));
       client.close();
       throw Throwables.propagate(e);
     }
     long postBootstrap = System.nanoTime();
 
     logger.info("Successfully created connection to {} after {} ms ({} ms spent in bootstraps)",
-      address, (postBootstrap - preConnect) / 1000000, (postBootstrap - preBootstrap) / 1000000);
+      MDC.of(LogKeys.HOST_PORT$.MODULE$, address),
+      MDC.of(LogKeys.ELAPSED_TIME$.MODULE$, (postBootstrap - preConnect) / 1000000),
+      MDC.of(LogKeys.BOOTSTRAP_TIME$.MODULE$, (postBootstrap - preBootstrap) / 1000000));
 
     return client;
   }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index 9041678435106..be4cf4a58abeb 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -28,9 +28,11 @@
 import io.netty.channel.Channel;
 import org.apache.commons.lang3.tuple.ImmutablePair;
 import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.protocol.ChunkFetchFailure;
 import org.apache.spark.network.protocol.ChunkFetchSuccess;
 import org.apache.spark.network.protocol.MergedBlockMetaSuccess;
@@ -51,7 +53,8 @@
  * Concurrency: thread safe and can be called from multiple threads.
  */
 public class TransportResponseHandler extends MessageHandler<ResponseMessage> {
-  private static final Logger logger = LoggerFactory.getLogger(TransportResponseHandler.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(TransportResponseHandler.class);
 
   private final Channel channel;
 
@@ -143,7 +146,8 @@ public void channelInactive() {
     if (hasOutstandingRequests()) {
       String remoteAddress = getRemoteAddress(channel);
       logger.error("Still have {} requests outstanding when connection from {} is closed",
-        numOutstandingRequests(), remoteAddress);
+        MDC.of(LogKeys.COUNT$.MODULE$, numOutstandingRequests()),
+        MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
       failOutstandingRequests(new IOException("Connection from " + remoteAddress + " closed"));
     }
   }
@@ -153,7 +157,8 @@ public void exceptionCaught(Throwable cause) {
     if (hasOutstandingRequests()) {
       String remoteAddress = getRemoteAddress(channel);
       logger.error("Still have {} requests outstanding when connection from {} is closed",
-        numOutstandingRequests(), remoteAddress);
+        MDC.of(LogKeys.COUNT$.MODULE$, numOutstandingRequests()),
+        MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
       failOutstandingRequests(cause);
     }
   }
@@ -164,7 +169,8 @@ public void handle(ResponseMessage message) throws Exception {
       ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
       if (listener == null) {
         logger.warn("Ignoring response for block {} from {} since it is not outstanding",
-          resp.streamChunkId, getRemoteAddress(channel));
+          MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, resp.streamChunkId),
+          MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
         resp.body().release();
       } else {
         outstandingFetches.remove(resp.streamChunkId);
@@ -175,7 +181,9 @@ public void handle(ResponseMessage message) throws Exception {
       ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId);
       if (listener == null) {
         logger.warn("Ignoring response for block {} from {} ({}) since it is not outstanding",
-          resp.streamChunkId, getRemoteAddress(channel), resp.errorString);
+          MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, resp.streamChunkId),
+          MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
+          MDC.of(LogKeys.ERROR$.MODULE$, resp.errorString));
       } else {
         outstandingFetches.remove(resp.streamChunkId);
         listener.onFailure(resp.streamChunkId.chunkIndex(), new ChunkFetchFailureException(
@@ -185,7 +193,9 @@ public void handle(ResponseMessage message) throws Exception {
       RpcResponseCallback listener = (RpcResponseCallback) outstandingRpcs.get(resp.requestId);
       if (listener == null) {
         logger.warn("Ignoring response for RPC {} from {} ({} bytes) since it is not outstanding",
-          resp.requestId, getRemoteAddress(channel), resp.body().size());
+          MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId),
+          MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
+          MDC.of(LogKeys.RESPONSE_BODY_SIZE$.MODULE$, resp.body().size()));
         resp.body().release();
       } else {
         outstandingRpcs.remove(resp.requestId);
@@ -199,7 +209,9 @@ public void handle(ResponseMessage message) throws Exception {
       BaseResponseCallback listener = outstandingRpcs.get(resp.requestId);
       if (listener == null) {
         logger.warn("Ignoring response for RPC {} from {} ({}) since it is not outstanding",
-          resp.requestId, getRemoteAddress(channel), resp.errorString);
+          MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId),
+          MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
+          MDC.of(LogKeys.ERROR$.MODULE$, resp.errorString));
       } else {
         outstandingRpcs.remove(resp.requestId);
         listener.onFailure(new RuntimeException(resp.errorString));
@@ -209,9 +221,11 @@ public void handle(ResponseMessage message) throws Exception {
         MergedBlockMetaResponseCallback listener =
           (MergedBlockMetaResponseCallback) outstandingRpcs.get(resp.requestId);
         if (listener == null) {
-          logger.warn(
-            "Ignoring response for MergedBlockMetaRequest {} from {} ({} bytes) since it is not"
-              + " outstanding", resp.requestId, getRemoteAddress(channel), resp.body().size());
+          logger.warn("Ignoring response for MergedBlockMetaRequest {} from {} ({} bytes) since "
+            + "it is not outstanding",
+            MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId),
+            MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)),
+            MDC.of(LogKeys.RESPONSE_BODY_SIZE$.MODULE$, resp.body().size()));
         } else {
           outstandingRpcs.remove(resp.requestId);
           listener.onSuccess(resp.getNumChunks(), resp.body());
@@ -255,7 +269,8 @@ public void handle(ResponseMessage message) throws Exception {
           logger.warn("Error in stream failure handler.", ioe);
         }
       } else {
-        logger.warn("Stream failure with unknown callback: {}", resp.error);
+        logger.warn("Stream failure with unknown callback: {}",
+          MDC.of(LogKeys.ERROR$.MODULE$, resp.error));
       }
     } else {
       throw new IllegalStateException("Unknown response type: " + message.type());
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
index b55541017c9d2..08e2c084fe67b 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
@@ -26,9 +26,9 @@
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.channel.Channel;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportClientBootstrap;
 import org.apache.spark.network.sasl.SaslClientBootstrap;
@@ -47,7 +47,7 @@
  */
 public class AuthClientBootstrap implements TransportClientBootstrap {
 
-  private static final Logger LOG = LoggerFactory.getLogger(AuthClientBootstrap.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(AuthClientBootstrap.class);
 
   private final TransportConf conf;
   private final String appId;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
index cb68cfb5a0e88..8449a774a404a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
@@ -45,6 +45,8 @@ class AuthEngine implements Closeable {
   public static final byte[] INPUT_IV_INFO = "inputIv".getBytes(UTF_8);
   public static final byte[] OUTPUT_IV_INFO = "outputIv".getBytes(UTF_8);
   private static final String MAC_ALGORITHM = "HMACSHA256";
+  private static final String LEGACY_CIPHER_ALGORITHM = "AES/CTR/NoPadding";
+  private static final String CIPHER_ALGORITHM = "AES/GCM/NoPadding";
   private static final int AES_GCM_KEY_SIZE_BYTES = 16;
   private static final byte[] EMPTY_TRANSCRIPT = new byte[0];
   private static final int UNSAFE_SKIP_HKDF_VERSION = 1;
@@ -227,12 +229,19 @@ private TransportCipher generateTransportCipher(
         OUTPUT_IV_INFO,  // This is the HKDF info field used to differentiate IV values
         AES_GCM_KEY_SIZE_BYTES);
     SecretKeySpec sessionKey = new SecretKeySpec(derivedKey, "AES");
-    return new TransportCipher(
-        cryptoConf,
-        conf.cipherTransformation(),
-        sessionKey,
-        isClient ? clientIv : serverIv,  // If it's the client, use the client IV first
-        isClient ? serverIv : clientIv);
+    if (LEGACY_CIPHER_ALGORITHM.equalsIgnoreCase(conf.cipherTransformation())) {
+      return new CtrTransportCipher(
+          cryptoConf,
+          sessionKey,
+          isClient ? clientIv : serverIv,  // If it's the client, use the client IV first
+          isClient ? serverIv : clientIv);
+    } else if (CIPHER_ALGORITHM.equalsIgnoreCase(conf.cipherTransformation())) {
+      return new GcmTransportCipher(sessionKey);
+    } else {
+      throw new IllegalArgumentException(
+              String.format("Unsupported cipher mode: %s. %s and %s are supported.",
+                      conf.cipherTransformation(), CIPHER_ALGORITHM, LEGACY_CIPHER_ALGORITHM));
+    }
   }
 
   private byte[] getTranscript(AuthMessage... encryptedPublicKeys) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
index 9a7ce8b7b31d6..65367743e24f9 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
@@ -25,9 +25,11 @@
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.channel.Channel;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.sasl.SecretKeyHolder;
@@ -46,7 +48,7 @@
  * authenticated. A connection may be authenticated at most once.
  */
 class AuthRpcHandler extends AbstractAuthRpcHandler {
-  private static final Logger LOG = LoggerFactory.getLogger(AuthRpcHandler.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(AuthRpcHandler.class);
 
   /** Transport configuration. */
   private final TransportConf conf;
@@ -91,7 +93,7 @@ protected boolean doAuthChallenge(
     } catch (RuntimeException e) {
       if (conf.saslFallback()) {
         LOG.warn("Failed to parse new auth challenge, reverting to SASL for client {}.",
-          channel.remoteAddress());
+          MDC.of(LogKeys.HOST_PORT$.MODULE$, channel.remoteAddress()));
         saslHandler = new SaslRpcHandler(conf, channel, null, secretKeyHolder);
         message.position(position);
         message.limit(limit);
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
new file mode 100644
index 0000000000000..85b893751b39c
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
@@ -0,0 +1,381 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.security.GeneralSecurityException;
+import java.util.Properties;
+import javax.crypto.spec.SecretKeySpec;
+import javax.crypto.spec.IvParameterSpec;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.*;
+import org.apache.commons.crypto.stream.CryptoInputStream;
+import org.apache.commons.crypto.stream.CryptoOutputStream;
+
+import org.apache.spark.network.util.AbstractFileRegion;
+import org.apache.spark.network.util.ByteArrayReadableChannel;
+import org.apache.spark.network.util.ByteArrayWritableChannel;
+
+/**
+ * Cipher for encryption and decryption.
+ */
+public class CtrTransportCipher implements TransportCipher {
+  @VisibleForTesting
+  static final String ENCRYPTION_HANDLER_NAME = "CtrTransportEncryption";
+  private static final String DECRYPTION_HANDLER_NAME = "CtrTransportDecryption";
+  @VisibleForTesting
+  static final int STREAM_BUFFER_SIZE = 1024 * 32;
+
+  private final Properties conf;
+  private static final String CIPHER_ALGORITHM = "AES/CTR/NoPadding";
+  private final SecretKeySpec key;
+  private final byte[] inIv;
+  private final byte[] outIv;
+
+  public CtrTransportCipher(
+      Properties conf,
+      SecretKeySpec key,
+      byte[] inIv,
+      byte[] outIv) {
+    this.conf = conf;
+    this.key = key;
+    this.inIv = inIv;
+    this.outIv = outIv;
+  }
+
+  /*
+   * This method is for testing purposes only.
+   */
+  @VisibleForTesting
+  public String getKeyId() throws GeneralSecurityException {
+    return TransportCipherUtil.getKeyId(key);
+  }
+
+  @VisibleForTesting
+  SecretKeySpec getKey() {
+    return key;
+  }
+
+  /** The IV for the input channel (i.e. output channel of the remote side). */
+  public byte[] getInputIv() {
+    return inIv;
+  }
+
+  /** The IV for the output channel (i.e. input channel of the remote side). */
+  public byte[] getOutputIv() {
+    return outIv;
+  }
+
+  @VisibleForTesting
+  CryptoOutputStream createOutputStream(WritableByteChannel ch) throws IOException {
+    return new CryptoOutputStream(CIPHER_ALGORITHM, conf, ch, key, new IvParameterSpec(outIv));
+  }
+
+  @VisibleForTesting
+  CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
+    return new CryptoInputStream(CIPHER_ALGORITHM, conf, ch, key, new IvParameterSpec(inIv));
+  }
+
+  /**
+   * Add handlers to channel.
+   *
+   * @param ch the channel for adding handlers
+   * @throws IOException
+   */
+  public void addToChannel(Channel ch) throws IOException {
+    ch.pipeline()
+      .addFirst(ENCRYPTION_HANDLER_NAME, new EncryptionHandler(this))
+      .addFirst(DECRYPTION_HANDLER_NAME, new DecryptionHandler(this));
+  }
+
+  @VisibleForTesting
+  static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
+    private final ByteArrayWritableChannel byteEncChannel;
+    private final CryptoOutputStream cos;
+    private final ByteArrayWritableChannel byteRawChannel;
+    private boolean isCipherValid;
+
+    EncryptionHandler(CtrTransportCipher cipher) throws IOException {
+      byteEncChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
+      cos = cipher.createOutputStream(byteEncChannel);
+      byteRawChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
+      isCipherValid = true;
+    }
+
+    @Override
+    public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
+      throws Exception {
+      ctx.write(createEncryptedMessage(msg), promise);
+    }
+
+    @VisibleForTesting
+    EncryptedMessage createEncryptedMessage(Object msg) {
+      return new EncryptedMessage(this, cos, msg, byteEncChannel, byteRawChannel);
+    }
+
+    @Override
+    public void close(ChannelHandlerContext ctx, ChannelPromise promise) throws Exception {
+      try {
+        if (isCipherValid) {
+          cos.close();
+        }
+      } finally {
+        super.close(ctx, promise);
+      }
+    }
+
+    /**
+     * SPARK-25535. Workaround for CRYPTO-141. Avoid further interaction with the underlying cipher
+     * after an error occurs.
+     */
+    void reportError() {
+      this.isCipherValid = false;
+    }
+
+    boolean isCipherValid() {
+      return isCipherValid;
+    }
+  }
+
+  private static class DecryptionHandler extends ChannelInboundHandlerAdapter {
+    private final CryptoInputStream cis;
+    private final ByteArrayReadableChannel byteChannel;
+    private boolean isCipherValid;
+
+    DecryptionHandler(CtrTransportCipher cipher) throws IOException {
+      byteChannel = new ByteArrayReadableChannel();
+      cis = cipher.createInputStream(byteChannel);
+      isCipherValid = true;
+    }
+
+    @Override
+    public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception {
+      ByteBuf buffer = (ByteBuf) data;
+
+      try {
+        if (!isCipherValid) {
+          throw new IOException("Cipher is in invalid state.");
+        }
+        byte[] decryptedData = new byte[buffer.readableBytes()];
+        byteChannel.feedData(buffer);
+
+        int offset = 0;
+        while (offset < decryptedData.length) {
+          // SPARK-25535: workaround for CRYPTO-141.
+          try {
+            offset += cis.read(decryptedData, offset, decryptedData.length - offset);
+          } catch (InternalError ie) {
+            isCipherValid = false;
+            throw ie;
+          }
+        }
+
+        ctx.fireChannelRead(Unpooled.wrappedBuffer(decryptedData, 0, decryptedData.length));
+      } finally {
+        buffer.release();
+      }
+    }
+
+    @Override
+    public void handlerRemoved(ChannelHandlerContext ctx) throws Exception {
+      // We do the closing of the stream / channel in handlerRemoved(...) as
+      // this method will be called in all cases:
+      //
+      //     - when the Channel becomes inactive
+      //     - when the handler is removed from the ChannelPipeline
+      try {
+        if (isCipherValid) {
+          cis.close();
+        }
+      } finally {
+        super.handlerRemoved(ctx);
+      }
+    }
+  }
+
+  @VisibleForTesting
+  static class EncryptedMessage extends AbstractFileRegion {
+    private final boolean isByteBuf;
+    private final ByteBuf buf;
+    private final FileRegion region;
+    private final CryptoOutputStream cos;
+    private final EncryptionHandler handler;
+    private final long count;
+    private long transferred;
+
+    // Due to streaming issue CRYPTO-125: https://issues.apache.org/jira/browse/CRYPTO-125, it has
+    // to utilize two helper ByteArrayWritableChannel for streaming. One is used to receive raw data
+    // from upper handler, another is used to store encrypted data.
+    private final ByteArrayWritableChannel byteEncChannel;
+    private final ByteArrayWritableChannel byteRawChannel;
+
+    private ByteBuffer currentEncrypted;
+
+    EncryptedMessage(
+        EncryptionHandler handler,
+        CryptoOutputStream cos,
+        Object msg,
+        ByteArrayWritableChannel byteEncChannel,
+        ByteArrayWritableChannel byteRawChannel) {
+      Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
+        "Unrecognized message type: %s", msg.getClass().getName());
+      this.handler = handler;
+      this.isByteBuf = msg instanceof ByteBuf;
+      this.buf = isByteBuf ? (ByteBuf) msg : null;
+      this.region = isByteBuf ? null : (FileRegion) msg;
+      this.transferred = 0;
+      this.cos = cos;
+      this.byteEncChannel = byteEncChannel;
+      this.byteRawChannel = byteRawChannel;
+      this.count = isByteBuf ? buf.readableBytes() : region.count();
+    }
+
+    @Override
+    public long count() {
+      return count;
+    }
+
+    @Override
+    public long position() {
+      return 0;
+    }
+
+    @Override
+    public long transferred() {
+      return transferred;
+    }
+
+    @Override
+    public EncryptedMessage touch(Object o) {
+      super.touch(o);
+      if (region != null) {
+        region.touch(o);
+      }
+      if (buf != null) {
+        buf.touch(o);
+      }
+      return this;
+    }
+
+    @Override
+    public EncryptedMessage retain(int increment) {
+      super.retain(increment);
+      if (region != null) {
+        region.retain(increment);
+      }
+      if (buf != null) {
+        buf.retain(increment);
+      }
+      return this;
+    }
+
+    @Override
+    public boolean release(int decrement) {
+      if (region != null) {
+        region.release(decrement);
+      }
+      if (buf != null) {
+        buf.release(decrement);
+      }
+      return super.release(decrement);
+    }
+
+    @Override
+    public long transferTo(WritableByteChannel target, long position) throws IOException {
+      Preconditions.checkArgument(position == transferred(), "Invalid position.");
+
+      if (transferred == count) {
+        return 0;
+      }
+
+      long totalBytesWritten = 0L;
+      do {
+        if (currentEncrypted == null) {
+          encryptMore();
+        }
+
+        long remaining = currentEncrypted.remaining();
+        if (remaining == 0)  {
+          // Just for safety to avoid endless loop. It usually won't happen, but since the
+          // underlying `region.transferTo` is allowed to transfer 0 bytes, we should handle it for
+          // safety.
+          currentEncrypted = null;
+          byteEncChannel.reset();
+          return totalBytesWritten;
+        }
+
+        long bytesWritten = target.write(currentEncrypted);
+        totalBytesWritten += bytesWritten;
+        transferred += bytesWritten;
+        if (bytesWritten < remaining) {
+          // break as the underlying buffer in "target" is full
+          break;
+        }
+        currentEncrypted = null;
+        byteEncChannel.reset();
+      } while (transferred < count);
+
+      return totalBytesWritten;
+    }
+
+    private void encryptMore() throws IOException {
+      if (!handler.isCipherValid()) {
+        throw new IOException("Cipher is in invalid state.");
+      }
+      byteRawChannel.reset();
+
+      if (isByteBuf) {
+        int copied = byteRawChannel.write(buf.nioBuffer());
+        buf.skipBytes(copied);
+      } else {
+        region.transferTo(byteRawChannel, region.transferred());
+      }
+
+      try {
+        cos.write(byteRawChannel.getData(), 0, byteRawChannel.length());
+        cos.flush();
+      } catch (InternalError ie) {
+        handler.reportError();
+        throw ie;
+      }
+
+      currentEncrypted = ByteBuffer.wrap(byteEncChannel.getData(),
+        0, byteEncChannel.length());
+    }
+
+    @Override
+    protected void deallocate() {
+      byteRawChannel.reset();
+      byteEncChannel.reset();
+      if (region != null) {
+        region.release();
+      }
+      if (buf != null) {
+        buf.release();
+      }
+    }
+  }
+
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
new file mode 100644
index 0000000000000..c3540838bef09
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
@@ -0,0 +1,410 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.primitives.Longs;
+import com.google.crypto.tink.subtle.*;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.*;
+import io.netty.util.ReferenceCounted;
+import org.apache.spark.network.util.AbstractFileRegion;
+import org.apache.spark.network.util.ByteBufferWriteableChannel;
+
+import javax.crypto.spec.SecretKeySpec;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.security.GeneralSecurityException;
+import java.security.InvalidAlgorithmParameterException;
+
+public class GcmTransportCipher implements TransportCipher {
+    private static final String HKDF_ALG = "HmacSha256";
+    private static final int LENGTH_HEADER_BYTES = 8;
+    @VisibleForTesting
+    static final int CIPHERTEXT_BUFFER_SIZE = 32 * 1024; // 32KB
+    private final SecretKeySpec aesKey;
+
+    public GcmTransportCipher(SecretKeySpec aesKey)  {
+        this.aesKey = aesKey;
+    }
+
+    AesGcmHkdfStreaming getAesGcmHkdfStreaming() throws InvalidAlgorithmParameterException {
+        return new AesGcmHkdfStreaming(
+            aesKey.getEncoded(),
+            HKDF_ALG,
+            aesKey.getEncoded().length,
+            CIPHERTEXT_BUFFER_SIZE,
+            0);
+    }
+
+    /*
+     * This method is for testing purposes only.
+     */
+    @VisibleForTesting
+    public String getKeyId() throws GeneralSecurityException {
+        return TransportCipherUtil.getKeyId(aesKey);
+    }
+
+    @VisibleForTesting
+    EncryptionHandler getEncryptionHandler() throws GeneralSecurityException {
+        return new EncryptionHandler();
+    }
+
+    @VisibleForTesting
+    DecryptionHandler getDecryptionHandler() throws GeneralSecurityException {
+        return new DecryptionHandler();
+    }
+
+    public void addToChannel(Channel ch) throws GeneralSecurityException {
+        ch.pipeline()
+            .addFirst("GcmTransportEncryption", getEncryptionHandler())
+            .addFirst("GcmTransportDecryption", getDecryptionHandler());
+    }
+
+    @VisibleForTesting
+    class EncryptionHandler extends ChannelOutboundHandlerAdapter {
+        private final ByteBuffer plaintextBuffer;
+        private final ByteBuffer ciphertextBuffer;
+        private final AesGcmHkdfStreaming aesGcmHkdfStreaming;
+
+        EncryptionHandler() throws InvalidAlgorithmParameterException {
+            aesGcmHkdfStreaming = getAesGcmHkdfStreaming();
+            plaintextBuffer = ByteBuffer.allocate(aesGcmHkdfStreaming.getPlaintextSegmentSize());
+            ciphertextBuffer = ByteBuffer.allocate(aesGcmHkdfStreaming.getCiphertextSegmentSize());
+        }
+
+        @Override
+        public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
+                throws Exception {
+            GcmEncryptedMessage encryptedMessage = new GcmEncryptedMessage(
+                    aesGcmHkdfStreaming,
+                    msg,
+                    plaintextBuffer,
+                    ciphertextBuffer);
+            ctx.write(encryptedMessage, promise);
+        }
+    }
+
+    static class GcmEncryptedMessage extends AbstractFileRegion {
+        private final Object plaintextMessage;
+        private final ByteBuffer plaintextBuffer;
+        private final ByteBuffer ciphertextBuffer;
+        private final ByteBuffer headerByteBuffer;
+        private final long bytesToRead;
+        private long bytesRead = 0;
+        private final StreamSegmentEncrypter encrypter;
+        private long transferred = 0;
+        private final long encryptedCount;
+
+        GcmEncryptedMessage(AesGcmHkdfStreaming aesGcmHkdfStreaming,
+                            Object plaintextMessage,
+                            ByteBuffer plaintextBuffer,
+                            ByteBuffer ciphertextBuffer) throws GeneralSecurityException {
+            Preconditions.checkArgument(
+                    plaintextMessage instanceof ByteBuf || plaintextMessage instanceof FileRegion,
+                    "Unrecognized message type: %s", plaintextMessage.getClass().getName());
+            this.plaintextMessage = plaintextMessage;
+            this.plaintextBuffer = plaintextBuffer;
+            this.ciphertextBuffer = ciphertextBuffer;
+            // If the ciphertext buffer cannot be fully written the target, transferTo may
+            // return with it containing some unwritten data. The initial call we'll explicitly
+            // set its limit to 0 to indicate the first call to transferTo.
+            this.ciphertextBuffer.limit(0);
+
+            this.bytesToRead = getReadableBytes();
+            this.encryptedCount =
+                    LENGTH_HEADER_BYTES + aesGcmHkdfStreaming.expectedCiphertextSize(bytesToRead);
+            byte[] lengthAad = Longs.toByteArray(encryptedCount);
+            this.encrypter = aesGcmHkdfStreaming.newStreamSegmentEncrypter(lengthAad);
+            this.headerByteBuffer = createHeaderByteBuffer();
+        }
+
+        // The format of the output is:
+        // [8 byte length][Internal IV and header][Ciphertext][Auth Tag]
+        private ByteBuffer createHeaderByteBuffer() {
+            ByteBuffer encrypterHeader = encrypter.getHeader();
+            return ByteBuffer
+                    .allocate(encrypterHeader.remaining() + LENGTH_HEADER_BYTES)
+                    .putLong(encryptedCount)
+                    .put(encrypterHeader)
+                    .flip();
+        }
+
+        @Override
+        public long position() {
+            return 0;
+        }
+
+        @Override
+        public long transferred() {
+            return transferred;
+        }
+
+        @Override
+        public long count() {
+            return encryptedCount;
+        }
+
+        @Override
+        public GcmEncryptedMessage touch(Object o) {
+            super.touch(o);
+            if (plaintextMessage instanceof ByteBuf byteBuf) {
+                byteBuf.touch(o);
+            } else if (plaintextMessage instanceof FileRegion fileRegion) {
+                fileRegion.touch(o);
+            }
+            return this;
+        }
+
+        @Override
+        public GcmEncryptedMessage retain(int increment) {
+            super.retain(increment);
+            if (plaintextMessage instanceof ByteBuf byteBuf) {
+                byteBuf.retain(increment);
+            } else if (plaintextMessage instanceof FileRegion fileRegion) {
+                fileRegion.retain(increment);
+            }
+            return this;
+        }
+
+        @Override
+        public boolean release(int decrement) {
+            if (plaintextMessage instanceof ByteBuf byteBuf) {
+                byteBuf.release(decrement);
+            } else if (plaintextMessage instanceof FileRegion fileRegion) {
+                fileRegion.release(decrement);
+            }
+            return super.release(decrement);
+        }
+
+        @Override
+        public long transferTo(WritableByteChannel target, long position) throws IOException {
+            int transferredThisCall = 0;
+            // If the header has is not empty, try to write it out to the target.
+            if (headerByteBuffer.hasRemaining()) {
+                int written = target.write(headerByteBuffer);
+                transferredThisCall += written;
+                this.transferred += written;
+                if (headerByteBuffer.hasRemaining()) {
+                    return written;
+                }
+            }
+            // If the ciphertext buffer is not empty, try to write it to the target.
+            if (ciphertextBuffer.hasRemaining()) {
+                int written = target.write(ciphertextBuffer);
+                transferredThisCall += written;
+                this.transferred += written;
+                if (ciphertextBuffer.hasRemaining()) {
+                    return transferredThisCall;
+               }
+            }
+            while (bytesRead < bytesToRead) {
+                long readableBytes = getReadableBytes();
+                int readLimit =
+                        (int) Math.min(readableBytes, plaintextBuffer.remaining());
+                if (plaintextMessage instanceof ByteBuf byteBuf) {
+                    Preconditions.checkState(0 == plaintextBuffer.position());
+                    plaintextBuffer.limit(readLimit);
+                    byteBuf.readBytes(plaintextBuffer);
+                    Preconditions.checkState(readLimit == plaintextBuffer.position());
+                } else if (plaintextMessage instanceof FileRegion fileRegion) {
+                    ByteBufferWriteableChannel plaintextChannel =
+                            new ByteBufferWriteableChannel(plaintextBuffer);
+                    long plaintextRead =
+                            fileRegion.transferTo(plaintextChannel, fileRegion.transferred());
+                    if (plaintextRead < readLimit) {
+                        // If we do not read a full plaintext buffer or all the available
+                        // readable bytes, return what was transferred this call.
+                        return transferredThisCall;
+                    }
+                }
+                boolean lastSegment = getReadableBytes() == 0;
+                plaintextBuffer.flip();
+                bytesRead += plaintextBuffer.remaining();
+                ciphertextBuffer.clear();
+                try {
+                    encrypter.encryptSegment(plaintextBuffer, lastSegment, ciphertextBuffer);
+                } catch (GeneralSecurityException e) {
+                    throw new IllegalStateException("GeneralSecurityException from encrypter", e);
+                }
+                plaintextBuffer.clear();
+                ciphertextBuffer.flip();
+                int written = target.write(ciphertextBuffer);
+                transferredThisCall += written;
+                this.transferred += written;
+                if (ciphertextBuffer.hasRemaining()) {
+                    // In this case, upon calling transferTo again, it will try to write the
+                    // remaining ciphertext buffer in the conditional before this loop.
+                    return transferredThisCall;
+                }
+            }
+            return transferredThisCall;
+        }
+
+        private long getReadableBytes() {
+            if (plaintextMessage instanceof ByteBuf byteBuf) {
+                return byteBuf.readableBytes();
+            } else if (plaintextMessage instanceof FileRegion fileRegion) {
+                return fileRegion.count() - fileRegion.transferred();
+            } else {
+                throw new IllegalArgumentException("Unsupported message type: " +
+                        plaintextMessage.getClass().getName());
+            }
+        }
+
+        @Override
+        protected void deallocate() {
+            if (plaintextMessage instanceof ReferenceCounted referenceCounted) {
+                referenceCounted.release();
+            }
+            plaintextBuffer.clear();
+            ciphertextBuffer.clear();
+        }
+    }
+
+    @VisibleForTesting
+    class DecryptionHandler extends ChannelInboundHandlerAdapter {
+        private final ByteBuffer expectedLengthBuffer;
+        private final ByteBuffer headerBuffer;
+        private final ByteBuffer ciphertextBuffer;
+        private final AesGcmHkdfStreaming aesGcmHkdfStreaming;
+        private final StreamSegmentDecrypter decrypter;
+        private final int plaintextSegmentSize;
+        private boolean decrypterInit = false;
+        private boolean completed = false;
+        private int segmentNumber = 0;
+        private long expectedLength = -1;
+        private long ciphertextRead = 0;
+
+        DecryptionHandler() throws GeneralSecurityException {
+            aesGcmHkdfStreaming = getAesGcmHkdfStreaming();
+            expectedLengthBuffer = ByteBuffer.allocate(LENGTH_HEADER_BYTES);
+            headerBuffer = ByteBuffer.allocate(aesGcmHkdfStreaming.getHeaderLength());
+            ciphertextBuffer =
+                    ByteBuffer.allocate(aesGcmHkdfStreaming.getCiphertextSegmentSize());
+            decrypter = aesGcmHkdfStreaming.newStreamSegmentDecrypter();
+            plaintextSegmentSize = aesGcmHkdfStreaming.getPlaintextSegmentSize();
+        }
+
+        private boolean initalizeExpectedLength(ByteBuf ciphertextNettyBuf) {
+            if (expectedLength < 0) {
+                ciphertextNettyBuf.readBytes(expectedLengthBuffer);
+                if (expectedLengthBuffer.hasRemaining()) {
+                    // We did not read enough bytes to initialize the expected length.
+                    return false;
+                }
+                expectedLengthBuffer.flip();
+                expectedLength = expectedLengthBuffer.getLong();
+                if (expectedLength < 0) {
+                    throw new IllegalStateException("Invalid expected ciphertext length.");
+                }
+                ciphertextRead += LENGTH_HEADER_BYTES;
+            }
+            return true;
+        }
+
+        private boolean initalizeDecrypter(ByteBuf ciphertextNettyBuf)
+                throws GeneralSecurityException {
+            // Check if the ciphertext header has been read. This contains
+            // the IV and other internal metadata.
+            if (!decrypterInit) {
+                ciphertextNettyBuf.readBytes(headerBuffer);
+                if (headerBuffer.hasRemaining()) {
+                    // We did not read enough bytes to initialize the header.
+                    return false;
+                }
+                headerBuffer.flip();
+                byte[] lengthAad = Longs.toByteArray(expectedLength);
+                decrypter.init(headerBuffer, lengthAad);
+                decrypterInit = true;
+                ciphertextRead += aesGcmHkdfStreaming.getHeaderLength();
+                if (expectedLength == ciphertextRead) {
+                    // If the expected length is just the header, the ciphertext is 0 length.
+                    completed = true;
+                }
+            }
+            return true;
+        }
+
+        @Override
+        public void channelRead(ChannelHandlerContext ctx, Object ciphertextMessage)
+                throws GeneralSecurityException {
+            Preconditions.checkArgument(ciphertextMessage instanceof ByteBuf,
+                    "Unrecognized message type: %s",
+                    ciphertextMessage.getClass().getName());
+            ByteBuf ciphertextNettyBuf = (ByteBuf) ciphertextMessage;
+            // The format of the output is:
+            // [8 byte length][Internal IV and header][Ciphertext][Auth Tag]
+            try {
+                if (!initalizeExpectedLength(ciphertextNettyBuf)) {
+                    // We have not read enough bytes to initialize the expected length.
+                    return;
+                }
+                if (!initalizeDecrypter(ciphertextNettyBuf)) {
+                    // We have not read enough bytes to initialize a header, needed to
+                    // initialize a decrypter.
+                    return;
+                }
+                int nettyBufReadableBytes = ciphertextNettyBuf.readableBytes();
+                while (nettyBufReadableBytes > 0 && !completed) {
+                    // Read the ciphertext into the local buffer
+                    int readableBytes = Integer.min(
+                            nettyBufReadableBytes,
+                            ciphertextBuffer.remaining());
+                    int expectedRemaining = (int) (expectedLength - ciphertextRead);
+                    int bytesToRead = Integer.min(readableBytes, expectedRemaining);
+                    // The smallest ciphertext size is 16 bytes for the auth tag
+                    ciphertextBuffer.limit(ciphertextBuffer.position() + bytesToRead);
+                    ciphertextNettyBuf.readBytes(ciphertextBuffer);
+                    ciphertextRead += bytesToRead;
+                    // Check if this is the last segment
+                    if (ciphertextRead == expectedLength) {
+                        completed = true;
+                    } else if (ciphertextRead > expectedLength) {
+                        throw new IllegalStateException("Read more ciphertext than expected.");
+                    }
+                    // If the ciphertext buffer is full, or this is the last segment,
+                    // then decrypt it and fire a read.
+                    if (ciphertextBuffer.limit() == ciphertextBuffer.capacity() || completed) {
+                        ByteBuffer plaintextBuffer = ByteBuffer.allocate(plaintextSegmentSize);
+                        ciphertextBuffer.flip();
+                        decrypter.decryptSegment(
+                                ciphertextBuffer,
+                                segmentNumber,
+                                completed,
+                                plaintextBuffer);
+                        segmentNumber++;
+                        // Clear the ciphertext buffer because it's been read
+                        ciphertextBuffer.clear();
+                        plaintextBuffer.flip();
+                        ctx.fireChannelRead(Unpooled.wrappedBuffer(plaintextBuffer));
+                    } else {
+                        // Set the ciphertext buffer up to read the next chunk
+                        ciphertextBuffer.limit(ciphertextBuffer.capacity());
+                    }
+                    nettyBufReadableBytes = ciphertextNettyBuf.readableBytes();
+                }
+            } finally {
+                ciphertextNettyBuf.release();
+            }
+        }
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index b507f911fe11a..355c552720185 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -17,362 +17,32 @@
 
 package org.apache.spark.network.crypto;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.util.Properties;
-import javax.crypto.spec.SecretKeySpec;
-import javax.crypto.spec.IvParameterSpec;
-
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.Unpooled;
-import io.netty.channel.*;
-import org.apache.commons.crypto.stream.CryptoInputStream;
-import org.apache.commons.crypto.stream.CryptoOutputStream;
-
-import org.apache.spark.network.util.AbstractFileRegion;
-import org.apache.spark.network.util.ByteArrayReadableChannel;
-import org.apache.spark.network.util.ByteArrayWritableChannel;
-
-/**
- * Cipher for encryption and decryption.
- */
-public class TransportCipher {
-  @VisibleForTesting
-  static final String ENCRYPTION_HANDLER_NAME = "TransportEncryption";
-  private static final String DECRYPTION_HANDLER_NAME = "TransportDecryption";
-  @VisibleForTesting
-  static final int STREAM_BUFFER_SIZE = 1024 * 32;
-
-  private final Properties conf;
-  private final String cipher;
-  private final SecretKeySpec key;
-  private final byte[] inIv;
-  private final byte[] outIv;
-
-  public TransportCipher(
-      Properties conf,
-      String cipher,
-      SecretKeySpec key,
-      byte[] inIv,
-      byte[] outIv) {
-    this.conf = conf;
-    this.cipher = cipher;
-    this.key = key;
-    this.inIv = inIv;
-    this.outIv = outIv;
-  }
-
-  public String getCipherTransformation() {
-    return cipher;
-  }
-
-  @VisibleForTesting
-  SecretKeySpec getKey() {
-    return key;
-  }
-
-  /** The IV for the input channel (i.e. output channel of the remote side). */
-  public byte[] getInputIv() {
-    return inIv;
-  }
-
-  /** The IV for the output channel (i.e. input channel of the remote side). */
-  public byte[] getOutputIv() {
-    return outIv;
-  }
-
-  @VisibleForTesting
-  CryptoOutputStream createOutputStream(WritableByteChannel ch) throws IOException {
-    return new CryptoOutputStream(cipher, conf, ch, key, new IvParameterSpec(outIv));
-  }
-
-  @VisibleForTesting
-  CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
-    return new CryptoInputStream(cipher, conf, ch, key, new IvParameterSpec(inIv));
-  }
-
-  /**
-   * Add handlers to channel.
-   *
-   * @param ch the channel for adding handlers
-   * @throws IOException
-   */
-  public void addToChannel(Channel ch) throws IOException {
-    ch.pipeline()
-      .addFirst(ENCRYPTION_HANDLER_NAME, new EncryptionHandler(this))
-      .addFirst(DECRYPTION_HANDLER_NAME, new DecryptionHandler(this));
-  }
-
-  @VisibleForTesting
-  static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
-    private final ByteArrayWritableChannel byteEncChannel;
-    private final CryptoOutputStream cos;
-    private final ByteArrayWritableChannel byteRawChannel;
-    private boolean isCipherValid;
-
-    EncryptionHandler(TransportCipher cipher) throws IOException {
-      byteEncChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
-      cos = cipher.createOutputStream(byteEncChannel);
-      byteRawChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
-      isCipherValid = true;
-    }
+import com.google.crypto.tink.subtle.Hex;
+import com.google.crypto.tink.subtle.Hkdf;
+import io.netty.channel.Channel;
 
-    @Override
-    public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
-      throws Exception {
-      ctx.write(createEncryptedMessage(msg), promise);
-    }
-
-    @VisibleForTesting
-    EncryptedMessage createEncryptedMessage(Object msg) {
-      return new EncryptedMessage(this, cos, msg, byteEncChannel, byteRawChannel);
-    }
+import javax.crypto.spec.SecretKeySpec;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.security.GeneralSecurityException;
 
-    @Override
-    public void close(ChannelHandlerContext ctx, ChannelPromise promise) throws Exception {
-      try {
-        if (isCipherValid) {
-          cos.close();
-        }
-      } finally {
-        super.close(ctx, promise);
-      }
-    }
+interface TransportCipher {
+    String getKeyId() throws GeneralSecurityException;
+    void addToChannel(Channel channel) throws IOException, GeneralSecurityException;
+}
 
-    /**
-     * SPARK-25535. Workaround for CRYPTO-141. Avoid further interaction with the underlying cipher
-     * after an error occurs.
+class TransportCipherUtil {
+    /*
+     * This method is used for testing to verify key derivation.
      */
-    void reportError() {
-      this.isCipherValid = false;
-    }
-
-    boolean isCipherValid() {
-      return isCipherValid;
-    }
-  }
-
-  private static class DecryptionHandler extends ChannelInboundHandlerAdapter {
-    private final CryptoInputStream cis;
-    private final ByteArrayReadableChannel byteChannel;
-    private boolean isCipherValid;
-
-    DecryptionHandler(TransportCipher cipher) throws IOException {
-      byteChannel = new ByteArrayReadableChannel();
-      cis = cipher.createInputStream(byteChannel);
-      isCipherValid = true;
-    }
-
-    @Override
-    public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception {
-      ByteBuf buffer = (ByteBuf) data;
-
-      try {
-        if (!isCipherValid) {
-          throw new IOException("Cipher is in invalid state.");
-        }
-        byte[] decryptedData = new byte[buffer.readableBytes()];
-        byteChannel.feedData(buffer);
-
-        int offset = 0;
-        while (offset < decryptedData.length) {
-          // SPARK-25535: workaround for CRYPTO-141.
-          try {
-            offset += cis.read(decryptedData, offset, decryptedData.length - offset);
-          } catch (InternalError ie) {
-            isCipherValid = false;
-            throw ie;
-          }
-        }
-
-        ctx.fireChannelRead(Unpooled.wrappedBuffer(decryptedData, 0, decryptedData.length));
-      } finally {
-        buffer.release();
-      }
-    }
-
-    @Override
-    public void handlerRemoved(ChannelHandlerContext ctx) throws Exception {
-      // We do the closing of the stream / channel in handlerRemoved(...) as
-      // this method will be called in all cases:
-      //
-      //     - when the Channel becomes inactive
-      //     - when the handler is removed from the ChannelPipeline
-      try {
-        if (isCipherValid) {
-          cis.close();
-        }
-      } finally {
-        super.handlerRemoved(ctx);
-      }
-    }
-  }
-
-  @VisibleForTesting
-  static class EncryptedMessage extends AbstractFileRegion {
-    private final boolean isByteBuf;
-    private final ByteBuf buf;
-    private final FileRegion region;
-    private final CryptoOutputStream cos;
-    private final EncryptionHandler handler;
-    private final long count;
-    private long transferred;
-
-    // Due to streaming issue CRYPTO-125: https://issues.apache.org/jira/browse/CRYPTO-125, it has
-    // to utilize two helper ByteArrayWritableChannel for streaming. One is used to receive raw data
-    // from upper handler, another is used to store encrypted data.
-    private final ByteArrayWritableChannel byteEncChannel;
-    private final ByteArrayWritableChannel byteRawChannel;
-
-    private ByteBuffer currentEncrypted;
-
-    EncryptedMessage(
-        EncryptionHandler handler,
-        CryptoOutputStream cos,
-        Object msg,
-        ByteArrayWritableChannel byteEncChannel,
-        ByteArrayWritableChannel byteRawChannel) {
-      Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
-        "Unrecognized message type: %s", msg.getClass().getName());
-      this.handler = handler;
-      this.isByteBuf = msg instanceof ByteBuf;
-      this.buf = isByteBuf ? (ByteBuf) msg : null;
-      this.region = isByteBuf ? null : (FileRegion) msg;
-      this.transferred = 0;
-      this.cos = cos;
-      this.byteEncChannel = byteEncChannel;
-      this.byteRawChannel = byteRawChannel;
-      this.count = isByteBuf ? buf.readableBytes() : region.count();
-    }
-
-    @Override
-    public long count() {
-      return count;
-    }
-
-    @Override
-    public long position() {
-      return 0;
-    }
-
-    @Override
-    public long transferred() {
-      return transferred;
-    }
-
-    @Override
-    public EncryptedMessage touch(Object o) {
-      super.touch(o);
-      if (region != null) {
-        region.touch(o);
-      }
-      if (buf != null) {
-        buf.touch(o);
-      }
-      return this;
-    }
-
-    @Override
-    public EncryptedMessage retain(int increment) {
-      super.retain(increment);
-      if (region != null) {
-        region.retain(increment);
-      }
-      if (buf != null) {
-        buf.retain(increment);
-      }
-      return this;
-    }
-
-    @Override
-    public boolean release(int decrement) {
-      if (region != null) {
-        region.release(decrement);
-      }
-      if (buf != null) {
-        buf.release(decrement);
-      }
-      return super.release(decrement);
-    }
-
-    @Override
-    public long transferTo(WritableByteChannel target, long position) throws IOException {
-      Preconditions.checkArgument(position == transferred(), "Invalid position.");
-
-      if (transferred == count) {
-        return 0;
-      }
-
-      long totalBytesWritten = 0L;
-      do {
-        if (currentEncrypted == null) {
-          encryptMore();
-        }
-
-        long remaining = currentEncrypted.remaining();
-        if (remaining == 0)  {
-          // Just for safety to avoid endless loop. It usually won't happen, but since the
-          // underlying `region.transferTo` is allowed to transfer 0 bytes, we should handle it for
-          // safety.
-          currentEncrypted = null;
-          byteEncChannel.reset();
-          return totalBytesWritten;
-        }
-
-        long bytesWritten = target.write(currentEncrypted);
-        totalBytesWritten += bytesWritten;
-        transferred += bytesWritten;
-        if (bytesWritten < remaining) {
-          // break as the underlying buffer in "target" is full
-          break;
-        }
-        currentEncrypted = null;
-        byteEncChannel.reset();
-      } while (transferred < count);
-
-      return totalBytesWritten;
-    }
-
-    private void encryptMore() throws IOException {
-      if (!handler.isCipherValid()) {
-        throw new IOException("Cipher is in invalid state.");
-      }
-      byteRawChannel.reset();
-
-      if (isByteBuf) {
-        int copied = byteRawChannel.write(buf.nioBuffer());
-        buf.skipBytes(copied);
-      } else {
-        region.transferTo(byteRawChannel, region.transferred());
-      }
-
-      try {
-        cos.write(byteRawChannel.getData(), 0, byteRawChannel.length());
-        cos.flush();
-      } catch (InternalError ie) {
-        handler.reportError();
-        throw ie;
-      }
-
-      currentEncrypted = ByteBuffer.wrap(byteEncChannel.getData(),
-        0, byteEncChannel.length());
-    }
-
-    @Override
-    protected void deallocate() {
-      byteRawChannel.reset();
-      byteEncChannel.reset();
-      if (region != null) {
-        region.release();
-      }
-      if (buf != null) {
-        buf.release();
-      }
+    @VisibleForTesting
+    static String getKeyId(SecretKeySpec key) throws GeneralSecurityException {
+        byte[] keyIdBytes = Hkdf.computeHkdf("HmacSha256",
+                key.getEncoded(),
+                null,
+                "keyID".getBytes(StandardCharsets.UTF_8),
+                32);
+        return Hex.encode(keyIdBytes);
     }
-  }
-
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java
index d9f83ce8bac35..321ac13881c2a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java
@@ -1,4 +1,3 @@
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -15,6 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.network.protocol;
 
 import java.io.EOFException;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
index 29369f6c20600..a9b700a7800e0 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageDecoder.java
@@ -23,8 +23,9 @@
 import io.netty.channel.ChannelHandler;
 import io.netty.channel.ChannelHandlerContext;
 import io.netty.handler.codec.MessageToMessageDecoder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 /**
  * Decoder used by the client side to encode server-to-client responses.
@@ -33,7 +34,7 @@
 @ChannelHandler.Sharable
 public final class MessageDecoder extends MessageToMessageDecoder<ByteBuf> {
 
-  private static final Logger logger = LoggerFactory.getLogger(MessageDecoder.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(MessageDecoder.class);
 
   public static final MessageDecoder INSTANCE = new MessageDecoder();
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
index 00de47dc9fc2d..ab20fb908eb42 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java
@@ -23,8 +23,11 @@
 import io.netty.channel.ChannelHandler;
 import io.netty.channel.ChannelHandlerContext;
 import io.netty.handler.codec.MessageToMessageEncoder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.MDC;
 
 /**
  * Encoder used by the server side to encode server-to-client responses.
@@ -33,7 +36,7 @@
 @ChannelHandler.Sharable
 public final class MessageEncoder extends MessageToMessageEncoder<Message> {
 
-  private static final Logger logger = LoggerFactory.getLogger(MessageEncoder.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(MessageEncoder.class);
 
   public static final MessageEncoder INSTANCE = new MessageEncoder();
 
@@ -62,8 +65,9 @@ public void encode(ChannelHandlerContext ctx, Message in, List<Object> out) thro
         if (in instanceof AbstractResponseMessage resp) {
           // Re-encode this message as a failure response.
           String error = e.getMessage() != null ? e.getMessage() : "null";
-          logger.error(String.format("Error processing %s for client %s",
-            in, ctx.channel().remoteAddress()), e);
+          logger.error("Error processing {} for client {}", e,
+            MDC.of(LogKeys.MESSAGE$.MODULE$, in),
+            MDC.of(LogKeys.HOST_PORT$.MODULE$, ctx.channel().remoteAddress()));
           encode(ctx, resp.createFailureResponse(error), out);
         } else {
           throw e;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java
index 3177971a95d57..abe6ccca7bfd6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java
@@ -26,8 +26,10 @@
 import io.netty.handler.codec.MessageToMessageEncoder;
 import io.netty.handler.stream.ChunkedStream;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * Encoder used by the server side to encode secure (SSL) server-to-client responses.
@@ -36,7 +38,7 @@
 @ChannelHandler.Sharable
 public final class SslMessageEncoder extends MessageToMessageEncoder<Message> {
 
-  private static final Logger logger = LoggerFactory.getLogger(SslMessageEncoder.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(SslMessageEncoder.class);
 
   private SslMessageEncoder() {}
 
@@ -68,8 +70,9 @@ public void encode(ChannelHandlerContext ctx, Message in, List<Object> out) thro
         if (in instanceof AbstractResponseMessage resp) {
           // Re-encode this message as a failure response.
           String error = e.getMessage() != null ? e.getMessage() : "null";
-          logger.error(String.format("Error processing %s for client %s",
-                  in, ctx.channel().remoteAddress()), e);
+          logger.error("Error processing {} for client {}", e,
+            MDC.of(LogKeys.MESSAGE$.MODULE$, in),
+            MDC.of(LogKeys.HOST_PORT$.MODULE$, ctx.channel().remoteAddress()));
           encode(ctx, resp.createFailureResponse(error), out);
         } else {
           throw e;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
index 7f29af3688eec..0a355d28c3668 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
@@ -26,9 +26,9 @@
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.channel.Channel;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportClientBootstrap;
 import org.apache.spark.network.util.JavaUtils;
@@ -39,7 +39,7 @@
  * server should be setup with a {@link SaslRpcHandler} with matching keys for the given appId.
  */
 public class SaslClientBootstrap implements TransportClientBootstrap {
-  private static final Logger logger = LoggerFactory.getLogger(SaslClientBootstrap.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(SaslClientBootstrap.class);
 
   private final TransportConf conf;
   private final String appId;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java
index cc9e88fcf98e7..b5fffe583ec63 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslRpcHandler.java
@@ -24,9 +24,9 @@
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.channel.Channel;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.server.AbstractAuthRpcHandler;
@@ -43,7 +43,7 @@
  * which are individual RPCs.
  */
 public class SaslRpcHandler extends AbstractAuthRpcHandler {
-  private static final Logger logger = LoggerFactory.getLogger(SaslRpcHandler.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(SaslRpcHandler.class);
 
   /** Transport configuration. */
   private final TransportConf conf;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
index 524ff0a310655..3600c1045dbf4 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
@@ -31,8 +31,9 @@
 
 import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableMap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 import static org.apache.spark.network.sasl.SparkSaslServer.*;
 
@@ -42,7 +43,7 @@
  * firstToken, which is then followed by a set of challenges and responses.
  */
 public class SparkSaslClient implements SaslEncryptionBackend {
-  private static final Logger logger = LoggerFactory.getLogger(SparkSaslClient.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(SparkSaslClient.class);
 
   private final String secretKeyId;
   private final SecretKeyHolder secretKeyHolder;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
index 26e5718cb4a70..b897650afe832 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
@@ -36,8 +36,9 @@
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.handler.codec.base64.Base64;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 /**
  * A SASL Server for Spark which simply keeps track of the state of a single SASL session, from the
@@ -45,7 +46,7 @@
  * connections on some socket.)
  */
 public class SparkSaslServer implements SaslEncryptionBackend {
-  private static final Logger logger = LoggerFactory.getLogger(SparkSaslServer.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(SparkSaslServer.class);
 
   /**
    * This is passed as the server name when creating the sasl client/server.
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
index e49141c7b9679..cc0bed7ed5b6d 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java
@@ -25,9 +25,11 @@
 import io.netty.channel.ChannelFutureListener;
 import io.netty.channel.ChannelHandlerContext;
 import io.netty.channel.SimpleChannelInboundHandler;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.protocol.ChunkFetchFailure;
@@ -49,7 +51,8 @@
  * registering executors, or waiting for response for an OpenBlocks messages.
  */
 public class ChunkFetchRequestHandler extends SimpleChannelInboundHandler<ChunkFetchRequest> {
-  private static final Logger logger = LoggerFactory.getLogger(ChunkFetchRequestHandler.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ChunkFetchRequestHandler.class);
 
   private final TransportClient client;
   private final StreamManager streamManager;
@@ -70,7 +73,8 @@ public ChunkFetchRequestHandler(
 
   @Override
   public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
-    logger.warn("Exception in connection from " + getRemoteAddress(ctx.channel()), cause);
+    logger.warn("Exception in connection from {}", cause,
+      MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(ctx.channel())));
     ctx.close();
   }
 
@@ -92,7 +96,8 @@ public void processFetchRequest(
       long chunksBeingTransferred = streamManager.chunksBeingTransferred();
       if (chunksBeingTransferred >= maxChunksBeingTransferred) {
         logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
-          chunksBeingTransferred, maxChunksBeingTransferred);
+          MDC.of(LogKeys.NUM_CHUNKS$.MODULE$, chunksBeingTransferred),
+          MDC.of(LogKeys.MAX_NUM_CHUNKS$.MODULE$, maxChunksBeingTransferred));
         channel.close();
         return;
       }
@@ -105,8 +110,9 @@ public void processFetchRequest(
         throw new IllegalStateException("Chunk was not found");
       }
     } catch (Exception e) {
-      logger.error(String.format("Error opening block %s for request from %s",
-        msg.streamChunkId, getRemoteAddress(channel)), e);
+      logger.error("Error opening block {} for request from {}", e,
+        MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, msg.streamChunkId),
+        MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
       respond(channel, new ChunkFetchFailure(msg.streamChunkId,
         Throwables.getStackTraceAsString(e)));
       return;
@@ -145,8 +151,10 @@ private ChannelFuture respond(
       if (future.isSuccess()) {
         logger.trace("Sent result {} to client {}", result, remoteAddress);
       } else {
-        logger.error(String.format("Error sending result %s to %s; closing connection",
-          result, remoteAddress), future.cause());
+        logger.error("Error sending result {} to {}; closing connection",
+          future.cause(),
+          MDC.of(LogKeys.RESULT$.MODULE$, result),
+          MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
         channel.close();
       }
     });
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index ace409eb3f48d..f322293782dee 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -28,9 +28,9 @@
 import io.netty.channel.Channel;
 import org.apache.commons.lang3.tuple.ImmutablePair;
 import org.apache.commons.lang3.tuple.Pair;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.TransportClient;
 
@@ -39,7 +39,8 @@
  * individually fetched as chunks by the client. Each registered buffer is one chunk.
  */
 public class OneForOneStreamManager extends StreamManager {
-  private static final Logger logger = LoggerFactory.getLogger(OneForOneStreamManager.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(OneForOneStreamManager.class);
 
   private final AtomicLong nextStreamId;
   private final ConcurrentHashMap<Long, StreamState> streams;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/RpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/RpcHandler.java
index 0b894277561fd..a7c38917d17f6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/RpcHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/RpcHandler.java
@@ -19,9 +19,8 @@
 
 import java.nio.ByteBuffer;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.client.MergedBlockMetaResponseCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.StreamCallbackWithID;
@@ -123,7 +122,7 @@ public void exceptionCaught(Throwable cause, TransportClient client) { }
 
   private static class OneWayRpcCallback implements RpcResponseCallback {
 
-    private static final Logger logger = LoggerFactory.getLogger(OneWayRpcCallback.class);
+    private static final SparkLogger logger = SparkLoggerFactory.getLogger(OneWayRpcCallback.class);
 
     @Override
     public void onSuccess(ByteBuffer response) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
index a504e8c20a7f7..283f0f0a431fd 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java
@@ -22,9 +22,11 @@
 import io.netty.handler.timeout.IdleState;
 import io.netty.handler.timeout.IdleStateEvent;
 import org.apache.spark.network.TransportContext;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.client.TransportClient;
 import org.apache.spark.network.client.TransportResponseHandler;
 import org.apache.spark.network.protocol.ChunkFetchRequest;
@@ -51,7 +53,8 @@
  * timeout if the client is continuously sending but getting no responses, for simplicity.
  */
 public class TransportChannelHandler extends SimpleChannelInboundHandler<Message> {
-  private static final Logger logger = LoggerFactory.getLogger(TransportChannelHandler.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(TransportChannelHandler.class);
 
   private final TransportClient client;
   private final TransportResponseHandler responseHandler;
@@ -84,8 +87,8 @@ public TransportClient getClient() {
 
   @Override
   public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception {
-    logger.warn("Exception in connection from " + getRemoteAddress(ctx.channel()),
-      cause);
+    logger.warn("Exception in connection from {}", cause,
+      MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(ctx.channel())));
     requestHandler.exceptionCaught(cause);
     responseHandler.exceptionCaught(cause);
     ctx.close();
@@ -165,7 +168,9 @@ public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exc
             logger.error("Connection to {} has been quiet for {} ms while there are outstanding " +
               "requests. Assuming connection is dead; please adjust" +
               " spark.{}.io.connectionTimeout if this is wrong.",
-              address, requestTimeoutNs / 1000 / 1000, transportContext.getConf().getModuleName());
+              MDC.of(LogKeys.HOST_PORT$.MODULE$, address),
+              MDC.of(LogKeys.TIMEOUT$.MODULE$, requestTimeoutNs / 1000 / 1000),
+              MDC.of(LogKeys.MODULE_NAME$.MODULE$, transportContext.getConf().getModuleName()));
             client.timeOut();
             ctx.close();
           } else if (closeIdleConnections) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
index c5e6da4cf6c7d..687c3040ed083 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java
@@ -25,9 +25,10 @@
 import io.netty.channel.Channel;
 import io.netty.channel.ChannelFuture;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NioManagedBuffer;
 import org.apache.spark.network.client.*;
@@ -45,7 +46,8 @@
  */
 public class TransportRequestHandler extends MessageHandler<RequestMessage> {
 
-  private static final Logger logger = LoggerFactory.getLogger(TransportRequestHandler.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(TransportRequestHandler.class);
 
   /** The Netty channel that this handler is associated with. */
   private final Channel channel;
@@ -130,7 +132,8 @@ private void processStreamRequest(final StreamRequest req) {
       long chunksBeingTransferred = streamManager.chunksBeingTransferred();
       if (chunksBeingTransferred >= maxChunksBeingTransferred) {
         logger.warn("The number of chunks being transferred {} is above {}, close the connection.",
-          chunksBeingTransferred, maxChunksBeingTransferred);
+          MDC.of(LogKeys.NUM_CHUNKS$.MODULE$, chunksBeingTransferred),
+          MDC.of(LogKeys.MAX_NUM_CHUNKS$.MODULE$, maxChunksBeingTransferred));
         channel.close();
         return;
       }
@@ -139,8 +142,9 @@ private void processStreamRequest(final StreamRequest req) {
     try {
       buf = streamManager.openStream(req.streamId);
     } catch (Exception e) {
-      logger.error(String.format(
-        "Error opening stream %s for request from %s", req.streamId, getRemoteAddress(channel)), e);
+      logger.error("Error opening stream {} for request from {}", e,
+        MDC.of(LogKeys.STREAM_ID$.MODULE$, req.streamId),
+        MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)));
       respond(new StreamFailure(req.streamId, Throwables.getStackTraceAsString(e)));
       return;
     }
@@ -172,7 +176,8 @@ public void onFailure(Throwable e) {
         }
       });
     } catch (Exception e) {
-      logger.error("Error while invoking RpcHandler#receive() on RPC id " + req.requestId, e);
+      logger.error("Error while invoking RpcHandler#receive() on RPC id {}", e,
+        MDC.of(LogKeys.REQUEST_ID$.MODULE$, req.requestId));
       respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
     } finally {
       req.body().release();
@@ -257,7 +262,8 @@ public String getID() {
         respond(new RpcResponse(req.requestId,
           new NioManagedBuffer(blockPushNonFatalFailure.getResponse())));
       } else {
-        logger.error("Error while invoking RpcHandler#receive() on RPC id " + req.requestId, e);
+        logger.error("Error while invoking RpcHandler#receive() on RPC id {}", e,
+          MDC.of(LogKeys.REQUEST_ID$.MODULE$, req.requestId));
         respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
       }
       // We choose to totally fail the channel, rather than trying to recover as we do in other
@@ -298,7 +304,9 @@ public void onFailure(Throwable e) {
       });
     } catch (Exception e) {
       logger.error("Error while invoking receiveMergeBlockMetaReq() for appId {} shuffleId {} "
-        + "reduceId {}", req.appId, req.shuffleId, req.appId, e);
+        + "reduceId {}", e, MDC.of(LogKeys.APP_ID$.MODULE$, req.appId),
+          MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, req.shuffleId),
+          MDC.of(LogKeys.REDUCE_ID$.MODULE$, req.reduceId));
       respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e)));
     }
   }
@@ -313,8 +321,9 @@ private ChannelFuture respond(Encodable result) {
       if (future.isSuccess()) {
         logger.trace("Sent result {} to client {}", result, remoteAddress);
       } else {
-        logger.error(String.format("Error sending result %s to %s; closing connection",
-          result, remoteAddress), future.cause());
+        logger.error("Error sending result {} to {}; closing connection", future.cause(),
+          MDC.of(LogKeys.RESULT$.MODULE$, result),
+          MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress));
         channel.close();
       }
     });
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index 6f2e4b8a502a2..d1a19652f5649 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -34,9 +34,9 @@
 import io.netty.channel.EventLoopGroup;
 import io.netty.channel.socket.SocketChannel;
 import org.apache.commons.lang3.SystemUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.TransportContext;
 import org.apache.spark.network.util.*;
 
@@ -44,7 +44,7 @@
  * Server for the efficient, low-level streaming service.
  */
 public class TransportServer implements Closeable {
-  private static final Logger logger = LoggerFactory.getLogger(TransportServer.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(TransportServer.class);
 
   private final TransportContext context;
   private final TransportConf conf;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/ssl/ReloadingX509TrustManager.java b/common/network-common/src/main/java/org/apache/spark/network/ssl/ReloadingX509TrustManager.java
index 18618e7d5c8be..09609d0ac8ad9 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/ssl/ReloadingX509TrustManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/ssl/ReloadingX509TrustManager.java
@@ -30,9 +30,8 @@
 
 import com.google.common.annotations.VisibleForTesting;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 /**
  * A {@link TrustManager} implementation that reloads its configuration when
@@ -46,7 +45,8 @@
 public final class ReloadingX509TrustManager
         implements X509TrustManager, Runnable {
 
-  private static final Logger logger = LoggerFactory.getLogger(ReloadingX509TrustManager.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ReloadingX509TrustManager.class);
 
   private final String type;
   private final File file;
@@ -211,13 +211,13 @@ public void run() {
             this.reloadCount += 1;
           } catch (Exception ex) {
             logger.warn(
-              "Could not load truststore (keep using existing one) : " + ex.toString(),
+              "Could not load truststore (keep using existing one) : ",
               ex
             );
           }
         }
       } catch (IOException ex) {
-       logger.warn("Could not check whether truststore needs reloading: " + ex.toString(), ex);
+       logger.warn("Could not check whether truststore needs reloading: ", ex);
       }
       needsReloadCheckCounts++;
     }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java b/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java
index 82951d2130112..a2e42e3eb39f6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java
@@ -49,13 +49,12 @@
 import io.netty.handler.ssl.SslContextBuilder;
 import io.netty.handler.ssl.SslProvider;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.util.JavaUtils;
 
 public class SSLFactory {
-  private static final Logger logger = LoggerFactory.getLogger(SSLFactory.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(SSLFactory.class);
 
   /**
    * For a configuration specifying keystore/truststore files
@@ -136,7 +135,7 @@ public void destroy() {
           try {
             manager.destroy();
           } catch (InterruptedException ex) {
-            logger.info("Interrupted while destroying trust manager: " + ex.toString(), ex);
+            logger.info("Interrupted while destroying trust manager: ", ex);
           }
         }
       }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/ByteBufferWriteableChannel.java b/common/network-common/src/main/java/org/apache/spark/network/util/ByteBufferWriteableChannel.java
new file mode 100644
index 0000000000000..b20240cfcaa6d
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/ByteBufferWriteableChannel.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.WritableByteChannel;
+
+public class ByteBufferWriteableChannel implements WritableByteChannel {
+    private final ByteBuffer destination;
+    private boolean open;
+
+    public ByteBufferWriteableChannel(ByteBuffer destination) {
+        this.destination = destination;
+        this.open = true;
+    }
+
+    @Override
+    public int write(ByteBuffer src) throws IOException {
+        if (!isOpen()) {
+            throw new ClosedChannelException();
+        }
+        int bytesToWrite = Math.min(src.remaining(), destination.remaining());
+        // Destination buffer is full
+        if (bytesToWrite == 0) {
+            return 0;
+        }
+        ByteBuffer temp = src.slice().limit(bytesToWrite);
+        destination.put(temp);
+        src.position(src.position() + bytesToWrite);
+        return bytesToWrite;
+    }
+
+    @Override
+    public boolean isOpen() {
+        return open;
+    }
+
+    @Override
+    public void close() {
+        open = false;
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/DBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/DBProvider.java
index 5a25bdda23355..94a64b3f4037c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/DBProvider.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/DBProvider.java
@@ -21,9 +21,9 @@
 
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.annotations.VisibleForTesting;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.shuffledb.DB;
 import org.apache.spark.network.shuffledb.DBBackend;
 import org.apache.spark.network.shuffledb.LevelDB;
@@ -31,7 +31,7 @@
 import org.apache.spark.network.shuffledb.StoreVersion;
 
 public class DBProvider {
-  private static final Logger logger = LoggerFactory.getLogger(DBProvider.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(DBProvider.class);
     public static DB initDB(
         DBBackend dbBackend,
         File dbFile,
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
index aa8be0c663bc2..391931961a474 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
@@ -26,16 +26,18 @@
 import org.fusesource.leveldbjni.internal.NativeDB;
 import org.iq80.leveldb.DB;
 import org.iq80.leveldb.Options;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.shuffledb.StoreVersion;
 
 /**
  * LevelDB utility class available in the network package.
  */
 public class LevelDBProvider {
-  private static final Logger logger = LoggerFactory.getLogger(LevelDBProvider.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(LevelDBProvider.class);
 
   public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper mapper) throws
       IOException {
@@ -48,7 +50,7 @@ public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper map
         tmpDb = JniDBFactory.factory.open(dbFile, options);
       } catch (NativeDB.DBException e) {
         if (e.isNotFound() || e.getMessage().contains(" does not exist ")) {
-          logger.info("Creating state database at " + dbFile);
+          logger.info("Creating state database at {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile));
           options.createIfMissing(true);
           try {
             tmpDb = JniDBFactory.factory.open(dbFile, options);
@@ -58,17 +60,17 @@ public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper map
         } else {
           // the leveldb file seems to be corrupt somehow.  Lets just blow it away and create a new
           // one, so we can keep processing new apps
-          logger.error("error opening leveldb file {}.  Creating new file, will not be able to " +
-              "recover state for existing applications", dbFile, e);
+          logger.error("error opening leveldb file {}. Creating new file, will not be able to " +
+              "recover state for existing applications", e, MDC.of(LogKeys.PATH$.MODULE$, dbFile));
           if (dbFile.isDirectory()) {
             for (File f : dbFile.listFiles()) {
               if (!f.delete()) {
-                logger.warn("error deleting {}", f.getPath());
+                logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, f.getPath()));
               }
             }
           }
           if (!dbFile.delete()) {
-            logger.warn("error deleting {}", dbFile.getPath());
+            logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile.getPath()));
           }
           options.createIfMissing(true);
           try {
@@ -99,7 +101,7 @@ static DB initLevelDB(File file) throws IOException {
   }
 
   private static class LevelDBLogger implements org.iq80.leveldb.Logger {
-    private static final Logger LOG = LoggerFactory.getLogger(LevelDBLogger.class);
+    private static final SparkLogger LOG = SparkLoggerFactory.getLogger(LevelDBLogger.class);
 
     @Override
     public void log(String message) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java
index cb66784e41918..a7063151fae89 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java
@@ -25,11 +25,12 @@
 import io.netty.channel.ChannelHandlerContext;
 import io.netty.handler.logging.LoggingHandler;
 import io.netty.handler.logging.LogLevel;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 public class NettyLogger {
-  private static final Logger logger = LoggerFactory.getLogger(NettyLogger.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(NettyLogger.class);
 
   /** A Netty LoggingHandler which does not dump the message contents. */
   private static class NoContentLoggingHandler extends LoggingHandler {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
index f3b7b48355a06..1753c124c9935 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
@@ -24,9 +24,11 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.annotations.VisibleForTesting;
 import org.rocksdb.*;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.shuffledb.StoreVersion;
 
 /**
@@ -38,7 +40,7 @@ public class RocksDBProvider {
       org.rocksdb.RocksDB.loadLibrary();
     }
 
-    private static final Logger logger = LoggerFactory.getLogger(RocksDBProvider.class);
+    private static final SparkLogger logger = SparkLoggerFactory.getLogger(RocksDBProvider.class);
 
     public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper mapper) throws
         IOException {
@@ -65,7 +67,7 @@ public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper
           tmpDb = RocksDB.open(dbOptions, dbFile.toString());
         } catch (RocksDBException e) {
           if (e.getStatus().getCode() == Status.Code.NotFound) {
-            logger.info("Creating state database at " + dbFile);
+            logger.info("Creating state database at {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile));
             dbOptions.setCreateIfMissing(true);
             try {
               tmpDb = RocksDB.open(dbOptions, dbFile.toString());
@@ -76,16 +78,16 @@ public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper
             // the RocksDB file seems to be corrupt somehow.  Let's just blow it away and create
             // a new one, so we can keep processing new apps
             logger.error("error opening rocksdb file {}. Creating new file, will not be able to " +
-              "recover state for existing applications", dbFile, e);
+              "recover state for existing applications", e, MDC.of(LogKeys.PATH$.MODULE$, dbFile));
             if (dbFile.isDirectory()) {
               for (File f : Objects.requireNonNull(dbFile.listFiles())) {
                 if (!f.delete()) {
-                  logger.warn("error deleting {}", f.getPath());
+                  logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, f.getPath()));
                 }
               }
             }
             if (!dbFile.delete()) {
-              logger.warn("error deleting {}", dbFile.getPath());
+              logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile.getPath()));
             }
             dbOptions.setCreateIfMissing(true);
             try {
@@ -133,10 +135,10 @@ static RocksDB initRocksDB(File file) throws IOException {
     }
 
     private static class RocksDBLogger extends org.rocksdb.Logger {
-        private static final Logger LOG = LoggerFactory.getLogger(RocksDBLogger.class);
+        private static final SparkLogger LOG = SparkLoggerFactory.getLogger(RocksDBLogger.class);
 
         RocksDBLogger(Options options) {
-          super(options);
+          super(options.infoLogLevel());
         }
 
         @Override
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index e9846be20c9b0..628de9e780337 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -18,75 +18,76 @@
 package org.apache.spark.network.crypto;
 
 import java.nio.ByteBuffer;
-import java.nio.channels.WritableByteChannel;
 import java.security.GeneralSecurityException;
-import java.util.Collections;
-import java.util.Random;
+import java.util.Map;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.crypto.tink.subtle.Hex;
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.Unpooled;
-import io.netty.channel.FileRegion;
-import org.apache.spark.network.util.ByteArrayWritableChannel;
-import org.apache.spark.network.util.ConfigProvider;
-import org.apache.spark.network.util.MapConfigProvider;
-import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.network.util.*;
+
 import static org.junit.jupiter.api.Assertions.*;
-import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
-import static org.mockito.Mockito.*;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-public class AuthEngineSuite {
 
-  private static final String clientPrivate =
-      "efe6b68b3fce92158e3637f6ef9d937e75558928dd4b401de04b43d300a73186";
-  private static final String clientChallengeHex =
-      "fb00000005617070496400000010890b6e960f48e998777267a7e4e623220000003c48ad7dc7ec9466da9" +
-      "3bda9f11488dc9404050e02c661d87d67c782444944c6e369b27e0a416c30845a2d9e64271511ca98b41d" +
-      "65f8c426e18ff380f6";
-  private static final String serverResponseHex =
-      "fb00000005617070496400000010708451c9dd2792c97c1ca66e6df449ef0000003c64fe899ecdaf458d4" +
-      "e25e9d5c5a380b8e6d1a184692fac065ed84f8592c18e9629f9c636809dca2ffc041f20346eb53db78738" +
-      "08ecad08b46b5ee3ff";
-  private static final String derivedKey = "2d6e7a9048c8265c33a8f3747bfcc84c";
+abstract class AuthEngineSuite {
+  static final String clientPrivate =
+          "efe6b68b3fce92158e3637f6ef9d937e75558928dd4b401de04b43d300a73186";
+  static final String clientChallengeHex =
+          "fb00000005617070496400000010890b6e960f48e998777267a7e4e623220000003c48ad7dc7ec9466da9" +
+          "3bda9f11488dc9404050e02c661d87d67c782444944c6e369b27e0a416c30845a2d9e64271511ca98b41d" +
+          "65f8c426e18ff380f6";
+  static final String serverResponseHex =
+          "fb00000005617070496400000010708451c9dd2792c97c1ca66e6df449ef0000003c64fe899ecdaf458d4" +
+          "e25e9d5c5a380b8e6d1a184692fac065ed84f8592c18e9629f9c636809dca2ffc041f20346eb53db78738" +
+          "08ecad08b46b5ee3ff";
+  static final String derivedKeyId =
+          "de04fd52d71040ed9d260579dacfdf4f5695f991ce8ddb1dde05a7335880906e";
   // This key would have been derived for version 1.0 protocol that did not run a final HKDF round.
-  private static final String unsafeDerivedKey =
-      "31963f15a320d5c90333f7ecf5cf3a31c7eaf151de07fef8494663a9f47cfd31";
-
-  private static final String inputIv = "fc6a5dc8b90a9dad8f54f08b51a59ed2";
-  private static final String outputIv = "a72709baf00785cad6329ce09f631f71";
-  private static TransportConf conf;
-
-  @BeforeAll
-  public static void setUp() {
-    ConfigProvider v2Provider = new MapConfigProvider(Collections.singletonMap(
-            "spark.network.crypto.authEngineVersion", "2"));
-    conf = new TransportConf("rpc", v2Provider);
+  static final String unsafeDerivedKey =
+          "31963f15a320d5c90333f7ecf5cf3a31c7eaf151de07fef8494663a9f47cfd31";
+  static TransportConf conf;
+
+  static TransportConf getConf(int authEngineVerison, boolean useCtr) {
+    String authEngineVersion = (authEngineVerison == 1) ? "1" : "2";
+    String mode = useCtr ? "AES/CTR/NoPadding" : "AES/GCM/NoPadding";
+    Map<String, String> confMap = ImmutableMap.of(
+            "spark.network.crypto.enabled", "true",
+            "spark.network.crypto.authEngineVersion", authEngineVersion,
+            "spark.network.crypto.cipher", mode
+    );
+    ConfigProvider v2Provider = new MapConfigProvider(confMap);
+    return new TransportConf("rpc", v2Provider);
   }
 
   @Test
   public void testAuthEngine() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
       AuthMessage serverResponse = server.response(clientChallenge);
       client.deriveSessionCipher(clientChallenge, serverResponse);
-
       TransportCipher serverCipher = server.sessionCipher();
       TransportCipher clientCipher = client.sessionCipher();
+      assertEquals(clientCipher.getKeyId(), serverCipher.getKeyId());
+    }
+  }
 
-      assertArrayEquals(serverCipher.getInputIv(), clientCipher.getOutputIv());
-      assertArrayEquals(serverCipher.getOutputIv(), clientCipher.getInputIv());
-      assertEquals(serverCipher.getKey(), clientCipher.getKey());
+  @Test
+  public void testFixedChallengeResponse() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf)) {
+      byte[] clientPrivateKey = Hex.decode(clientPrivate);
+      client.setClientPrivateKey(clientPrivateKey);
+      AuthMessage clientChallenge =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
+      AuthMessage serverResponse =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
+      // Verify that the client will accept an old transcript.
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      assertEquals(client.sessionCipher().getKeyId(), derivedKeyId);
     }
   }
 
   @Test
   public void testCorruptChallengeAppId() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -98,7 +99,6 @@ public void testCorruptChallengeAppId() throws Exception {
 
   @Test
   public void testCorruptChallengeSalt() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -109,7 +109,6 @@ public void testCorruptChallengeSalt() throws Exception {
 
   @Test
   public void testCorruptChallengeCiphertext() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -120,7 +119,6 @@ public void testCorruptChallengeCiphertext() throws Exception {
 
   @Test
   public void testCorruptResponseAppId() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -134,20 +132,18 @@ public void testCorruptResponseAppId() throws Exception {
 
   @Test
   public void testCorruptResponseSalt() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
       AuthMessage serverResponse = server.response(clientChallenge);
       serverResponse.salt()[0] ^= 1;
       assertThrows(GeneralSecurityException.class,
-        () -> client.deriveSessionCipher(clientChallenge, serverResponse));
+              () -> client.deriveSessionCipher(clientChallenge, serverResponse));
     }
   }
 
   @Test
   public void testCorruptServerCiphertext() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -169,45 +165,6 @@ public void testFixedChallenge() throws Exception {
     }
   }
 
-  @Test
-  public void testFixedChallengeResponse() throws Exception {
-    try (AuthEngine client = new AuthEngine("appId", "secret", conf)) {
-      byte[] clientPrivateKey = Hex.decode(clientPrivate);
-      client.setClientPrivateKey(clientPrivateKey);
-      AuthMessage clientChallenge =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
-      AuthMessage serverResponse =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
-      // Verify that the client will accept an old transcript.
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-      TransportCipher clientCipher = client.sessionCipher();
-      assertEquals(Hex.encode(clientCipher.getKey().getEncoded()), derivedKey);
-      assertEquals(Hex.encode(clientCipher.getInputIv()), inputIv);
-      assertEquals(Hex.encode(clientCipher.getOutputIv()), outputIv);
-    }
-  }
-
-  @Test
-  public void testFixedChallengeResponseUnsafeVersion() throws Exception {
-    ConfigProvider v1Provider = new MapConfigProvider(Collections.singletonMap(
-            "spark.network.crypto.authEngineVersion", "1"));
-    TransportConf v1Conf = new TransportConf("rpc", v1Provider);
-    try (AuthEngine client = new AuthEngine("appId", "secret", v1Conf)) {
-      byte[] clientPrivateKey = Hex.decode(clientPrivate);
-      client.setClientPrivateKey(clientPrivateKey);
-      AuthMessage clientChallenge =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
-      AuthMessage serverResponse =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
-      // Verify that the client will accept an old transcript.
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-      TransportCipher clientCipher = client.sessionCipher();
-      assertEquals(Hex.encode(clientCipher.getKey().getEncoded()), unsafeDerivedKey);
-      assertEquals(Hex.encode(clientCipher.getInputIv()), inputIv);
-      assertEquals(Hex.encode(clientCipher.getOutputIv()), outputIv);
-    }
-  }
-
   @Test
   public void testMismatchedSecret() throws Exception {
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
@@ -216,70 +173,4 @@ public void testMismatchedSecret() throws Exception {
       assertThrows(GeneralSecurityException.class, () -> server.response(clientChallenge));
     }
   }
-
-  @Test
-  public void testEncryptedMessage() throws Exception {
-    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
-         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
-      AuthMessage clientChallenge = client.challenge();
-      AuthMessage serverResponse = server.response(clientChallenge);
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-
-      TransportCipher cipher = server.sessionCipher();
-      TransportCipher.EncryptionHandler handler = new TransportCipher.EncryptionHandler(cipher);
-
-      byte[] data = new byte[TransportCipher.STREAM_BUFFER_SIZE + 1];
-      new Random().nextBytes(data);
-      ByteBuf buf = Unpooled.wrappedBuffer(data);
-
-      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length);
-      TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(buf);
-      while (emsg.transferred() < emsg.count()) {
-        emsg.transferTo(channel, emsg.transferred());
-      }
-      assertEquals(data.length, channel.length());
-    }
-  }
-
-  @Test
-  public void testEncryptedMessageWhenTransferringZeroBytes() throws Exception {
-    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
-         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
-      AuthMessage clientChallenge = client.challenge();
-      AuthMessage serverResponse = server.response(clientChallenge);
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-
-      TransportCipher cipher = server.sessionCipher();
-      TransportCipher.EncryptionHandler handler = new TransportCipher.EncryptionHandler(cipher);
-
-      int testDataLength = 4;
-      FileRegion region = mock(FileRegion.class);
-      when(region.count()).thenReturn((long) testDataLength);
-      // Make `region.transferTo` do nothing in first call and transfer 4 bytes in the second one.
-      when(region.transferTo(any(), anyLong())).thenAnswer(new Answer<Long>() {
-
-        private boolean firstTime = true;
-
-        @Override
-        public Long answer(InvocationOnMock invocationOnMock) throws Throwable {
-          if (firstTime) {
-            firstTime = false;
-            return 0L;
-          } else {
-            WritableByteChannel channel = invocationOnMock.getArgument(0);
-            channel.write(ByteBuffer.wrap(new byte[testDataLength]));
-            return (long) testDataLength;
-          }
-        }
-      });
-
-      TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(region);
-      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(testDataLength);
-      // "transferTo" should act correctly when the underlying FileRegion transfers 0 bytes.
-      assertEquals(0L, emsg.transferTo(channel, emsg.transferred()));
-      assertEquals(testDataLength, emsg.transferTo(channel, emsg.transferred()));
-      assertEquals(emsg.transferred(), emsg.count());
-      assertEquals(4, channel.length());
-    }
-  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
index 90f6c874a6c84..cb5929f7c65b4 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
@@ -49,7 +49,7 @@ public class AuthIntegrationSuite {
   private AuthTestCtx ctx;
 
   @AfterEach
-  public void cleanUp() throws Exception {
+  public void cleanUp() {
     if (ctx != null) {
       ctx.close();
     }
@@ -57,8 +57,8 @@ public void cleanUp() throws Exception {
   }
 
   @Test
-  public void testNewAuth() throws Exception {
-    ctx = new AuthTestCtx();
+  public void testNewCtrAuth() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/CTR/NoPadding");
     ctx.createServer("secret");
     ctx.createClient("secret");
 
@@ -68,8 +68,28 @@ public void testNewAuth() throws Exception {
   }
 
   @Test
-  public void testAuthFailure() throws Exception {
-    ctx = new AuthTestCtx();
+  public void testNewGcmAuth() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/GCM/NoPadding");
+    ctx.createServer("secret");
+    ctx.createClient("secret");
+    ByteBuffer reply = ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000);
+    assertEquals("Pong", JavaUtils.bytesToString(reply));
+    assertNull(ctx.authRpcHandler.saslHandler);
+  }
+
+  @Test
+  public void testCtrAuthFailure() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/CTR/NoPadding");
+    ctx.createServer("server");
+
+    assertThrows(Exception.class, () -> ctx.createClient("client"));
+    assertFalse(ctx.authRpcHandler.isAuthenticated());
+    assertFalse(ctx.serverChannel.isActive());
+  }
+
+  @Test
+  public void testGcmAuthFailure() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/GCM/NoPadding");
     ctx.createServer("server");
 
     assertThrows(Exception.class, () -> ctx.createClient("client"));
@@ -100,7 +120,7 @@ public void testSaslClientFallback() throws Exception {
   }
 
   @Test
-  public void testAuthReplay() throws Exception {
+  public void testCtrAuthReplay() throws Exception {
     // This test covers the case where an attacker replays a challenge message sniffed from the
     // network, but doesn't know the actual secret. The server should close the connection as
     // soon as a message is sent after authentication is performed. This is emulated by removing
@@ -110,16 +130,16 @@ public void testAuthReplay() throws Exception {
     ctx.createClient("secret");
 
     assertNotNull(ctx.client.getChannel().pipeline()
-      .remove(TransportCipher.ENCRYPTION_HANDLER_NAME));
+      .remove(CtrTransportCipher.ENCRYPTION_HANDLER_NAME));
     assertThrows(Exception.class,
       () -> ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000));
     assertTrue(ctx.authRpcHandler.isAuthenticated());
   }
 
   @Test
-  public void testLargeMessageEncryption() throws Exception {
+  public void testLargeCtrMessageEncryption() throws Exception {
     // Use a big length to create a message that cannot be put into the encryption buffer completely
-    final int testErrorMessageLength = TransportCipher.STREAM_BUFFER_SIZE;
+    final int testErrorMessageLength = CtrTransportCipher.STREAM_BUFFER_SIZE;
     ctx = new AuthTestCtx(new RpcHandler() {
       @Override
       public void receive(
@@ -157,6 +177,23 @@ public void testValidMergedBlockMetaReqHandler() throws Exception {
     assertNotNull(ctx.authRpcHandler.getMergedBlockMetaReqHandler());
   }
 
+  private static class DummyRpcHandler extends RpcHandler {
+    @Override
+    public void receive(
+            TransportClient client,
+            ByteBuffer message,
+            RpcResponseCallback callback) {
+      String messageString = JavaUtils.bytesToString(message);
+      assertEquals("Ping", messageString);
+      callback.onSuccess(JavaUtils.stringToBytes("Pong"));
+    }
+
+    @Override
+    public StreamManager getStreamManager() {
+      return null;
+    }
+  }
+
   private static class AuthTestCtx {
 
     private final String appId = "testAppId";
@@ -169,25 +206,17 @@ private static class AuthTestCtx {
     volatile AuthRpcHandler authRpcHandler;
 
     AuthTestCtx() throws Exception {
-      this(new RpcHandler() {
-        @Override
-        public void receive(
-            TransportClient client,
-            ByteBuffer message,
-            RpcResponseCallback callback) {
-          assertEquals("Ping", JavaUtils.bytesToString(message));
-          callback.onSuccess(JavaUtils.stringToBytes("Pong"));
-        }
-
-        @Override
-        public StreamManager getStreamManager() {
-          return null;
-        }
-      });
+      this(new DummyRpcHandler());
     }
 
     AuthTestCtx(RpcHandler rpcHandler) throws Exception {
-      Map<String, String> testConf = ImmutableMap.of("spark.network.crypto.enabled", "true");
+        this(rpcHandler, "AES/CTR/NoPadding");
+    }
+
+    AuthTestCtx(RpcHandler rpcHandler, String mode) throws Exception {
+      Map<String, String> testConf = ImmutableMap.of(
+              "spark.network.crypto.enabled", "true",
+              "spark.network.crypto.cipher", mode);
       this.conf = new TransportConf("rpc", new MapConfigProvider(testConf));
       this.ctx = new TransportContext(conf, rpcHandler);
     }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/CtrAuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/CtrAuthEngineSuite.java
new file mode 100644
index 0000000000000..c353ee392ff4f
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/CtrAuthEngineSuite.java
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import com.google.crypto.tink.subtle.Hex;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.FileRegion;
+import org.apache.spark.network.util.ByteArrayWritableChannel;
+import org.apache.spark.network.util.TransportConf;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.Random;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.Mockito.*;
+
+public class CtrAuthEngineSuite extends AuthEngineSuite {
+  private static final String inputIv = "fc6a5dc8b90a9dad8f54f08b51a59ed2";
+  private static final String outputIv = "a72709baf00785cad6329ce09f631f71";
+
+  @BeforeAll
+  public static void setUp() {
+    conf = getConf(2, true);
+  }
+
+  @Test
+  public void testAuthEngine() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
+         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+
+      TransportCipher serverCipher = server.sessionCipher();
+      TransportCipher clientCipher = client.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      assert(serverCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrClient = (CtrTransportCipher) clientCipher;
+      CtrTransportCipher ctrServer = (CtrTransportCipher) serverCipher;
+      assertArrayEquals(ctrServer.getInputIv(), ctrClient.getOutputIv());
+      assertArrayEquals(ctrServer.getOutputIv(), ctrClient.getInputIv());
+      assertEquals(ctrServer.getKey(), ctrClient.getKey());
+    }
+  }
+
+  @Test
+  public void testCtrFixedChallengeIvResponse() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf)) {
+      byte[] clientPrivateKey = Hex.decode(clientPrivate);
+      client.setClientPrivateKey(clientPrivateKey);
+      AuthMessage clientChallenge =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
+      AuthMessage serverResponse =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
+      // Verify that the client will accept an old transcript.
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = client.sessionCipher();
+      assertEquals(clientCipher.getKeyId(), derivedKeyId);
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      assertEquals(Hex.encode(ctrTransportCipher.getInputIv()), inputIv);
+      assertEquals(Hex.encode(ctrTransportCipher.getOutputIv()), outputIv);
+    }
+  }
+
+  @Test
+  public void testFixedChallengeResponseUnsafeVersion() throws Exception {
+    TransportConf v1Conf = getConf(1, true);
+    try (AuthEngine client = new AuthEngine("appId", "secret", v1Conf)) {
+      byte[] clientPrivateKey = Hex.decode(clientPrivate);
+      client.setClientPrivateKey(clientPrivateKey);
+      AuthMessage clientChallenge =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
+      AuthMessage serverResponse =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
+      // Verify that the client will accept an old transcript.
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = client.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      assertEquals(Hex.encode(ctrTransportCipher.getKey().getEncoded()), unsafeDerivedKey);
+      assertEquals(Hex.encode(ctrTransportCipher.getInputIv()), inputIv);
+      assertEquals(Hex.encode(ctrTransportCipher.getOutputIv()), outputIv);
+    }
+  }
+
+  @Test
+  public void testCtrEncryptedMessage() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
+         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+
+      TransportCipher clientCipher = server.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      CtrTransportCipher.EncryptionHandler handler =
+              new CtrTransportCipher.EncryptionHandler(ctrTransportCipher);
+
+      byte[] data = new byte[CtrTransportCipher.STREAM_BUFFER_SIZE + 1];
+      new Random().nextBytes(data);
+      ByteBuf buf = Unpooled.wrappedBuffer(data);
+
+      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length);
+      CtrTransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(buf);
+      while (emsg.transferred() < emsg.count()) {
+        emsg.transferTo(channel, emsg.transferred());
+      }
+      assertEquals(data.length, channel.length());
+    }
+  }
+
+  @Test
+  public void testCtrEncryptedMessageWhenTransferringZeroBytes() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
+         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      CtrTransportCipher.EncryptionHandler handler =
+              new CtrTransportCipher.EncryptionHandler(ctrTransportCipher);
+      int testDataLength = 4;
+      FileRegion region = mock(FileRegion.class);
+      when(region.count()).thenReturn((long) testDataLength);
+      // Make `region.transferTo` do nothing in first call and transfer 4 bytes in the second one.
+      when(region.transferTo(any(), anyLong())).thenAnswer(new Answer<Long>() {
+
+        private boolean firstTime = true;
+
+        @Override
+        public Long answer(InvocationOnMock invocationOnMock) throws Throwable {
+          if (firstTime) {
+            firstTime = false;
+            return 0L;
+          } else {
+            WritableByteChannel channel = invocationOnMock.getArgument(0);
+            channel.write(ByteBuffer.wrap(new byte[testDataLength]));
+            return (long) testDataLength;
+          }
+        }
+      });
+
+      CtrTransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(region);
+      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(testDataLength);
+      // "transferTo" should act correctly when the underlying FileRegion transfers 0 bytes.
+      assertEquals(0L, emsg.transferTo(channel, emsg.transferred()));
+      assertEquals(testDataLength, emsg.transferTo(channel, emsg.transferred()));
+      assertEquals(emsg.transferred(), emsg.count());
+      assertEquals(4, channel.length());
+    }
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/GcmAuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/GcmAuthEngineSuite.java
new file mode 100644
index 0000000000000..20efb8d57dcbf
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/GcmAuthEngineSuite.java
@@ -0,0 +1,339 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.channel.ChannelPromise;
+import org.apache.spark.network.util.*;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.mockito.ArgumentCaptor;
+
+import javax.crypto.AEADBadTagException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.Arrays;
+
+import static org.junit.jupiter.api.Assertions.*;
+import static org.mockito.Mockito.*;
+
+public class GcmAuthEngineSuite extends AuthEngineSuite {
+
+  @BeforeAll
+  public static void setUp() {
+    // Uses GCM mode
+    conf = getConf(2, false);
+  }
+
+  @Test
+  public void testGcmEncryptedMessage() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      // Verify that it derives a GcmTransportCipher
+      assert (clientCipher instanceof GcmTransportCipher);
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      // Allocating 1.5x the buffer size to test multiple segments and a fractional segment.
+      int plaintextSegmentSize = GcmTransportCipher.CIPHERTEXT_BUFFER_SIZE - 16;
+      byte[] data = new byte[plaintextSegmentSize + (plaintextSegmentSize / 2)];
+      // Just writing some bytes.
+      data[0] = 'a';
+      data[data.length / 2] = 'b';
+      data[data.length - 10] = 'c';
+      ByteBuf buf = Unpooled.wrappedBuffer(data);
+
+      // Mock the context and capture the arguments passed to it
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, buf, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      // Get the encrypted value and pass it to the decryption handler
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+      encrypted.transferTo(channel, 0);
+      ciphertextBuffer.flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      // Capture the decrypted values and verify them
+      ArgumentCaptor<ByteBuf> captorPlaintext = ArgumentCaptor.forClass(ByteBuf.class);
+      decryptionHandler.channelRead(ctx, ciphertext);
+      verify(ctx, times(2))
+              .fireChannelRead(captorPlaintext.capture());
+      ByteBuf lastPlaintextSegment = captorPlaintext.getValue();
+      assertEquals(plaintextSegmentSize/2,
+              lastPlaintextSegment.readableBytes());
+      assertEquals('c',
+              lastPlaintextSegment.getByte((plaintextSegmentSize/2) - 10));
+    }
+  }
+
+  static class FakeRegion extends AbstractFileRegion {
+    private final ByteBuffer[] source;
+    private int sourcePosition;
+    private final long count;
+
+    FakeRegion(ByteBuffer... source) {
+      this.source = source;
+      sourcePosition = 0;
+      count = remaining();
+    }
+
+    private long remaining() {
+      long remaining = 0;
+      for (ByteBuffer buffer : source) {
+        remaining += buffer.remaining();
+      }
+      return remaining;
+    }
+
+    @Override
+    public long position() {
+      return 0;
+    }
+
+    @Override
+    public long transferred() {
+      return count - remaining();
+    }
+
+    @Override
+    public long count() {
+      return count;
+    }
+
+    @Override
+    public long transferTo(WritableByteChannel target, long position) throws IOException {
+      if (sourcePosition < source.length) {
+        ByteBuffer currentBuffer = source[sourcePosition];
+        long written = target.write(currentBuffer);
+        if (!currentBuffer.hasRemaining()) {
+          sourcePosition++;
+        }
+        return written;
+      } else {
+        return 0;
+      }
+    }
+
+    @Override
+    protected void deallocate() {
+    }
+  }
+
+  private static ByteBuffer getTestByteBuf(int size, byte fill) {
+    byte[] data = new byte[size];
+    Arrays.fill(data, fill);
+    return ByteBuffer.wrap(data);
+  }
+
+  @Test
+  public void testGcmEncryptedMessageFileRegion() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      // Verify that it derives a GcmTransportCipher
+      assert (clientCipher instanceof GcmTransportCipher);
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      // Allocating 1.5x the buffer size to test multiple segments and a fractional segment.
+      int plaintextSegmentSize = GcmTransportCipher.CIPHERTEXT_BUFFER_SIZE - 16;
+      int halfSegmentSize = plaintextSegmentSize / 2;
+      int totalSize = plaintextSegmentSize + halfSegmentSize;
+
+      // Set up some fragmented segments to test
+      ByteBuffer halfSegment = getTestByteBuf(halfSegmentSize, (byte) 'a');
+      int smallFragmentSize = 128;
+      ByteBuffer smallFragment = getTestByteBuf(smallFragmentSize, (byte) 'b');
+      int remainderSize = totalSize - halfSegmentSize - smallFragmentSize;
+      ByteBuffer remainder = getTestByteBuf(remainderSize, (byte) 'c');
+      FakeRegion fakeRegion = new FakeRegion(halfSegment, smallFragment, remainder);
+      assertEquals(totalSize, fakeRegion.count());
+
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, fakeRegion, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      // Get the encrypted value and pass it to the decryption handler
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+
+      // We'll simulate the FileRegion only transferring half a segment.
+      // The encrypted message should buffer the partial segment plaintext.
+      long ciphertextTransferred = 0;
+      while (ciphertextTransferred < encrypted.count()) {
+        long chunkTransferred = encrypted.transferTo(channel, 0);
+        ciphertextTransferred += chunkTransferred;
+      }
+      assertEquals(encrypted.count(), ciphertextTransferred);
+
+      ciphertextBuffer.flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      // Capture the decrypted values and verify them
+      ArgumentCaptor<ByteBuf> captorPlaintext = ArgumentCaptor.forClass(ByteBuf.class);
+      decryptionHandler.channelRead(ctx, ciphertext);
+      verify(ctx, times(2)).fireChannelRead(captorPlaintext.capture());
+      ByteBuf plaintext = captorPlaintext.getValue();
+      // We expect this to be the last partial plaintext segment
+      int expectedLength = totalSize % plaintextSegmentSize;
+      assertEquals(expectedLength, plaintext.readableBytes());
+      // This will be the "remainder" segment that is filled to 'c'
+      assertEquals('c', plaintext.getByte(0));
+    }
+  }
+
+
+  @Test
+  public void testGcmUnalignedDecryption() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      // Verify that it derives a GcmTransportCipher
+      assert (clientCipher instanceof GcmTransportCipher);
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      // Allocating 1.5x the buffer size to test multiple segments and a fractional segment.
+      int plaintextSegmentSize = GcmTransportCipher.CIPHERTEXT_BUFFER_SIZE - 16;
+      int plaintextSize = plaintextSegmentSize + (plaintextSegmentSize / 2);
+      byte[] data = new byte[plaintextSize];
+      Arrays.fill(data, (byte) 'x');
+      ByteBuf buf = Unpooled.wrappedBuffer(data);
+
+      // Mock the context and capture the arguments passed to it
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, buf, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      // Get the encrypted value and pass it to the decryption handler
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+      encrypted.transferTo(channel, 0);
+      ciphertextBuffer.flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      // Split up the ciphertext into some different sized chunks
+      int firstChunkSize = plaintextSize / 2;
+      ByteBuf mockCiphertext = spy(ciphertext);
+      when(mockCiphertext.readableBytes())
+              .thenReturn(firstChunkSize, firstChunkSize).thenCallRealMethod();
+
+      // Capture the decrypted values and verify them
+      ArgumentCaptor<ByteBuf> captorPlaintext = ArgumentCaptor.forClass(ByteBuf.class);
+      decryptionHandler.channelRead(ctx, mockCiphertext);
+      verify(ctx, times(2)).fireChannelRead(captorPlaintext.capture());
+      ByteBuf lastPlaintextSegment = captorPlaintext.getValue();
+      assertEquals(plaintextSegmentSize/2,
+              lastPlaintextSegment.readableBytes());
+      assertEquals('x',
+              lastPlaintextSegment.getByte((plaintextSegmentSize/2) - 10));
+    }
+  }
+
+  @Test
+  public void testCorruptGcmEncryptedMessage() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+
+      TransportCipher clientCipher = server.sessionCipher();
+      assert (clientCipher instanceof GcmTransportCipher);
+
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      byte[] zeroData = new byte[1024 * 32];
+      // Just writing some bytes.
+      ByteBuf buf = Unpooled.wrappedBuffer(zeroData);
+
+      // Mock the context and capture the arguments passed to it
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, buf, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+      encrypted.transferTo(channel, 0);
+      ciphertextBuffer.flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      byte b = ciphertext.getByte(100);
+      // Inverting the bits of the 100th bit
+      ciphertext.setByte(100, ~b & 0xFF);
+      assertThrows(AEADBadTagException.class, () -> decryptionHandler.channelRead(ctx, ciphertext));
+    }
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
index da62d3b2de31a..8977f29034fe0 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
@@ -41,10 +41,10 @@
 public class TransportCipherSuite {
 
   @Test
-  public void testBufferNotLeaksOnInternalError() throws IOException {
+  public void testCtrBufferNotLeaksOnInternalError() throws IOException {
     String algorithm = "TestAlgorithm";
     TransportConf conf = new TransportConf("Test", MapConfigProvider.EMPTY);
-    TransportCipher cipher = new TransportCipher(conf.cryptoConf(), conf.cipherTransformation(),
+    CtrTransportCipher cipher = new CtrTransportCipher(conf.cryptoConf(),
       new SecretKeySpec(new byte[256], algorithm), new byte[0], new byte[0]) {
 
       @Override
diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/EncryptedMessageWithHeaderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/EncryptedMessageWithHeaderSuite.java
index 7478fa1db7113..2865d411bf673 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/protocol/EncryptedMessageWithHeaderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/EncryptedMessageWithHeaderSuite.java
@@ -116,7 +116,7 @@ public void testChunkedStream() throws Exception {
 
       // Validate we read data correctly
       assertEquals(bodyResult.readableBytes(), chunkSize);
-      assert(bodyResult.readableBytes() < (randomData.length - readIndex));
+      assertTrue(bodyResult.readableBytes() < (randomData.length - readIndex));
       while (bodyResult.readableBytes() > 0) {
         assertEquals(bodyResult.readByte(), randomData[readIndex++]);
       }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
index aa3891450a933..025be80c5ce4b 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
@@ -27,8 +27,10 @@
 import io.netty.channel.ChannelHandlerContext;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Test;
+// checkstyle.off: RegexpSinglelineJava
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
 
 import static org.junit.jupiter.api.Assertions.*;
 import static org.mockito.Mockito.*;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
index 7253101f41df6..d67f2a3099d35 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java
@@ -20,8 +20,10 @@
 import java.nio.ByteBuffer;
 import java.util.concurrent.ConcurrentHashMap;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 import org.apache.spark.network.util.JavaUtils;
 
@@ -29,7 +31,8 @@
  * A class that manages shuffle secret used by the external shuffle service.
  */
 public class ShuffleSecretManager implements SecretKeyHolder {
-  private static final Logger logger = LoggerFactory.getLogger(ShuffleSecretManager.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ShuffleSecretManager.class);
 
   private final ConcurrentHashMap<String, String> shuffleSecretMap;
 
@@ -51,7 +54,8 @@ public void registerApp(String appId, String shuffleSecret) {
     // Otherwise we have to specifically look at the application attempt in addition
     // to the applicationId since the secrets change between application attempts on yarn.
     shuffleSecretMap.put(appId, shuffleSecret);
-    logger.info("Registered shuffle secret for application {}", appId);
+    logger.info("Registered shuffle secret for application {}",
+      MDC.of(LogKeys.APP_ID$.MODULE$, appId));
   }
 
   /**
@@ -67,7 +71,8 @@ public void registerApp(String appId, ByteBuffer shuffleSecret) {
    */
   public void unregisterApp(String appId) {
     shuffleSecretMap.remove(appId);
-    logger.info("Unregistered shuffle secret for application {}", appId);
+    logger.info("Unregistered shuffle secret for application {}",
+      MDC.of(LogKeys.APP_ID$.MODULE$, appId));
   }
 
   /**
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
index 32222e910df06..dcb0a52b0d66c 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
@@ -26,9 +26,11 @@
 import java.util.concurrent.CompletableFuture;
 
 import com.codahale.metrics.MetricSet;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.RpcResponseCallback;
 import org.apache.spark.network.client.TransportClient;
@@ -42,7 +44,7 @@
  * or external service.
  */
 public abstract class BlockStoreClient implements Closeable {
-  protected final Logger logger = LoggerFactory.getLogger(this.getClass());
+  protected final SparkLogger logger = SparkLoggerFactory.getLogger(this.getClass());
 
   protected volatile TransportClientFactory clientFactory;
   protected String appId;
@@ -170,16 +172,16 @@ public void onSuccess(ByteBuffer response) {
             hostLocalDirsCompletable.complete(
               ((LocalDirsForExecutors) msgObj).getLocalDirsByExec());
           } catch (Throwable t) {
-            logger.warn("Error while trying to get the host local dirs for " +
-              Arrays.toString(getLocalDirsMessage.execIds), t.getCause());
+            logger.warn("Error while trying to get the host local dirs for {}", t.getCause(),
+              MDC.of(LogKeys.EXECUTOR_IDS$.MODULE$, Arrays.toString(getLocalDirsMessage.execIds)));
             hostLocalDirsCompletable.completeExceptionally(t);
           }
         }
 
         @Override
         public void onFailure(Throwable t) {
-          logger.warn("Error while trying to get the host local dirs for " +
-            Arrays.toString(getLocalDirsMessage.execIds), t.getCause());
+          logger.warn("Error while trying to get the host local dirs for {}", t.getCause(),
+            MDC.of(LogKeys.EXECUTOR_IDS$.MODULE$, Arrays.toString(getLocalDirsMessage.execIds)));
           hostLocalDirsCompletable.completeExceptionally(t);
         }
       });
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 137572da108a4..5d33bfb345a9e 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -37,9 +37,11 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Sets;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.MergedBlockMetaResponseCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
@@ -65,7 +67,8 @@
  */
 public class ExternalBlockHandler extends RpcHandler
     implements RpcHandler.MergedBlockMetaReqHandler {
-  private static final Logger logger = LoggerFactory.getLogger(ExternalBlockHandler.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ExternalBlockHandler.class);
   private static final String SHUFFLE_MERGER_IDENTIFIER = "shuffle-push-merger";
   private static final String SHUFFLE_BLOCK_ID = "shuffle";
   private static final String SHUFFLE_CHUNK_ID = "shuffleChunk";
@@ -221,7 +224,9 @@ protected void handleMessage(
     } else if (msgObj instanceof RemoveShuffleMerge msg) {
       checkAuth(client, msg.appId);
       logger.info("Removing shuffle merge data for application {} shuffle {} shuffleMerge {}",
-          msg.appId, msg.shuffleId, msg.shuffleMergeId);
+        MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
+        MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
+        MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId));
       mergeManager.removeShuffleMerge(msg);
     } else if (msgObj instanceof DiagnoseCorruption msg) {
       checkAuth(client, msg.appId);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
index 1451d5712812d..97723f77723d4 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
@@ -29,6 +29,8 @@
 import com.codahale.metrics.MetricSet;
 import com.google.common.collect.Lists;
 
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.TransportContext;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.MergedBlockMetaResponseCallback;
@@ -103,7 +105,8 @@ private void setComparableAppAttemptId(String appAttemptId) {
       this.comparableAppAttemptId = Integer.parseInt(appAttemptId);
     } catch (NumberFormatException e) {
       logger.warn("Push based shuffle requires comparable application attemptId, " +
-        "but the appAttemptId {} cannot be parsed to Integer", appAttemptId, e);
+        "but the appAttemptId {} cannot be parsed to Integer", e,
+          MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId));
     }
   }
 
@@ -217,8 +220,9 @@ public void onFailure(Throwable e) {
         }
       });
     } catch (Exception e) {
-      logger.error("Exception while sending finalizeShuffleMerge request to {}:{}",
-        host, port, e);
+      logger.error("Exception while sending finalizeShuffleMerge request to {}:{}", e,
+        MDC.of(LogKeys.HOST$.MODULE$, host),
+        MDC.of(LogKeys.PORT$.MODULE$, port));
       listener.onShuffleMergeFailure(e);
     }
   }
@@ -316,16 +320,19 @@ public void onSuccess(ByteBuffer response) {
           BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteBuffer(response);
           numRemovedBlocksFuture.complete(((BlocksRemoved) msgObj).numRemovedBlocks);
         } catch (Throwable t) {
-          logger.warn("Error trying to remove blocks " + Arrays.toString(blockIds) +
-            " via external shuffle service from executor: " + execId, t);
+          logger.warn("Error trying to remove blocks {} via external shuffle service from " +
+            "executor: {}", t,
+            MDC.of(LogKeys.BLOCK_IDS$.MODULE$, Arrays.toString(blockIds)),
+            MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, execId));
           numRemovedBlocksFuture.complete(0);
         }
       }
 
       @Override
       public void onFailure(Throwable e) {
-        logger.warn("Error trying to remove blocks " + Arrays.toString(blockIds) +
-          " via external shuffle service from executor: " + execId, e);
+        logger.warn("Error trying to remove blocks {} via external shuffle service from " +
+          "executor: {}", e, MDC.of(LogKeys.BLOCK_IDS$.MODULE$, Arrays.toString(blockIds)),
+          MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, execId));
         numRemovedBlocksFuture.complete(0);
       }
     });
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index 429e5f03b9eaf..e43eedd8b25eb 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -38,9 +38,11 @@
 import com.google.common.cache.LoadingCache;
 import com.google.common.cache.Weigher;
 import com.google.common.collect.Maps;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.shuffle.checksum.Cause;
@@ -62,7 +64,8 @@
  * from Spark's IndexShuffleBlockResolver.
  */
 public class ExternalShuffleBlockResolver {
-  private static final Logger logger = LoggerFactory.getLogger(ExternalShuffleBlockResolver.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ExternalShuffleBlockResolver.class);
 
   private static final ObjectMapper mapper = new ObjectMapper();
 
@@ -131,7 +134,8 @@ public ShuffleIndexInformation load(String filePath) throws IOException {
     db = DBProvider.initDB(dbBackend, this.registeredExecutorFile, CURRENT_VERSION, mapper);
     if (db != null) {
       logger.info("Use {} as the implementation of {}",
-        dbBackend, Constants.SHUFFLE_SERVICE_DB_BACKEND);
+        MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME$.MODULE$, dbBackend),
+        MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY$.MODULE$, Constants.SHUFFLE_SERVICE_DB_BACKEND));
       executors = reloadRegisteredExecutors(db);
     } else {
       executors = Maps.newConcurrentMap();
@@ -149,7 +153,9 @@ public void registerExecutor(
       String execId,
       ExecutorShuffleInfo executorInfo) {
     AppExecId fullId = new AppExecId(appId, execId);
-    logger.info("Registered executor {} with {}", fullId, executorInfo);
+    logger.info("Registered executor {} with {}",
+      MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId),
+      MDC.of(LogKeys.EXECUTOR_SHUFFLE_INFO$.MODULE$, executorInfo));
     try {
       if (db != null && AppsWithRecoveryDisabled.isRecoveryEnabledForApp(appId)) {
         byte[] key = dbAppExecKey(fullId);
@@ -214,7 +220,9 @@ public ManagedBuffer getRddBlockData(
    * this method.
    */
   public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
-    logger.info("Application {} removed, cleanupLocalDirs = {}", appId, cleanupLocalDirs);
+    logger.info("Application {} removed, cleanupLocalDirs = {}",
+      MDC.of(LogKeys.APP_ID$.MODULE$, appId),
+      MDC.of(LogKeys.CLEANUP_LOCAL_DIRS$.MODULE$, cleanupLocalDirs));
     Iterator<Map.Entry<AppExecId, ExecutorShuffleInfo>> it = executors.entrySet().iterator();
     while (it.hasNext()) {
       Map.Entry<AppExecId, ExecutorShuffleInfo> entry = it.next();
@@ -228,12 +236,15 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
           try {
             db.delete(dbAppExecKey(fullId));
           } catch (IOException e) {
-            logger.error("Error deleting {} from executor state db", appId, e);
+            logger.error("Error deleting {} from executor state db", e,
+              MDC.of(LogKeys.APP_ID$.MODULE$, appId));
           }
         }
 
         if (cleanupLocalDirs) {
-          logger.info("Cleaning up executor {}'s {} local dirs", fullId, executor.localDirs.length);
+          logger.info("Cleaning up executor {}'s {} local dirs",
+            MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId),
+            MDC.of(LogKeys.NUM_LOCAL_DIRS$.MODULE$, executor.localDirs.length));
 
           // Execute the actual deletion in a different thread, as it may take some time.
           directoryCleaner.execute(() -> deleteExecutorDirs(executor.localDirs));
@@ -248,15 +259,18 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
    */
   public void executorRemoved(String executorId, String appId) {
     logger.info("Clean up non-shuffle and non-RDD files associated with the finished executor {}",
-      executorId);
+      MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, executorId));
     AppExecId fullId = new AppExecId(appId, executorId);
     final ExecutorShuffleInfo executor = executors.get(fullId);
     if (executor == null) {
       // Executor not registered, skip clean up of the local directories.
-      logger.info("Executor is not registered (appId={}, execId={})", appId, executorId);
+      logger.info("Executor is not registered (appId={}, execId={})",
+        MDC.of(LogKeys.APP_ID$.MODULE$, appId),
+        MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, executorId));
     } else {
       logger.info("Cleaning up non-shuffle and non-RDD files in executor {}'s {} local dirs",
-        fullId, executor.localDirs.length);
+        MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId),
+        MDC.of(LogKeys.NUM_LOCAL_DIRS$.MODULE$, executor.localDirs.length));
 
       // Execute the actual deletion in a different thread, as it may take some time.
       directoryCleaner.execute(() -> deleteNonShuffleServiceServedFiles(executor.localDirs));
@@ -273,7 +287,8 @@ private void deleteExecutorDirs(String[] dirs) {
         JavaUtils.deleteRecursively(new File(localDir));
         logger.debug("Successfully cleaned up directory: {}", localDir);
       } catch (Exception e) {
-        logger.error("Failed to delete directory: " + localDir, e);
+        logger.error("Failed to delete directory: {}", e,
+          MDC.of(LogKeys.PATH$.MODULE$, localDir));
       }
     }
   }
@@ -295,8 +310,8 @@ private void deleteNonShuffleServiceServedFiles(String[] dirs) {
         logger.debug("Successfully cleaned up files not served by shuffle service in directory: {}",
           localDir);
       } catch (Exception e) {
-        logger.error("Failed to delete files not served by shuffle service in directory: "
-          + localDir, e);
+        logger.error("Failed to delete files not served by shuffle service in directory: {}", e,
+          MDC.of(LogKeys.PATH$.MODULE$, localDir));
       }
     }
   }
@@ -368,7 +383,8 @@ public int removeBlocks(String appId, String execId, String[] blockIds) {
       if (file.delete()) {
         numRemovedBlocks++;
       } else {
-        logger.warn("Failed to delete block: " + file.getAbsolutePath());
+        logger.warn("Failed to delete block: {}",
+          MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
       }
     }
     return numRemovedBlocks;
@@ -472,7 +488,8 @@ static ConcurrentMap<AppExecId, ExecutorShuffleInfo> reloadRegisteredExecutors(D
             break;
           }
           AppExecId id = parseDbAppExecKey(key);
-          logger.info("Reloading registered executors: " +  id.toString());
+          logger.info("Reloading registered executors: {}",
+            MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, id));
           ExecutorShuffleInfo shuffleInfo =
             mapper.readValue(e.getValue(), ExecutorShuffleInfo.class);
           registeredExecutors.put(id, shuffleInfo);
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index b93db3f570b86..c5c6ab313e193 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -26,9 +26,9 @@
 
 import com.google.common.primitives.Ints;
 import com.google.common.primitives.Longs;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.ChunkReceivedCallback;
 import org.apache.spark.network.client.RpcResponseCallback;
@@ -53,7 +53,8 @@
  * {@link org.apache.spark.network.server.OneForOneStreamManager} on the server side.
  */
 public class OneForOneBlockFetcher {
-  private static final Logger logger = LoggerFactory.getLogger(OneForOneBlockFetcher.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(OneForOneBlockFetcher.class);
   private static final String SHUFFLE_BLOCK_PREFIX = "shuffle_";
   private static final String SHUFFLE_CHUNK_PREFIX = "shuffleChunk_";
   private static final String SHUFFLE_BLOCK_SPLIT = "shuffle";
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
index 8885dc9f2e2c5..d90ca1a88a267 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java
@@ -22,9 +22,9 @@
 import java.util.Map;
 
 import com.google.common.base.Preconditions;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NioManagedBuffer;
 import org.apache.spark.network.client.RpcResponseCallback;
@@ -44,7 +44,8 @@
  * @since 3.1.0
  */
 public class OneForOneBlockPusher {
-  private static final Logger logger = LoggerFactory.getLogger(OneForOneBlockPusher.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(OneForOneBlockPusher.class);
   private static final ErrorHandler PUSH_ERROR_HANDLER = new ErrorHandler.BlockPushErrorHandler();
   public static final String SHUFFLE_PUSH_BLOCK_PREFIX = "shufflePush";
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index 5f9576843b476..02a38eac5b409 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -64,9 +64,11 @@
 import com.google.common.primitives.Longs;
 
 import org.roaringbitmap.RoaringBitmap;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.client.StreamCallbackWithID;
@@ -96,7 +98,8 @@
 public class RemoteBlockPushResolver implements MergedShuffleFileManager {
 
   private static final Cleaner CLEANER = Cleaner.create();
-  private static final Logger logger = LoggerFactory.getLogger(RemoteBlockPushResolver.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(RemoteBlockPushResolver.class);
 
   public static final String MERGED_SHUFFLE_FILE_NAME_PREFIX = "shuffleMerged";
   public static final String SHUFFLE_META_DELIMITER = ":";
@@ -184,7 +187,8 @@ public ShuffleIndexInformation load(String filePath) throws IOException {
     db = DBProvider.initDB(dbBackend, this.recoveryFile, CURRENT_VERSION, mapper);
     if (db != null) {
       logger.info("Use {} as the implementation of {}",
-        dbBackend, Constants.SHUFFLE_SERVICE_DB_BACKEND);
+        MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME$.MODULE$, dbBackend),
+        MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY$.MODULE$, Constants.SHUFFLE_SERVICE_DB_BACKEND));
       reloadAndCleanUpAppShuffleInfo(db);
     }
     this.pushMergeMetrics = new PushMergeMetrics();
@@ -229,8 +233,11 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
       shuffles.compute(shuffleId, (id, mergePartitionsInfo) -> {
         if (mergePartitionsInfo == null) {
           logger.info("{} attempt {} shuffle {} shuffleMerge {}: creating a new shuffle " +
-              "merge metadata", appShuffleInfo.appId, appShuffleInfo.attemptId, shuffleId,
-              shuffleMergeId);
+            "merge metadata",
+            MDC.of(LogKeys.APP_ID$.MODULE$, appShuffleInfo.appId),
+            MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId),
+            MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, shuffleId),
+            MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId));
           return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
         } else {
           int latestShuffleMergeId = mergePartitionsInfo.shuffleMergeId;
@@ -248,8 +255,11 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
                 new AppAttemptShuffleMergeId(appShuffleInfo.appId, appShuffleInfo.attemptId,
                     shuffleId, latestShuffleMergeId);
             logger.info("{}: creating a new shuffle merge metadata since received " +
-                "shuffleMergeId {} is higher than latest shuffleMergeId {}",
-                currrentAppAttemptShuffleMergeId, shuffleMergeId, latestShuffleMergeId);
+              "shuffleMergeId {} is higher than latest shuffleMergeId {}",
+              MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$,
+                currrentAppAttemptShuffleMergeId),
+              MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId),
+              MDC.of(LogKeys.LATEST_SHUFFLE_MERGE_ID$.MODULE$, latestShuffleMergeId));
             submitCleanupTask(() ->
                 closeAndDeleteOutdatedPartitions(currrentAppAttemptShuffleMergeId,
                     mergePartitionsInfo.shuffleMergePartitions));
@@ -282,9 +292,14 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
             dataFile, indexFile, metaFile);
       } catch (IOException e) {
         logger.error("{} attempt {} shuffle {} shuffleMerge {}: cannot create merged shuffle " +
-            "partition with data file {}, index file {}, and meta file {}", appShuffleInfo.appId,
-            appShuffleInfo.attemptId, shuffleId, shuffleMergeId, dataFile.getAbsolutePath(),
-            indexFile.getAbsolutePath(), metaFile.getAbsolutePath());
+          "partition with data file {}, index file {}, and meta file {}",
+          MDC.of(LogKeys.APP_ID$.MODULE$, appShuffleInfo.appId),
+          MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId),
+          MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, shuffleId),
+          MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId),
+          MDC.of(LogKeys.DATA_FILE$.MODULE$, dataFile.getAbsolutePath()),
+          MDC.of(LogKeys.INDEX_FILE$.MODULE$, indexFile.getAbsolutePath()),
+          MDC.of(LogKeys.META_FILE$.MODULE$, metaFile.getAbsolutePath()));
         throw new RuntimeException(
           String.format("Cannot initialize merged shuffle partition for appId %s shuffleId %s "
             + "shuffleMergeId %s reduceId %s", appShuffleInfo.appId, shuffleId, shuffleMergeId,
@@ -395,7 +410,9 @@ private void removeOldApplicationAttemptsFromDb(AppShuffleInfo info) {
 
   @Override
   public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
-    logger.info("Application {} removed, cleanupLocalDirs = {}", appId, cleanupLocalDirs);
+    logger.info("Application {} removed, cleanupLocalDirs = {}",
+      MDC.of(LogKeys.APP_ID$.MODULE$, appId),
+      MDC.of(LogKeys.CLEANUP_LOCAL_DIRS$.MODULE$, cleanupLocalDirs));
     // Cleanup the DB within critical section to gain the consistency between
     // DB and in-memory hashmap.
     AtomicReference<AppShuffleInfo> ref = new AtomicReference<>(null);
@@ -505,8 +522,8 @@ void removeAppAttemptPathInfoFromDB(String appId, int attemptId) {
         byte[] key = getDbAppAttemptPathsKey(appAttemptId);
         db.delete(key);
       } catch (Exception e) {
-        logger.error("Failed to remove the application attempt {} local path in DB",
-            appAttemptId, e);
+        logger.error("Failed to remove the application attempt {} local path in DB", e,
+          MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId));
       }
     }
   }
@@ -576,7 +593,10 @@ void deleteMergedFiles(
       }
     }
     logger.info("Delete {} data files, {} index files, {} meta files for {}",
-        dataFilesDeleteCnt, indexFilesDeleteCnt, metaFilesDeleteCnt, appAttemptShuffleMergeId);
+      MDC.of(LogKeys.NUM_DATA_FILES$.MODULE$, dataFilesDeleteCnt),
+      MDC.of(LogKeys.NUM_INDEX_FILES$.MODULE$, indexFilesDeleteCnt),
+      MDC.of(LogKeys.NUM_META_FILES$.MODULE$, metaFilesDeleteCnt),
+      MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
   }
 
   /**
@@ -588,8 +608,8 @@ void removeAppShufflePartitionInfoFromDB(AppAttemptShuffleMergeId appAttemptShuf
       try {
         db.delete(getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId));
       } catch (Exception e) {
-        logger.error("Error deleting {} from application shuffle merged partition info in DB",
-            appAttemptShuffleMergeId, e);
+        logger.error("Error deleting {} from application shuffle merged partition info in DB", e,
+          MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
       }
     }
   }
@@ -608,7 +628,8 @@ void deleteExecutorDirs(AppShuffleInfo appShuffleInfo) {
           logger.debug("Successfully cleaned up directory: {}", localDir);
         }
       } catch (Exception e) {
-        logger.error("Failed to delete directory: {}", localDir, e);
+        logger.error("Failed to delete directory: {}", e,
+          MDC.of(LogKeys.PATH$.MODULE$, localDir));
       }
     }
   }
@@ -738,7 +759,10 @@ public ByteBuffer getCompletionResponse() {
   @Override
   public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
     logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalize shuffle merge",
-        msg.appId, msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId);
+      MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
+      MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId),
+      MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
+      MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId));
     AppShuffleInfo appShuffleInfo = validateAndGetAppShuffleInfo(msg.appId);
     if (appShuffleInfo.attemptId != msg.appAttemptId) {
       // If finalizeShuffleMerge from a former application attempt, it is considered late,
@@ -821,9 +845,13 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
             }
           } catch (IOException ioe) {
             logger.warn("{} attempt {} shuffle {} shuffleMerge {}: exception while " +
-                "finalizing shuffle partition {}. Exception message: {}", msg.appId,
-                msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId, partition.reduceId,
-                ioe.getMessage());
+              "finalizing shuffle partition {}. Exception message: {}",
+              MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
+              MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId),
+              MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
+              MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId),
+              MDC.of(LogKeys.REDUCE_ID$.MODULE$, partition.reduceId),
+              MDC.of(LogKeys.EXCEPTION$.MODULE$, ioe.getMessage()));
           } finally {
             partition.cleanable.clean();
           }
@@ -835,7 +863,10 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
       appShuffleInfo.shuffles.get(msg.shuffleId).setReduceIds(Ints.toArray(reduceIds));
     }
     logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalization of shuffle merge completed",
-        msg.appId, msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId);
+      MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId),
+      MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$,  msg.appAttemptId),
+      MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId),
+      MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId));
     return mergeStatuses;
   }
 
@@ -903,7 +934,8 @@ public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
           if (originalAppShuffleInfo.get() != null) {
             AppShuffleInfo appShuffleInfo = originalAppShuffleInfo.get();
             logger.warn("Cleanup shuffle info and merged shuffle files for {}_{} as new " +
-                "application attempt registered", appId, appShuffleInfo.attemptId);
+              "application attempt registered", MDC.of(LogKeys.APP_ID$.MODULE$, appId),
+              MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId));
             // Clean up all the merge shuffle related information in the DB for the former attempt
             submitCleanupTask(
               () -> closeAndDeletePartitionsIfNeeded(appShuffleInfo, true)
@@ -959,11 +991,13 @@ private void shutdownMergedShuffleCleanerNow() {
     try {
       List<Runnable> unfinishedTasks = mergedShuffleCleaner.shutdownNow();
       logger.warn("There are still {} tasks not completed in mergedShuffleCleaner " +
-        "after {} seconds.", unfinishedTasks.size(), cleanerShutdownTimeout);
+        "after {} ms.",
+         MDC.of(LogKeys.COUNT$.MODULE$, unfinishedTasks.size()),
+         MDC.of(LogKeys.TIMEOUT$.MODULE$, cleanerShutdownTimeout * 1000L));
       // Wait a while for tasks to respond to being cancelled
       if (!mergedShuffleCleaner.awaitTermination(cleanerShutdownTimeout, TimeUnit.SECONDS)) {
-        logger.warn("mergedShuffleCleaner did not terminate in {} seconds.",
-          cleanerShutdownTimeout);
+        logger.warn("mergedShuffleCleaner did not terminate in {} ms.",
+          MDC.of(LogKeys.TIMEOUT$.MODULE$, cleanerShutdownTimeout * 1000L));
       }
     } catch (InterruptedException ignored) {
       Thread.currentThread().interrupt();
@@ -982,7 +1016,8 @@ private void writeAppPathsInfoToDb(String appId, int attemptId, AppPathsInfo app
         byte[] value = valueStr.getBytes(StandardCharsets.UTF_8);
         db.put(key, value);
       } catch (Exception e) {
-        logger.error("Error saving registered app paths info for {}", appAttemptId, e);
+        logger.error("Error saving registered app paths info for {}", e,
+          MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId));
       }
     }
   }
@@ -999,7 +1034,8 @@ private void writeAppAttemptShuffleMergeInfoToDB(
         byte[] dbKey = getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId);
         db.put(dbKey, new byte[0]);
       } catch (Exception e) {
-        logger.error("Error saving active app shuffle partition {}", appAttemptShuffleMergeId, e);
+        logger.error("Error saving active app shuffle partition {}", e,
+          MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
       }
     }
   }
@@ -1100,7 +1136,8 @@ List<byte[]> reloadActiveAppAttemptsPathInfo(DB db) throws IOException {
                       // Add the former outdated DB key to deletion list
                       dbKeysToBeRemoved.add(getDbAppAttemptPathsKey(existingAppAttemptId));
                     } catch (IOException e) {
-                      logger.error("Failed to get the DB key for {}", existingAppAttemptId, e);
+                      logger.error("Failed to get the DB key for {}", e,
+                        MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, existingAppAttemptId));
                     }
                   }
                   return new AppShuffleInfo(
@@ -1149,8 +1186,8 @@ List<byte[]> reloadFinalizedAppAttemptsShuffleMergeInfo(DB db) throws IOExceptio
                         dbKeysToBeRemoved.add(
                             getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId));
                       } catch (Exception e) {
-                        logger.error("Error getting the DB key for {}",
-                            appAttemptShuffleMergeId, e);
+                        logger.error("Error getting the DB key for {}", e, MDC.of(
+                          LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId));
                       }
                     }
                     return new AppShuffleMergePartitionsInfo(partitionId.shuffleMergeId, true);
@@ -1178,7 +1215,8 @@ void removeOutdatedKeyValuesInDB(List<byte[]> dbKeysToBeRemoved) {
             try {
               db.delete(key);
             } catch (Exception e) {
-              logger.error("Error deleting dangling key {} in DB", key, e);
+              logger.error("Error deleting dangling key {} in DB", e,
+                MDC.of(LogKeys.KEY$.MODULE$, key));
             }
           }
       );
@@ -1560,7 +1598,8 @@ public void onComplete(String streamId) throws IOException {
     @Override
     public void onFailure(String streamId, Throwable throwable) throws IOException {
       if (ERROR_HANDLER.shouldLogError(throwable)) {
-        logger.error("Encountered issue when merging {}", streamId, throwable);
+        logger.error("Encountered issue when merging {}", throwable,
+          MDC.of(LogKeys.STREAM_ID$.MODULE$, streamId));
       } else {
         logger.debug("Encountered issue when merging {}", streamId, throwable);
       }
@@ -1821,7 +1860,8 @@ void updateChunkInfo(long chunkOffset, int mapIndex) throws IOException {
         indexMetaUpdateFailed = false;
       } catch (IOException ioe) {
         logger.warn("{} reduceId {} update to index/meta failed",
-            appAttemptShuffleMergeId, reduceId);
+          MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
+          MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
         indexMetaUpdateFailed = true;
         // Any exception here is propagated to the caller and the caller can decide whether to
         // abort or not.
@@ -1873,7 +1913,8 @@ private void finalizePartition() throws IOException {
     private void deleteAllFiles() {
       if (!dataFile.delete()) {
         logger.info("Error deleting data file for {} reduceId {}",
-          appAttemptShuffleMergeId, reduceId);
+          MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
+          MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
       }
       metaFile.delete();
       indexFile.delete();
@@ -1942,19 +1983,22 @@ private void closeAllFiles(
           }
         } catch (IOException ioe) {
           logger.warn("Error closing data channel for {} reduceId {}",
-            appAttemptShuffleMergeId, reduceId);
+            MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
+            MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
         }
         try {
           metaFile.close();
         } catch (IOException ioe) {
           logger.warn("Error closing meta file for {} reduceId {}",
-            appAttemptShuffleMergeId, reduceId);
+            MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
+            MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
         }
         try {
           indexFile.close();
         } catch (IOException ioe) {
           logger.warn("Error closing index file for {} reduceId {}",
-            appAttemptShuffleMergeId, reduceId);
+            MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId),
+            MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId));
         }
       }
     }
@@ -1999,7 +2043,9 @@ private AppPathsInfo(
       this.subDirsPerLocalDir = subDirsPerLocalDir;
       if (logger.isInfoEnabled()) {
         logger.info("Updated active local dirs {} and sub dirs {} for application {}",
-          Arrays.toString(activeLocalDirs),subDirsPerLocalDir, appId);
+          MDC.of(LogKeys.PATHS$.MODULE$, Arrays.toString(activeLocalDirs)),
+          MDC.of(LogKeys.NUM_SUB_DIRS$.MODULE$, subDirsPerLocalDir),
+          MDC.of(LogKeys.APP_ID$.MODULE$, appId));
       }
     }
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
index c628b201b2027..31c454f63a92e 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
@@ -28,9 +28,11 @@
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Sets;
 import com.google.common.util.concurrent.Uninterruptibles;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.sasl.SaslTimeoutException;
 import org.apache.spark.network.util.NettyUtils;
@@ -68,7 +70,8 @@ void createAndStart(String[] blockIds, BlockTransferListener listener)
   private static final ExecutorService executorService = Executors.newCachedThreadPool(
     NettyUtils.createThreadFactory("Block Transfer Retry"));
 
-  private static final Logger logger = LoggerFactory.getLogger(RetryingBlockTransferor.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(RetryingBlockTransferor.class);
 
   /** Used to initiate new Block transfer on our remaining blocks. */
   private final BlockTransferStarter transferStarter;
@@ -177,10 +180,16 @@ private void transferAllOutstanding() {
     try {
       transferStarter.createAndStart(blockIdsToTransfer, myListener);
     } catch (Exception e) {
-      logger.error(String.format("Exception while beginning %s of %s outstanding blocks %s",
-        listener.getTransferType(), blockIdsToTransfer.length,
-        numRetries > 0 ? "(after " + numRetries + " retries)" : ""), e);
-
+      if (numRetries > 0) {
+        logger.error("Exception while beginning {} of {} outstanding blocks (after {} retries)", e,
+          MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
+          MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, blockIdsToTransfer.length),
+          MDC.of(LogKeys.NUM_RETRY$.MODULE$, numRetries));
+      } else {
+        logger.error("Exception while beginning {} of {} outstanding blocks", e,
+          MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
+          MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, blockIdsToTransfer.length));
+      }
       if (shouldRetry(e) && initiateRetry(e)) {
         // successfully initiated a retry
         return;
@@ -207,8 +216,11 @@ synchronized boolean initiateRetry(Throwable e) {
     currentListener = new RetryingBlockTransferListener();
 
     logger.info("Retrying {} ({}/{}) for {} outstanding blocks after {} ms",
-      listener.getTransferType(), retryCount, maxRetries, outstandingBlocksIds.size(),
-      retryWaitTime);
+      MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
+      MDC.of(LogKeys.NUM_RETRY$.MODULE$, retryCount),
+      MDC.of(LogKeys.MAX_ATTEMPTS$.MODULE$, maxRetries),
+      MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, outstandingBlocksIds.size()),
+      MDC.of(LogKeys.RETRY_WAIT_TIME$.MODULE$, retryWaitTime));
 
     try {
       executorService.execute(() -> {
@@ -298,9 +310,10 @@ private void handleBlockTransferFailure(String blockId, Throwable exception) {
             }
           } else {
             if (errorHandler.shouldLogError(exception)) {
-              logger.error(
-                String.format("Failed to %s block %s, and will not retry (%s retries)",
-                  listener.getTransferType(), blockId, retryCount), exception);
+              logger.error("Failed to {} block {}, and will not retry ({} retries)", exception,
+                MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()),
+                MDC.of(LogKeys.BLOCK_ID$.MODULE$, blockId),
+                MDC.of(LogKeys.NUM_RETRY$.MODULE$,retryCount));
             } else {
               logger.debug(
                 String.format("Failed to %s block %s, and will not retry (%s retries)",
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleTransportContext.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleTransportContext.java
index a0794113a080d..705d47aab3b50 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleTransportContext.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ShuffleTransportContext.java
@@ -29,9 +29,8 @@
 import io.netty.channel.socket.SocketChannel;
 import io.netty.handler.codec.MessageToMessageDecoder;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.network.TransportContext;
 import org.apache.spark.network.protocol.Message;
 import org.apache.spark.network.protocol.MessageDecoder;
@@ -52,7 +51,8 @@
  * are processed in the separate handlers.
  * */
 public class ShuffleTransportContext extends TransportContext {
-  private static final Logger logger = LoggerFactory.getLogger(ShuffleTransportContext.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ShuffleTransportContext.class);
   private static final ShuffleMessageDecoder SHUFFLE_DECODER =
       new ShuffleMessageDecoder(MessageDecoder.INSTANCE);
   private final EventLoopGroup finalizeWorkers;
@@ -158,7 +158,7 @@ record RpcRequestInternal(BlockTransferMessage.Type messageType, RpcRequest rpcR
   }
 
   static class FinalizedHandler extends SimpleChannelInboundHandler<RpcRequestInternal> {
-    private static final Logger logger = LoggerFactory.getLogger(FinalizedHandler.class);
+    private static final SparkLogger logger = SparkLoggerFactory.getLogger(FinalizedHandler.class);
     public static final String HANDLER_NAME = "finalizeHandler";
     private final TransportRequestHandler transportRequestHandler;
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
index 1feac49752c8f..f9c0c60c2f2c6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
@@ -25,9 +25,11 @@
 import java.util.zip.Checksum;
 
 import com.google.common.io.ByteStreams;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.annotation.Private;
 import org.apache.spark.network.buffer.ManagedBuffer;
 
@@ -36,8 +38,8 @@
  */
 @Private
 public class ShuffleChecksumHelper {
-  private static final Logger logger =
-    LoggerFactory.getLogger(ShuffleChecksumHelper.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ShuffleChecksumHelper.class);
 
   public static final int CHECKSUM_CALCULATION_BUFFER = 8192;
   public static final Checksum[] EMPTY_CHECKSUM = new Checksum[0];
@@ -149,7 +151,8 @@ public static Cause diagnoseCorruption(
       cause = Cause.UNSUPPORTED_CHECKSUM_ALGORITHM;
     } catch (FileNotFoundException e) {
       // Even if checksum is enabled, a checksum file may not exist if error throws during writing.
-      logger.warn("Checksum file " + checksumFile.getName() + " doesn't exit");
+      logger.warn("Checksum file {} doesn't exit",
+        MDC.of(LogKeys.PATH$.MODULE$, checksumFile.getName()));
       cause = Cause.UNKNOWN_ISSUE;
     } catch (Exception e) {
       logger.warn("Unable to diagnose shuffle block corruption", e);
@@ -162,7 +165,9 @@ public static Cause diagnoseCorruption(
         checksumByReader, checksumByWriter, checksumByReCalculation);
     } else {
       logger.info("Shuffle corruption diagnosis took {} ms, checksum file {}, cause {}",
-        duration, checksumFile.getAbsolutePath(), cause);
+        MDC.of(LogKeys.TIME$.MODULE$, duration),
+        MDC.of(LogKeys.PATH$.MODULE$, checksumFile.getAbsolutePath()),
+        MDC.of(LogKeys.REASON$.MODULE$, cause));
     }
     return cause;
   }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
index fbde165fb39c9..edd5e1961a501 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
@@ -47,8 +47,10 @@
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 import org.roaringbitmap.RoaringBitmap;
+// checkstyle.off: RegexpSinglelineJava
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
 
 import static org.junit.jupiter.api.Assertions.*;
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
index 3725973ae7333..84c8b1b3353f2 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
@@ -288,7 +288,7 @@ public void testRetryOnSaslTimeout() throws IOException, InterruptedException {
     verify(listener, timeout(5000)).onBlockTransferSuccess("b0", block0);
     verify(listener).getTransferType();
     verifyNoMoreInteractions(listener);
-    assert(_retryingBlockTransferor.getRetryCount() == 0);
+    assertEquals(0, _retryingBlockTransferor.getRetryCount());
   }
 
   @Test
@@ -310,7 +310,7 @@ public void testRepeatedSaslRetryFailures() throws IOException, InterruptedExcep
     verify(listener, timeout(5000)).onBlockTransferFailure("b0", saslTimeoutException);
     verify(listener, times(3)).getTransferType();
     verifyNoMoreInteractions(listener);
-    assert(_retryingBlockTransferor.getRetryCount() == MAX_RETRIES);
+    assertEquals(MAX_RETRIES, _retryingBlockTransferor.getRetryCount());
   }
 
   @Test
@@ -339,7 +339,7 @@ public void testBlockTransferFailureAfterSasl() throws IOException, InterruptedE
     // This should be equal to 1 because after the SASL exception is retried,
     // retryCount should be set back to 0. Then after that b1 encounters an
     // exception that is retried.
-    assert(_retryingBlockTransferor.getRetryCount() == 1);
+    assertEquals(1, _retryingBlockTransferor.getRetryCount());
   }
 
   @Test
@@ -368,7 +368,7 @@ public void testIOExceptionFailsConnectionEvenWithSaslException()
     verify(listener, timeout(5000)).onBlockTransferFailure("b0", saslExceptionFinal);
     verify(listener, atLeastOnce()).getTransferType();
     verifyNoMoreInteractions(listener);
-    assert(_retryingBlockTransferor.getRetryCount() == MAX_RETRIES);
+    assertEquals(MAX_RETRIES, _retryingBlockTransferor.getRetryCount());
   }
 
   @Test
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index 4068224665597..eeb936773aaad 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -28,8 +28,10 @@
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
 import org.apache.spark.network.util.JavaUtils;
 import org.junit.jupiter.api.Assertions;
+// checkstyle.off: RegexpSinglelineJava
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
 
 /**
  * Manages some sort-shuffle data, including the creation
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index 4cfbd8e96ac61..e0af3c5ae2468 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -53,9 +53,11 @@
 import org.apache.spark.network.shuffledb.StoreVersion;
 import org.apache.spark.network.util.DBProvider;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.network.TransportContext;
 import org.apache.spark.network.crypto.AuthServerBootstrap;
 import org.apache.spark.network.sasl.ShuffleSecretManager;
@@ -100,8 +102,9 @@
  * This {@code classpath} configuration is only supported on YARN versions &gt;= 2.9.0.
  */
 public class YarnShuffleService extends AuxiliaryService {
-  private static final Logger defaultLogger = LoggerFactory.getLogger(YarnShuffleService.class);
-  private Logger logger = defaultLogger;
+  private static final SparkLogger defaultSparkLogger =
+    SparkLoggerFactory.getLogger(YarnShuffleService.class);
+  private SparkLogger logger = defaultSparkLogger;
 
   // Port on which the shuffle server listens for fetch requests
   private static final String SPARK_SHUFFLE_SERVICE_PORT_KEY = "spark.shuffle.service.port";
@@ -237,14 +240,14 @@ protected void serviceInit(Configuration externalConf) throws Exception {
         .getResource(SHUFFLE_SERVICE_CONF_OVERLAY_RESOURCE_NAME);
     if (confOverlayUrl != null) {
       logger.info("Initializing Spark YARN shuffle service with configuration overlay from {}",
-          confOverlayUrl);
+        MDC.of(LogKeys.SHUFFLE_SERVICE_CONF_OVERLAY_URL$.MODULE$, confOverlayUrl));
       _conf.addResource(confOverlayUrl);
     }
 
     String logsNamespace = _conf.get(SPARK_SHUFFLE_SERVICE_LOGS_NAMESPACE_KEY, "");
     if (!logsNamespace.isEmpty()) {
       String className = YarnShuffleService.class.getName();
-      logger = LoggerFactory.getLogger(className + "." + logsNamespace);
+      logger = SparkLoggerFactory.getLogger(className + "." + logsNamespace);
     }
 
     super.serviceInit(_conf);
@@ -262,7 +265,8 @@ protected void serviceInit(Configuration externalConf) throws Exception {
         DBBackend.ROCKSDB.name());
       dbBackend = DBBackend.byName(dbBackendName);
       logger.info("Use {} as the implementation of {}",
-        dbBackend, Constants.SHUFFLE_SERVICE_DB_BACKEND);
+        MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME$.MODULE$, dbBackend),
+        MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY$.MODULE$, Constants.SHUFFLE_SERVICE_DB_BACKEND));
     }
 
     try {
@@ -326,11 +330,12 @@ protected void serviceInit(Configuration externalConf) throws Exception {
           "PushBasedShuffleMergeManager", "Metrics on the push-based shuffle merge manager",
           mergeManagerMetrics);
       logger.info("Registered metrics with Hadoop's DefaultMetricsSystem using namespace '{}'",
-          metricsNamespace);
+        MDC.of(LogKeys.SHUFFLE_SERVICE_METRICS_NAMESPACE$.MODULE$, metricsNamespace));
 
-      logger.info("Started YARN shuffle service for Spark on port {}. " +
-        "Authentication is {}.  Registered executor file is {}", port, authEnabledString,
-        registeredExecutorFile);
+      logger.info("Started YARN shuffle service for Spark on port {}. Authentication is {}. " +
+        "Registered executor file is {}", MDC.of(LogKeys.PORT$.MODULE$, port),
+        MDC.of(LogKeys.AUTH_ENABLED$.MODULE$, authEnabledString),
+        MDC.of(LogKeys.REGISTERED_EXECUTOR_FILE$.MODULE$, registeredExecutorFile));
     } catch (Exception e) {
       if (stopOnFailure) {
         throw e;
@@ -363,7 +368,8 @@ static MergedShuffleFileManager newMergedShuffleFileManagerInstance(
       return mergeManagerSubClazz.getConstructor(TransportConf.class, File.class)
         .newInstance(conf, mergeManagerFile);
     } catch (Exception e) {
-      defaultLogger.error("Unable to create an instance of {}", mergeManagerImplClassName);
+      defaultSparkLogger.error("Unable to create an instance of {}",
+        MDC.of(LogKeys.CLASS_NAME$.MODULE$, mergeManagerImplClassName));
       return new NoOpMergedShuffleFileManager(conf, mergeManagerFile);
     }
   }
@@ -426,7 +432,8 @@ public void initializeApplication(ApplicationInitializationContext context) {
         Object metadataStorageVal = metaInfo.get(SPARK_SHUFFLE_SERVER_RECOVERY_DISABLED);
         if (metadataStorageVal != null && (Boolean) metadataStorageVal) {
           AppsWithRecoveryDisabled.disableRecoveryOfApp(appId);
-          logger.info("Disabling metadata persistence for application {}", appId);
+          logger.info("Disabling metadata persistence for application {}",
+            MDC.of(LogKeys.APP_ID$.MODULE$, appId));
         }
       } catch (IOException ioe) {
         logger.warn("Unable to parse application data for service: " + payload);
@@ -449,7 +456,8 @@ public void initializeApplication(ApplicationInitializationContext context) {
         secretManager.registerApp(appId, shuffleSecret);
       }
     } catch (Exception e) {
-      logger.error("Exception when initializing application {}", appId, e);
+      logger.error("Exception when initializing application {}", e,
+        MDC.of(LogKeys.APP_ID$.MODULE$, appId));
     }
   }
 
@@ -463,14 +471,16 @@ public void stopApplication(ApplicationTerminationContext context) {
           try {
             db.delete(dbAppKey(fullId));
           } catch (IOException e) {
-            logger.error("Error deleting {} from executor state db", appId, e);
+            logger.error("Error deleting {} from executor state db", e,
+              MDC.of(LogKeys.APP_ID$.MODULE$, appId));
           }
         }
         secretManager.unregisterApp(appId);
       }
       blockHandler.applicationRemoved(appId, false /* clean up local dirs */);
     } catch (Exception e) {
-      logger.error("Exception when stopping application {}", appId, e);
+      logger.error("Exception when stopping application {}", e,
+        MDC.of(LogKeys.APP_ID$.MODULE$, appId));
     } finally {
       AppsWithRecoveryDisabled.removeApp(appId);
     }
@@ -479,13 +489,13 @@ public void stopApplication(ApplicationTerminationContext context) {
   @Override
   public void initializeContainer(ContainerInitializationContext context) {
     ContainerId containerId = context.getContainerId();
-    logger.info("Initializing container {}", containerId);
+    logger.info("Initializing container {}", MDC.of(LogKeys.CONTAINER_ID$.MODULE$, containerId));
   }
 
   @Override
   public void stopContainer(ContainerTerminationContext context) {
     ContainerId containerId = context.getContainerId();
-    logger.info("Stopping container {}", containerId);
+    logger.info("Stopping container {}", MDC.of(LogKeys.CONTAINER_ID$.MODULE$, containerId));
   }
 
   /**
@@ -566,8 +576,9 @@ protected File initRecoveryDb(String dbName) {
             fs.rename(copyFrom, newLoc);
           } catch (Exception e) {
             // Fail to move recovery file to new path, just continue on with new DB location
-            logger.error("Failed to move recovery file {} to the path {}",
-              dbName, _recoveryPath.toString(), e);
+            logger.error("Failed to move recovery file {} to the path {}", e,
+              MDC.of(LogKeys.SHUFFLE_MERGE_RECOVERY_FILE$.MODULE$, dbName),
+              MDC.of(LogKeys.PATH$.MODULE$, _recoveryPath.toString()));
           }
         }
         return new File(newLoc.toUri().getPath());
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
new file mode 100644
index 0000000000000..272a8aa128141
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
@@ -0,0 +1,814 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.util;
+
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.StringSearch;
+import com.ibm.icu.util.ULocale;
+
+import org.apache.spark.unsafe.UTF8StringBuilder;
+import org.apache.spark.unsafe.types.UTF8String;
+
+import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
+import static org.apache.spark.unsafe.Platform.copyMemory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Utility class for collation-aware UTF8String operations.
+ */
+public class CollationAwareUTF8String {
+
+  /**
+   * The constant value to indicate that the match is not found when searching for a pattern
+   * string in a target string.
+   */
+  private static final int MATCH_NOT_FOUND = -1;
+
+  /**
+   * Returns whether the target string starts with the specified prefix, starting from the
+   * specified position (0-based index referring to character position in UTF8String), with respect
+   * to the UTF8_LCASE collation. The method assumes that the prefix is already lowercased
+   * prior to method call to avoid the overhead of calling .toLowerCase() multiple times on the
+   * same prefix string.
+   *
+   * @param target the string to be searched in
+   * @param lowercasePattern the string to be searched for
+   * @param startPos the start position for searching (in the target string)
+   * @return whether the target string starts with the specified prefix in UTF8_LCASE
+   */
+  public static boolean lowercaseMatchFrom(
+      final UTF8String target,
+      final UTF8String lowercasePattern,
+      int startPos) {
+    return lowercaseMatchLengthFrom(target, lowercasePattern, startPos) != MATCH_NOT_FOUND;
+  }
+
+  /**
+   * Returns the length of the substring of the target string that starts with the specified
+   * prefix, starting from the specified position (0-based index referring to character position
+   * in UTF8String), with respect to the UTF8_LCASE collation. The method assumes that the
+   * prefix is already lowercased. The method only considers the part of target string that
+   * starts from the specified (inclusive) position (that is, the method does not look at UTF8
+   * characters of the target string at or after position `endPos`). If the prefix is not found,
+   * MATCH_NOT_FOUND is returned.
+   *
+   * @param target the string to be searched in
+   * @param lowercasePattern the string to be searched for
+   * @param startPos the start position for searching (in the target string)
+   * @return length of the target substring that starts with the specified prefix in lowercase
+   */
+  private static int lowercaseMatchLengthFrom(
+      final UTF8String target,
+      final UTF8String lowercasePattern,
+      int startPos) {
+    assert startPos >= 0;
+    for (int len = 0; len <= target.numChars() - startPos; ++len) {
+      if (target.substring(startPos, startPos + len).toLowerCase().equals(lowercasePattern)) {
+        return len;
+      }
+    }
+    return MATCH_NOT_FOUND;
+  }
+
+  /**
+   * Returns the position of the first occurrence of the pattern string in the target string,
+   * starting from the specified position (0-based index referring to character position in
+   * UTF8String), with respect to the UTF8_LCASE collation. The method assumes that the
+   * pattern string is already lowercased prior to call. If the pattern is not found,
+   * MATCH_NOT_FOUND is returned.
+   *
+   * @param target the string to be searched in
+   * @param lowercasePattern the string to be searched for
+   * @param startPos the start position for searching (in the target string)
+   * @return the position of the first occurrence of pattern in target
+   */
+  private static int lowercaseFind(
+      final UTF8String target,
+      final UTF8String lowercasePattern,
+      int startPos) {
+    assert startPos >= 0;
+    for (int i = startPos; i <= target.numChars(); ++i) {
+      if (lowercaseMatchFrom(target, lowercasePattern, i)) {
+        return i;
+      }
+    }
+    return MATCH_NOT_FOUND;
+  }
+
+  /**
+   * Returns whether the target string ends with the specified suffix, ending at the specified
+   * position (0-based index referring to character position in UTF8String), with respect to the
+   * UTF8_LCASE collation. The method assumes that the suffix is already lowercased prior
+   * to method call to avoid the overhead of calling .toLowerCase() multiple times on the same
+   * suffix string.
+   *
+   * @param target the string to be searched in
+   * @param lowercasePattern the string to be searched for
+   * @param endPos the end position for searching (in the target string)
+   * @return whether the target string ends with the specified suffix in lowercase
+   */
+  public static boolean lowercaseMatchUntil(
+      final UTF8String target,
+      final UTF8String lowercasePattern,
+      int endPos) {
+    return lowercaseMatchLengthUntil(target, lowercasePattern, endPos) != MATCH_NOT_FOUND;
+  }
+
+  /**
+   * Returns the length of the substring of the target string that ends with the specified
+   * suffix, ending at the specified position (0-based index referring to character position in
+   * UTF8String), with respect to the UTF8_LCASE collation. The method assumes that the
+   * suffix is already lowercased. The method only considers the part of target string that ends
+   * at the specified (non-inclusive) position (that is, the method does not look at UTF8
+   * characters of the target string at or after position `endPos`). If the suffix is not found,
+   * MATCH_NOT_FOUND is returned.
+   *
+   * @param target the string to be searched in
+   * @param lowercasePattern the string to be searched for
+   * @param endPos the end position for searching (in the target string)
+   * @return length of the target substring that ends with the specified suffix in lowercase
+   */
+  private static int lowercaseMatchLengthUntil(
+      final UTF8String target,
+      final UTF8String lowercasePattern,
+      int endPos) {
+    assert endPos <= target.numChars();
+    for (int len = 0; len <= endPos; ++len) {
+      if (target.substring(endPos - len, endPos).toLowerCase().equals(lowercasePattern)) {
+        return len;
+      }
+    }
+    return MATCH_NOT_FOUND;
+  }
+
+  /**
+   * Returns the position of the last occurrence of the pattern string in the target string,
+   * ending at the specified position (0-based index referring to character position in
+   * UTF8String), with respect to the UTF8_LCASE collation. The method assumes that the
+   * pattern string is already lowercased prior to call. If the pattern is not found,
+   * MATCH_NOT_FOUND is returned.
+   *
+   * @param target the string to be searched in
+   * @param lowercasePattern the string to be searched for
+   * @param endPos the end position for searching (in the target string)
+   * @return the position of the last occurrence of pattern in target
+   */
+  private static int lowercaseRFind(
+      final UTF8String target,
+      final UTF8String lowercasePattern,
+      int endPos) {
+    assert endPos <= target.numChars();
+    for (int i = endPos; i >= 0; --i) {
+      if (lowercaseMatchUntil(target, lowercasePattern, i)) {
+        return i;
+      }
+    }
+    return MATCH_NOT_FOUND;
+  }
+
+  /**
+   * Lowercase UTF8String comparison used for UTF8_LCASE collation. While the default
+   * UTF8String comparison is equivalent to a.toLowerCase().binaryCompare(b.toLowerCase()), this
+   * method uses code points to compare the strings in a case-insensitive manner using ICU rules,
+   * as well as handling special rules for one-to-many case mappings (see: lowerCaseCodePoints).
+   *
+   * @param left The first UTF8String to compare.
+   * @param right The second UTF8String to compare.
+   * @return An integer representing the comparison result.
+   */
+  public static int compareLowerCase(final UTF8String left, final UTF8String right) {
+    // Only if both strings are ASCII, we can use faster comparison (no string allocations).
+    if (left.isFullAscii() && right.isFullAscii()) {
+      return compareLowerCaseAscii(left, right);
+    }
+    return compareLowerCaseSlow(left, right);
+  }
+
+  /**
+   * Fast version of the `compareLowerCase` method, used when both arguments are ASCII strings.
+   *
+   * @param left The first ASCII UTF8String to compare.
+   * @param right The second ASCII UTF8String to compare.
+   * @return An integer representing the comparison result.
+   */
+  private static int compareLowerCaseAscii(final UTF8String left, final UTF8String right) {
+    int leftBytes = left.numBytes(), rightBytes = right.numBytes();
+    for (int curr = 0; curr < leftBytes && curr < rightBytes; curr++) {
+      int lowerLeftByte = Character.toLowerCase(left.getByte(curr));
+      int lowerRightByte = Character.toLowerCase(right.getByte(curr));
+      if (lowerLeftByte != lowerRightByte) {
+        return lowerLeftByte - lowerRightByte;
+      }
+    }
+    return leftBytes - rightBytes;
+  }
+
+  /**
+   * Slow version of the `compareLowerCase` method, used when both arguments are non-ASCII strings.
+   *
+   * @param left The first non-ASCII UTF8String to compare.
+   * @param right The second non-ASCII UTF8String to compare.
+   * @return An integer representing the comparison result.
+   */
+  private static int compareLowerCaseSlow(final UTF8String left, final UTF8String right) {
+    return lowerCaseCodePoints(left).binaryCompare(lowerCaseCodePoints(right));
+  }
+
+  /*
+   * Performs string replacement for ICU collations by searching for instances of the search
+   * string in the `src` string, with respect to the specified collation, and then replacing
+   * them with the replace string. The method returns a new UTF8String with all instances of the
+   * search string replaced using the replace string. Similar to UTF8String.findInSet behavior
+   * used for UTF8_BINARY, the method returns the `src` string if the `search` string is empty.
+   *
+   * @param src the string to be searched in
+   * @param search the string to be searched for
+   * @param replace the string to be used as replacement
+   * @param collationId the collation ID to use for string search
+   * @return the position of the first occurrence of `match` in `set`
+   */
+  public static UTF8String replace(final UTF8String src, final UTF8String search,
+      final UTF8String replace, final int collationId) {
+    // This collation aware implementation is based on existing implementation on UTF8String
+    if (src.numBytes() == 0 || search.numBytes() == 0) {
+      return src;
+    }
+
+    StringSearch stringSearch = CollationFactory.getStringSearch(src, search, collationId);
+
+    // Find the first occurrence of the search string.
+    int end = stringSearch.next();
+    if (end == StringSearch.DONE) {
+      // Search string was not found, so string is unchanged.
+      return src;
+    }
+
+    // Initialize byte positions
+    int c = 0;
+    int byteStart = 0; // position in byte
+    int byteEnd = 0; // position in byte
+    while (byteEnd < src.numBytes() && c < end) {
+      byteEnd += UTF8String.numBytesForFirstByte(src.getByte(byteEnd));
+      c += 1;
+    }
+
+    // At least one match was found. Estimate space needed for result.
+    // The 16x multiplier here is chosen to match commons-lang3's implementation.
+    int increase = Math.max(0, Math.abs(replace.numBytes() - search.numBytes())) * 16;
+    final UTF8StringBuilder buf = new UTF8StringBuilder(src.numBytes() + increase);
+    while (end != StringSearch.DONE) {
+      buf.appendBytes(src.getBaseObject(), src.getBaseOffset() + byteStart, byteEnd - byteStart);
+      buf.append(replace);
+
+      // Move byteStart to the beginning of the current match
+      byteStart = byteEnd;
+      int cs = c;
+      // Move cs to the end of the current match
+      // This is necessary because the search string may contain 'multi-character' characters
+      while (byteStart < src.numBytes() && cs < c + stringSearch.getMatchLength()) {
+        byteStart += UTF8String.numBytesForFirstByte(src.getByte(byteStart));
+        cs += 1;
+      }
+      // Go to next match
+      end = stringSearch.next();
+      // Update byte positions
+      while (byteEnd < src.numBytes() && c < end) {
+        byteEnd += UTF8String.numBytesForFirstByte(src.getByte(byteEnd));
+        c += 1;
+      }
+    }
+    buf.appendBytes(src.getBaseObject(), src.getBaseOffset() + byteStart,
+      src.numBytes() - byteStart);
+    return buf.build();
+  }
+
+  /*
+   * Performs string replacement for UTF8_LCASE collation by searching for instances of the search
+   * string in the src string, with respect to lowercased string versions, and then replacing
+   * them with the replace string. The method returns a new UTF8String with all instances of the
+   * search string replaced using the replace string. Similar to UTF8String.findInSet behavior
+   * used for UTF8_BINARY, the method returns the `src` string if the `search` string is empty.
+   *
+   * @param src the string to be searched in
+   * @param search the string to be searched for
+   * @param replace the string to be used as replacement
+   * @param collationId the collation ID to use for string search
+   * @return the position of the first occurrence of `match` in `set`
+   */
+  public static UTF8String lowercaseReplace(final UTF8String src, final UTF8String search,
+      final UTF8String replace) {
+    if (src.numBytes() == 0 || search.numBytes() == 0) {
+      return src;
+    }
+
+    // TODO(SPARK-48725): Use lowerCaseCodePoints instead of UTF8String.toLowerCase.
+    UTF8String lowercaseSearch = search.toLowerCase();
+
+    int start = 0;
+    int end = lowercaseFind(src, lowercaseSearch, start);
+    if (end == -1) {
+      // Search string was not found, so string is unchanged.
+      return src;
+    }
+
+    // At least one match was found. Estimate space needed for result.
+    // The 16x multiplier here is chosen to match commons-lang3's implementation.
+    int increase = Math.max(0, replace.numBytes() - search.numBytes()) * 16;
+    final UTF8StringBuilder buf = new UTF8StringBuilder(src.numBytes() + increase);
+    while (end != -1) {
+      buf.append(src.substring(start, end));
+      buf.append(replace);
+      // Update character positions
+      start = end + lowercaseMatchLengthFrom(src, lowercaseSearch, end);
+      end = lowercaseFind(src, lowercaseSearch, start);
+    }
+    buf.append(src.substring(start, src.numChars()));
+    return buf.build();
+  }
+
+  /**
+   * Convert the input string to uppercase using the ICU root locale rules.
+   *
+   * @param target the input string
+   * @return the uppercase string
+   */
+  public static UTF8String toUpperCase(final UTF8String target) {
+    if (target.isFullAscii()) return target.toUpperCaseAscii();
+    return toUpperCaseSlow(target);
+  }
+
+  private static UTF8String toUpperCaseSlow(final UTF8String target) {
+    // Note: In order to achieve the desired behavior, we use the ICU UCharacter class to
+    // convert the string to uppercase, which only accepts a Java strings as input.
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    return UTF8String.fromString(UCharacter.toUpperCase(target.toString()));
+  }
+
+  /**
+   * Convert the input string to uppercase using the specified ICU collation rules.
+   *
+   * @param target the input string
+   * @return the uppercase string
+   */
+  public static UTF8String toUpperCase(final UTF8String target, final int collationId) {
+    if (target.isFullAscii()) return target.toUpperCaseAscii();
+    return toUpperCaseSlow(target, collationId);
+  }
+
+  private static UTF8String toUpperCaseSlow(final UTF8String target, final int collationId) {
+    // Note: In order to achieve the desired behavior, we use the ICU UCharacter class to
+    // convert the string to uppercase, which only accepts a Java strings as input.
+    ULocale locale = CollationFactory.fetchCollation(collationId)
+      .collator.getLocale(ULocale.ACTUAL_LOCALE);
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    return UTF8String.fromString(UCharacter.toUpperCase(locale, target.toString()));
+  }
+
+  /**
+   * Convert the input string to lowercase using the ICU root locale rules.
+   *
+   * @param target the input string
+   * @return the lowercase string
+   */
+  public static UTF8String toLowerCase(final UTF8String target) {
+    if (target.isFullAscii()) return target.toLowerCaseAscii();
+    return toLowerCaseSlow(target);
+  }
+
+  private static UTF8String toLowerCaseSlow(final UTF8String target) {
+    // Note: In order to achieve the desired behavior, we use the ICU UCharacter class to
+    // convert the string to lowercase, which only accepts a Java strings as input.
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    return UTF8String.fromString(UCharacter.toLowerCase(target.toString()));
+  }
+
+  /**
+   * Convert the input string to lowercase using the specified ICU collation rules.
+   *
+   * @param target the input string
+   * @return the lowercase string
+   */
+  public static UTF8String toLowerCase(final UTF8String target, final int collationId) {
+    if (target.isFullAscii()) return target.toLowerCaseAscii();
+    return toLowerCaseSlow(target, collationId);
+  }
+
+  private static UTF8String toLowerCaseSlow(final UTF8String target, final int collationId) {
+    // Note: In order to achieve the desired behavior, we use the ICU UCharacter class to
+    // convert the string to lowercase, which only accepts a Java strings as input.
+    ULocale locale = CollationFactory.fetchCollation(collationId)
+      .collator.getLocale(ULocale.ACTUAL_LOCALE);
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    return UTF8String.fromString(UCharacter.toLowerCase(locale, target.toString()));
+  }
+
+  /**
+   * Converts a single code point to lowercase using ICU rules, with special handling for
+   * one-to-many case mappings (i.e. characters that map to multiple characters in lowercase) and
+   * context-insensitive case mappings (i.e. characters that map to different characters based on
+   * string context - e.g. the position in the string relative to other characters).
+   *
+   * @param codePoint The code point to convert to lowercase.
+   * @param sb The StringBuilder to append the lowercase character to.
+   */
+  private static void lowercaseCodePoint(final int codePoint, final StringBuilder sb) {
+    if (codePoint == 0x0130) {
+      // Latin capital letter I with dot above is mapped to 2 lowercase characters.
+      sb.appendCodePoint(0x0069);
+      sb.appendCodePoint(0x0307);
+    }
+    else if (codePoint == 0x03C2) {
+      // Greek final and non-final capital letter sigma should be mapped the same.
+      sb.appendCodePoint(0x03C3);
+    }
+    else {
+      // All other characters should follow context-unaware ICU single-code point case mapping.
+      sb.appendCodePoint(UCharacter.toLowerCase(codePoint));
+    }
+  }
+
+  /**
+   * Converts an entire string to lowercase using ICU rules, code point by code point, with
+   * special handling for one-to-many case mappings (i.e. characters that map to multiple
+   * characters in lowercase). Also, this method omits information about context-sensitive case
+   * mappings using special handling in the `lowercaseCodePoint` method.
+   *
+   * @param target The target string to convert to lowercase.
+   * @return The string converted to lowercase in a context-unaware manner.
+   */
+  public static UTF8String lowerCaseCodePoints(final UTF8String target) {
+    if (target.isFullAscii()) return target.toLowerCaseAscii();
+    return lowerCaseCodePointsSlow(target);
+  }
+
+  private static UTF8String lowerCaseCodePointsSlow(final UTF8String target) {
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    String targetString = target.toString();
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < targetString.length(); ++i) {
+      lowercaseCodePoint(targetString.codePointAt(i), sb);
+    }
+    return UTF8String.fromString(sb.toString());
+  }
+
+  /**
+   * Convert the input string to titlecase using the ICU root locale rules.
+   */
+  public static UTF8String toTitleCase(final UTF8String target) {
+    // Note: In order to achieve the desired behavior, we use the ICU UCharacter class to
+    // convert the string to titlecase, which only accepts a Java strings as input.
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    return UTF8String.fromString(UCharacter.toTitleCase(target.toString(),
+      BreakIterator.getWordInstance()));
+  }
+
+  /**
+   * Convert the input string to titlecase using the specified ICU collation rules.
+   */
+  public static UTF8String toTitleCase(final UTF8String target, final int collationId) {
+    ULocale locale = CollationFactory.fetchCollation(collationId)
+      .collator.getLocale(ULocale.ACTUAL_LOCALE);
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    return UTF8String.fromString(UCharacter.toTitleCase(locale, target.toString(),
+      BreakIterator.getWordInstance(locale)));
+  }
+
+  /*
+   * Returns the position of the first occurrence of the match string in the set string,
+   * counting ASCII commas as delimiters. The match string is compared in a collation-aware manner,
+   * with respect to the specified collation ID. Similar to UTF8String.findInSet behavior used
+   * for UTF8_BINARY collation, the method returns 0 if the match string contains no commas.
+   *
+   * @param match the string to be searched for
+   * @param set the string to be searched in
+   * @param collationId the collation ID to use for string comparison
+   * @return the position of the first occurrence of `match` in `set`
+   */
+  public static int findInSet(final UTF8String match, final UTF8String set, int collationId) {
+    // If the "word" string contains a comma, FindInSet should return 0.
+    if (match.contains(UTF8String.fromString(","))) {
+      return 0;
+    }
+    // Otherwise, search for commas in "set" and compare each substring with "word".
+    int byteIndex = 0, charIndex = 0, wordCount = 1, lastComma = -1;
+    while (byteIndex < set.numBytes()) {
+      byte nextByte = set.getByte(byteIndex);
+      if (nextByte == (byte) ',') {
+        if (set.substring(lastComma + 1, charIndex).semanticEquals(match, collationId)) {
+          return wordCount;
+        }
+        lastComma = charIndex;
+        ++wordCount;
+      }
+      byteIndex += UTF8String.numBytesForFirstByte(nextByte);
+      ++charIndex;
+    }
+    if (set.substring(lastComma + 1, set.numBytes()).semanticEquals(match, collationId)) {
+      return wordCount;
+    }
+    // If no match is found, return 0.
+    return 0;
+  }
+
+  /**
+   * Returns the position of the first occurrence of the pattern string in the target string,
+   * starting from the specified position (0-based index referring to character position in
+   * UTF8String), with respect to the UTF8_LCASE collation. If the pattern is not found,
+   * MATCH_NOT_FOUND is returned.
+   *
+   * @param target the string to be searched in
+   * @param pattern the string to be searched for
+   * @param start the start position for searching (in the target string)
+   * @return the position of the first occurrence of pattern in target
+   */
+  public static int lowercaseIndexOf(final UTF8String target, final UTF8String pattern,
+      final int start) {
+    if (pattern.numChars() == 0) return target.indexOfEmpty(start);
+    return lowercaseFind(target, pattern.toLowerCase(), start);
+  }
+
+  public static int indexOf(final UTF8String target, final UTF8String pattern,
+      final int start, final int collationId) {
+    if (pattern.numBytes() == 0) return target.indexOfEmpty(start);
+    if (target.numBytes() == 0) return MATCH_NOT_FOUND;
+
+    StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
+    stringSearch.setIndex(start);
+
+    return stringSearch.next();
+  }
+
+  public static int find(UTF8String target, UTF8String pattern, int start,
+      int collationId) {
+    assert (pattern.numBytes() > 0);
+
+    StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
+    // Set search start position (start from character at start position)
+    stringSearch.setIndex(target.bytePosToChar(start));
+
+    // Return either the byte position or -1 if not found
+    return target.charPosToByte(stringSearch.next());
+  }
+
+  public static UTF8String subStringIndex(final UTF8String string, final UTF8String delimiter,
+      int count, final int collationId) {
+    if (delimiter.numBytes() == 0 || count == 0 || string.numBytes() == 0) {
+      return UTF8String.EMPTY_UTF8;
+    }
+    if (count > 0) {
+      int idx = -1;
+      while (count > 0) {
+        idx = find(string, delimiter, idx + 1, collationId);
+        if (idx >= 0) {
+          count --;
+        } else {
+          // can not find enough delim
+          return string;
+        }
+      }
+      if (idx == 0) {
+        return UTF8String.EMPTY_UTF8;
+      }
+      byte[] bytes = new byte[idx];
+      copyMemory(string.getBaseObject(), string.getBaseOffset(), bytes, BYTE_ARRAY_OFFSET, idx);
+      return UTF8String.fromBytes(bytes);
+
+    } else {
+      count = -count;
+
+      StringSearch stringSearch = CollationFactory
+        .getStringSearch(string, delimiter, collationId);
+
+      int start = string.numChars() - 1;
+      int lastMatchLength = 0;
+      int prevStart = -1;
+      while (count > 0) {
+        stringSearch.reset();
+        prevStart = -1;
+        int matchStart = stringSearch.next();
+        lastMatchLength = stringSearch.getMatchLength();
+        while (matchStart <= start) {
+          if (matchStart != StringSearch.DONE) {
+            // Found a match, update the start position
+            prevStart = matchStart;
+            matchStart = stringSearch.next();
+          } else {
+            break;
+          }
+        }
+
+        if (prevStart == -1) {
+          // can not find enough delim
+          return string;
+        } else {
+          start = prevStart - 1;
+          count--;
+        }
+      }
+
+      int resultStart = prevStart + lastMatchLength;
+      if (resultStart == string.numChars()) {
+        return UTF8String.EMPTY_UTF8;
+      }
+
+      return string.substring(resultStart, string.numChars());
+    }
+  }
+
+  public static UTF8String lowercaseSubStringIndex(final UTF8String string,
+      final UTF8String delimiter, int count) {
+    if (delimiter.numBytes() == 0 || count == 0) {
+      return UTF8String.EMPTY_UTF8;
+    }
+
+    UTF8String lowercaseDelimiter = delimiter.toLowerCase();
+
+    if (count > 0) {
+      // Search left to right (note: the start code point is inclusive).
+      int matchLength = -1;
+      while (count > 0) {
+        matchLength = lowercaseFind(string, lowercaseDelimiter, matchLength + 1);
+        if (matchLength > MATCH_NOT_FOUND) --count; // Found a delimiter.
+        else return string; // Cannot find enough delimiters in the string.
+      }
+      return string.substring(0, matchLength);
+    } else {
+      // Search right to left (note: the end code point is exclusive).
+      int matchLength = string.numChars() + 1;
+      count = -count;
+      while (count > 0) {
+        matchLength = lowercaseRFind(string, lowercaseDelimiter, matchLength - 1);
+        if (matchLength > MATCH_NOT_FOUND) --count; // Found a delimiter.
+        else return string; // Cannot find enough delimiters in the string.
+      }
+      return string.substring(matchLength, string.numChars());
+    }
+  }
+
+  public static Map<String, String> getCollationAwareDict(UTF8String string,
+      Map<String, String> dict, int collationId) {
+    // TODO(SPARK-48715): All UTF8String -> String conversions should use `makeValid`
+    String srcStr = string.toString();
+
+    Map<String, String> collationAwareDict = new HashMap<>();
+    for (String key : dict.keySet()) {
+      StringSearch stringSearch =
+        CollationFactory.getStringSearch(string, UTF8String.fromString(key), collationId);
+
+      int pos = 0;
+      while ((pos = stringSearch.next()) != StringSearch.DONE) {
+        int codePoint = srcStr.codePointAt(pos);
+        int charCount = Character.charCount(codePoint);
+        String newKey = srcStr.substring(pos, pos + charCount);
+
+        boolean exists = false;
+        for (String existingKey : collationAwareDict.keySet()) {
+          if (stringSearch.getCollator().compare(existingKey, newKey) == 0) {
+            collationAwareDict.put(newKey, collationAwareDict.get(existingKey));
+            exists = true;
+            break;
+          }
+        }
+
+        if (!exists) {
+          collationAwareDict.put(newKey, dict.get(key));
+        }
+      }
+    }
+
+    return collationAwareDict;
+  }
+
+  public static UTF8String lowercaseTrim(
+      final UTF8String srcString,
+      final UTF8String trimString) {
+    // Matching UTF8String behavior for null `trimString`.
+    if (trimString == null) {
+      return null;
+    }
+
+    UTF8String leftTrimmed = lowercaseTrimLeft(srcString, trimString);
+    return lowercaseTrimRight(leftTrimmed, trimString);
+  }
+
+  public static UTF8String lowercaseTrimLeft(
+      final UTF8String srcString,
+      final UTF8String trimString) {
+    // Matching UTF8String behavior for null `trimString`.
+    if (trimString == null) {
+      return null;
+    }
+
+    // The searching byte position in the srcString.
+    int searchIdx = 0;
+    // The byte position of a first non-matching character in the srcString.
+    int trimByteIdx = 0;
+    // Number of bytes in srcString.
+    int numBytes = srcString.numBytes();
+    // Convert trimString to lowercase, so it can be searched properly.
+    UTF8String lowercaseTrimString = trimString.toLowerCase();
+
+    while (searchIdx < numBytes) {
+      UTF8String searchChar = srcString.copyUTF8String(
+        searchIdx,
+        searchIdx + UTF8String.numBytesForFirstByte(srcString.getByte(searchIdx)) - 1);
+      int searchCharBytes = searchChar.numBytes();
+
+      // Try to find the matching for the searchChar in the trimString.
+      if (lowercaseTrimString.find(searchChar.toLowerCase(), 0) >= 0) {
+        trimByteIdx += searchCharBytes;
+        searchIdx += searchCharBytes;
+      } else {
+        // No matching, exit the search.
+        break;
+      }
+    }
+
+    if (searchIdx == 0) {
+      // Nothing trimmed - return original string (not converted to lowercase).
+      return srcString;
+    }
+    if (trimByteIdx >= numBytes) {
+      // Everything trimmed.
+      return UTF8String.EMPTY_UTF8;
+    }
+    return srcString.copyUTF8String(trimByteIdx, numBytes - 1);
+  }
+
+  public static UTF8String lowercaseTrimRight(
+      final UTF8String srcString,
+      final UTF8String trimString) {
+    // Matching UTF8String behavior for null `trimString`.
+    if (trimString == null) {
+      return null;
+    }
+
+    // Number of bytes iterated from the srcString.
+    int byteIdx = 0;
+    // Number of characters iterated from the srcString.
+    int numChars = 0;
+    // Number of bytes in srcString.
+    int numBytes = srcString.numBytes();
+    // Array of character length for the srcString.
+    int[] stringCharLen = new int[numBytes];
+    // Array of the first byte position for each character in the srcString.
+    int[] stringCharPos = new int[numBytes];
+    // Convert trimString to lowercase, so it can be searched properly.
+    UTF8String lowercaseTrimString = trimString.toLowerCase();
+
+    // Build the position and length array.
+    while (byteIdx < numBytes) {
+      stringCharPos[numChars] = byteIdx;
+      stringCharLen[numChars] = UTF8String.numBytesForFirstByte(srcString.getByte(byteIdx));
+      byteIdx += stringCharLen[numChars];
+      numChars++;
+    }
+
+    // Index trimEnd points to the first no matching byte position from the right side of
+    //  the source string.
+    int trimByteIdx = numBytes - 1;
+
+    while (numChars > 0) {
+      UTF8String searchChar = srcString.copyUTF8String(
+        stringCharPos[numChars - 1],
+        stringCharPos[numChars - 1] + stringCharLen[numChars - 1] - 1);
+
+      if(lowercaseTrimString.find(searchChar.toLowerCase(), 0) >= 0) {
+        trimByteIdx -= stringCharLen[numChars - 1];
+        numChars--;
+      } else {
+        break;
+      }
+    }
+
+    if (trimByteIdx == numBytes - 1) {
+      // Nothing trimmed.
+      return srcString;
+    }
+    if (trimByteIdx < 0) {
+      // Everything trimmed.
+      return UTF8String.EMPTY_UTF8;
+    }
+    return srcString.copyUTF8String(0, trimByteIdx);
+  }
+
+  // TODO: Add more collation-aware UTF8String operations here.
+
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index 9786c559da44b..f13f66e384e0f 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -19,12 +19,15 @@
 import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;
 import java.util.*;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Function;
 import java.util.function.BiFunction;
 import java.util.function.ToLongFunction;
 
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.text.StringSearch;
 import com.ibm.icu.util.ULocale;
+import com.ibm.icu.text.CollationKey;
 import com.ibm.icu.text.Collator;
 
 import org.apache.spark.SparkException;
@@ -35,11 +38,62 @@
  * Provides functionality to the UTF8String object which respects defined collation settings.
  */
 public final class CollationFactory {
+
+  /**
+   * Identifier for single a collation.
+   */
+  public static class CollationIdentifier {
+    private final String provider;
+    private final String name;
+    private final String version;
+
+    public CollationIdentifier(String provider, String collationName, String version) {
+      this.provider = provider;
+      this.name = collationName;
+      this.version = version;
+    }
+
+    public static CollationIdentifier fromString(String identifier) {
+      long numDots = identifier.chars().filter(ch -> ch == '.').count();
+      assert(numDots > 0);
+
+      if (numDots == 1) {
+        String[] parts = identifier.split("\\.", 2);
+        return new CollationIdentifier(parts[0], parts[1], null);
+      }
+
+      String[] parts = identifier.split("\\.", 3);
+      return new CollationIdentifier(parts[0], parts[1], parts[2]);
+    }
+
+    /**
+     * Returns the identifier's string value without the version.
+     * This is used for the table schema as the schema doesn't care about the version,
+     * only the statistics do.
+     */
+    public String toStringWithoutVersion() {
+      return String.format("%s.%s", provider, name);
+    }
+
+    public String getProvider() {
+      return provider;
+    }
+
+    public String getName() {
+      return name;
+    }
+
+    public Optional<String> getVersion() {
+      return Optional.ofNullable(version);
+    }
+  }
+
   /**
    * Entry encapsulating all information about a collation.
    */
   public static class Collation {
     public final String collationName;
+    public final String provider;
     public final Collator collator;
     public final Comparator<UTF8String> comparator;
 
@@ -81,13 +135,14 @@ public static class Collation {
     /**
      * Support for Lowercase Equality implies that it is possible to check equality on
      * byte by byte level, but only after calling "UTF8String.toLowerCase" on both arguments.
-     * This allows custom collation support for UTF8_BINARY_LCASE collation in various Spark
+     * This allows custom collation support for UTF8_LCASE collation in various Spark
      * expressions, as this particular collation is not supported by the external ICU library.
      */
     public final boolean supportsLowercaseEquality;
 
     public Collation(
         String collationName,
+        String provider,
         Collator collator,
         Comparator<UTF8String> comparator,
         String version,
@@ -96,6 +151,7 @@ public Collation(
         boolean supportsBinaryOrdering,
         boolean supportsLowercaseEquality) {
       this.collationName = collationName;
+      this.provider = provider;
       this.collator = collator;
       this.comparator = comparator;
       this.version = version;
@@ -109,6 +165,8 @@ public Collation(
       // No Collation can simultaneously support binary equality and lowercase equality
       assert(!supportsBinaryEquality || !supportsLowercaseEquality);
 
+      assert(SUPPORTED_PROVIDERS.contains(provider));
+
       if (supportsBinaryEquality) {
         this.equalsFunction = UTF8String::equals;
       } else {
@@ -117,76 +175,576 @@ public Collation(
     }
 
     /**
-     * Constructor with comparators that are inherited from the given collator.
+     * Collation ID is defined as 32-bit integer. We specify binary layouts for different classes of
+     * collations. Classes of collations are differentiated by most significant 3 bits (bit 31, 30
+     * and 29), bit 31 being most significant and bit 0 being least significant.
+     * ---
+     * General collation ID binary layout:
+     * bit 31:    1 for INDETERMINATE (requires all other bits to be 1 as well), 0 otherwise.
+     * bit 30:    0 for predefined, 1 for user-defined.
+     * Following bits are specified for predefined collations:
+     * bit 29:    0 for UTF8_BINARY, 1 for ICU collations.
+     * bit 28-24: Reserved.
+     * bit 23-22: Reserved for version.
+     * bit 21-18: Reserved for space trimming.
+     * bit 17-0:  Depend on collation family.
+     * ---
+     * INDETERMINATE collation ID binary layout:
+     * bit 31-0: 1
+     * INDETERMINATE collation ID is equal to -1.
+     * ---
+     * User-defined collation ID binary layout:
+     * bit 31:   0
+     * bit 30:   1
+     * bit 29-0: Undefined, reserved for future use.
+     * ---
+     * UTF8_BINARY collation ID binary layout:
+     * bit 31-24: Zeroes.
+     * bit 23-22: Zeroes, reserved for version.
+     * bit 21-18: Zeroes, reserved for space trimming.
+     * bit 17-3:  Zeroes.
+     * bit 2:     0, reserved for accent sensitivity.
+     * bit 1:     0, reserved for uppercase and case-insensitive.
+     * bit 0:     0 = case-sensitive, 1 = lowercase.
+     * ---
+     * ICU collation ID binary layout:
+     * bit 31-30: Zeroes.
+     * bit 29:    1
+     * bit 28-24: Zeroes.
+     * bit 23-22: Zeroes, reserved for version.
+     * bit 21-18: Zeroes, reserved for space trimming.
+     * bit 17:    0 = case-sensitive, 1 = case-insensitive.
+     * bit 16:    0 = accent-sensitive, 1 = accent-insensitive.
+     * bit 15-14: Zeroes, reserved for punctuation sensitivity.
+     * bit 13-12: Zeroes, reserved for first letter preference.
+     * bit 11-0:  Locale ID as specified in `ICULocaleToId` mapping.
+     * ---
+     * Some illustrative examples of collation name to ID mapping:
+     * - UTF8_BINARY       -> 0
+     * - UTF8_LCASE        -> 1
+     * - UNICODE           -> 0x20000000
+     * - UNICODE_AI        -> 0x20010000
+     * - UNICODE_CI        -> 0x20020000
+     * - UNICODE_CI_AI     -> 0x20030000
+     * - af                -> 0x20000001
+     * - af_CI_AI          -> 0x20030001
      */
-    public Collation(
-        String collationName,
-        Collator collator,
-        String version,
-        boolean supportsBinaryEquality,
-        boolean supportsBinaryOrdering,
-        boolean supportsLowercaseEquality) {
-      this(
-        collationName,
-        collator,
-        (s1, s2) -> collator.compare(s1.toString(), s2.toString()),
-        version,
-        s -> (long)collator.getCollationKey(s.toString()).hashCode(),
-        supportsBinaryEquality,
-        supportsBinaryOrdering,
-        supportsLowercaseEquality);
+    private abstract static class CollationSpec {
+
+      /**
+       * Bit 30 in collation ID having value 0 for predefined and 1 for user-defined collation.
+       */
+      private enum DefinitionOrigin {
+        PREDEFINED, USER_DEFINED
+      }
+
+      /**
+       * Bit 29 in collation ID having value 0 for UTF8_BINARY family and 1 for ICU family of
+       * collations.
+       */
+      protected enum ImplementationProvider {
+        UTF8_BINARY, ICU
+      }
+
+      /**
+       * Offset in binary collation ID layout.
+       */
+      private static final int DEFINITION_ORIGIN_OFFSET = 30;
+
+      /**
+       * Bitmask corresponding to width in bits in binary collation ID layout.
+       */
+      private static final int DEFINITION_ORIGIN_MASK = 0b1;
+
+      /**
+       * Offset in binary collation ID layout.
+       */
+      protected static final int IMPLEMENTATION_PROVIDER_OFFSET = 29;
+
+      /**
+       * Bitmask corresponding to width in bits in binary collation ID layout.
+       */
+      protected static final int IMPLEMENTATION_PROVIDER_MASK = 0b1;
+
+      private static final int INDETERMINATE_COLLATION_ID = -1;
+
+      /**
+       * Thread-safe cache mapping collation IDs to corresponding `Collation` instances.
+       * We add entries to this cache lazily as new `Collation` instances are requested.
+       */
+      private static final Map<Integer, Collation> collationMap = new ConcurrentHashMap<>();
+
+      /**
+       * Utility function to retrieve `ImplementationProvider` enum instance from collation ID.
+       */
+      private static ImplementationProvider getImplementationProvider(int collationId) {
+        return ImplementationProvider.values()[SpecifierUtils.getSpecValue(collationId,
+          IMPLEMENTATION_PROVIDER_OFFSET, IMPLEMENTATION_PROVIDER_MASK)];
+      }
+
+      /**
+       * Utility function to retrieve `DefinitionOrigin` enum instance from collation ID.
+       */
+      private static DefinitionOrigin getDefinitionOrigin(int collationId) {
+        return DefinitionOrigin.values()[SpecifierUtils.getSpecValue(collationId,
+          DEFINITION_ORIGIN_OFFSET, DEFINITION_ORIGIN_MASK)];
+      }
+
+      /**
+       * Main entry point for retrieving `Collation` instance from collation ID.
+       */
+      private static Collation fetchCollation(int collationId) {
+        // User-defined collations and INDETERMINATE collations cannot produce a `Collation`
+        // instance.
+        assert (collationId >= 0 && getDefinitionOrigin(collationId)
+          == DefinitionOrigin.PREDEFINED);
+        if (collationId == UTF8_BINARY_COLLATION_ID) {
+          // Skip cache.
+          return CollationSpecUTF8.UTF8_BINARY_COLLATION;
+        } else if (collationMap.containsKey(collationId)) {
+          // Already in cache.
+          return collationMap.get(collationId);
+        } else {
+          // Build `Collation` instance and put into cache.
+          CollationSpec spec;
+          ImplementationProvider implementationProvider = getImplementationProvider(collationId);
+          if (implementationProvider == ImplementationProvider.UTF8_BINARY) {
+            spec = CollationSpecUTF8.fromCollationId(collationId);
+          } else {
+            spec = CollationSpecICU.fromCollationId(collationId);
+          }
+          Collation collation = spec.buildCollation();
+          collationMap.put(collationId, collation);
+          return collation;
+        }
+      }
+
+      /**
+       * Method for constructing errors thrown on providing invalid collation name.
+       */
+      protected static SparkException collationInvalidNameException(String collationName) {
+        Map<String, String> params = new HashMap<>();
+        final int maxSuggestions = 3;
+        params.put("collationName", collationName);
+        params.put("proposals", getClosestSuggestionsOnInvalidName(collationName, maxSuggestions));
+        return new SparkException("COLLATION_INVALID_NAME",
+          SparkException.constructMessageParams(params), null);
+      }
+
+      private static int collationNameToId(String collationName) throws SparkException {
+        // Collation names provided by user are treated as case-insensitive.
+        String collationNameUpper = collationName.toUpperCase();
+        if (collationNameUpper.startsWith("UTF8_")) {
+          return CollationSpecUTF8.collationNameToId(collationName, collationNameUpper);
+        } else {
+          return CollationSpecICU.collationNameToId(collationName, collationNameUpper);
+        }
+      }
+
+      protected abstract Collation buildCollation();
     }
-  }
 
-  private static final Collation[] collationTable = new Collation[4];
-  private static final HashMap<String, Integer> collationNameToIdMap = new HashMap<>();
-
-  public static final int UTF8_BINARY_COLLATION_ID = 0;
-  public static final int UTF8_BINARY_LCASE_COLLATION_ID = 1;
-
-  static {
-    // Binary comparison. This is the default collation.
-    // No custom comparators will be used for this collation.
-    // Instead, we rely on byte for byte comparison.
-    collationTable[0] = new Collation(
-      "UTF8_BINARY",
-      null,
-      UTF8String::binaryCompare,
-      "1.0",
-      s -> (long)s.hashCode(),
-      true,
-      true,
-      false);
-
-    // Case-insensitive UTF8 binary collation.
-    // TODO: Do in place comparisons instead of creating new strings.
-    collationTable[1] = new Collation(
-      "UTF8_BINARY_LCASE",
-      null,
-      UTF8String::compareLowerCase,
-      "1.0",
-      (s) -> (long)s.toLowerCase().hashCode(),
-      false,
-      false,
-      true);
-
-    // UNICODE case sensitive comparison (ROOT locale, in ICU).
-    collationTable[2] = new Collation(
-      "UNICODE", Collator.getInstance(ULocale.ROOT), "153.120.0.0", true, false, false);
-    collationTable[2].collator.setStrength(Collator.TERTIARY);
-    collationTable[2].collator.freeze();
-
-    // UNICODE case-insensitive comparison (ROOT locale, in ICU + Secondary strength).
-    collationTable[3] = new Collation(
-      "UNICODE_CI", Collator.getInstance(ULocale.ROOT), "153.120.0.0", false, false, false);
-    collationTable[3].collator.setStrength(Collator.SECONDARY);
-    collationTable[3].collator.freeze();
-
-    for (int i = 0; i < collationTable.length; i++) {
-      collationNameToIdMap.put(collationTable[i].collationName, i);
+    private static class CollationSpecUTF8 extends CollationSpec {
+
+      /**
+       * Bit 0 in collation ID having value 0 for plain UTF8_BINARY and 1 for UTF8_LCASE
+       * collation.
+       */
+      private enum CaseSensitivity {
+        UNSPECIFIED, LCASE
+      }
+
+      /**
+       * Offset in binary collation ID layout.
+       */
+      private static final int CASE_SENSITIVITY_OFFSET = 0;
+
+      /**
+       * Bitmask corresponding to width in bits in binary collation ID layout.
+       */
+      private static final int CASE_SENSITIVITY_MASK = 0b1;
+
+      private static final int UTF8_BINARY_COLLATION_ID =
+        new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED).collationId;
+      private static final int UTF8_LCASE_COLLATION_ID =
+        new CollationSpecUTF8(CaseSensitivity.LCASE).collationId;
+      protected static Collation UTF8_BINARY_COLLATION =
+        new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED).buildCollation();
+      protected static Collation UTF8_LCASE_COLLATION =
+        new CollationSpecUTF8(CaseSensitivity.LCASE).buildCollation();
+
+      private final int collationId;
+
+      private CollationSpecUTF8(CaseSensitivity caseSensitivity) {
+        this.collationId =
+          SpecifierUtils.setSpecValue(0, CASE_SENSITIVITY_OFFSET, caseSensitivity);
+      }
+
+      private static int collationNameToId(String originalName, String collationName)
+          throws SparkException {
+        if (UTF8_BINARY_COLLATION.collationName.equals(collationName)) {
+          return UTF8_BINARY_COLLATION_ID;
+        } else if (UTF8_LCASE_COLLATION.collationName.equals(collationName)) {
+          return UTF8_LCASE_COLLATION_ID;
+        } else {
+          // Throw exception with original (before case conversion) collation name.
+          throw collationInvalidNameException(originalName);
+        }
+      }
+
+      private static CollationSpecUTF8 fromCollationId(int collationId) {
+        // Extract case sensitivity from collation ID.
+        int caseConversionOrdinal = SpecifierUtils.getSpecValue(collationId,
+          CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK);
+        // Verify only case sensitivity bits were set settable in UTF8_BINARY family of collations.
+        assert (SpecifierUtils.removeSpec(collationId,
+          CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK) == 0);
+        return new CollationSpecUTF8(CaseSensitivity.values()[caseConversionOrdinal]);
+      }
+
+      @Override
+      protected Collation buildCollation() {
+        if (collationId == UTF8_BINARY_COLLATION_ID) {
+          return new Collation(
+            "UTF8_BINARY",
+            PROVIDER_SPARK,
+            null,
+            UTF8String::binaryCompare,
+            "1.0",
+            s -> (long) s.hashCode(),
+            /* supportsBinaryEquality = */ true,
+            /* supportsBinaryOrdering = */ true,
+            /* supportsLowercaseEquality = */ false);
+        } else {
+          return new Collation(
+            "UTF8_LCASE",
+            PROVIDER_SPARK,
+            null,
+            CollationAwareUTF8String::compareLowerCase,
+            "1.0",
+            s -> (long) CollationAwareUTF8String.lowerCaseCodePoints(s).hashCode(),
+            /* supportsBinaryEquality = */ false,
+            /* supportsBinaryOrdering = */ false,
+            /* supportsLowercaseEquality = */ true);
+        }
+      }
+    }
+
+    private static class CollationSpecICU extends CollationSpec {
+
+      /**
+       * Bit 17 in collation ID having value 0 for case-sensitive and 1 for case-insensitive
+       * collation.
+       */
+      private enum CaseSensitivity {
+        CS, CI
+      }
+
+      /**
+       * Bit 16 in collation ID having value 0 for accent-sensitive and 1 for accent-insensitive
+       * collation.
+       */
+      private enum AccentSensitivity {
+        AS, AI
+      }
+
+      /**
+       * Offset in binary collation ID layout.
+       */
+      private static final int CASE_SENSITIVITY_OFFSET = 17;
+
+      /**
+       * Bitmask corresponding to width in bits in binary collation ID layout.
+       */
+      private static final int CASE_SENSITIVITY_MASK = 0b1;
+
+      /**
+       * Offset in binary collation ID layout.
+       */
+      private static final int ACCENT_SENSITIVITY_OFFSET = 16;
+
+      /**
+       * Bitmask corresponding to width in bits in binary collation ID layout.
+       */
+      private static final int ACCENT_SENSITIVITY_MASK = 0b1;
+
+      /**
+       * Array of locale names, each locale ID corresponds to the index in this array.
+       */
+      private static final String[] ICULocaleNames;
+
+      /**
+       * Mapping of locale names to corresponding `ULocale` instance.
+       */
+      private static final Map<String, ULocale> ICULocaleMap = new HashMap<>();
+
+      /**
+       * Used to parse user input collation names which are converted to uppercase.
+       */
+      private static final Map<String, String> ICULocaleMapUppercase = new HashMap<>();
+
+      /**
+       * Reverse mapping of `ICULocaleNames`.
+       */
+      private static final Map<String, Integer> ICULocaleToId = new HashMap<>();
+
+      /**
+       * ICU library Collator version passed to `Collation` instance.
+       */
+      private static final String ICU_COLLATOR_VERSION = "153.120.0.0";
+
+      static {
+        ICULocaleMap.put("UNICODE", ULocale.ROOT);
+        // ICU-implemented `ULocale`s which have corresponding `Collator` installed.
+        ULocale[] locales = Collator.getAvailableULocales();
+        // Build locale names in format: language["_" optional script]["_" optional country code].
+        // Examples: en, en_USA, sr_Cyrl_SRB
+        for (ULocale locale : locales) {
+          // Skip variants.
+          if (locale.getVariant().isEmpty()) {
+            String language = locale.getLanguage();
+            // Require non-empty language as first component of locale name.
+            assert (!language.isEmpty());
+            StringBuilder builder = new StringBuilder(language);
+            // Script tag.
+            String script = locale.getScript();
+            if (!script.isEmpty()) {
+              builder.append('_');
+              builder.append(script);
+            }
+            // 3-letter country code.
+            String country = locale.getISO3Country();
+            if (!country.isEmpty()) {
+              builder.append('_');
+              builder.append(country);
+            }
+            String localeName = builder.toString();
+            // Verify locale names are unique.
+            assert (!ICULocaleMap.containsKey(localeName));
+            ICULocaleMap.put(localeName, locale);
+          }
+        }
+        // Construct uppercase-normalized locale name mapping.
+        for (String localeName : ICULocaleMap.keySet()) {
+          String localeUppercase = localeName.toUpperCase();
+          // Locale names are unique case-insensitively.
+          assert (!ICULocaleMapUppercase.containsKey(localeUppercase));
+          ICULocaleMapUppercase.put(localeUppercase, localeName);
+        }
+        // Construct locale name to ID mapping. Locale ID is defined as index in `ICULocaleNames`.
+        ICULocaleNames = ICULocaleMap.keySet().toArray(new String[0]);
+        Arrays.sort(ICULocaleNames);
+        // Maximum number of locale IDs as defined by binary layout.
+        assert (ICULocaleNames.length <= (1 << 12));
+        for (int i = 0; i < ICULocaleNames.length; ++i) {
+          ICULocaleToId.put(ICULocaleNames[i], i);
+        }
+      }
+
+      private static final int UNICODE_COLLATION_ID =
+        new CollationSpecICU("UNICODE", CaseSensitivity.CS, AccentSensitivity.AS).collationId;
+      private static final int UNICODE_CI_COLLATION_ID =
+        new CollationSpecICU("UNICODE", CaseSensitivity.CI, AccentSensitivity.AS).collationId;
+
+      private final CaseSensitivity caseSensitivity;
+      private final AccentSensitivity accentSensitivity;
+      private final String locale;
+      private final int collationId;
+
+      private CollationSpecICU(String locale, CaseSensitivity caseSensitivity,
+          AccentSensitivity accentSensitivity) {
+        this.locale = locale;
+        this.caseSensitivity = caseSensitivity;
+        this.accentSensitivity = accentSensitivity;
+        // Construct collation ID from locale, case-sensitivity and accent-sensitivity specifiers.
+        int collationId = ICULocaleToId.get(locale);
+        // Mandatory ICU implementation provider.
+        collationId = SpecifierUtils.setSpecValue(collationId, IMPLEMENTATION_PROVIDER_OFFSET,
+          ImplementationProvider.ICU);
+        collationId = SpecifierUtils.setSpecValue(collationId, CASE_SENSITIVITY_OFFSET,
+          caseSensitivity);
+        collationId = SpecifierUtils.setSpecValue(collationId, ACCENT_SENSITIVITY_OFFSET,
+          accentSensitivity);
+        this.collationId = collationId;
+      }
+
+      private static int collationNameToId(
+          String originalName, String collationName) throws SparkException {
+        // Search for the longest locale match because specifiers are designed to be different from
+        // script tag and country code, meaning the only valid locale name match can be the longest
+        // one.
+        int lastPos = -1;
+        for (int i = 1; i <= collationName.length(); i++) {
+          String localeName = collationName.substring(0, i);
+          if (ICULocaleMapUppercase.containsKey(localeName)) {
+            lastPos = i;
+          }
+        }
+        if (lastPos == -1) {
+          throw collationInvalidNameException(originalName);
+        } else {
+          String locale = collationName.substring(0, lastPos);
+          int collationId = ICULocaleToId.get(ICULocaleMapUppercase.get(locale));
+
+          // Try all combinations of AS/AI and CS/CI.
+          CaseSensitivity caseSensitivity;
+          AccentSensitivity accentSensitivity;
+          if (collationName.equals(locale) ||
+              collationName.equals(locale + "_AS") ||
+              collationName.equals(locale + "_CS") ||
+              collationName.equals(locale + "_AS_CS") ||
+              collationName.equals(locale + "_CS_AS")
+          ) {
+            caseSensitivity = CaseSensitivity.CS;
+            accentSensitivity = AccentSensitivity.AS;
+          } else if (collationName.equals(locale + "_CI") ||
+              collationName.equals(locale + "_AS_CI") ||
+              collationName.equals(locale + "_CI_AS")) {
+            caseSensitivity = CaseSensitivity.CI;
+            accentSensitivity = AccentSensitivity.AS;
+          } else if (collationName.equals(locale + "_AI") ||
+              collationName.equals(locale + "_CS_AI") ||
+              collationName.equals(locale + "_AI_CS")) {
+            caseSensitivity = CaseSensitivity.CS;
+            accentSensitivity = AccentSensitivity.AI;
+          } else if (collationName.equals(locale + "_AI_CI") ||
+              collationName.equals(locale + "_CI_AI")) {
+            caseSensitivity = CaseSensitivity.CI;
+            accentSensitivity = AccentSensitivity.AI;
+          } else {
+            throw collationInvalidNameException(originalName);
+          }
+
+          // Build collation ID from computed specifiers.
+          collationId = SpecifierUtils.setSpecValue(collationId,
+            IMPLEMENTATION_PROVIDER_OFFSET, ImplementationProvider.ICU);
+          collationId = SpecifierUtils.setSpecValue(collationId,
+            CASE_SENSITIVITY_OFFSET, caseSensitivity);
+          collationId = SpecifierUtils.setSpecValue(collationId,
+            ACCENT_SENSITIVITY_OFFSET, accentSensitivity);
+          return collationId;
+        }
+      }
+
+      private static CollationSpecICU fromCollationId(int collationId) {
+        // Parse specifiers from collation ID.
+        int caseSensitivityOrdinal = SpecifierUtils.getSpecValue(collationId,
+          CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK);
+        int accentSensitivityOrdinal = SpecifierUtils.getSpecValue(collationId,
+          ACCENT_SENSITIVITY_OFFSET, ACCENT_SENSITIVITY_MASK);
+        collationId = SpecifierUtils.removeSpec(collationId,
+          IMPLEMENTATION_PROVIDER_OFFSET, IMPLEMENTATION_PROVIDER_MASK);
+        collationId = SpecifierUtils.removeSpec(collationId,
+          CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK);
+        collationId = SpecifierUtils.removeSpec(collationId,
+          ACCENT_SENSITIVITY_OFFSET, ACCENT_SENSITIVITY_MASK);
+        // Locale ID remains after removing all other specifiers.
+        int localeId = collationId;
+        // Verify locale ID is valid against `ICULocaleNames` array.
+        assert(localeId >= 0 && localeId < ICULocaleNames.length);
+        CaseSensitivity caseSensitivity = CaseSensitivity.values()[caseSensitivityOrdinal];
+        AccentSensitivity accentSensitivity = AccentSensitivity.values()[accentSensitivityOrdinal];
+        String locale = ICULocaleNames[localeId];
+        return new CollationSpecICU(locale, caseSensitivity, accentSensitivity);
+      }
+
+      @Override
+      protected Collation buildCollation() {
+        ULocale.Builder builder = new ULocale.Builder();
+        builder.setLocale(ICULocaleMap.get(locale));
+        // Compute unicode locale keyword for all combinations of case/accent sensitivity.
+        if (caseSensitivity == CaseSensitivity.CS &&
+            accentSensitivity == AccentSensitivity.AS) {
+          builder.setUnicodeLocaleKeyword("ks", "level3");
+        } else if (caseSensitivity == CaseSensitivity.CS &&
+            accentSensitivity == AccentSensitivity.AI) {
+          builder
+            .setUnicodeLocaleKeyword("ks", "level1")
+            .setUnicodeLocaleKeyword("kc", "true");
+        } else if (caseSensitivity == CaseSensitivity.CI &&
+            accentSensitivity == AccentSensitivity.AS) {
+          builder.setUnicodeLocaleKeyword("ks", "level2");
+        } else if (caseSensitivity == CaseSensitivity.CI &&
+            accentSensitivity == AccentSensitivity.AI) {
+          builder.setUnicodeLocaleKeyword("ks", "level1");
+        }
+        ULocale resultLocale = builder.build();
+        Collator collator = Collator.getInstance(resultLocale);
+        // Freeze ICU collator to ensure thread safety.
+        collator.freeze();
+        return new Collation(
+          collationName(),
+          PROVIDER_ICU,
+          collator,
+          (s1, s2) -> collator.compare(s1.toString(), s2.toString()),
+          ICU_COLLATOR_VERSION,
+          s -> (long) collator.getCollationKey(s.toString()).hashCode(),
+          /* supportsBinaryEquality = */ false,
+          /* supportsBinaryOrdering = */ false,
+          /* supportsLowercaseEquality = */ false);
+      }
+
+      /**
+       * Compute normalized collation name. Components of collation name are given in order:
+       * - Locale name
+       * - Optional case sensitivity when non-default preceded by underscore
+       * - Optional accent sensitivity when non-default preceded by underscore
+       * Examples: en, en_USA_CI_AI, sr_Cyrl_SRB_AI.
+       */
+      private String collationName() {
+        StringBuilder builder = new StringBuilder();
+        builder.append(locale);
+        if (caseSensitivity != CaseSensitivity.CS) {
+          builder.append('_');
+          builder.append(caseSensitivity.toString());
+        }
+        if (accentSensitivity != AccentSensitivity.AS) {
+          builder.append('_');
+          builder.append(accentSensitivity.toString());
+        }
+        return builder.toString();
+      }
+    }
+
+    /**
+     * Utility class for manipulating conversions between collation IDs and specifier enums/locale
+     * IDs. Scope bitwise operations here to avoid confusion.
+     */
+    private static class SpecifierUtils {
+      private static int getSpecValue(int collationId, int offset, int mask) {
+        return (collationId >> offset) & mask;
+      }
+
+      private static int removeSpec(int collationId, int offset, int mask) {
+        return collationId & ~(mask << offset);
+      }
+
+      private static int setSpecValue(int collationId, int offset, Enum spec) {
+        return collationId | (spec.ordinal() << offset);
+      }
+    }
+
+    /** Returns the collation identifier. */
+    public CollationIdentifier identifier() {
+      return new CollationIdentifier(provider, collationName, version);
     }
   }
 
+  public static final String PROVIDER_SPARK = "spark";
+  public static final String PROVIDER_ICU = "icu";
+  public static final List<String> SUPPORTED_PROVIDERS = List.of(PROVIDER_SPARK, PROVIDER_ICU);
+
+  public static final int UTF8_BINARY_COLLATION_ID =
+    Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION_ID;
+  public static final int UTF8_LCASE_COLLATION_ID =
+    Collation.CollationSpecUTF8.UTF8_LCASE_COLLATION_ID;
+  public static final int UNICODE_COLLATION_ID =
+    Collation.CollationSpecICU.UNICODE_COLLATION_ID;
+  public static final int UNICODE_CI_COLLATION_ID =
+    Collation.CollationSpecICU.UNICODE_CI_COLLATION_ID;
+  public static final int INDETERMINATE_COLLATION_ID =
+    Collation.CollationSpec.INDETERMINATE_COLLATION_ID;
+
   /**
    * Returns a StringSearch object for the given pattern and target strings, under collation
    * rules corresponding to the given collationId. The external ICU library StringSearch object can
@@ -196,33 +754,27 @@ public static StringSearch getStringSearch(
       final UTF8String targetUTF8String,
       final UTF8String patternUTF8String,
       final int collationId) {
-    String pattern = patternUTF8String.toString();
-    CharacterIterator target = new StringCharacterIterator(targetUTF8String.toString());
-    Collator collator = CollationFactory.fetchCollation(collationId).collator;
-    return new StringSearch(pattern, target, (RuleBasedCollator) collator);
-  }
-
-  /**
-   * Returns if the given collationName is valid one.
-   */
-  public static boolean isValidCollation(String collationName) {
-    return collationNameToIdMap.containsKey(collationName.toUpperCase());
+    return getStringSearch(targetUTF8String.toString(), patternUTF8String.toString(), collationId);
   }
 
   /**
-   * Returns closest valid name to collationName
+   * Returns a StringSearch object for the given pattern and target strings, under collation
+   * rules corresponding to the given collationId. The external ICU library StringSearch object can
+   * be used to find occurrences of the pattern in the target string, while respecting collation.
    */
-  public static String getClosestCollation(String collationName) {
-    Collation suggestion = Collections.min(List.of(collationTable), Comparator.comparingInt(
-            c -> UTF8String.fromString(c.collationName).levenshteinDistance(
-                    UTF8String.fromString(collationName.toUpperCase()))));
-    return suggestion.collationName;
+  public static StringSearch getStringSearch(
+          final String targetString,
+          final String patternString,
+          final int collationId) {
+    CharacterIterator target = new StringCharacterIterator(targetString);
+    Collator collator = CollationFactory.fetchCollation(collationId).collator;
+    return new StringSearch(patternString, target, (RuleBasedCollator) collator);
   }
 
   /**
    * Returns a collation-unaware StringSearch object for the given pattern and target strings.
    * While this object does not respect collation, it can be used to find occurrences of the pattern
-   * in the target string for UTF8_BINARY or UTF8_BINARY_LCASE (if arguments are lowercased).
+   * in the target string for UTF8_BINARY or UTF8_LCASE (if arguments are lowercased).
    */
   public static StringSearch getStringSearch(
           final UTF8String targetUTF8String,
@@ -231,32 +783,139 @@ public static StringSearch getStringSearch(
   }
 
   /**
-   * Returns the collation id for the given collation name.
+   * Returns the collation ID for the given collation name.
    */
   public static int collationNameToId(String collationName) throws SparkException {
-    String normalizedName = collationName.toUpperCase();
-    if (collationNameToIdMap.containsKey(normalizedName)) {
-      return collationNameToIdMap.get(normalizedName);
-    } else {
-      Collation suggestion = Collections.min(List.of(collationTable), Comparator.comparingInt(
-        c -> UTF8String.fromString(c.collationName).levenshteinDistance(
-          UTF8String.fromString(normalizedName))));
+    return Collation.CollationSpec.collationNameToId(collationName);
+  }
 
-      Map<String, String> params = new HashMap<>();
-      params.put("collationName", collationName);
-      params.put("proposal", suggestion.collationName);
+  public static void assertValidProvider(String provider) throws SparkException {
+    if (!SUPPORTED_PROVIDERS.contains(provider.toLowerCase())) {
+      Map<String, String> params = Map.of(
+        "provider", provider,
+        "supportedProviders", String.join(", ", SUPPORTED_PROVIDERS)
+      );
 
       throw new SparkException(
-        "COLLATION_INVALID_NAME", SparkException.constructMessageParams(params), null);
+        "COLLATION_INVALID_PROVIDER", SparkException.constructMessageParams(params), null);
     }
   }
 
   public static Collation fetchCollation(int collationId) {
-    return collationTable[collationId];
+    return Collation.CollationSpec.fetchCollation(collationId);
   }
 
   public static Collation fetchCollation(String collationName) throws SparkException {
-    int collationId = collationNameToId(collationName);
-    return collationTable[collationId];
+    return fetchCollation(collationNameToId(collationName));
+  }
+
+  public static String[] getICULocaleNames() {
+    return Collation.CollationSpecICU.ICULocaleNames;
+  }
+
+  public static UTF8String getCollationKey(UTF8String input, int collationId) {
+    Collation collation = fetchCollation(collationId);
+    if (collation.supportsBinaryEquality) {
+      return input;
+    } else if (collation.supportsLowercaseEquality) {
+      return input.toLowerCase();
+    } else {
+      CollationKey collationKey = collation.collator.getCollationKey(input.toString());
+      return UTF8String.fromBytes(collationKey.toByteArray());
+    }
+  }
+
+  public static byte[] getCollationKeyBytes(UTF8String input, int collationId) {
+    Collation collation = fetchCollation(collationId);
+    if (collation.supportsBinaryEquality) {
+      return input.getBytes();
+    } else if (collation.supportsLowercaseEquality) {
+      return input.toLowerCase().getBytes();
+    } else {
+      return collation.collator.getCollationKey(input.toString()).toByteArray();
+    }
+  }
+
+  /**
+   * Returns same string if collation name is valid or the closest suggestion if it is invalid.
+   */
+  public static String getClosestSuggestionsOnInvalidName(
+      String collationName, int maxSuggestions) {
+    String[] validRootNames;
+    String[] validModifiers;
+    if (collationName.startsWith("UTF8_")) {
+      validRootNames = new String[]{
+        Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION.collationName,
+        Collation.CollationSpecUTF8.UTF8_LCASE_COLLATION.collationName
+      };
+      validModifiers = new String[0];
+    } else {
+      validRootNames = getICULocaleNames();
+      validModifiers = new String[]{"_CI", "_AI", "_CS", "_AS"};
+    }
+
+    // Split modifiers and locale name.
+    final int MODIFIER_LENGTH = 3;
+    String localeName = collationName.toUpperCase();
+    List<String> modifiers = new ArrayList<>();
+    while (Arrays.stream(validModifiers).anyMatch(localeName::endsWith)) {
+      modifiers.add(localeName.substring(localeName.length() - MODIFIER_LENGTH));
+      localeName = localeName.substring(0, localeName.length() - MODIFIER_LENGTH);
+    }
+
+    // Suggest version with unique modifiers.
+    Collections.reverse(modifiers);
+    modifiers = modifiers.stream().distinct().toList();
+
+    // Remove conflicting settings.
+    if (modifiers.contains("_CI") && modifiers.contains(("_CS"))) {
+      modifiers = modifiers.stream().filter(m -> !m.equals("_CI")).toList();
+    }
+
+    if (modifiers.contains("_AI") && modifiers.contains(("_AS"))) {
+      modifiers = modifiers.stream().filter(m -> !m.equals("_AI")).toList();
+    }
+
+    final String finalLocaleName = localeName;
+    Comparator<String> distanceComparator = (c1, c2) -> {
+      int distance1 = UTF8String.fromString(c1.toUpperCase())
+              .levenshteinDistance(UTF8String.fromString(finalLocaleName));
+      int distance2 = UTF8String.fromString(c2.toUpperCase())
+              .levenshteinDistance(UTF8String.fromString(finalLocaleName));
+      return Integer.compare(distance1, distance2);
+    };
+
+    String[] rootNamesByDistance = Arrays.copyOf(validRootNames, validRootNames.length);
+    Arrays.sort(rootNamesByDistance, distanceComparator);
+    Function<String, Boolean> isCollationNameValid = name -> {
+      try {
+        collationNameToId(name);
+        return true;
+      } catch (SparkException e) {
+        return false;
+      }
+    };
+
+    final int suggestionThreshold = 3;
+    final ArrayList<String> suggestions = new ArrayList<>(maxSuggestions);
+    for (int i = 0; i < maxSuggestions; i++) {
+      // Add at least one suggestion.
+      // Add others if distance from the original is lower than threshold.
+      String suggestion = rootNamesByDistance[i] + String.join("", modifiers);
+      assert(isCollationNameValid.apply(suggestion));
+      if (suggestions.isEmpty()) {
+        suggestions.add(suggestion);
+      } else {
+        int distance = UTF8String.fromString(suggestion.toUpperCase())
+          .levenshteinDistance(UTF8String.fromString(collationName.toUpperCase()));
+        if (distance < suggestionThreshold) {
+          suggestions.add(suggestion);
+        } else {
+          break;
+        }
+      }
+    }
+
+    return String.join(", ", suggestions);
   }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
index fe1952921b7fb..fa4a40b74ab24 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
@@ -20,6 +20,11 @@
 
 import org.apache.spark.unsafe.types.UTF8String;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
 /**
  * Static entry point for collation-aware expressions (StringExpressions, RegexpExpressions, and
  * other expressions that require custom collation support), as well as private utility methods for
@@ -31,6 +36,62 @@ public final class CollationSupport {
    * Collation-aware string expressions.
    */
 
+  public static class StringSplitSQL {
+    public static UTF8String[] exec(final UTF8String s, final UTF8String d, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(s, d);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(s, d);
+      } else {
+        return execICU(s, d, collationId);
+      }
+    }
+    public static String genCode(final String s, final String d, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringSplitSQL.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s)", s, d);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s, %s)", s, d);
+      } else {
+        return String.format(expr + "ICU(%s, %s, %d)", s, d, collationId);
+      }
+    }
+    public static UTF8String[] execBinary(final UTF8String string, final UTF8String delimiter) {
+      return string.splitSQL(delimiter, -1);
+    }
+    public static UTF8String[] execLowercase(final UTF8String string, final UTF8String delimiter) {
+      if (delimiter.numBytes() == 0) return new UTF8String[] { string };
+      if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
+      Pattern pattern = Pattern.compile(Pattern.quote(delimiter.toString()),
+        CollationSupport.lowercaseRegexFlags);
+      String[] splits = pattern.split(string.toString(), -1);
+      UTF8String[] res = new UTF8String[splits.length];
+      for (int i = 0; i < res.length; i++) {
+        res[i] = UTF8String.fromString(splits[i]);
+      }
+      return res;
+    }
+    public static UTF8String[] execICU(final UTF8String string, final UTF8String delimiter,
+        final int collationId) {
+      if (delimiter.numBytes() == 0) return new UTF8String[] { string };
+      if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
+      List<UTF8String> strings = new ArrayList<>();
+      String target = string.toString(), pattern = delimiter.toString();
+      StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
+      int start = 0, end;
+      while ((end = stringSearch.next()) != StringSearch.DONE) {
+        strings.add(UTF8String.fromString(target.substring(start, end)));
+        start = end + stringSearch.getMatchLength();
+      }
+      if (start <= target.length()) {
+        strings.add(UTF8String.fromString(target.substring(start)));
+      }
+      return strings.toArray(new UTF8String[0]);
+    }
+  }
+
   public static class Contains {
     public static boolean exec(final UTF8String l, final UTF8String r, final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
@@ -57,7 +118,7 @@ public static boolean execBinary(final UTF8String l, final UTF8String r) {
       return l.contains(r);
     }
     public static boolean execLowercase(final UTF8String l, final UTF8String r) {
-      return l.toLowerCase().contains(r.toLowerCase());
+      return CollationAwareUTF8String.lowercaseIndexOf(l, r, 0) >= 0;
     }
     public static boolean execICU(final UTF8String l, final UTF8String r,
         final int collationId) {
@@ -95,11 +156,14 @@ public static boolean execBinary(final UTF8String l, final UTF8String r) {
       return l.startsWith(r);
     }
     public static boolean execLowercase(final UTF8String l, final UTF8String r) {
-      return l.toLowerCase().startsWith(r.toLowerCase());
+      return CollationAwareUTF8String.lowercaseMatchFrom(l, r.toLowerCase(), 0);
     }
     public static boolean execICU(final UTF8String l, final UTF8String r,
         final int collationId) {
-      return CollationAwareUTF8String.matchAt(l, r, 0, collationId);
+      if (r.numBytes() == 0) return true;
+      if (l.numBytes() == 0) return false;
+      StringSearch stringSearch = CollationFactory.getStringSearch(l, r, collationId);
+      return stringSearch.first() == 0;
     }
   }
 
@@ -129,46 +193,582 @@ public static boolean execBinary(final UTF8String l, final UTF8String r) {
       return l.endsWith(r);
     }
     public static boolean execLowercase(final UTF8String l, final UTF8String r) {
-      return l.toLowerCase().endsWith(r.toLowerCase());
+      return CollationAwareUTF8String.lowercaseMatchUntil(l, r.toLowerCase(), l.numChars());
     }
     public static boolean execICU(final UTF8String l, final UTF8String r,
         final int collationId) {
-      return CollationAwareUTF8String.matchAt(l, r, l.numBytes() - r.numBytes(), collationId);
+      if (r.numBytes() == 0) return true;
+      if (l.numBytes() == 0) return false;
+      StringSearch stringSearch = CollationFactory.getStringSearch(l, r, collationId);
+      int endIndex = stringSearch.getTarget().getEndIndex();
+      return stringSearch.last() == endIndex - stringSearch.getMatchLength();
     }
   }
 
-  // TODO: Add more collation-aware string expressions.
+  public static class Upper {
+    public static UTF8String exec(final UTF8String v, final int collationId, boolean useICU) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return useICU ? execBinaryICU(v) : execBinary(v);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(v);
+      }  else {
+        return execICU(v, collationId);
+      }
+    }
+    public static String genCode(final String v, final int collationId, boolean useICU) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.Upper.exec";
+      if (collation.supportsBinaryEquality) {
+        String funcName = useICU ? "BinaryICU" : "Binary";
+        return String.format(expr + "%s(%s)", funcName, v);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s)", v);
+      }  else {
+        return String.format(expr + "ICU(%s, %d)", v, collationId);
+      }
+    }
+    public static UTF8String execBinary(final UTF8String v) {
+      return v.toUpperCase();
+    }
+    public static UTF8String execBinaryICU(final UTF8String v) {
+      return CollationAwareUTF8String.toUpperCase(v);
+    }
+    public static UTF8String execLowercase(final UTF8String v) {
+      return CollationAwareUTF8String.toUpperCase(v);
+    }
+    public static UTF8String execICU(final UTF8String v, final int collationId) {
+      return CollationAwareUTF8String.toUpperCase(v, collationId);
+    }
+  }
 
-  /**
-   * Collation-aware regexp expressions.
-   */
+  public static class Lower {
+    public static UTF8String exec(final UTF8String v, final int collationId, boolean useICU) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return useICU ? execBinaryICU(v) : execBinary(v);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(v);
+      } else {
+        return execICU(v, collationId);
+      }
+    }
+    public static String genCode(final String v, final int collationId, boolean useICU) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.Lower.exec";
+      if (collation.supportsBinaryEquality) {
+        String funcName = useICU ? "BinaryICU" : "Binary";
+        return String.format(expr + "%s(%s)", funcName, v);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s)", v);
+      }  else {
+        return String.format(expr + "ICU(%s, %d)", v, collationId);
+      }
+    }
+    public static UTF8String execBinary(final UTF8String v) {
+      return v.toLowerCase();
+    }
+    public static UTF8String execBinaryICU(final UTF8String v) {
+      return CollationAwareUTF8String.toLowerCase(v);
+    }
+    public static UTF8String execLowercase(final UTF8String v) {
+      return CollationAwareUTF8String.toLowerCase(v);
+    }
+    public static UTF8String execICU(final UTF8String v, final int collationId) {
+      return CollationAwareUTF8String.toLowerCase(v, collationId);
+    }
+  }
 
-  // TODO: Add more collation-aware regexp expressions.
+  public static class InitCap {
+    public static UTF8String exec(final UTF8String v, final int collationId, boolean useICU) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return useICU ? execBinaryICU(v) : execBinary(v);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(v);
+      } else {
+        return execICU(v, collationId);
+      }
+    }
 
-  /**
-   * Other collation-aware expressions.
-   */
+    public static String genCode(final String v, final int collationId, boolean useICU) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.InitCap.exec";
+      if (collation.supportsBinaryEquality) {
+        String funcName = useICU ? "BinaryICU" : "Binary";
+        return String.format(expr + "%s(%s)", funcName, v);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s)", v);
+      } else {
+        return String.format(expr + "ICU(%s, %d)", v, collationId);
+      }
+    }
+    public static UTF8String execBinary(final UTF8String v) {
+      return v.toLowerCase().toTitleCase();
+    }
+    public static UTF8String execBinaryICU(final UTF8String v) {
+      return CollationAwareUTF8String.toLowerCase(v).toTitleCaseICU();
+    }
+    public static UTF8String execLowercase(final UTF8String v) {
+      return CollationAwareUTF8String.toTitleCase(v);
+    }
+    public static UTF8String execICU(final UTF8String v, final int collationId) {
+      return CollationAwareUTF8String.toTitleCase(v, collationId);
+    }
+  }
 
-  // TODO: Add other collation-aware expressions.
+  public static class FindInSet {
+    public static int exec(final UTF8String word, final UTF8String set, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(word, set);
+      } else {
+        return execCollationAware(word, set, collationId);
+      }
+    }
+    public static String genCode(final String word, final String set, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.FindInSet.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s)", word, set);
+      } else {
+        return String.format(expr + "execCollationAware(%s, %s, %d)", word, set, collationId);
+      }
+    }
+    public static int execBinary(final UTF8String word, final UTF8String set) {
+      return set.findInSet(word);
+    }
+    public static int execCollationAware(final UTF8String word, final UTF8String set,
+        final int collationId) {
+      return CollationAwareUTF8String.findInSet(word, set, collationId);
+    }
+  }
 
-  /**
-   * Utility class for collation-aware UTF8String operations.
-   */
+  public static class StringInstr {
+    public static int exec(final UTF8String string, final UTF8String substring,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(string, substring);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(string, substring);
+      } else {
+        return execICU(string, substring, collationId);
+      }
+    }
+    public static String genCode(final String string, final String substring,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringInstr.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s)", string, substring);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s, %s)", string, substring);
+      } else {
+        return String.format(expr + "ICU(%s, %s, %d)", string, substring, collationId);
+      }
+    }
+    public static int execBinary(final UTF8String string, final UTF8String substring) {
+      return string.indexOf(substring, 0);
+    }
+    public static int execLowercase(final UTF8String string, final UTF8String substring) {
+      return CollationAwareUTF8String.lowercaseIndexOf(string, substring, 0);
+    }
+    public static int execICU(final UTF8String string, final UTF8String substring,
+        final int collationId) {
+      return CollationAwareUTF8String.indexOf(string, substring, 0, collationId);
+    }
+  }
+
+  public static class StringReplace {
+    public static UTF8String exec(final UTF8String src, final UTF8String search,
+        final UTF8String replace, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(src, search, replace);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(src, search, replace);
+      } else {
+        return execICU(src, search, replace, collationId);
+      }
+    }
+    public static String genCode(final String src, final String search, final String replace,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringReplace.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s, %s)", src, search, replace);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s, %s, %s)", src, search, replace);
+      } else {
+        return String.format(expr + "ICU(%s, %s, %s, %d)", src, search, replace, collationId);
+      }
+    }
+    public static UTF8String execBinary(final UTF8String src, final UTF8String search,
+        final UTF8String replace) {
+      return src.replace(search, replace);
+    }
+    public static UTF8String execLowercase(final UTF8String src, final UTF8String search,
+        final UTF8String replace) {
+      return CollationAwareUTF8String.lowercaseReplace(src, search, replace);
+    }
+    public static UTF8String execICU(final UTF8String src, final UTF8String search,
+        final UTF8String replace, final int collationId) {
+      return CollationAwareUTF8String.replace(src, search, replace, collationId);
+    }
+  }
+
+  public static class StringLocate {
+    public static int exec(final UTF8String string, final UTF8String substring, final int start,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(string, substring, start);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(string, substring, start);
+      } else {
+        return execICU(string, substring, start, collationId);
+      }
+    }
+    public static String genCode(final String string, final String substring, final int start,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringLocate.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s, %d)", string, substring, start);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s, %s, %d)", string, substring, start);
+      } else {
+        return String.format(expr + "ICU(%s, %s, %d, %d)", string, substring, start, collationId);
+      }
+    }
+    public static int execBinary(final UTF8String string, final UTF8String substring,
+        final int start) {
+      return string.indexOf(substring, start);
+    }
+    public static int execLowercase(final UTF8String string, final UTF8String substring,
+        final int start) {
+      return CollationAwareUTF8String.lowercaseIndexOf(string, substring, start);
+    }
+    public static int execICU(final UTF8String string, final UTF8String substring, final int start,
+        final int collationId) {
+      return CollationAwareUTF8String.indexOf(string, substring, start, collationId);
+    }
+  }
+
+  public static class SubstringIndex {
+    public static UTF8String exec(final UTF8String string, final UTF8String delimiter,
+        final int count, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(string, delimiter, count);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(string, delimiter, count);
+      } else {
+        return execICU(string, delimiter, count, collationId);
+      }
+    }
+    public static String genCode(final String string, final String delimiter,
+        final int count, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.SubstringIndex.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s, %d)", string, delimiter, count);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s, %s, %d)", string, delimiter, count);
+      } else {
+        return String.format(expr + "ICU(%s, %s, %d, %d)", string, delimiter, count, collationId);
+      }
+    }
+    public static UTF8String execBinary(final UTF8String string, final UTF8String delimiter,
+        final int count) {
+      return string.subStringIndex(delimiter, count);
+    }
+    public static UTF8String execLowercase(final UTF8String string, final UTF8String delimiter,
+        final int count) {
+      return CollationAwareUTF8String.lowercaseSubStringIndex(string, delimiter, count);
+    }
+    public static UTF8String execICU(final UTF8String string, final UTF8String delimiter,
+        final int count, final int collationId) {
+      return CollationAwareUTF8String.subStringIndex(string, delimiter, count,
+              collationId);
+    }
+  }
+
+  public static class StringTranslate {
+    public static UTF8String exec(final UTF8String source, Map<String, String> dict,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(source, dict);
+      } else if (collation.supportsLowercaseEquality) {
+        return execLowercase(source, dict);
+      } else {
+        return execICU(source, dict, collationId);
+      }
+    }
+    public static String genCode(final String source, final String dict, final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.EndsWith.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s)", source, dict);
+      } else if (collation.supportsLowercaseEquality) {
+        return String.format(expr + "Lowercase(%s, %s)", source, dict);
+      } else {
+        return String.format(expr + "ICU(%s, %s, %d)", source, dict, collationId);
+      }
+    }
+    public static UTF8String execBinary(final UTF8String source, Map<String, String> dict) {
+      return source.translate(dict);
+    }
+    public static UTF8String execLowercase(final UTF8String source, Map<String, String> dict) {
+      String srcStr = source.toString();
+      StringBuilder sb = new StringBuilder();
+      int charCount = 0;
+      for (int k = 0; k < srcStr.length(); k += charCount) {
+        int codePoint = srcStr.codePointAt(k);
+        charCount = Character.charCount(codePoint);
+        String subStr = srcStr.substring(k, k + charCount);
+        String translated = dict.get(subStr.toLowerCase());
+        if (null == translated) {
+          sb.append(subStr);
+        } else if (!"\0".equals(translated)) {
+          sb.append(translated);
+        }
+      }
+      return UTF8String.fromString(sb.toString());
+    }
+    public static UTF8String execICU(final UTF8String source, Map<String, String> dict,
+        final int collationId) {
+      return source.translate(CollationAwareUTF8String.getCollationAwareDict(
+        source, dict, collationId));
+    }
+  }
+
+  public static class StringTrim {
+    public static UTF8String exec(
+        final UTF8String srcString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(srcString);
+      } else {
+        return execLowercase(srcString);
+      }
+    }
+    public static UTF8String exec(
+        final UTF8String srcString,
+        final UTF8String trimString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(srcString, trimString);
+      } else {
+        return execLowercase(srcString, trimString);
+      }
+    }
+    public static String genCode(
+        final String srcString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringTrim.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s)", srcString);
+      } {
+        return String.format(expr + "Lowercase(%s)", srcString);
+      }
+    }
+    public static String genCode(
+        final String srcString,
+        final String trimString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringTrim.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s)", srcString, trimString);
+      } else {
+        return String.format(expr + "Lowercase(%s, %s)", srcString, trimString);
+      }
+    }
+    public static UTF8String execBinary(
+        final UTF8String srcString) {
+      return srcString.trim();
+    }
+    public static UTF8String execBinary(
+        final UTF8String srcString,
+        final UTF8String trimString) {
+      return srcString.trim(trimString);
+    }
+    public static UTF8String execLowercase(
+        final UTF8String srcString) {
+      return srcString.trim();
+    }
+    public static UTF8String execLowercase(
+        final UTF8String srcString,
+        final UTF8String trimString) {
+      return CollationAwareUTF8String.lowercaseTrim(srcString, trimString);
+    }
+  }
 
-  private static class CollationAwareUTF8String {
+  public static class StringTrimLeft {
+    public static UTF8String exec(
+        final UTF8String srcString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(srcString);
+      } else {
+        return execLowercase(srcString);
+      }
+    }
+    public static UTF8String exec(
+        final UTF8String srcString,
+        final UTF8String trimString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(srcString, trimString);
+      } else {
+        return execLowercase(srcString, trimString);
+      }
+    }
+    public static String genCode(
+        final String srcString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringTrimLeft.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s)", srcString);
+      } else {
+        return String.format(expr + "Lowercase(%s)", srcString);
+      }
+    }
+    public static String genCode(
+        final String srcString,
+        final String trimString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringTrimLeft.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s)", srcString, trimString);
+      } else {
+        return String.format(expr + "Lowercase(%s, %s)", srcString, trimString);
+      }
+    }
+    public static UTF8String execBinary(
+        final UTF8String srcString) {
+      return srcString.trimLeft();
+    }
+    public static UTF8String execBinary(
+        final UTF8String srcString,
+        final UTF8String trimString) {
+      return srcString.trimLeft(trimString);
+    }
+    public static UTF8String execLowercase(
+        final UTF8String srcString) {
+      return srcString.trimLeft();
+    }
+    public static UTF8String execLowercase(
+        final UTF8String srcString,
+        final UTF8String trimString) {
+      return CollationAwareUTF8String.lowercaseTrimLeft(srcString, trimString);
+    }
+  }
 
-    private static boolean matchAt(final UTF8String target, final UTF8String pattern,
-        final int pos, final int collationId) {
-      if (pattern.numChars() + pos > target.numChars() || pos < 0) {
-        return false;
+  public static class StringTrimRight {
+    public static UTF8String exec(
+        final UTF8String srcString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(srcString);
+      } else {
+        return execLowercase(srcString);
+      }
+    }
+    public static UTF8String exec(
+        final UTF8String srcString,
+        final UTF8String trimString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      if (collation.supportsBinaryEquality) {
+        return execBinary(srcString, trimString);
+      } else {
+        return execLowercase(srcString, trimString);
       }
-      if (pattern.numBytes() == 0 || target.numBytes() == 0) {
-        return pattern.numBytes() == 0;
+    }
+    public static String genCode(
+        final String srcString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringTrimRight.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s)", srcString);
+      } else {
+        return String.format(expr + "Lowercase(%s)", srcString);
       }
-      return CollationFactory.getStringSearch(target.substring(
-        pos, pos + pattern.numChars()), pattern, collationId).last() == 0;
     }
+    public static String genCode(
+        final String srcString,
+        final String trimString,
+        final int collationId) {
+      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+      String expr = "CollationSupport.StringTrimRight.exec";
+      if (collation.supportsBinaryEquality) {
+        return String.format(expr + "Binary(%s, %s)", srcString, trimString);
+      } else {
+        return String.format(expr + "Lowercase(%s, %s)", srcString, trimString);
+      }
+    }
+    public static UTF8String execBinary(
+        final UTF8String srcString) {
+      return srcString.trimRight();
+    }
+    public static UTF8String execBinary(
+        final UTF8String srcString,
+        final UTF8String trimString) {
+      return srcString.trimRight(trimString);
+    }
+    public static UTF8String execLowercase(
+        final UTF8String srcString) {
+      return srcString.trimRight();
+    }
+    public static UTF8String execLowercase(
+        final UTF8String srcString,
+        final UTF8String trimString) {
+      return CollationAwareUTF8String.lowercaseTrimRight(srcString, trimString);
+    }
+  }
+
+  // TODO: Add more collation-aware string expressions.
+
+  /**
+   * Collation-aware regexp expressions.
+   */
+
+  public static boolean supportsLowercaseRegex(final int collationId) {
+    // for regex, only Unicode case-insensitive matching is possible,
+    // so UTF8_LCASE is treated as UNICODE_CI in this context
+    return CollationFactory.fetchCollation(collationId).supportsLowercaseEquality;
+  }
 
+  private static final int lowercaseRegexFlags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
+  public static int collationAwareRegexFlags(final int collationId) {
+    return supportsLowercaseRegex(collationId) ? lowercaseRegexFlags : 0;
   }
 
+  private static final UTF8String lowercaseRegexPrefix = UTF8String.fromString("(?ui)");
+  public static UTF8String lowercaseRegex(final UTF8String regex) {
+    return UTF8String.concat(lowercaseRegexPrefix, regex);
+  }
+  public static UTF8String collationAwareRegex(final UTF8String regex, final int collationId) {
+    return supportsLowercaseRegex(collationId) ? lowercaseRegex(regex) : regex;
+  }
+
+  /**
+   * Other collation-aware expressions.
+   */
+
+  // TODO: Add other collation-aware expressions.
+
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index 2009f1d20442c..a2372d28a6c41 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -21,7 +21,10 @@
 import java.io.*;
 import java.nio.ByteBuffer;
 import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.function.Function;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -29,6 +32,7 @@
 import com.esotericsoftware.kryo.KryoSerializable;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
+import com.ibm.icu.lang.UCharacter;
 
 import org.apache.spark.sql.catalyst.util.CollationFactory;
 import org.apache.spark.unsafe.Platform;
@@ -56,6 +60,7 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
   private Object base;
   private long offset;
   private int numBytes;
+  private volatile int numChars = -1;
 
   public Object getBaseObject() { return base; }
   public long getBaseOffset() { return offset; }
@@ -100,6 +105,8 @@ public final class UTF8String implements Comparable<UTF8String>, Externalizable,
 
   private static final UTF8String COMMA_UTF8 = UTF8String.fromString(",");
   public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
+  public static final UTF8String ZERO_UTF8 = UTF8String.fromString("0");
+
 
   /**
    * Creates an UTF8String from byte array, which should be encoded in UTF-8.
@@ -114,6 +121,14 @@ public static UTF8String fromBytes(byte[] bytes) {
     }
   }
 
+  private static UTF8String fromBytes(ArrayList<Byte> bytes) {
+    byte[] byteArray = new byte[bytes.size()];
+    for (int i = 0; i < bytes.size(); i++) {
+      byteArray[i] = bytes.get(i);
+    }
+    return fromBytes(byteArray);
+  }
+
   /**
    * Creates an UTF8String from byte array, which should be encoded in UTF-8.
    *
@@ -224,7 +239,7 @@ public void writeTo(OutputStream out) throws IOException {
    * Returns the number of bytes for a code point with the first byte as `b`
    * @param b The first byte of a code point
    */
-  private static int numBytesForFirstByte(final byte b) {
+  public static int numBytesForFirstByte(final byte b) {
     final int offset = b & 0xFF;
     byte numBytes = bytesOfCodePointInUTF8[offset];
     return (numBytes == 0) ? 1: numBytes; // Skip the first byte disallowed in UTF-8
@@ -241,6 +256,16 @@ public int numBytes() {
    * Returns the number of code points in it.
    */
   public int numChars() {
+    if (numChars == -1) numChars = getNumChars();
+    return numChars;
+  }
+
+  /**
+   * Private helper method to calculate the number of code points in the UTF-8 string. Counting
+   * the code points is a linear time operation, as we need to scan the entire UTF-8 string.
+   * Hence, this method should generally only be called once for non-empty UTF-8 strings.
+   */
+  private int getNumChars() {
     int len = 0;
     for (int i = 0; i < numBytes; i += numBytesForFirstByte(getByte(i))) {
       len += 1;
@@ -270,6 +295,228 @@ public byte[] getBytes() {
     }
   }
 
+  /**
+   * Utility methods and constants for UTF-8 string validation.
+   */
+
+  private static boolean isValidContinuationByte(byte b) {
+     return b >= (byte) 0x80 && b <= (byte) 0xBF;
+  }
+
+  private static boolean isValidSecondByte(byte b, byte firstByte) {
+    return switch (firstByte) {
+      case (byte) 0xE0 -> b >= (byte) 0xA0 && b <= (byte) 0xBF;
+      case (byte) 0xED -> b >= (byte) 0x80 && b <= (byte) 0x9F;
+      case (byte) 0xF0 -> b >= (byte) 0x90 && b <= (byte) 0xBF;
+      case (byte) 0xF4 -> b >= (byte) 0x80 && b <= (byte) 0x8F;
+      default -> isValidContinuationByte(b);
+    };
+  }
+
+  private static final byte[] UNICODE_REPLACEMENT_CHARACTER =
+    new byte[] { (byte) 0xEF, (byte) 0xBF, (byte) 0xBD };
+
+  private static void appendReplacementCharacter(ArrayList<Byte> bytes) {
+    for (byte b : UTF8String.UNICODE_REPLACEMENT_CHARACTER) bytes.add(b);
+  }
+
+  /**
+   * Returns a validated version of the current UTF-8 string by replacing invalid UTF-8 sequences
+   * with the Unicode replacement character (U+FFFD), as per the rules defined in the Unicode
+   * standard 15, Section 3.9, Paragraph D86, Table 3-7. This behaviour is consistent with the
+   * behaviour of `UnicodeString` in ICU4C.
+   *
+   * @return A new UTF8String that is a valid UTF8 string.
+   */
+  public UTF8String makeValid() {
+    ArrayList<Byte> bytes = new ArrayList<>();
+    int byteIndex = 0;
+    while (byteIndex < numBytes) {
+      // Read the first byte.
+      byte firstByte = getByte(byteIndex);
+      int expectedLen = bytesOfCodePointInUTF8[firstByte & 0xFF];
+      int codePointLen = Math.min(expectedLen, numBytes - byteIndex);
+      // 0B UTF-8 sequence (invalid first byte).
+      if (codePointLen == 0) {
+        appendReplacementCharacter(bytes);
+        ++byteIndex;
+        continue;
+      }
+      // 1B UTF-8 sequence (ASCII or truncated).
+      if (codePointLen == 1) {
+        if (firstByte >= 0) bytes.add(firstByte);
+        else appendReplacementCharacter(bytes);
+        ++byteIndex;
+        continue;
+      }
+      // Read the second byte.
+      byte secondByte = getByte(byteIndex + 1);
+      if (!isValidSecondByte(secondByte, firstByte)) {
+        appendReplacementCharacter(bytes);
+        ++byteIndex;
+        continue;
+      }
+      // Read remaining continuation bytes.
+      int continuationBytes = 2;
+      for (; continuationBytes < codePointLen; ++continuationBytes) {
+        byte nextByte = getByte(byteIndex + continuationBytes);
+        if (!isValidContinuationByte(nextByte)) {
+          break;
+        }
+      }
+      // Invalid UTF-8 sequence (not enough continuation bytes).
+      if (continuationBytes < expectedLen) {
+        appendReplacementCharacter(bytes);
+        byteIndex += continuationBytes;
+        continue;
+      }
+      // Valid UTF-8 sequence.
+      for (int i = 0; i < codePointLen; ++i) {
+        bytes.add(getByte(byteIndex + i));
+      }
+      byteIndex += codePointLen;
+    }
+    return UTF8String.fromBytes(bytes);
+  }
+
+  /**
+   * Checks if the current UTF8String is valid.
+   *
+   * @return If string represents a valid UTF8 string.
+   */
+  public boolean isValid() {
+    int byteIndex = 0;
+    while (byteIndex < numBytes) {
+      // Read the first byte.
+      byte firstByte = getByte(byteIndex);
+      int expectedLen = bytesOfCodePointInUTF8[firstByte & 0xFF];
+      int codePointLen = Math.min(expectedLen, numBytes - byteIndex);
+      // 0B UTF-8 sequence (invalid first byte).
+      if (codePointLen == 0) return false;
+      // 1B UTF-8 sequence (ASCII or truncated).
+      if (codePointLen == 1) {
+        if (firstByte >= 0) {
+          ++byteIndex;
+          continue;
+        }
+        else return false;
+      }
+      // Read the second byte.
+      byte secondByte = getByte(byteIndex + 1);
+      if (!isValidSecondByte(secondByte, firstByte)) return false;
+      // Read remaining continuation bytes.
+      int continuationBytes = 2;
+      for (; continuationBytes < codePointLen; ++continuationBytes) {
+        byte nextByte = getByte(byteIndex + continuationBytes);
+        if (!isValidContinuationByte(nextByte)) return false;
+      }
+      // Invalid UTF-8 sequence (not enough continuation bytes).
+      if (continuationBytes < expectedLen) return false;
+      // Valid UTF-8 sequence.
+      byteIndex += codePointLen;
+    }
+    return true;
+  }
+
+  /**
+   * Code point iteration over a UTF8String can be done using one of two modes:
+   * 1. CODE_POINT_ITERATOR_ASSUME_VALID: The caller ensures that the UTF8String is valid and does
+   *    not contain any invalid UTF-8 byte sequences. In this case, the code point iterator will
+   *    return the code points in the current string one by one, as integers. If an invalid code
+   *    point is found within the string during iteration, an exception will be thrown. This mode
+   *    is more dangerous, but faster - since no scan is needed prior to beginning iteration.
+   * 2. CODE_POINT_ITERATOR_MAKE_VALID: The caller does not ensure that the UTF8String is valid,
+   *    but instead expects the code point iterator to first check whether the current UTF8String
+   *    is valid, then perform the invalid byte sequence replacement using `makeValid`, and finally
+   *    begin the code point iteration over the resulting valid UTF8String. However, the original
+   *    UTF8String stays unchanged. This mode is safer, but slower - due to initial validation.
+   * The default mode is CODE_POINT_ITERATOR_ASSUME_VALID.
+   */
+  public enum CodePointIteratorType {
+    CODE_POINT_ITERATOR_ASSUME_VALID, // USE ONLY WITH VALID STRINGS
+    CODE_POINT_ITERATOR_MAKE_VALID
+  }
+
+  /**
+   * Returns a code point iterator for this UTF8String.
+   */
+  public Iterator<Integer> codePointIterator() {
+    return codePointIterator(CodePointIteratorType.CODE_POINT_ITERATOR_ASSUME_VALID);
+  }
+
+  public Iterator<Integer> codePointIterator(CodePointIteratorType iteratorMode) {
+    if (iteratorMode == CodePointIteratorType.CODE_POINT_ITERATOR_MAKE_VALID && !isValid()) {
+      return makeValid().codePointIterator();
+    }
+    return new CodePointIterator();
+  }
+
+  /**
+   * Code point iterator implementation for the UTF8String class. The iterator will return code
+   * points in the current string one by one, as integers. However, the code point iterator is only
+   * guaranteed to work if the current UTF8String does not contain any invalid UTF-8 byte sequences.
+   * If the current string contains any invalid UTF-8 byte sequences, exceptions will be thrown.
+   */
+  private class CodePointIterator implements Iterator<Integer> {
+    // Byte index used to iterate over the current UTF8String.
+    private int byteIndex = 0;
+
+    @Override
+    public boolean hasNext() {
+      return byteIndex < numBytes;
+    }
+
+    @Override
+    public Integer next() {
+      if (!hasNext()) {
+        throw new IndexOutOfBoundsException();
+      }
+      int codePoint = codePointFrom(byteIndex);
+      byteIndex += numBytesForFirstByte(getByte(byteIndex));
+      return codePoint;
+    }
+  }
+
+  /**
+   * Reverse version of the code point iterator for this UTF8String, returns code points in the
+   * current string one by one, as integers, in reverse order. The logic is similar to the above.
+   */
+
+  public Iterator<Integer> reverseCodePointIterator() {
+    return reverseCodePointIterator(CodePointIteratorType.CODE_POINT_ITERATOR_ASSUME_VALID);
+  }
+
+  public Iterator<Integer> reverseCodePointIterator(CodePointIteratorType iteratorMode) {
+    if (iteratorMode == CodePointIteratorType.CODE_POINT_ITERATOR_MAKE_VALID && !isValid()) {
+      return makeValid().reverseCodePointIterator();
+    }
+    return new ReverseCodePointIterator();
+  }
+
+  private class ReverseCodePointIterator implements Iterator<Integer> {
+    private int byteIndex = numBytes - 1;
+
+    @Override
+    public boolean hasNext() {
+      return byteIndex >= 0;
+    }
+
+    @Override
+    public Integer next() {
+      if (!hasNext()) {
+        throw new IndexOutOfBoundsException();
+      }
+      while (byteIndex > 0 && isContinuationByte(getByte(byteIndex))) {
+        --byteIndex;
+      }
+      return codePointFrom(byteIndex--);
+    }
+
+    private boolean isContinuationByte(byte b) {
+      return (b & 0xC0) == 0x80;
+    }
+  }
+
   /**
    * Returns a substring of this.
    * @param start the position of first code point
@@ -342,10 +589,53 @@ public boolean contains(final UTF8String substring) {
   }
 
   /**
-   * Returns the byte at position `i`.
+   * Returns the byte at (byte) position `byteIndex`. If byte index is invalid, returns 0.
    */
-  private byte getByte(int i) {
-    return Platform.getByte(base, offset + i);
+  public byte getByte(int byteIndex) {
+    return Platform.getByte(base, offset + byteIndex);
+  }
+
+  /**
+   * Returns the code point at (char) position `charIndex`. If char index is invalid, throws
+   * exception. Note that this method is not efficient as it needs to traverse the UTF-8 string.
+   * If `byteIndex` of the first byte in the code point is known, use `codePointFrom` instead.
+   */
+  public int getChar(int charIndex) {
+    if (charIndex < 0 || charIndex >= numChars()) {
+      throw new IndexOutOfBoundsException();
+    }
+    int charCount = 0, byteCount = 0;
+    while (charCount < charIndex) {
+      byteCount += numBytesForFirstByte(getByte(byteCount));
+      charCount += 1;
+    }
+    return codePointFrom(byteCount);
+  }
+
+  /**
+   * Returns the code point starting from the byte at position `byteIndex`.
+   * If byte index is invalid, throws exception.
+   */
+  public int codePointFrom(int byteIndex) {
+    if (byteIndex < 0 || byteIndex >= numBytes) {
+      throw new IndexOutOfBoundsException();
+    }
+    byte b = getByte(byteIndex);
+    int numBytes = numBytesForFirstByte(b);
+    return switch (numBytes) {
+      case 1 ->
+        b & 0x7F;
+      case 2 ->
+        ((b & 0x1F) << 6) | (getByte(byteIndex + 1) & 0x3F);
+      case 3 ->
+        ((b & 0x0F) << 12) | ((getByte(byteIndex + 1) & 0x3F) << 6) |
+        (getByte(byteIndex + 2) & 0x3F);
+      case 4 ->
+        ((b & 0x07) << 18) | ((getByte(byteIndex + 1) & 0x3F) << 12) |
+        ((getByte(byteIndex + 2) & 0x3F) << 6) | (getByte(byteIndex + 3) & 0x3F);
+      default ->
+        throw new IllegalStateException("Error in UTF-8 code point");
+    };
   }
 
   public boolean matchAt(final UTF8String s, int pos) {
@@ -364,56 +654,34 @@ public boolean endsWith(final UTF8String suffix) {
   }
 
   /**
-   * Returns the upper case of this string
+   * Method for ASCII character conversion using a functional interface for chars.
    */
-  public UTF8String toUpperCase() {
-    if (numBytes == 0) {
-      return EMPTY_UTF8;
-    }
-    // Optimization - do char level uppercase conversion in case of chars in ASCII range
-    for (int i = 0; i < numBytes; i++) {
-      if (getByte(i) < 0) {
-        // non-ASCII
-        return toUpperCaseSlow();
-      }
-    }
+
+  private UTF8String convertAscii(Function<Character, Character> charConverter) {
     byte[] bytes = new byte[numBytes];
     for (int i = 0; i < numBytes; i++) {
-      bytes[i] = (byte) Character.toUpperCase(getByte(i));
+        bytes[i] = (byte) charConverter.apply((char) getByte(i)).charValue();
     }
     return fromBytes(bytes);
   }
 
-  private UTF8String toUpperCaseSlow() {
-    return fromString(toString().toUpperCase());
-  }
-
   /**
-   * Optimized lowercase comparison for UTF8_BINARY_LCASE collation
-   * a.compareLowerCase(b) is equivalent to a.toLowerCase().binaryCompare(b.toLowerCase())
+   * Returns the upper case of this string
    */
-  public int compareLowerCase(UTF8String other) {
-    int curr;
-    for (curr = 0; curr < numBytes && curr < other.numBytes; curr++) {
-      byte left, right;
-      if ((left = getByte(curr)) < 0 || (right = other.getByte(curr)) < 0) {
-        return compareLowerCaseSuffixSlow(other, curr);
-      }
-      int lowerLeft = Character.toLowerCase(left);
-      int lowerRight = Character.toLowerCase(right);
-      if (lowerLeft != lowerRight) {
-        return lowerLeft - lowerRight;
-      }
+  public UTF8String toUpperCase() {
+    if (numBytes == 0) {
+      return EMPTY_UTF8;
     }
-    return numBytes - other.numBytes;
+
+    return isFullAscii() ? toUpperCaseAscii() : toUpperCaseSlow();
+  }
+
+  public UTF8String toUpperCaseAscii() {
+    return convertAscii(Character::toUpperCase);
   }
 
-  private int compareLowerCaseSuffixSlow(UTF8String other, int pref) {
-    UTF8String suffixLeft = UTF8String.fromAddress(base, offset + pref,
-      numBytes - pref);
-    UTF8String suffixRight = UTF8String.fromAddress(other.base, other.offset + pref,
-      other.numBytes - pref);
-    return suffixLeft.toLowerCaseSlow().binaryCompare(suffixRight.toLowerCaseSlow());
+  private UTF8String toUpperCaseSlow() {
+    return fromString(toString().toUpperCase());
   }
 
   /**
@@ -423,43 +691,57 @@ public UTF8String toLowerCase() {
     if (numBytes == 0) {
       return EMPTY_UTF8;
     }
-    // Optimization - do char level lowercase conversion in case of chars in ASCII range
-    for (int i = 0; i < numBytes; i++) {
+
+    return isFullAscii() ? toLowerCaseAscii() : toLowerCaseSlow();
+  }
+
+  public boolean isFullAscii() {
+    for (var i = 0; i < numBytes; i++) {
       if (getByte(i) < 0) {
-        // non-ASCII
-        return toLowerCaseSlow();
+        return false;
       }
     }
-    byte[] bytes = new byte[numBytes];
-    for (int i = 0; i < numBytes; i++) {
-      bytes[i] = (byte) Character.toLowerCase(getByte(i));
-    }
-    return fromBytes(bytes);
+    return true;
   }
 
   private UTF8String toLowerCaseSlow() {
     return fromString(toString().toLowerCase());
   }
 
+  public UTF8String toLowerCaseAscii() {
+    return convertAscii(Character::toLowerCase);
+  }
+
   /**
-   * Returns the title case of this string, that could be used as title.
+   * Returns the title case of this string, that could be used as title. There are essentially two
+   * different version of this method - one using the JVM case mapping rules, and the other using
+   * the ICU case mapping rules. ASCII implementation is the same for both, but please refer to the
+   * respective methods for the slow (non-ASCII) implementation for more details on the differences.
    */
   public UTF8String toTitleCase() {
     if (numBytes == 0) {
       return EMPTY_UTF8;
     }
-    // Optimization - in case of ASCII chars we can skip copying the data to and from StringBuilder
-    byte prev = ' ', curr;
-    for (int i = 0; i < numBytes; i++) {
-      curr = getByte(i);
-      if (prev == ' ' && curr < 0) {
-        // non-ASCII
-        return toTitleCaseSlow();
-      }
-      prev = curr;
+
+    return isFullAscii() ? toTitleCaseAscii() : toTitleCaseSlow();
+  }
+
+  public UTF8String toTitleCaseICU() {
+    if (numBytes == 0) {
+      return EMPTY_UTF8;
     }
+
+    return isFullAscii() ? toTitleCaseAscii() : toTitleCaseSlowICU();
+  }
+
+  /*
+   * Fast path to return the title case of this string, given that all characters are ASCII.
+   * This implementation essentially works for all collations currently supported in Spark.
+   * This method is more efficient, because it skips copying the data to and from StringBuilder.
+   */
+  private UTF8String toTitleCaseAscii() {
     byte[] bytes = new byte[numBytes];
-    prev = ' ';
+    byte prev = ' ', curr;
     for (int i = 0; i < numBytes; i++) {
       curr = getByte(i);
       if (prev == ' ') {
@@ -472,6 +754,11 @@ public UTF8String toTitleCase() {
     return fromBytes(bytes);
   }
 
+  /*
+   * Slow path to return the title case of this string, according to JVM case mapping rules.
+   * This is considered the "old" behaviour for UTF8_BINARY collation, and is not recommended.
+   * To use this, set the spark.sql.ICU_CASE_MAPPINGS_ENABLED configuration to `false`.
+   */
   private UTF8String toTitleCaseSlow() {
     StringBuilder sb = new StringBuilder();
     String s = toString();
@@ -485,6 +772,24 @@ private UTF8String toTitleCaseSlow() {
     return fromString(sb.toString());
   }
 
+  /*
+   * Slow path to return the title case of this string, according to ICU case mapping rules.
+   * This is considered the "new" behaviour for UTF8_BINARY collation, and is recommended.
+   * This is used by default, since spark.sql.ICU_CASE_MAPPINGS_ENABLED is set to `true`.
+   */
+  private UTF8String toTitleCaseSlowICU() {
+    StringBuilder sb = new StringBuilder();
+    String s = toString();
+    sb.append(s);
+    sb.setCharAt(0, (char) UCharacter.toTitleCase(sb.charAt(0)));
+    for (int i = 1; i < s.length(); i++) {
+      if (sb.charAt(i - 1) == ' ') {
+        sb.setCharAt(i, (char) UCharacter.toTitleCase(sb.charAt(i)));
+      }
+    }
+    return fromString(sb.toString());
+  }
+
   /*
    * Returns the index of the string `match` in this String. This string has to be a comma separated
    * list. If `match` contains a comma 0 will be returned. If the `match` isn't part of this String,
@@ -521,7 +826,7 @@ public int findInSet(UTF8String match) {
    * @param end the end position of the current UTF8String in bytes.
    * @return a new UTF8String in the position of [start, end] of current UTF8String bytes.
    */
-  private UTF8String copyUTF8String(int start, int end) {
+  public UTF8String copyUTF8String(int start, int end) {
     int len = end - start + 1;
     byte[] newBytes = new byte[len];
     copyMemory(base, offset + start, newBytes, BYTE_ARRAY_OFFSET, len);
@@ -766,6 +1071,17 @@ public UTF8String repeat(int times) {
     return UTF8String.fromBytes(newBytes);
   }
 
+  /**
+   * Returns the (default) position of the first occurrence of an empty substr in the current
+   * string from the specified position (0-based index).
+   *
+   * @param start the start position of the current string for searching
+   * @return the position of the first occurrence of the empty substr (now, always 0)
+   */
+  public int indexOfEmpty(int start) {
+    return 0; // TODO: Fix this behaviour (SPARK-48284)
+  }
+
   /**
    * Returns the position of the first occurrence of substr in
    * current string from the specified position (0-based index).
@@ -776,7 +1092,7 @@ public UTF8String repeat(int times) {
    */
   public int indexOf(UTF8String v, int start) {
     if (v.numBytes() == 0) {
-      return 0;
+      return indexOfEmpty(start);
     }
 
     // locate to the start position.
@@ -801,10 +1117,34 @@ public int indexOf(UTF8String v, int start) {
     return -1;
   }
 
+  public int charPosToByte(int charPos) {
+    if (charPos < 0) {
+      return -1;
+    }
+
+    int i = 0;
+    int c = 0;
+    while (i < numBytes && c < charPos) {
+      i += numBytesForFirstByte(getByte(i));
+      c += 1;
+    }
+    return i;
+  }
+
+  public int bytePosToChar(int bytePos) {
+    int i = 0;
+    int c = 0;
+    while (i < numBytes && i < bytePos) {
+      i += numBytesForFirstByte(getByte(i));
+      c += 1;
+    }
+    return c;
+  }
+
   /**
    * Find the `str` from left to right.
    */
-  private int find(UTF8String str, int start) {
+  public int find(UTF8String str, int start) {
     assert (str.numBytes > 0);
     while (start <= numBytes - str.numBytes) {
       if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
@@ -818,7 +1158,7 @@ private int find(UTF8String str, int start) {
   /**
    * Find the `str` from right to left.
    */
-  private int rfind(UTF8String str, int start) {
+  public int rfind(UTF8String str, int start) {
     assert (str.numBytes > 0);
     while (start >= 0) {
       if (ByteArrayMethods.arrayEquals(base, offset + start, str.base, str.offset, str.numBytes)) {
@@ -1718,4 +2058,21 @@ public void read(Kryo kryo, Input in) {
     in.read((byte[]) base);
   }
 
+  /**
+   * Convert a long value to its binary format stripping leading zeros.
+   */
+  public static UTF8String toBinaryString(long val) {
+    int zeros = Long.numberOfLeadingZeros(val);
+    if (zeros == Long.SIZE) {
+      return UTF8String.ZERO_UTF8;
+    } else {
+      int length = Long.SIZE - zeros;
+      byte[] bytes = new byte[length];
+      do {
+        bytes[--length] = (byte) ((val & 0x1) == 1 ? '1': '0');
+        val >>>= 1;
+      } while (length > 0);
+      return fromBytes(bytes);
+    }
+  }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
index 0a1ee279316f1..aacc4507861ad 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CalendarIntervalSuite.java
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package org.apache.spark.unsafe.types;
 
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
index 099a13a025e7e..d084ef098248f 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
@@ -17,15 +17,179 @@
 package org.apache.spark.unsafe.types;
 
 import org.apache.spark.SparkException;
+import org.apache.spark.sql.catalyst.util.CollationAwareUTF8String;
 import org.apache.spark.sql.catalyst.util.CollationFactory;
 import org.apache.spark.sql.catalyst.util.CollationSupport;
 import org.junit.jupiter.api.Test;
 
 import static org.junit.jupiter.api.Assertions.*;
 
-
+// checkstyle.off: AvoidEscapedUnicodeCharacters
 public class CollationSupportSuite {
 
+  /**
+   * A list containing some of the supported collations in Spark. Use this list to iterate over
+   * all the important collation groups (binary, lowercase, icu) for complete unit test coverage.
+   * Note: this list may come in handy when the Spark function result is the same regardless of
+   * the specified collations (as often seen in some pass-through Spark expressions).
+   */
+  private final String[] testSupportedCollations =
+    {"UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI"};
+
+  /**
+   * Collation-aware UTF8String comparison.
+   */
+
+  private void assertStringCompare(String s1, String s2, String collationName, int expected)
+      throws SparkException {
+    UTF8String l = UTF8String.fromString(s1);
+    UTF8String r = UTF8String.fromString(s2);
+    int compare = CollationFactory.fetchCollation(collationName).comparator.compare(l, r);
+    assertEquals(Integer.signum(expected), Integer.signum(compare));
+  }
+
+  @Test
+  public void testCompare() throws SparkException {
+    for (String collationName: testSupportedCollations) {
+      // Edge cases
+      assertStringCompare("", "", collationName, 0);
+      assertStringCompare("a", "", collationName, 1);
+      assertStringCompare("", "a", collationName, -1);
+      // Basic tests
+      assertStringCompare("a", "a", collationName, 0);
+      assertStringCompare("a", "b", collationName, -1);
+      assertStringCompare("b", "a", collationName, 1);
+      assertStringCompare("A", "A", collationName, 0);
+      assertStringCompare("A", "B", collationName, -1);
+      assertStringCompare("B", "A", collationName, 1);
+      assertStringCompare("aa", "a", collationName, 1);
+      assertStringCompare("b", "bb", collationName, -1);
+      assertStringCompare("abc", "a", collationName, 1);
+      assertStringCompare("abc", "b", collationName, -1);
+      assertStringCompare("abc", "ab", collationName, 1);
+      assertStringCompare("abc", "abc", collationName, 0);
+      // ASCII strings
+      assertStringCompare("aaaa", "aaa", collationName, 1);
+      assertStringCompare("hello", "world", collationName, -1);
+      assertStringCompare("Spark", "Spark", collationName, 0);
+      // Non-ASCII strings
+      assertStringCompare("ü", "ü", collationName, 0);
+      assertStringCompare("ü", "", collationName, 1);
+      assertStringCompare("", "ü", collationName, -1);
+      assertStringCompare("äü", "äü", collationName, 0);
+      assertStringCompare("äxx", "äx", collationName, 1);
+      assertStringCompare("a", "ä", collationName, -1);
+    }
+    // Non-ASCII strings
+    assertStringCompare("äü", "bü", "UTF8_BINARY", 1);
+    assertStringCompare("bxx", "bü", "UTF8_BINARY", -1);
+    assertStringCompare("äü", "bü", "UTF8_LCASE", 1);
+    assertStringCompare("bxx", "bü", "UTF8_LCASE", -1);
+    assertStringCompare("äü", "bü", "UNICODE", -1);
+    assertStringCompare("bxx", "bü", "UNICODE", 1);
+    assertStringCompare("äü", "bü", "UNICODE_CI", -1);
+    assertStringCompare("bxx", "bü", "UNICODE_CI", 1);
+    // Case variation
+    assertStringCompare("AbCd", "aBcD", "UTF8_BINARY", -1);
+    assertStringCompare("ABCD", "abcd", "UTF8_LCASE", 0);
+    assertStringCompare("AbcD", "aBCd", "UNICODE", 1);
+    assertStringCompare("abcd", "ABCD", "UNICODE_CI", 0);
+    // Accent variation
+    assertStringCompare("aBćD", "ABĆD", "UTF8_BINARY", 1);
+    assertStringCompare("AbCδ", "ABCΔ", "UTF8_LCASE", 0);
+    assertStringCompare("äBCd", "ÄBCD", "UNICODE", -1);
+    assertStringCompare("Ab́cD", "AB́CD", "UNICODE_CI", 0);
+    // Case-variable character length
+    assertStringCompare("i\u0307", "İ", "UTF8_BINARY", -1);
+    assertStringCompare("İ", "i\u0307", "UTF8_BINARY", 1);
+    assertStringCompare("i\u0307", "İ", "UTF8_LCASE", 0);
+    assertStringCompare("İ", "i\u0307", "UTF8_LCASE", 0);
+    assertStringCompare("i\u0307", "İ", "UNICODE", -1);
+    assertStringCompare("İ", "i\u0307", "UNICODE", 1);
+    assertStringCompare("i\u0307", "İ", "UNICODE_CI", 0);
+    assertStringCompare("İ", "i\u0307", "UNICODE_CI", 0);
+    assertStringCompare("i\u0307İ", "i\u0307İ", "UTF8_LCASE", 0);
+    assertStringCompare("i\u0307İ", "İi\u0307", "UTF8_LCASE", 0);
+    assertStringCompare("İi\u0307", "i\u0307İ", "UTF8_LCASE", 0);
+    assertStringCompare("İi\u0307", "İi\u0307", "UTF8_LCASE", 0);
+    assertStringCompare("i\u0307İ", "i\u0307İ", "UNICODE_CI", 0);
+    assertStringCompare("i\u0307İ", "İi\u0307", "UNICODE_CI", 0);
+    assertStringCompare("İi\u0307", "i\u0307İ", "UNICODE_CI", 0);
+    assertStringCompare("İi\u0307", "İi\u0307", "UNICODE_CI", 0);
+    // Conditional case mapping
+    assertStringCompare("ς", "σ", "UTF8_BINARY", -1);
+    assertStringCompare("ς", "Σ", "UTF8_BINARY", 1);
+    assertStringCompare("σ", "Σ", "UTF8_BINARY", 1);
+    assertStringCompare("ς", "σ", "UTF8_LCASE", 0);
+    assertStringCompare("ς", "Σ", "UTF8_LCASE", 0);
+    assertStringCompare("σ", "Σ", "UTF8_LCASE", 0);
+    assertStringCompare("ς", "σ", "UNICODE", 1);
+    assertStringCompare("ς", "Σ", "UNICODE", 1);
+    assertStringCompare("σ", "Σ", "UNICODE", -1);
+    assertStringCompare("ς", "σ", "UNICODE_CI", 0);
+    assertStringCompare("ς", "Σ", "UNICODE_CI", 0);
+    assertStringCompare("σ", "Σ", "UNICODE_CI", 0);
+    // Maximum code point.
+    int maxCodePoint = Character.MAX_CODE_POINT;
+    String maxCodePointStr = new String(Character.toChars(maxCodePoint));
+    for (int i = 0; i < maxCodePoint && Character.isValidCodePoint(i); ++i) {
+      assertStringCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_BINARY", -1);
+      assertStringCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_LCASE", -1);
+    }
+    // Minimum code point.
+    int minCodePoint = Character.MIN_CODE_POINT;
+    String minCodePointStr = new String(Character.toChars(minCodePoint));
+    for (int i = minCodePoint + 1; i <= maxCodePoint && Character.isValidCodePoint(i); ++i) {
+      assertStringCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_BINARY", 1);
+      assertStringCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_LCASE", 1);
+    }
+  }
+
+  private void assertLowerCaseCodePoints(UTF8String target, UTF8String expected,
+      Boolean useCodePoints) {
+    if (useCodePoints) {
+      assertEquals(expected, CollationAwareUTF8String.lowerCaseCodePoints(target));
+    } else {
+      assertEquals(expected, target.toLowerCase());
+    }
+  }
+
+  @Test
+  public void testLowerCaseCodePoints() {
+    // Edge cases
+    assertLowerCaseCodePoints(UTF8String.fromString(""), UTF8String.fromString(""), false);
+    assertLowerCaseCodePoints(UTF8String.fromString(""), UTF8String.fromString(""), true);
+    // Basic tests
+    assertLowerCaseCodePoints(UTF8String.fromString("abcd"), UTF8String.fromString("abcd"), false);
+    assertLowerCaseCodePoints(UTF8String.fromString("AbCd"), UTF8String.fromString("abcd"), false);
+    assertLowerCaseCodePoints(UTF8String.fromString("abcd"), UTF8String.fromString("abcd"), true);
+    assertLowerCaseCodePoints(UTF8String.fromString("aBcD"), UTF8String.fromString("abcd"), true);
+    // Accent variation
+    assertLowerCaseCodePoints(UTF8String.fromString("AbĆd"), UTF8String.fromString("abćd"), false);
+    assertLowerCaseCodePoints(UTF8String.fromString("aBcΔ"), UTF8String.fromString("abcδ"), true);
+    // Case-variable character length
+    assertLowerCaseCodePoints(
+      UTF8String.fromString("İoDiNe"), UTF8String.fromString("i̇odine"), false);
+    assertLowerCaseCodePoints(
+      UTF8String.fromString("Abi̇o12"), UTF8String.fromString("abi̇o12"), false);
+    assertLowerCaseCodePoints(
+      UTF8String.fromString("İodInE"), UTF8String.fromString("i̇odine"), true);
+    assertLowerCaseCodePoints(
+      UTF8String.fromString("aBi̇o12"), UTF8String.fromString("abi̇o12"), true);
+    // Conditional case mapping
+    assertLowerCaseCodePoints(
+      UTF8String.fromString("ΘΑΛΑΣΣΙΝΟΣ"), UTF8String.fromString("θαλασσινος"), false);
+    assertLowerCaseCodePoints(
+      UTF8String.fromString("ΘΑΛΑΣΣΙΝΟΣ"), UTF8String.fromString("θαλασσινοσ"), true);
+    // Surrogate pairs are treated as invalid UTF8 sequences
+    assertLowerCaseCodePoints(UTF8String.fromBytes(new byte[]
+      {(byte) 0xED, (byte) 0xA0, (byte) 0x80, (byte) 0xED, (byte) 0xB0, (byte) 0x80}),
+      UTF8String.fromString("\uFFFD\uFFFD"), false);
+    assertLowerCaseCodePoints(UTF8String.fromBytes(new byte[]
+      {(byte) 0xED, (byte) 0xA0, (byte) 0x80, (byte) 0xED, (byte) 0xB0, (byte) 0x80}),
+      UTF8String.fromString("\uFFFD\uFFFD"), true);
+  }
+
   /**
    * Collation-aware string expressions.
    */
@@ -47,9 +211,9 @@ public void testContains() throws SparkException {
     assertContains("", "", "UNICODE", true);
     assertContains("c", "", "UNICODE", true);
     assertContains("", "c", "UNICODE", false);
-    assertContains("", "", "UTF8_BINARY_LCASE", true);
-    assertContains("c", "", "UTF8_BINARY_LCASE", true);
-    assertContains("", "c", "UTF8_BINARY_LCASE", false);
+    assertContains("", "", "UTF8_LCASE", true);
+    assertContains("c", "", "UTF8_LCASE", true);
+    assertContains("", "c", "UTF8_LCASE", false);
     assertContains("", "", "UNICODE_CI", true);
     assertContains("c", "", "UNICODE_CI", true);
     assertContains("", "c", "UNICODE_CI", false);
@@ -60,9 +224,9 @@ public void testContains() throws SparkException {
     assertContains("abcde", "abcde", "UNICODE", true);
     assertContains("abcde", "aBcDe", "UNICODE", false);
     assertContains("abcde", "fghij", "UNICODE", false);
-    assertContains("abcde", "C", "UTF8_BINARY_LCASE", true);
-    assertContains("abcde", "AbCdE", "UTF8_BINARY_LCASE", true);
-    assertContains("abcde", "X", "UTF8_BINARY_LCASE", false);
+    assertContains("abcde", "C", "UTF8_LCASE", true);
+    assertContains("abcde", "AbCdE", "UTF8_LCASE", true);
+    assertContains("abcde", "X", "UTF8_LCASE", false);
     assertContains("abcde", "c", "UNICODE_CI", true);
     assertContains("abcde", "bCD", "UNICODE_CI", true);
     assertContains("abcde", "123", "UNICODE_CI", false);
@@ -71,8 +235,8 @@ public void testContains() throws SparkException {
     assertContains("aBcDe", "BcD", "UTF8_BINARY", true);
     assertContains("aBcDe", "abcde", "UNICODE", false);
     assertContains("aBcDe", "aBcDe", "UNICODE", true);
-    assertContains("aBcDe", "bcd", "UTF8_BINARY_LCASE", true);
-    assertContains("aBcDe", "BCD", "UTF8_BINARY_LCASE", true);
+    assertContains("aBcDe", "bcd", "UTF8_LCASE", true);
+    assertContains("aBcDe", "BCD", "UTF8_LCASE", true);
     assertContains("aBcDe", "abcde", "UNICODE_CI", true);
     assertContains("aBcDe", "AbCdE", "UNICODE_CI", true);
     // Accent variation
@@ -80,8 +244,8 @@ public void testContains() throws SparkException {
     assertContains("aBcDe", "BćD", "UTF8_BINARY", false);
     assertContains("aBcDe", "abćde", "UNICODE", false);
     assertContains("aBcDe", "aBćDe", "UNICODE", false);
-    assertContains("aBcDe", "bćd", "UTF8_BINARY_LCASE", false);
-    assertContains("aBcDe", "BĆD", "UTF8_BINARY_LCASE", false);
+    assertContains("aBcDe", "bćd", "UTF8_LCASE", false);
+    assertContains("aBcDe", "BĆD", "UTF8_LCASE", false);
     assertContains("aBcDe", "abćde", "UNICODE_CI", false);
     assertContains("aBcDe", "AbĆdE", "UNICODE_CI", false);
     // Variable byte length characters
@@ -93,14 +257,48 @@ public void testContains() throws SparkException {
     assertContains("ab世De", "AB世dE", "UNICODE", false);
     assertContains("äbćδe", "äbćδe", "UNICODE", true);
     assertContains("äbćδe", "ÄBcΔÉ", "UNICODE", false);
-    assertContains("ab世De", "b世D", "UTF8_BINARY_LCASE", true);
-    assertContains("ab世De", "B世d", "UTF8_BINARY_LCASE", true);
-    assertContains("äbćδe", "bćδ", "UTF8_BINARY_LCASE", true);
-    assertContains("äbćδe", "BcΔ", "UTF8_BINARY_LCASE", false);
+    assertContains("ab世De", "b世D", "UTF8_LCASE", true);
+    assertContains("ab世De", "B世d", "UTF8_LCASE", true);
+    assertContains("äbćδe", "bćδ", "UTF8_LCASE", true);
+    assertContains("äbćδe", "BcΔ", "UTF8_LCASE", false);
     assertContains("ab世De", "ab世De", "UNICODE_CI", true);
     assertContains("ab世De", "AB世dE", "UNICODE_CI", true);
     assertContains("äbćδe", "ÄbćδE", "UNICODE_CI", true);
     assertContains("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
+    // Characters with the same binary lowercase representation
+    assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true);
+    assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true);
+    assertContains("The KKelvin.", "KKelvin", "UTF8_LCASE", true);
+    assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
+    assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
+    assertContains("The KKelvin.", "KKelvin,", "UTF8_LCASE", false);
+    // Case-variable character length
+    assertContains("i̇", "i", "UNICODE_CI", false);
+    assertContains("i̇", "\u0307", "UNICODE_CI", false);
+    assertContains("i̇", "İ", "UNICODE_CI", true);
+    assertContains("İ", "i", "UNICODE_CI", false);
+    assertContains("adi̇os", "io", "UNICODE_CI", false);
+    assertContains("adi̇os", "Io", "UNICODE_CI", false);
+    assertContains("adi̇os", "i̇o", "UNICODE_CI", true);
+    assertContains("adi̇os", "İo", "UNICODE_CI", true);
+    assertContains("adİos", "io", "UNICODE_CI", false);
+    assertContains("adİos", "Io", "UNICODE_CI", false);
+    assertContains("adİos", "i̇o", "UNICODE_CI", true);
+    assertContains("adİos", "İo", "UNICODE_CI", true);
+    assertContains("i̇", "i", "UTF8_LCASE", true); // != UNICODE_CI
+    assertContains("İ", "\u0307", "UTF8_LCASE", false);
+    assertContains("İ", "i", "UTF8_LCASE", false);
+    assertContains("i̇", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
+    assertContains("i̇", "İ", "UTF8_LCASE", true);
+    assertContains("İ", "i", "UTF8_LCASE", false);
+    assertContains("adi̇os", "io", "UTF8_LCASE", false);
+    assertContains("adi̇os", "Io", "UTF8_LCASE", false);
+    assertContains("adi̇os", "i̇o", "UTF8_LCASE", true);
+    assertContains("adi̇os", "İo", "UTF8_LCASE", true);
+    assertContains("adİos", "io", "UTF8_LCASE", false);
+    assertContains("adİos", "Io", "UTF8_LCASE", false);
+    assertContains("adİos", "i̇o", "UTF8_LCASE", true);
+    assertContains("adİos", "İo", "UTF8_LCASE", true);
   }
 
   private void assertStartsWith(
@@ -121,9 +319,9 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("", "", "UNICODE", true);
     assertStartsWith("c", "", "UNICODE", true);
     assertStartsWith("", "c", "UNICODE", false);
-    assertStartsWith("", "", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("c", "", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("", "c", "UTF8_BINARY_LCASE", false);
+    assertStartsWith("", "", "UTF8_LCASE", true);
+    assertStartsWith("c", "", "UTF8_LCASE", true);
+    assertStartsWith("", "c", "UTF8_LCASE", false);
     assertStartsWith("", "", "UNICODE_CI", true);
     assertStartsWith("c", "", "UNICODE_CI", true);
     assertStartsWith("", "c", "UNICODE_CI", false);
@@ -134,19 +332,20 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("abcde", "abcde", "UNICODE", true);
     assertStartsWith("abcde", "aBcDe", "UNICODE", false);
     assertStartsWith("abcde", "fghij", "UNICODE", false);
-    assertStartsWith("abcde", "A", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("abcde", "AbCdE", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("abcde", "X", "UTF8_BINARY_LCASE", false);
+    assertStartsWith("abcde", "A", "UTF8_LCASE", true);
+    assertStartsWith("abcde", "AbCdE", "UTF8_LCASE", true);
+    assertStartsWith("abcde", "X", "UTF8_LCASE", false);
     assertStartsWith("abcde", "a", "UNICODE_CI", true);
     assertStartsWith("abcde", "aBC", "UNICODE_CI", true);
+    assertStartsWith("abcde", "bcd", "UNICODE_CI", false);
     assertStartsWith("abcde", "123", "UNICODE_CI", false);
     // Case variation
     assertStartsWith("aBcDe", "abc", "UTF8_BINARY", false);
     assertStartsWith("aBcDe", "aBc", "UTF8_BINARY", true);
     assertStartsWith("aBcDe", "abcde", "UNICODE", false);
     assertStartsWith("aBcDe", "aBcDe", "UNICODE", true);
-    assertStartsWith("aBcDe", "abc", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("aBcDe", "ABC", "UTF8_BINARY_LCASE", true);
+    assertStartsWith("aBcDe", "abc", "UTF8_LCASE", true);
+    assertStartsWith("aBcDe", "ABC", "UTF8_LCASE", true);
     assertStartsWith("aBcDe", "abcde", "UNICODE_CI", true);
     assertStartsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
     // Accent variation
@@ -154,8 +353,8 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("aBcDe", "aBć", "UTF8_BINARY", false);
     assertStartsWith("aBcDe", "abćde", "UNICODE", false);
     assertStartsWith("aBcDe", "aBćDe", "UNICODE", false);
-    assertStartsWith("aBcDe", "abć", "UTF8_BINARY_LCASE", false);
-    assertStartsWith("aBcDe", "ABĆ", "UTF8_BINARY_LCASE", false);
+    assertStartsWith("aBcDe", "abć", "UTF8_LCASE", false);
+    assertStartsWith("aBcDe", "ABĆ", "UTF8_LCASE", false);
     assertStartsWith("aBcDe", "abćde", "UNICODE_CI", false);
     assertStartsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
     // Variable byte length characters
@@ -167,14 +366,52 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("ab世De", "AB世dE", "UNICODE", false);
     assertStartsWith("äbćδe", "äbćδe", "UNICODE", true);
     assertStartsWith("äbćδe", "ÄBcΔÉ", "UNICODE", false);
-    assertStartsWith("ab世De", "ab世", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("ab世De", "aB世", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("äbćδe", "äbć", "UTF8_BINARY_LCASE", true);
-    assertStartsWith("äbćδe", "äBc", "UTF8_BINARY_LCASE", false);
+    assertStartsWith("ab世De", "ab世", "UTF8_LCASE", true);
+    assertStartsWith("ab世De", "aB世", "UTF8_LCASE", true);
+    assertStartsWith("äbćδe", "äbć", "UTF8_LCASE", true);
+    assertStartsWith("äbćδe", "äBc", "UTF8_LCASE", false);
     assertStartsWith("ab世De", "ab世De", "UNICODE_CI", true);
     assertStartsWith("ab世De", "AB世dE", "UNICODE_CI", true);
     assertStartsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true);
     assertStartsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
+    // Characters with the same binary lowercase representation
+    assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true);
+    assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true);
+    assertStartsWith("KKelvin.", "KKelvin", "UTF8_LCASE", true);
+    assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
+    assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
+    assertStartsWith("KKelvin.", "KKelvin,", "UTF8_LCASE", false);
+    // Case-variable character length
+    assertStartsWith("i̇", "i", "UNICODE_CI", false);
+    assertStartsWith("i̇", "İ", "UNICODE_CI", true);
+    assertStartsWith("İ", "i", "UNICODE_CI", false);
+    assertStartsWith("İİİ", "i̇i̇", "UNICODE_CI", true);
+    assertStartsWith("İİİ", "i̇i", "UNICODE_CI", false);
+    assertStartsWith("İi̇İ", "i̇İ", "UNICODE_CI", true);
+    assertStartsWith("i̇İi̇i̇", "İi̇İi", "UNICODE_CI", false);
+    assertStartsWith("i̇onic", "io", "UNICODE_CI", false);
+    assertStartsWith("i̇onic", "Io", "UNICODE_CI", false);
+    assertStartsWith("i̇onic", "i̇o", "UNICODE_CI", true);
+    assertStartsWith("i̇onic", "İo", "UNICODE_CI", true);
+    assertStartsWith("İonic", "io", "UNICODE_CI", false);
+    assertStartsWith("İonic", "Io", "UNICODE_CI", false);
+    assertStartsWith("İonic", "i̇o", "UNICODE_CI", true);
+    assertStartsWith("İonic", "İo", "UNICODE_CI", true);
+    assertStartsWith("i̇", "i", "UTF8_LCASE", true); // != UNICODE_CI
+    assertStartsWith("i̇", "İ", "UTF8_LCASE", true);
+    assertStartsWith("İ", "i", "UTF8_LCASE", false);
+    assertStartsWith("İİİ", "i̇i̇", "UTF8_LCASE", true);
+    assertStartsWith("İİİ", "i̇i", "UTF8_LCASE", false);
+    assertStartsWith("İi̇İ", "i̇İ", "UTF8_LCASE", true);
+    assertStartsWith("i̇İi̇i̇", "İi̇İi", "UTF8_LCASE", true); // != UNICODE_CI
+    assertStartsWith("i̇onic", "io", "UTF8_LCASE", false);
+    assertStartsWith("i̇onic", "Io", "UTF8_LCASE", false);
+    assertStartsWith("i̇onic", "i̇o", "UTF8_LCASE", true);
+    assertStartsWith("i̇onic", "İo", "UTF8_LCASE", true);
+    assertStartsWith("İonic", "io", "UTF8_LCASE", false);
+    assertStartsWith("İonic", "Io", "UTF8_LCASE", false);
+    assertStartsWith("İonic", "i̇o", "UTF8_LCASE", true);
+    assertStartsWith("İonic", "İo", "UTF8_LCASE", true);
   }
 
   private void assertEndsWith(String pattern, String suffix, String collationName, boolean expected)
@@ -194,9 +431,9 @@ public void testEndsWith() throws SparkException {
     assertEndsWith("", "", "UNICODE", true);
     assertEndsWith("c", "", "UNICODE", true);
     assertEndsWith("", "c", "UNICODE", false);
-    assertEndsWith("", "", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("c", "", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("", "c", "UTF8_BINARY_LCASE", false);
+    assertEndsWith("", "", "UTF8_LCASE", true);
+    assertEndsWith("c", "", "UTF8_LCASE", true);
+    assertEndsWith("", "c", "UTF8_LCASE", false);
     assertEndsWith("", "", "UNICODE_CI", true);
     assertEndsWith("c", "", "UNICODE_CI", true);
     assertEndsWith("", "c", "UNICODE_CI", false);
@@ -207,19 +444,20 @@ public void testEndsWith() throws SparkException {
     assertEndsWith("abcde", "abcde", "UNICODE", true);
     assertEndsWith("abcde", "aBcDe", "UNICODE", false);
     assertEndsWith("abcde", "fghij", "UNICODE", false);
-    assertEndsWith("abcde", "E", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("abcde", "AbCdE", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("abcde", "X", "UTF8_BINARY_LCASE", false);
+    assertEndsWith("abcde", "E", "UTF8_LCASE", true);
+    assertEndsWith("abcde", "AbCdE", "UTF8_LCASE", true);
+    assertEndsWith("abcde", "X", "UTF8_LCASE", false);
     assertEndsWith("abcde", "e", "UNICODE_CI", true);
     assertEndsWith("abcde", "CDe", "UNICODE_CI", true);
+    assertEndsWith("abcde", "bcd", "UNICODE_CI", false);
     assertEndsWith("abcde", "123", "UNICODE_CI", false);
     // Case variation
     assertEndsWith("aBcDe", "cde", "UTF8_BINARY", false);
     assertEndsWith("aBcDe", "cDe", "UTF8_BINARY", true);
     assertEndsWith("aBcDe", "abcde", "UNICODE", false);
     assertEndsWith("aBcDe", "aBcDe", "UNICODE", true);
-    assertEndsWith("aBcDe", "cde", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("aBcDe", "CDE", "UTF8_BINARY_LCASE", true);
+    assertEndsWith("aBcDe", "cde", "UTF8_LCASE", true);
+    assertEndsWith("aBcDe", "CDE", "UTF8_LCASE", true);
     assertEndsWith("aBcDe", "abcde", "UNICODE_CI", true);
     assertEndsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
     // Accent variation
@@ -227,8 +465,8 @@ public void testEndsWith() throws SparkException {
     assertEndsWith("aBcDe", "ćDe", "UTF8_BINARY", false);
     assertEndsWith("aBcDe", "abćde", "UNICODE", false);
     assertEndsWith("aBcDe", "aBćDe", "UNICODE", false);
-    assertEndsWith("aBcDe", "ćde", "UTF8_BINARY_LCASE", false);
-    assertEndsWith("aBcDe", "ĆDE", "UTF8_BINARY_LCASE", false);
+    assertEndsWith("aBcDe", "ćde", "UTF8_LCASE", false);
+    assertEndsWith("aBcDe", "ĆDE", "UTF8_LCASE", false);
     assertEndsWith("aBcDe", "abćde", "UNICODE_CI", false);
     assertEndsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
     // Variable byte length characters
@@ -240,14 +478,904 @@ public void testEndsWith() throws SparkException {
     assertEndsWith("ab世De", "AB世dE", "UNICODE", false);
     assertEndsWith("äbćδe", "äbćδe", "UNICODE", true);
     assertEndsWith("äbćδe", "ÄBcΔÉ", "UNICODE", false);
-    assertEndsWith("ab世De", "世De", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("ab世De", "世dE", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("äbćδe", "ćδe", "UTF8_BINARY_LCASE", true);
-    assertEndsWith("äbćδe", "cδE", "UTF8_BINARY_LCASE", false);
+    assertEndsWith("ab世De", "世De", "UTF8_LCASE", true);
+    assertEndsWith("ab世De", "世dE", "UTF8_LCASE", true);
+    assertEndsWith("äbćδe", "ćδe", "UTF8_LCASE", true);
+    assertEndsWith("äbćδe", "cδE", "UTF8_LCASE", false);
     assertEndsWith("ab世De", "ab世De", "UNICODE_CI", true);
     assertEndsWith("ab世De", "AB世dE", "UNICODE_CI", true);
     assertEndsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true);
     assertEndsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
+    // Characters with the same binary lowercase representation
+    assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true);
+    assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true);
+    assertEndsWith("The KKelvin", "KKelvin", "UTF8_LCASE", true);
+    assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true);
+    assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true);
+    assertEndsWith("The KKelvin", "KKelvin,", "UTF8_LCASE", false);
+    // Case-variable character length
+    assertEndsWith("i̇", "\u0307", "UNICODE_CI", false);
+    assertEndsWith("i̇", "İ", "UNICODE_CI", true);
+    assertEndsWith("İ", "i", "UNICODE_CI", false);
+    assertEndsWith("İİİ", "i̇i̇", "UNICODE_CI", true);
+    assertEndsWith("İİİ", "ii̇", "UNICODE_CI", false);
+    assertEndsWith("İi̇İ", "İi̇", "UNICODE_CI", true);
+    assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UNICODE_CI", false);
+    assertEndsWith("the i̇o", "io", "UNICODE_CI", false);
+    assertEndsWith("the i̇o", "Io", "UNICODE_CI", false);
+    assertEndsWith("the i̇o", "i̇o", "UNICODE_CI", true);
+    assertEndsWith("the i̇o", "İo", "UNICODE_CI", true);
+    assertEndsWith("the İo", "io", "UNICODE_CI", false);
+    assertEndsWith("the İo", "Io", "UNICODE_CI", false);
+    assertEndsWith("the İo", "i̇o", "UNICODE_CI", true);
+    assertEndsWith("the İo", "İo", "UNICODE_CI", true);
+    assertEndsWith("i̇", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
+    assertEndsWith("i̇", "İ", "UTF8_LCASE", true);
+    assertEndsWith("İ", "\u0307", "UTF8_LCASE", false);
+    assertEndsWith("İİİ", "i̇i̇", "UTF8_LCASE", true);
+    assertEndsWith("İİİ", "ii̇", "UTF8_LCASE", false);
+    assertEndsWith("İi̇İ", "İi̇", "UTF8_LCASE", true);
+    assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UTF8_LCASE", true); // != UNICODE_CI
+    assertEndsWith("i̇İi̇i̇", "\u0307İİ", "UTF8_LCASE", false);
+    assertEndsWith("the i̇o", "io", "UTF8_LCASE", false);
+    assertEndsWith("the i̇o", "Io", "UTF8_LCASE", false);
+    assertEndsWith("the i̇o", "i̇o", "UTF8_LCASE", true);
+    assertEndsWith("the i̇o", "İo", "UTF8_LCASE", true);
+    assertEndsWith("the İo", "io", "UTF8_LCASE", false);
+    assertEndsWith("the İo", "Io", "UTF8_LCASE", false);
+    assertEndsWith("the İo", "i̇o", "UTF8_LCASE", true);
+    assertEndsWith("the İo", "İo", "UTF8_LCASE", true);
+  }
+
+  private void assertStringSplitSQL(String str, String delimiter, String collationName,
+      UTF8String[] expected) throws SparkException {
+    UTF8String s = UTF8String.fromString(str);
+    UTF8String d = UTF8String.fromString(delimiter);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    assertArrayEquals(expected, CollationSupport.StringSplitSQL.exec(s, d, collationId));
+  }
+
+  @Test
+  public void testStringSplitSQL() throws SparkException {
+    // Possible splits
+    var empty_match = new UTF8String[] { UTF8String.fromString("") };
+    var array_abc = new UTF8String[] { UTF8String.fromString("abc") };
+    var array_1a2 = new UTF8String[] { UTF8String.fromString("1a2") };
+    var array_AaXbB = new UTF8String[] { UTF8String.fromString("AaXbB") };
+    var array_aBcDe = new UTF8String[] { UTF8String.fromString("aBcDe") };
+    var array_special = new UTF8String[] { UTF8String.fromString("äb世De") };
+    var array_abcde = new UTF8String[] { UTF8String.fromString("äbćδe") };
+    var full_match = new UTF8String[] { UTF8String.fromString(""), UTF8String.fromString("") };
+    var array_1_2 = new UTF8String[] { UTF8String.fromString("1"), UTF8String.fromString("2") };
+    var array_A_B = new UTF8String[] { UTF8String.fromString("A"), UTF8String.fromString("B") };
+    var array_a_e = new UTF8String[] { UTF8String.fromString("ä"), UTF8String.fromString("e") };
+    var array_Aa_bB = new UTF8String[] { UTF8String.fromString("Aa"), UTF8String.fromString("bB") };
+    // Edge cases
+    assertStringSplitSQL("", "", "UTF8_BINARY", empty_match);
+    assertStringSplitSQL("abc", "", "UTF8_BINARY", array_abc);
+    assertStringSplitSQL("", "abc", "UTF8_BINARY", empty_match);
+    assertStringSplitSQL("", "", "UNICODE", empty_match);
+    assertStringSplitSQL("abc", "", "UNICODE", array_abc);
+    assertStringSplitSQL("", "abc", "UNICODE", empty_match);
+    assertStringSplitSQL("", "", "UTF8_LCASE", empty_match);
+    assertStringSplitSQL("abc", "", "UTF8_LCASE", array_abc);
+    assertStringSplitSQL("", "abc", "UTF8_LCASE", empty_match);
+    assertStringSplitSQL("", "", "UNICODE_CI", empty_match);
+    assertStringSplitSQL("abc", "", "UNICODE_CI", array_abc);
+    assertStringSplitSQL("", "abc", "UNICODE_CI", empty_match);
+    // Basic tests
+    assertStringSplitSQL("1a2", "a", "UTF8_BINARY", array_1_2);
+    assertStringSplitSQL("1a2", "A", "UTF8_BINARY", array_1a2);
+    assertStringSplitSQL("1a2", "b", "UTF8_BINARY", array_1a2);
+    assertStringSplitSQL("1a2", "1a2", "UNICODE", full_match);
+    assertStringSplitSQL("1a2", "1A2", "UNICODE", array_1a2);
+    assertStringSplitSQL("1a2", "3b4", "UNICODE", array_1a2);
+    assertStringSplitSQL("1a2", "A", "UTF8_LCASE", array_1_2);
+    assertStringSplitSQL("1a2", "1A2", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("1a2", "X", "UTF8_LCASE", array_1a2);
+    assertStringSplitSQL("1a2", "a", "UNICODE_CI", array_1_2);
+    assertStringSplitSQL("1a2", "A", "UNICODE_CI", array_1_2);
+    assertStringSplitSQL("1a2", "1A2", "UNICODE_CI", full_match);
+    assertStringSplitSQL("1a2", "123", "UNICODE_CI", array_1a2);
+    // Case variation
+    assertStringSplitSQL("AaXbB", "x", "UTF8_BINARY", array_AaXbB);
+    assertStringSplitSQL("AaXbB", "X", "UTF8_BINARY", array_Aa_bB);
+    assertStringSplitSQL("AaXbB", "axb", "UNICODE", array_AaXbB);
+    assertStringSplitSQL("AaXbB", "aXb", "UNICODE", array_A_B);
+    assertStringSplitSQL("AaXbB", "axb", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("AaXbB", "AXB", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("AaXbB", "axb", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("AaXbB", "AxB", "UNICODE_CI", array_A_B);
+    // Accent variation
+    assertStringSplitSQL("aBcDe", "bćd", "UTF8_BINARY", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "BćD", "UTF8_BINARY", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "abćde", "UNICODE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "aBćDe", "UNICODE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "bćd", "UTF8_LCASE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "BĆD", "UTF8_LCASE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "abćde", "UNICODE_CI", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "AbĆdE", "UNICODE_CI", array_aBcDe);
+    // Variable byte length characters
+    assertStringSplitSQL("äb世De", "b世D", "UTF8_BINARY", array_a_e);
+    assertStringSplitSQL("äb世De", "B世d", "UTF8_BINARY", array_special);
+    assertStringSplitSQL("äbćδe", "bćδ", "UTF8_BINARY", array_a_e);
+    assertStringSplitSQL("äbćδe", "BcΔ", "UTF8_BINARY", array_abcde);
+    assertStringSplitSQL("äb世De", "äb世De", "UNICODE", full_match);
+    assertStringSplitSQL("äb世De", "äB世de", "UNICODE", array_special);
+    assertStringSplitSQL("äbćδe", "äbćδe", "UNICODE", full_match);
+    assertStringSplitSQL("äbćδe", "ÄBcΔÉ", "UNICODE", array_abcde);
+    assertStringSplitSQL("äb世De", "b世D", "UTF8_LCASE", array_a_e);
+    assertStringSplitSQL("äb世De", "B世d", "UTF8_LCASE", array_a_e);
+    assertStringSplitSQL("äbćδe", "bćδ", "UTF8_LCASE", array_a_e);
+    assertStringSplitSQL("äbćδe", "BcΔ", "UTF8_LCASE", array_abcde);
+    assertStringSplitSQL("äb世De", "ab世De", "UNICODE_CI", array_special);
+    assertStringSplitSQL("äb世De", "AB世dE", "UNICODE_CI", array_special);
+    assertStringSplitSQL("äbćδe", "ÄbćδE", "UNICODE_CI", full_match);
+    assertStringSplitSQL("äbćδe", "ÄBcΔÉ", "UNICODE_CI", array_abcde);
+  }
+
+  private void assertUpper(String target, String collationName, String expected)
+          throws SparkException {
+    UTF8String target_utf8 = UTF8String.fromString(target);
+    UTF8String expected_utf8 = UTF8String.fromString(expected);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    // Testing the new ICU-based implementation of the Upper function.
+    assertEquals(expected_utf8, CollationSupport.Upper.exec(target_utf8, collationId, true));
+    // Testing the old JVM-based implementation of the Upper function.
+    assertEquals(expected_utf8, CollationSupport.Upper.exec(target_utf8, collationId, false));
+    // Note: results should be the same in these tests for both ICU and JVM-based implementations.
+  }
+
+  @Test
+  public void testUpper() throws SparkException {
+    // Edge cases
+    assertUpper("", "UTF8_BINARY", "");
+    assertUpper("", "UTF8_LCASE", "");
+    assertUpper("", "UNICODE", "");
+    assertUpper("", "UNICODE_CI", "");
+    // Basic tests
+    assertUpper("abcde", "UTF8_BINARY", "ABCDE");
+    assertUpper("abcde", "UTF8_LCASE", "ABCDE");
+    assertUpper("abcde", "UNICODE", "ABCDE");
+    assertUpper("abcde", "UNICODE_CI", "ABCDE");
+    // Uppercase present
+    assertUpper("AbCdE", "UTF8_BINARY", "ABCDE");
+    assertUpper("aBcDe", "UTF8_BINARY", "ABCDE");
+    assertUpper("AbCdE", "UTF8_LCASE", "ABCDE");
+    assertUpper("aBcDe", "UTF8_LCASE", "ABCDE");
+    assertUpper("AbCdE", "UNICODE", "ABCDE");
+    assertUpper("aBcDe", "UNICODE", "ABCDE");
+    assertUpper("AbCdE", "UNICODE_CI", "ABCDE");
+    assertUpper("aBcDe", "UNICODE_CI", "ABCDE");
+    // Accent letters
+    assertUpper("aBćDe","UTF8_BINARY", "ABĆDE");
+    assertUpper("aBćDe","UTF8_LCASE", "ABĆDE");
+    assertUpper("aBćDe","UNICODE", "ABĆDE");
+    assertUpper("aBćDe","UNICODE_CI", "ABĆDE");
+    // Variable byte length characters
+    assertUpper("ab世De", "UTF8_BINARY", "AB世DE");
+    assertUpper("äbćδe", "UTF8_BINARY", "ÄBĆΔE");
+    assertUpper("ab世De", "UTF8_LCASE", "AB世DE");
+    assertUpper("äbćδe", "UTF8_LCASE", "ÄBĆΔE");
+    assertUpper("ab世De", "UNICODE", "AB世DE");
+    assertUpper("äbćδe", "UNICODE", "ÄBĆΔE");
+    assertUpper("ab世De", "UNICODE_CI", "AB世DE");
+    assertUpper("äbćδe", "UNICODE_CI", "ÄBĆΔE");
+    // Case-variable character length
+    assertUpper("i\u0307o", "UTF8_BINARY","I\u0307O");
+    assertUpper("i\u0307o", "UTF8_LCASE","I\u0307O");
+    assertUpper("i\u0307o", "UNICODE","I\u0307O");
+    assertUpper("i\u0307o", "UNICODE_CI","I\u0307O");
+    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_BINARY","SS FI FFI FF ST \u0399\u0308\u0342");
+    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_LCASE","SS FI FFI FF ST \u0399\u0308\u0342");
+    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE","SS FI FFI FF ST \u0399\u0308\u0342");
+    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE","SS FI FFI FF ST \u0399\u0308\u0342");
+  }
+
+  private void assertLower(String target, String collationName, String expected)
+          throws SparkException {
+    UTF8String target_utf8 = UTF8String.fromString(target);
+    UTF8String expected_utf8 = UTF8String.fromString(expected);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    // Testing the new ICU-based implementation of the Lower function.
+    assertEquals(expected_utf8, CollationSupport.Lower.exec(target_utf8, collationId, true));
+    // Testing the old JVM-based implementation of the Lower function.
+    assertEquals(expected_utf8, CollationSupport.Lower.exec(target_utf8, collationId, false));
+    // Note: results should be the same in these tests for both ICU and JVM-based implementations.
+  }
+
+  @Test
+  public void testLower() throws SparkException {
+    // Edge cases
+    assertLower("", "UTF8_BINARY", "");
+    assertLower("", "UTF8_LCASE", "");
+    assertLower("", "UNICODE", "");
+    assertLower("", "UNICODE_CI", "");
+    // Basic tests
+    assertLower("ABCDE", "UTF8_BINARY", "abcde");
+    assertLower("ABCDE", "UTF8_LCASE", "abcde");
+    assertLower("ABCDE", "UNICODE", "abcde");
+    assertLower("ABCDE", "UNICODE_CI", "abcde");
+    // Uppercase present
+    assertLower("AbCdE", "UTF8_BINARY", "abcde");
+    assertLower("aBcDe", "UTF8_BINARY", "abcde");
+    assertLower("AbCdE", "UTF8_LCASE", "abcde");
+    assertLower("aBcDe", "UTF8_LCASE", "abcde");
+    assertLower("AbCdE", "UNICODE", "abcde");
+    assertLower("aBcDe", "UNICODE", "abcde");
+    assertLower("AbCdE", "UNICODE_CI", "abcde");
+    assertLower("aBcDe", "UNICODE_CI", "abcde");
+    // Accent letters
+    assertLower("AbĆdE","UTF8_BINARY", "abćde");
+    assertLower("AbĆdE","UTF8_LCASE", "abćde");
+    assertLower("AbĆdE","UNICODE", "abćde");
+    assertLower("AbĆdE","UNICODE_CI", "abćde");
+    // Variable byte length characters
+    assertLower("aB世De", "UTF8_BINARY", "ab世de");
+    assertLower("ÄBĆΔE", "UTF8_BINARY", "äbćδe");
+    assertLower("aB世De", "UTF8_LCASE", "ab世de");
+    assertLower("ÄBĆΔE", "UTF8_LCASE", "äbćδe");
+    assertLower("aB世De", "UNICODE", "ab世de");
+    assertLower("ÄBĆΔE", "UNICODE", "äbćδe");
+    assertLower("aB世De", "UNICODE_CI", "ab世de");
+    assertLower("ÄBĆΔE", "UNICODE_CI", "äbćδe");
+    // Case-variable character length
+    assertLower("İo", "UTF8_BINARY","i\u0307o");
+    assertLower("İo", "UTF8_LCASE","i\u0307o");
+    assertLower("İo", "UNICODE","i\u0307o");
+    assertLower("İo", "UNICODE_CI","i\u0307o");
+  }
+
+  private void assertInitCap(String target, String collationName, String expected)
+          throws SparkException {
+    UTF8String target_utf8 = UTF8String.fromString(target);
+    UTF8String expected_utf8 = UTF8String.fromString(expected);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    // Testing the new ICU-based implementation of the Lower function.
+    assertEquals(expected_utf8, CollationSupport.InitCap.exec(target_utf8, collationId, true));
+    // Testing the old JVM-based implementation of the Lower function.
+    assertEquals(expected_utf8, CollationSupport.InitCap.exec(target_utf8, collationId, false));
+    // Note: results should be the same in these tests for both ICU and JVM-based implementations.
+  }
+
+  @Test
+  public void testInitCap() throws SparkException {
+    // Edge cases
+    assertInitCap("", "UTF8_BINARY", "");
+    assertInitCap("", "UTF8_LCASE", "");
+    assertInitCap("", "UNICODE", "");
+    assertInitCap("", "UNICODE_CI", "");
+    // Basic tests
+    assertInitCap("ABCDE", "UTF8_BINARY", "Abcde");
+    assertInitCap("ABCDE", "UTF8_LCASE", "Abcde");
+    assertInitCap("ABCDE", "UNICODE", "Abcde");
+    assertInitCap("ABCDE", "UNICODE_CI", "Abcde");
+    // Uppercase present
+    assertInitCap("AbCdE", "UTF8_BINARY", "Abcde");
+    assertInitCap("aBcDe", "UTF8_BINARY", "Abcde");
+    assertInitCap("AbCdE", "UTF8_LCASE", "Abcde");
+    assertInitCap("aBcDe", "UTF8_LCASE", "Abcde");
+    assertInitCap("AbCdE", "UNICODE", "Abcde");
+    assertInitCap("aBcDe", "UNICODE", "Abcde");
+    assertInitCap("AbCdE", "UNICODE_CI", "Abcde");
+    assertInitCap("aBcDe", "UNICODE_CI", "Abcde");
+    // Accent letters
+    assertInitCap("AbĆdE", "UTF8_BINARY", "Abćde");
+    assertInitCap("AbĆdE", "UTF8_LCASE", "Abćde");
+    assertInitCap("AbĆdE", "UNICODE", "Abćde");
+    assertInitCap("AbĆdE", "UNICODE_CI", "Abćde");
+    // Variable byte length characters
+    assertInitCap("aB 世 De", "UTF8_BINARY", "Ab 世 De");
+    assertInitCap("ÄBĆΔE", "UTF8_BINARY", "Äbćδe");
+    assertInitCap("aB 世 De", "UTF8_LCASE", "Ab 世 De");
+    assertInitCap("ÄBĆΔE", "UTF8_LCASE", "Äbćδe");
+    assertInitCap("aB 世 De", "UNICODE", "Ab 世 De");
+    assertInitCap("ÄBĆΔE", "UNICODE", "Äbćδe");
+    assertInitCap("aB 世 de", "UNICODE_CI", "Ab 世 De");
+    assertInitCap("ÄBĆΔE", "UNICODE_CI", "Äbćδe");
+    // Case-variable character length
+    assertInitCap("İo", "UTF8_BINARY", "I\u0307o");
+    assertInitCap("İo", "UTF8_LCASE", "İo");
+    assertInitCap("İo", "UNICODE", "İo");
+    assertInitCap("İo", "UNICODE_CI", "İo");
+    assertInitCap("i\u0307o", "UTF8_BINARY", "I\u0307o");
+    assertInitCap("i\u0307o", "UTF8_LCASE", "I\u0307o");
+    assertInitCap("i\u0307o", "UNICODE", "I\u0307o");
+    assertInitCap("i\u0307o", "UNICODE_CI", "I\u0307o");
+    // Different possible word boundaries
+    assertInitCap("a b c", "UTF8_BINARY", "A B C");
+    assertInitCap("a b c", "UNICODE", "A B C");
+    assertInitCap("a b c", "UTF8_LCASE", "A B C");
+    assertInitCap("a b c", "UNICODE_CI", "A B C");
+    assertInitCap("a.b,c", "UTF8_BINARY", "A.b,c");
+    assertInitCap("a.b,c", "UNICODE", "A.b,C");
+    assertInitCap("a.b,c", "UTF8_LCASE", "A.b,C");
+    assertInitCap("a.b,c", "UNICODE_CI", "A.b,C");
+    assertInitCap("a. b-c", "UTF8_BINARY", "A. B-c");
+    assertInitCap("a. b-c", "UNICODE", "A. B-C");
+    assertInitCap("a. b-c", "UTF8_LCASE", "A. B-C");
+    assertInitCap("a. b-c", "UNICODE_CI", "A. B-C");
+    assertInitCap("a?b世c", "UTF8_BINARY", "A?b世c");
+    assertInitCap("a?b世c", "UNICODE", "A?B世C");
+    assertInitCap("a?b世c", "UTF8_LCASE", "A?B世C");
+    assertInitCap("a?b世c", "UNICODE_CI", "A?B世C");
+    // Titlecase characters that are different from uppercase characters
+    assertInitCap("ǳǱǲ", "UTF8_BINARY", "ǲǳǳ");
+    assertInitCap("ǳǱǲ", "UNICODE", "ǲǳǳ");
+    assertInitCap("ǳǱǲ", "UTF8_LCASE", "ǲǳǳ");
+    assertInitCap("ǳǱǲ", "UNICODE_CI", "ǲǳǳ");
+    assertInitCap("ǆaba ǈubav Ǌegova", "UTF8_BINARY", "ǅaba ǈubav ǋegova");
+    assertInitCap("ǆaba ǈubav Ǌegova", "UNICODE", "ǅaba ǈubav ǋegova");
+    assertInitCap("ǆaba ǈubav Ǌegova", "UTF8_LCASE", "ǅaba ǈubav ǋegova");
+    assertInitCap("ǆaba ǈubav Ǌegova", "UNICODE_CI", "ǅaba ǈubav ǋegova");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_BINARY",
+      "ß ﬁ ﬃ ﬀ ﬆ Σημερινος Ασημενιος I\u0307ota");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_LCASE",
+      "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE",
+      "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE_CI",
+      "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
+  }
+
+  private void assertStringInstr(String string, String substring, String collationName,
+          Integer expected) throws SparkException {
+    UTF8String str = UTF8String.fromString(string);
+    UTF8String substr = UTF8String.fromString(substring);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    assertEquals(expected, CollationSupport.StringInstr.exec(str, substr, collationId) + 1);
+  }
+
+  @Test
+  public void testStringInstr() throws SparkException {
+    assertStringInstr("aaads", "Aa", "UTF8_BINARY", 0);
+    assertStringInstr("aaaDs", "de", "UTF8_BINARY", 0);
+    assertStringInstr("aaads", "ds", "UTF8_BINARY", 4);
+    assertStringInstr("xxxx", "", "UTF8_BINARY", 1);
+    assertStringInstr("", "xxxx", "UTF8_BINARY", 0);
+    assertStringInstr("test大千世界X大千世界", "大千", "UTF8_BINARY", 5);
+    assertStringInstr("test大千世界X大千世界", "界X", "UTF8_BINARY", 8);
+    assertStringInstr("aaads", "Aa", "UTF8_LCASE", 1);
+    assertStringInstr("aaaDs", "de", "UTF8_LCASE", 0);
+    assertStringInstr("aaaDs", "ds", "UTF8_LCASE", 4);
+    assertStringInstr("xxxx", "", "UTF8_LCASE", 1);
+    assertStringInstr("", "xxxx", "UTF8_LCASE", 0);
+    assertStringInstr("test大千世界X大千世界", "大千", "UTF8_LCASE", 5);
+    assertStringInstr("test大千世界X大千世界", "界x", "UTF8_LCASE", 8);
+    assertStringInstr("aaads", "Aa", "UNICODE", 0);
+    assertStringInstr("aaads", "aa", "UNICODE", 1);
+    assertStringInstr("aaads", "de", "UNICODE", 0);
+    assertStringInstr("xxxx", "", "UNICODE", 1);
+    assertStringInstr("", "xxxx", "UNICODE", 0);
+    assertStringInstr("test大千世界X大千世界", "界x", "UNICODE", 0);
+    assertStringInstr("test大千世界X大千世界", "界X", "UNICODE", 8);
+    assertStringInstr("xxxx", "", "UNICODE_CI", 1);
+    assertStringInstr("", "xxxx", "UNICODE_CI", 0);
+    assertStringInstr("aaads", "AD", "UNICODE_CI", 3);
+    assertStringInstr("aaads", "dS", "UNICODE_CI", 4);
+    assertStringInstr("test大千世界X大千世界", "界y", "UNICODE_CI", 0);
+    assertStringInstr("test大千世界X大千世界", "界x", "UNICODE_CI", 8);
+    assertStringInstr("i̇", "i", "UNICODE_CI", 0);
+    assertStringInstr("i̇", "\u0307", "UNICODE_CI", 0);
+    assertStringInstr("i̇", "İ", "UNICODE_CI", 1);
+    assertStringInstr("İ", "i", "UNICODE_CI", 0);
+    assertStringInstr("İoi̇o12", "i̇o", "UNICODE_CI", 1);
+    assertStringInstr("i̇oİo12", "İo", "UNICODE_CI", 1);
+    assertStringInstr("abİoi̇o", "i̇o", "UNICODE_CI", 3);
+    assertStringInstr("abi̇oİo", "İo", "UNICODE_CI", 3);
+    assertStringInstr("ai̇oxXİo", "Xx", "UNICODE_CI", 5);
+    assertStringInstr("aİoi̇oxx", "XX", "UNICODE_CI", 7);
+    assertStringInstr("i̇", "i", "UTF8_LCASE", 1); // != UNICODE_CI
+    assertStringInstr("i̇", "\u0307", "UTF8_LCASE", 2); // != UNICODE_CI
+    assertStringInstr("i̇", "İ", "UTF8_LCASE", 1);
+    assertStringInstr("İ", "i", "UTF8_LCASE", 0);
+    assertStringInstr("İoi̇o12", "i̇o", "UTF8_LCASE", 1);
+    assertStringInstr("i̇oİo12", "İo", "UTF8_LCASE", 1);
+    assertStringInstr("abİoi̇o", "i̇o", "UTF8_LCASE", 3);
+    assertStringInstr("abi̇oİo", "İo", "UTF8_LCASE", 3);
+    assertStringInstr("abI\u0307oi̇o", "İo", "UTF8_LCASE", 3);
+    assertStringInstr("ai̇oxXİo", "Xx", "UTF8_LCASE", 5);
+    assertStringInstr("abİoi̇o", "\u0307o", "UTF8_LCASE", 6);
+    assertStringInstr("aİoi̇oxx", "XX", "UTF8_LCASE", 7);
+  }
+
+  private void assertFindInSet(String word, UTF8String set, String collationName,
+      Integer expected) throws SparkException {
+    UTF8String w = UTF8String.fromString(word);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    assertEquals(expected, CollationSupport.FindInSet.exec(w, set, collationId));
+  }
+
+  @Test
+  public void testFindInSet() throws SparkException {
+    assertFindInSet("AB", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 1);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 5);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_BINARY", 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_BINARY", 6);
+    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_BINARY", 0);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("c", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 4);
+    assertFindInSet("AB", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 3);
+    assertFindInSet("AbC", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 1);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("XX", UTF8String.fromString("xx"), "UTF8_LCASE", 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_LCASE", 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_LCASE", 6);
+    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_LCASE", 0);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 4);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 3);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE", 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE", 6);
+    assertFindInSet("", UTF8String.fromString("abc"), "UNICODE", 0);
+    assertFindInSet("xx", UTF8String.fromString("xx"), "UNICODE", 1);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 0);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 5);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 4);
+    assertFindInSet("DeF", UTF8String.fromString("abc,b,ab,c,dEf"), "UNICODE_CI", 5);
+    assertFindInSet("DEFG", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE_CI", 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE_CI", 6);
+    assertFindInSet("", UTF8String.fromString("abc"), "UNICODE_CI", 0);
+    assertFindInSet("XX", UTF8String.fromString("xx"), "UNICODE_CI", 1);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 4);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UNICODE_CI", 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 5);
+    assertFindInSet("i̇", UTF8String.fromString("İ"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE_CI", 0);
+    assertFindInSet("i̇", UTF8String.fromString("i̇"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("i̇"), "UNICODE_CI", 0);
+    assertFindInSet("i̇", UTF8String.fromString("İ,"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE_CI", 0);
+    assertFindInSet("i̇", UTF8String.fromString("i̇,"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("i̇,"), "UNICODE_CI", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,İ"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE_CI", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,i̇"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i̇"), "UNICODE_CI", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,i̇,12"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i̇,12"), "UNICODE_CI", 0);
+    assertFindInSet("i̇o", UTF8String.fromString("ab,İo,12"), "UNICODE_CI", 2);
+    assertFindInSet("İo", UTF8String.fromString("ab,i̇o,12"), "UNICODE_CI", 2);
+    assertFindInSet("i̇", UTF8String.fromString("İ"), "UTF8_LCASE", 1);
+    assertFindInSet("i", UTF8String.fromString("İ"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇", UTF8String.fromString("i̇"), "UTF8_LCASE", 1);
+    assertFindInSet("i", UTF8String.fromString("i̇"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇", UTF8String.fromString("İ,"), "UTF8_LCASE", 1);
+    assertFindInSet("i", UTF8String.fromString("İ,"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇", UTF8String.fromString("i̇,"), "UTF8_LCASE", 1);
+    assertFindInSet("i", UTF8String.fromString("i̇,"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,i̇"), "UTF8_LCASE", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i̇"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇", UTF8String.fromString("ab,i̇,12"), "UTF8_LCASE", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i̇,12"), "UTF8_LCASE", 0);
+    assertFindInSet("i̇o", UTF8String.fromString("ab,İo,12"), "UTF8_LCASE", 2);
+    assertFindInSet("İo", UTF8String.fromString("ab,i̇o,12"), "UTF8_LCASE", 2);
+    // Invalid UTF8 strings
+    assertFindInSet("C", UTF8String.fromBytes(
+      new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
+      "UTF8_BINARY", 3);
+    assertFindInSet("c", UTF8String.fromBytes(
+      new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
+      "UTF8_LCASE", 2);
+    assertFindInSet("C", UTF8String.fromBytes(
+      new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
+      "UNICODE", 2);
+    assertFindInSet("c", UTF8String.fromBytes(
+      new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
+      "UNICODE_CI", 2);
+  }
+
+  private void assertReplace(String source, String search, String replace, String collationName,
+        String expected) throws SparkException {
+    UTF8String src = UTF8String.fromString(source);
+    UTF8String sear = UTF8String.fromString(search);
+    UTF8String repl = UTF8String.fromString(replace);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    assertEquals(expected, CollationSupport.StringReplace
+      .exec(src, sear, repl, collationId).toString());
+  }
+
+  @Test
+  public void testReplace() throws SparkException {
+    assertReplace("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace");
+    assertReplace("replace", "pl", "", "UTF8_BINARY", "reace");
+    assertReplace("repl世ace", "Pl", "", "UTF8_BINARY", "repl世ace");
+    assertReplace("replace", "", "123", "UTF8_BINARY", "replace");
+    assertReplace("abcabc", "b", "12", "UTF8_BINARY", "a12ca12c");
+    assertReplace("abcdabcd", "bc", "", "UTF8_BINARY", "adad");
+    assertReplace("r世eplace", "pl", "xx", "UTF8_LCASE", "r世exxace");
+    assertReplace("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace");
+    assertReplace("Replace", "", "123", "UTF8_LCASE", "Replace");
+    assertReplace("re世place", "世", "x", "UTF8_LCASE", "rexplace");
+    assertReplace("abcaBc", "B", "12", "UTF8_LCASE", "a12ca12c");
+    assertReplace("AbcdabCd", "Bc", "", "UTF8_LCASE", "Adad");
+    assertReplace("re世place", "plx", "123", "UNICODE", "re世place");
+    assertReplace("世Replace", "re", "", "UNICODE", "世Replace");
+    assertReplace("replace世", "", "123", "UNICODE", "replace世");
+    assertReplace("aBc世abc", "b", "12", "UNICODE", "aBc世a12c");
+    assertReplace("abcdabcd", "bc", "", "UNICODE", "adad");
+    assertReplace("replace", "plx", "123", "UNICODE_CI", "replace");
+    assertReplace("Replace", "re", "", "UNICODE_CI", "place");
+    assertReplace("replace", "", "123", "UNICODE_CI", "replace");
+    assertReplace("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c");
+    assertReplace("a世Bcdabcd", "bC", "", "UNICODE_CI", "a世dad");
+    assertReplace("abi̇12", "i", "X", "UNICODE_CI", "abi̇12");
+    assertReplace("abi̇12", "\u0307", "X", "UNICODE_CI", "abi̇12");
+    assertReplace("abi̇12", "İ", "X", "UNICODE_CI", "abX12");
+    assertReplace("abİ12", "i", "X", "UNICODE_CI", "abİ12");
+    assertReplace("İi̇İi̇İi̇", "i̇", "x", "UNICODE_CI", "xxxxxx");
+    assertReplace("İi̇İi̇İi̇", "i", "x", "UNICODE_CI", "İi̇İi̇İi̇");
+    assertReplace("abİo12i̇o", "i̇o", "xx", "UNICODE_CI", "abxx12xx");
+    assertReplace("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy");
+    assertReplace("abi̇12", "i", "X", "UTF8_LCASE", "abX\u030712"); // != UNICODE_CI
+    assertReplace("abi̇12", "\u0307", "X", "UTF8_LCASE", "abiX12"); // != UNICODE_CI
+    assertReplace("abi̇12", "İ", "X", "UTF8_LCASE", "abX12");
+    assertReplace("abİ12", "i", "X", "UTF8_LCASE", "abİ12");
+    assertReplace("İi̇İi̇İi̇", "i̇", "x", "UTF8_LCASE", "xxxxxx");
+    assertReplace("İi̇İi̇İi̇", "i", "x", "UTF8_LCASE",
+      "İx\u0307İx\u0307İx\u0307"); // != UNICODE_CI
+    assertReplace("abİo12i̇o", "i̇o", "xx", "UTF8_LCASE", "abxx12xx");
+    assertReplace("abi̇o12i̇o", "İo", "yy", "UTF8_LCASE", "abyy12yy");
+  }
+
+  private void assertLocate(String substring, String string, Integer start, String collationName,
+        Integer expected) throws SparkException {
+    UTF8String substr = UTF8String.fromString(substring);
+    UTF8String str = UTF8String.fromString(string);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    assertEquals(expected, CollationSupport.StringLocate.exec(str, substr,
+      start - 1, collationId) + 1);
+  }
+
+  @Test
+  public void testLocate() throws SparkException {
+    // If you add tests with start < 1 be careful to understand the behavior of the indexOf method
+    // and usage of indexOf in the StringLocate class.
+    assertLocate("aa", "aaads", 1, "UTF8_BINARY", 1);
+    assertLocate("aa", "aaads", 2, "UTF8_BINARY", 2);
+    assertLocate("aa", "aaads", 3, "UTF8_BINARY", 0);
+    assertLocate("Aa", "aaads", 1, "UTF8_BINARY", 0);
+    assertLocate("Aa", "aAads", 1, "UTF8_BINARY", 2);
+    assertLocate("界x", "test大千世界X大千世界", 1, "UTF8_BINARY", 0);
+    assertLocate("界X", "test大千世界X大千世界", 1, "UTF8_BINARY", 8);
+    assertLocate("界", "test大千世界X大千世界", 13, "UTF8_BINARY", 13);
+    assertLocate("AA", "aaads", 1, "UTF8_LCASE", 1);
+    assertLocate("aa", "aAads", 2, "UTF8_LCASE", 2);
+    assertLocate("aa", "aaAds", 3, "UTF8_LCASE", 0);
+    assertLocate("abC", "abcabc", 1, "UTF8_LCASE", 1);
+    assertLocate("abC", "abCabc", 2, "UTF8_LCASE", 4);
+    assertLocate("abc", "abcabc", 4, "UTF8_LCASE", 4);
+    assertLocate("界x", "test大千世界X大千世界", 1, "UTF8_LCASE", 8);
+    assertLocate("界X", "test大千世界Xtest大千世界", 1, "UTF8_LCASE", 8);
+    assertLocate("界", "test大千世界X大千世界", 13, "UTF8_LCASE", 13);
+    assertLocate("大千", "test大千世界大千世界", 1, "UTF8_LCASE", 5);
+    assertLocate("大千", "test大千世界大千世界", 9, "UTF8_LCASE", 9);
+    assertLocate("大千", "大千世界大千世界", 1, "UTF8_LCASE", 1);
+    assertLocate("aa", "Aaads", 1, "UNICODE", 2);
+    assertLocate("AA", "aaads", 1, "UNICODE", 0);
+    assertLocate("aa", "aAads", 2, "UNICODE", 0);
+    assertLocate("aa", "aaAds", 3, "UNICODE", 0);
+    assertLocate("abC", "abcabc", 1, "UNICODE", 0);
+    assertLocate("abC", "abCabc", 2, "UNICODE", 0);
+    assertLocate("abC", "abCabC", 2, "UNICODE", 4);
+    assertLocate("abc", "abcabc", 1, "UNICODE", 1);
+    assertLocate("abc", "abcabc", 3, "UNICODE", 4);
+    assertLocate("界x", "test大千世界X大千世界", 1, "UNICODE", 0);
+    assertLocate("界X", "test大千世界X大千世界", 1, "UNICODE", 8);
+    assertLocate("界", "test大千世界X大千世界", 13, "UNICODE", 13);
+    assertLocate("AA", "aaads", 1, "UNICODE_CI", 1);
+    assertLocate("aa", "aAads", 2, "UNICODE_CI", 2);
+    assertLocate("aa", "aaAds", 3, "UNICODE_CI", 0);
+    assertLocate("abC", "abcabc", 1, "UNICODE_CI", 1);
+    assertLocate("abC", "abCabc", 2, "UNICODE_CI", 4);
+    assertLocate("abc", "abcabc", 4, "UNICODE_CI", 4);
+    assertLocate("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8);
+    assertLocate("界", "test大千世界X大千世界", 13, "UNICODE_CI", 13);
+    assertLocate("大千", "test大千世界大千世界", 1, "UNICODE_CI", 5);
+    assertLocate("大千", "test大千世界大千世界", 9, "UNICODE_CI", 9);
+    assertLocate("大千", "大千世界大千世界", 1, "UNICODE_CI", 1);
+    // Case-variable character length
+    assertLocate("\u0307", "i̇", 1, "UTF8_BINARY", 2);
+    assertLocate("\u0307", "İ", 1, "UTF8_LCASE", 0); // != UTF8_BINARY
+    assertLocate("i", "i̇", 1, "UNICODE_CI", 0);
+    assertLocate("\u0307", "i̇", 1, "UNICODE_CI", 0);
+    assertLocate("i̇", "i", 1, "UNICODE_CI", 0);
+    assertLocate("İ", "i̇", 1, "UNICODE_CI", 1);
+    assertLocate("İ", "i", 1, "UNICODE_CI", 0);
+    assertLocate("i", "i̇", 1, "UTF8_LCASE", 1); // != UNICODE_CI
+    assertLocate("\u0307", "i̇", 1, "UTF8_LCASE", 2); // != UNICODE_CI
+    assertLocate("i̇", "i", 1, "UTF8_LCASE", 0);
+    assertLocate("İ", "i̇", 1, "UTF8_LCASE", 1);
+    assertLocate("İ", "i", 1, "UTF8_LCASE", 0);
+    assertLocate("i̇o", "İo世界大千世界", 1, "UNICODE_CI", 1);
+    assertLocate("i̇o", "大千İo世界大千世界", 1, "UNICODE_CI", 3);
+    assertLocate("i̇o", "世界İo大千世界大千İo", 4, "UNICODE_CI", 11);
+    assertLocate("İo", "i̇o世界大千世界", 1, "UNICODE_CI", 1);
+    assertLocate("İo", "大千i̇o世界大千世界", 1, "UNICODE_CI", 3);
+    assertLocate("İo", "世界i̇o大千世界大千i̇o", 4, "UNICODE_CI", 12);
+  }
+
+  private void assertSubstringIndex(String string, String delimiter, Integer count,
+        String collationName, String expected) throws SparkException {
+    UTF8String str = UTF8String.fromString(string);
+    UTF8String delim = UTF8String.fromString(delimiter);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    assertEquals(expected,
+      CollationSupport.SubstringIndex.exec(str, delim, count, collationId).toString());
+  }
+
+  @Test
+  public void testSubstringIndex() throws SparkException {
+    assertSubstringIndex("wwwgapachegorg", "g", -3, "UTF8_BINARY", "apachegorg");
+    assertSubstringIndex("www||apache||org", "||", 2, "UTF8_BINARY", "www||apache");
+    assertSubstringIndex("aaaaaaaaaa", "aa", 2, "UTF8_BINARY", "a");
+    assertSubstringIndex("AaAaAaAaAa", "aa", 2, "UTF8_LCASE", "A");
+    assertSubstringIndex("www.apache.org", ".", 3, "UTF8_LCASE", "www.apache.org");
+    assertSubstringIndex("wwwXapacheXorg", "x", 2, "UTF8_LCASE", "wwwXapache");
+    assertSubstringIndex("wwwxapachexorg", "X", 1, "UTF8_LCASE", "www");
+    assertSubstringIndex("www.apache.org", ".", 0, "UTF8_LCASE", "");
+    assertSubstringIndex("www.apache.ORG", ".", -3, "UTF8_LCASE", "www.apache.ORG");
+    assertSubstringIndex("wwwGapacheGorg", "g", 1, "UTF8_LCASE", "www");
+    assertSubstringIndex("wwwGapacheGorg", "g", 3, "UTF8_LCASE", "wwwGapacheGor");
+    assertSubstringIndex("gwwwGapacheGorg", "g", 3, "UTF8_LCASE", "gwwwGapache");
+    assertSubstringIndex("wwwGapacheGorg", "g", -3, "UTF8_LCASE", "apacheGorg");
+    assertSubstringIndex("wwwmapacheMorg", "M", -2, "UTF8_LCASE", "apacheMorg");
+    assertSubstringIndex("www.apache.org", ".", -1, "UTF8_LCASE", "org");
+    assertSubstringIndex("www.apache.org.", ".", -1, "UTF8_LCASE", "");
+    assertSubstringIndex("", ".", -2, "UTF8_LCASE", "");
+    assertSubstringIndex("test大千世界X大千世界", "x", -1, "UTF8_LCASE", "大千世界");
+    assertSubstringIndex("test大千世界X大千世界", "X", 1, "UTF8_LCASE", "test大千世界");
+    assertSubstringIndex("test大千世界大千世界", "千", 2, "UTF8_LCASE", "test大千世界大");
+    assertSubstringIndex("www||APACHE||org", "||", 2, "UTF8_LCASE", "www||APACHE");
+    assertSubstringIndex("www||APACHE||org", "||", -1, "UTF8_LCASE", "org");
+    assertSubstringIndex("AaAaAaAaAa", "Aa", 2, "UNICODE", "Aa");
+    assertSubstringIndex("wwwYapacheyorg", "y", 3, "UNICODE", "wwwYapacheyorg");
+    assertSubstringIndex("www.apache.org", ".", 2, "UNICODE", "www.apache");
+    assertSubstringIndex("wwwYapacheYorg", "Y", 1, "UNICODE", "www");
+    assertSubstringIndex("wwwYapacheYorg", "y", 1, "UNICODE", "wwwYapacheYorg");
+    assertSubstringIndex("wwwGapacheGorg", "g", 1, "UNICODE", "wwwGapacheGor");
+    assertSubstringIndex("GwwwGapacheGorG", "G", 3, "UNICODE", "GwwwGapache");
+    assertSubstringIndex("wwwGapacheGorG", "G", -3, "UNICODE", "apacheGorG");
+    assertSubstringIndex("www.apache.org", ".", 0, "UNICODE", "");
+    assertSubstringIndex("www.apache.org", ".", -3, "UNICODE", "www.apache.org");
+    assertSubstringIndex("www.apache.org", ".", -2, "UNICODE", "apache.org");
+    assertSubstringIndex("www.apache.org", ".", -1, "UNICODE", "org");
+    assertSubstringIndex("", ".", -2, "UNICODE", "");
+    assertSubstringIndex("test大千世界X大千世界", "X", -1, "UNICODE", "大千世界");
+    assertSubstringIndex("test大千世界X大千世界", "X", 1, "UNICODE", "test大千世界");
+    assertSubstringIndex("大x千世界大千世x界", "x", 1, "UNICODE", "大");
+    assertSubstringIndex("大x千世界大千世x界", "x", -1, "UNICODE", "界");
+    assertSubstringIndex("大x千世界大千世x界", "x", -2, "UNICODE", "千世界大千世x界");
+    assertSubstringIndex("大千世界大千世界", "千", 2, "UNICODE", "大千世界大");
+    assertSubstringIndex("www||apache||org", "||", 2, "UNICODE", "www||apache");
+    assertSubstringIndex("AaAaAaAaAa", "aa", 2, "UNICODE_CI", "A");
+    assertSubstringIndex("www.apache.org", ".", 3, "UNICODE_CI", "www.apache.org");
+    assertSubstringIndex("wwwXapacheXorg", "x", 2, "UNICODE_CI", "wwwXapache");
+    assertSubstringIndex("wwwxapacheXorg", "X", 1, "UNICODE_CI", "www");
+    assertSubstringIndex("www.apache.org", ".", 0, "UNICODE_CI", "");
+    assertSubstringIndex("wwwGapacheGorg", "G", 3, "UNICODE_CI", "wwwGapacheGor");
+    assertSubstringIndex("gwwwGapacheGorg", "g", 3, "UNICODE_CI", "gwwwGapache");
+    assertSubstringIndex("gwwwGapacheGorg", "g", -3, "UNICODE_CI", "apacheGorg");
+    assertSubstringIndex("www.apache.ORG", ".", -3, "UNICODE_CI", "www.apache.ORG");
+    assertSubstringIndex("wwwmapacheMorg", "M", -2, "UNICODE_CI", "apacheMorg");
+    assertSubstringIndex("www.apache.org", ".", -1, "UNICODE_CI", "org");
+    assertSubstringIndex("", ".", -2, "UNICODE_CI", "");
+    assertSubstringIndex("test大千世界X大千世界", "X", -1, "UNICODE_CI", "大千世界");
+    assertSubstringIndex("test大千世界X大千世界", "X", 1, "UNICODE_CI", "test大千世界");
+    assertSubstringIndex("test大千世界大千世界", "千", 2, "UNICODE_CI", "test大千世界大");
+    assertSubstringIndex("www||APACHE||org", "||", 2, "UNICODE_CI", "www||APACHE");
+    assertSubstringIndex("abİo12", "i̇o", 1, "UNICODE_CI", "ab");
+    assertSubstringIndex("abİo12", "i̇o", -1, "UNICODE_CI", "12");
+    assertSubstringIndex("abi̇o12", "İo", 1, "UNICODE_CI", "ab");
+    assertSubstringIndex("abi̇o12", "İo", -1, "UNICODE_CI", "12");
+    assertSubstringIndex("ai̇bi̇o12", "İo", 1, "UNICODE_CI", "ai̇b");
+    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", 2, "UNICODE_CI", "ai̇bi̇o12");
+    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -1, "UNICODE_CI", "");
+    assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -2, "UNICODE_CI", "12i̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", -4, "UNICODE_CI", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", -4, "UNICODE_CI", "i̇o12i̇oİo");
+    assertSubstringIndex("abi̇12", "i", 1, "UNICODE_CI", "abi̇12");
+    assertSubstringIndex("abi̇12", "\u0307", 1, "UNICODE_CI", "abi̇12");
+    assertSubstringIndex("abi̇12", "İ", 1, "UNICODE_CI", "ab");
+    assertSubstringIndex("abİ12", "i", 1, "UNICODE_CI", "abİ12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", -4, "UNICODE_CI", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", -4, "UNICODE_CI", "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UNICODE_CI", "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", 3, "UNICODE_CI", "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UNICODE_CI", "ai̇bİoi̇o12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", 3, "UNICODE_CI", "ai̇bİoi̇o12");
+    assertSubstringIndex("abi̇12", "i", 1, "UTF8_LCASE", "ab"); // != UNICODE_CI
+    assertSubstringIndex("abi̇12", "\u0307", 1, "UTF8_LCASE", "abi"); // != UNICODE_CI
+    assertSubstringIndex("abi̇12", "İ", 1, "UTF8_LCASE", "ab");
+    assertSubstringIndex("abİ12", "i", 1, "UTF8_LCASE", "abİ12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UTF8_LCASE", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", -4, "UTF8_LCASE", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UTF8_LCASE", "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", -4, "UTF8_LCASE", "i̇o12i̇oİo");
+    assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UTF8_LCASE", "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", 3, "UTF8_LCASE", "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UTF8_LCASE", "ai̇bİoi̇o12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", 3, "UTF8_LCASE", "ai̇bİoi̇o12");
+    assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o");
+  }
+
+  private void assertStringTrim(
+      String collation,
+      String sourceString,
+      String trimString,
+      String expectedResultString) throws SparkException {
+    int collationId = CollationFactory.collationNameToId(collation);
+    String result;
+
+    if (trimString == null) {
+      result = CollationSupport.StringTrim.exec(
+        UTF8String.fromString(sourceString), collationId).toString();
+    } else {
+      result = CollationSupport.StringTrim.exec(
+        UTF8String
+          .fromString(sourceString), UTF8String.fromString(trimString), collationId)
+          .toString();
+    }
+
+    assertEquals(expectedResultString, result);
+  }
+
+  private void assertStringTrimLeft(
+      String collation,
+      String sourceString,
+      String trimString,
+      String expectedResultString) throws SparkException {
+    int collationId = CollationFactory.collationNameToId(collation);
+    String result;
+
+    if (trimString == null) {
+      result = CollationSupport.StringTrimLeft.exec(
+        UTF8String.fromString(sourceString), collationId).toString();
+    } else {
+      result = CollationSupport.StringTrimLeft.exec(
+        UTF8String
+          .fromString(sourceString), UTF8String.fromString(trimString), collationId)
+          .toString();
+    }
+
+    assertEquals(expectedResultString, result);
+  }
+
+  private void assertStringTrimRight(
+      String collation,
+      String sourceString,
+      String trimString,
+      String expectedResultString) throws SparkException {
+    int collationId = CollationFactory.collationNameToId(collation);
+    String result;
+
+    if (trimString == null) {
+      result = CollationSupport.StringTrimRight.exec(
+        UTF8String.fromString(sourceString), collationId).toString();
+    } else {
+      result = CollationSupport.StringTrimRight.exec(
+        UTF8String
+          .fromString(sourceString), UTF8String.fromString(trimString), collationId)
+          .toString();
+    }
+
+    assertEquals(expectedResultString, result);
+  }
+
+  @Test
+  public void testStringTrim() throws SparkException {
+    assertStringTrim("UTF8_BINARY", "asd", null, "asd");
+    assertStringTrim("UTF8_BINARY", "  asd  ", null, "asd");
+    assertStringTrim("UTF8_BINARY", " a世a ", null, "a世a");
+    assertStringTrim("UTF8_BINARY", "asd", "x", "asd");
+    assertStringTrim("UTF8_BINARY", "xxasdxx", "x", "asd");
+    assertStringTrim("UTF8_BINARY", "xa世ax", "x", "a世a");
+
+    assertStringTrimLeft("UTF8_BINARY", "asd", null, "asd");
+    assertStringTrimLeft("UTF8_BINARY", "  asd  ", null, "asd  ");
+    assertStringTrimLeft("UTF8_BINARY", " a世a ", null, "a世a ");
+    assertStringTrimLeft("UTF8_BINARY", "asd", "x", "asd");
+    assertStringTrimLeft("UTF8_BINARY", "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft("UTF8_BINARY", "xa世ax", "x", "a世ax");
+
+    assertStringTrimRight("UTF8_BINARY", "asd", null, "asd");
+    assertStringTrimRight("UTF8_BINARY", "  asd  ", null, "  asd");
+    assertStringTrimRight("UTF8_BINARY", " a世a ", null, " a世a");
+    assertStringTrimRight("UTF8_BINARY", "asd", "x", "asd");
+    assertStringTrimRight("UTF8_BINARY", "xxasdxx", "x", "xxasd");
+    assertStringTrimRight("UTF8_BINARY", "xa世ax", "x", "xa世a");
+
+    assertStringTrim("UTF8_LCASE", "asd", null, "asd");
+    assertStringTrim("UTF8_LCASE", "  asd  ", null, "asd");
+    assertStringTrim("UTF8_LCASE", " a世a ", null, "a世a");
+    assertStringTrim("UTF8_LCASE", "asd", "x", "asd");
+    assertStringTrim("UTF8_LCASE", "xxasdxx", "x", "asd");
+    assertStringTrim("UTF8_LCASE", "xa世ax", "x", "a世a");
+
+    assertStringTrimLeft("UTF8_LCASE", "asd", null, "asd");
+    assertStringTrimLeft("UTF8_LCASE", "  asd  ", null, "asd  ");
+    assertStringTrimLeft("UTF8_LCASE", " a世a ", null, "a世a ");
+    assertStringTrimLeft("UTF8_LCASE", "asd", "x", "asd");
+    assertStringTrimLeft("UTF8_LCASE", "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft("UTF8_LCASE", "xa世ax", "x", "a世ax");
+
+    assertStringTrimRight("UTF8_LCASE", "asd", null, "asd");
+    assertStringTrimRight("UTF8_LCASE", "  asd  ", null, "  asd");
+    assertStringTrimRight("UTF8_LCASE", " a世a ", null, " a世a");
+    assertStringTrimRight("UTF8_LCASE", "asd", "x", "asd");
+    assertStringTrimRight("UTF8_LCASE", "xxasdxx", "x", "xxasd");
+    assertStringTrimRight("UTF8_LCASE", "xa世ax", "x", "xa世a");
+
+    assertStringTrim("UTF8_LCASE", "asd", null, "asd");
+    assertStringTrim("UTF8_LCASE", "  asd  ", null, "asd");
+    assertStringTrim("UTF8_LCASE", " a世a ", null, "a世a");
+    assertStringTrim("UTF8_LCASE", "asd", "x", "asd");
+    assertStringTrim("UTF8_LCASE", "xxasdxx", "x", "asd");
+    assertStringTrim("UTF8_LCASE", "xa世ax", "x", "a世a");
+
+    // Test cases where trimString has more than one character
+    assertStringTrim("UTF8_BINARY", "ddsXXXaa", "asd", "XXX");
+    assertStringTrimLeft("UTF8_BINARY", "ddsXXXaa", "asd", "XXXaa");
+    assertStringTrimRight("UTF8_BINARY", "ddsXXXaa", "asd", "ddsXXX");
+
+    assertStringTrim("UTF8_LCASE", "ddsXXXaa", "asd", "XXX");
+    assertStringTrimLeft("UTF8_LCASE", "ddsXXXaa", "asd", "XXXaa");
+    assertStringTrimRight("UTF8_LCASE", "ddsXXXaa", "asd", "ddsXXX");
+
+    // Test cases specific to collation type
+    // uppercase trim, lowercase src
+    assertStringTrim("UTF8_BINARY", "asd", "A", "asd");
+    assertStringTrim("UTF8_LCASE", "asd", "A", "sd");
+
+    // lowercase trim, uppercase src
+    assertStringTrim("UTF8_BINARY", "ASD", "a", "ASD");
+    assertStringTrim("UTF8_LCASE", "ASD", "a", "SD");
+
+    // uppercase and lowercase chars of different byte-length (utf8)
+    assertStringTrim("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimLeft("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimRight("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
+
+    assertStringTrim("UTF8_LCASE", "ẞaaaẞ", "ß", "aaa");
+    assertStringTrimLeft("UTF8_LCASE", "ẞaaaẞ", "ß", "aaaẞ");
+    assertStringTrimRight("UTF8_LCASE", "ẞaaaẞ", "ß", "ẞaaa");
+
+    assertStringTrim("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimLeft("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimRight("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
+
+    assertStringTrim("UTF8_LCASE", "ßaaaß", "ẞ", "aaa");
+    assertStringTrimLeft("UTF8_LCASE", "ßaaaß", "ẞ", "aaaß");
+    assertStringTrimRight("UTF8_LCASE", "ßaaaß", "ẞ", "ßaaa");
+
+    // different byte-length (utf8) chars trimmed
+    assertStringTrim("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaa");
+    assertStringTrimLeft("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimRight("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "Ëaaa");
+
+    assertStringTrim("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaa");
+    assertStringTrimLeft("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimRight("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "Ëaaa");
   }
 
   // TODO: Test more collation-aware string expressions.
@@ -265,3 +1393,4 @@ public void testEndsWith() throws SparkException {
   // TODO: Test other collation-aware expressions.
 
 }
+// checkstyle.on: AvoidEscapedUnicodeCharacters
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 934b93c9345b9..d690da53c7c66 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package org.apache.spark.unsafe.types;
 
@@ -28,6 +28,7 @@
 import org.apache.spark.unsafe.Platform;
 import org.junit.jupiter.api.Test;
 
+import static org.apache.spark.unsafe.types.UTF8String.fromString;
 import static org.junit.jupiter.api.Assertions.*;
 
 import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
@@ -107,29 +108,6 @@ public void binaryCompareTo() {
     assertTrue(fromString("你好123").binaryCompare(fromString("你好122")) > 0);
   }
 
-  @Test
-  public void lowercaseComparison() {
-    // SPARK-47693: Test optimized lowercase comparison of UTF8String instances
-    // ASCII
-    assertEquals(fromString("aaa").compareLowerCase(fromString("AAA")), 0);
-    assertTrue(fromString("aaa").compareLowerCase(fromString("AAAA")) < 0);
-    assertTrue(fromString("AAA").compareLowerCase(fromString("aaaa")) < 0);
-    assertTrue(fromString("a").compareLowerCase(fromString("B")) < 0);
-    assertTrue(fromString("b").compareLowerCase(fromString("A")) > 0);
-    assertEquals(fromString("aAa").compareLowerCase(fromString("AaA")), 0);
-    assertTrue(fromString("abcd").compareLowerCase(fromString("abC")) > 0);
-    assertTrue(fromString("ABC").compareLowerCase(fromString("abcd")) < 0);
-    assertEquals(fromString("abcd").compareLowerCase(fromString("abcd")), 0);
-    // non-ASCII
-    assertEquals(fromString("ü").compareLowerCase(fromString("Ü")), 0);
-    assertEquals(fromString("Äü").compareLowerCase(fromString("äÜ")), 0);
-    assertTrue(fromString("a").compareLowerCase(fromString("ä")) < 0);
-    assertTrue(fromString("a").compareLowerCase(fromString("Ä")) < 0);
-    assertTrue(fromString("A").compareLowerCase(fromString("ä")) < 0);
-    assertTrue(fromString("bä").compareLowerCase(fromString("aü")) > 0);
-    assertTrue(fromString("bxxxxxxxxxx").compareLowerCase(fromString("bü")) < 0);
-  }
-
   protected static void testUpperandLower(String upper, String lower) {
     UTF8String us = fromString(upper);
     UTF8String ls = fromString(lower);
@@ -902,4 +880,481 @@ public void skipWrongFirstByte() {
       assertEquals(1, fromBytes(c).numChars());
     }
   }
+
+  private void testMakeValid(String input, String expected) {
+    ByteArrayOutputStream exp = new ByteArrayOutputStream();
+    for (String hex : expected.split(" ")) exp.write(Integer.parseInt(hex.substring(2), 16));
+    ByteArrayOutputStream inp = new ByteArrayOutputStream();
+    for (String hex : input.split(" ")) inp.write(Integer.parseInt(hex.substring(2), 16));
+    assertEquals(fromBytes(exp.toByteArray()), fromBytes(inp.toByteArray()).makeValid());
+  }
+  @Test
+  public void makeValid() {
+    // Basic tests
+    assertEquals(EMPTY_UTF8, EMPTY_UTF8.makeValid());
+    assertEquals(fromString(""), fromString("").makeValid());
+    assertEquals(fromString("abc"), fromString("abc").makeValid());
+    assertEquals(fromString("hello"), fromString("hello").makeValid());
+    assertEquals(fromString("大千世界"), fromString("大千世界").makeValid());
+    assertEquals(fromBytes(new byte[] {}), fromBytes(new byte[] {}).makeValid());
+    assertEquals(fromBytes(new byte[] {0x61}), fromBytes(new byte[] {0x61}).makeValid());
+    assertEquals(fromBytes(new byte[] {0x7F}), fromBytes(new byte[] {0x7F}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBD}),
+      fromBytes(new byte[] {(byte) 0xFF}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xC2, (byte) 0x80}),
+      fromBytes(new byte[] {(byte) 0xC2, (byte) 0x80}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xDF, (byte) 0xBF}),
+      fromBytes(new byte[] {(byte) 0xDF, (byte) 0xBF}).makeValid());
+    assertEquals(fromString("��"),
+      fromBytes(new byte[] {(byte) 0xC0, (byte) 0x80}).makeValid());
+    assertEquals(fromString("��"),
+      fromBytes(new byte[] {(byte) 0xC1, (byte) 0xBF}).makeValid());
+    assertEquals(fromString("��"),
+      fromBytes(new byte[] {(byte) 0xDF, (byte) 0xC0}).makeValid());
+    assertEquals(fromString("��"),
+      fromBytes(new byte[] {(byte) 0xDF, (byte) 0xFF}).makeValid());
+    assertEquals(fromString("��"),
+      fromBytes(new byte[] {(byte) 0x80, (byte) 0x80}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xE0, (byte) 0xA0, (byte) 0x80}),
+      fromBytes(new byte[] {(byte) 0xE0, (byte) 0xA0, (byte) 0x80}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBF}),
+      fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBF}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBF}),
+      fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBF}).makeValid());
+    assertEquals(fromString("���"),
+      fromBytes(new byte[] {(byte) 0xE0, (byte) 0x9F, (byte) 0x80}).makeValid());
+    assertEquals(fromString("���"),
+      fromBytes(new byte[] {(byte) 0xE0, (byte) 0xC0, (byte) 0x80}).makeValid());
+    assertEquals(fromString("��"),
+      fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xC0}).makeValid());
+    assertEquals(fromString("���"),
+      fromBytes(new byte[] {(byte) 0x80, (byte) 0x80, (byte) 0x80}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xF0, (byte) 0x90, (byte) 0x80, (byte) 0x80}),
+      fromBytes(new byte[] {(byte) 0xF0, (byte) 0x90, (byte) 0x80, (byte) 0x80}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xF4, (byte) 0x8F, (byte) 0xBF, (byte) 0xBF}),
+      fromBytes(new byte[] {(byte) 0xF4, (byte) 0x8F, (byte) 0xBF, (byte) 0xBF}).makeValid());
+    assertEquals(fromString("����"),
+      fromBytes(new byte[] {(byte) 0xF0, (byte) 0x8F, (byte) 0x80, (byte) 0x80}).makeValid());
+    assertEquals(fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBD, (byte) 0x7F}),
+      fromBytes(new byte[] {(byte) 0xF0, (byte) 0x90, (byte) 0x80, (byte) 0x7F}).makeValid());
+    assertEquals(fromString("����"),
+      fromBytes(new byte[] {(byte) 0xF4, (byte) 0x90, (byte) 0x80, (byte) 0x80}).makeValid());
+    assertEquals(fromString("��"),
+      fromBytes(new byte[] {(byte) 0xF4, (byte) 0x8F, (byte) 0xBF, (byte) 0xC0}).makeValid());
+    assertEquals(fromString("�����"), fromBytes(new byte[]
+      {(byte) 0xF8, (byte) 0x8F, (byte) 0x80, (byte) 0x80, (byte) 0x80}).makeValid());
+    assertEquals(fromString("�����"), fromBytes(new byte[]
+      {(byte) 0xF8, (byte) 0x90, (byte) 0x80, (byte) 0x80, (byte) 0x80}).makeValid());
+    assertEquals(fromString("�����"), fromBytes(new byte[]
+      {(byte) 0xFB, (byte) 0x8F, (byte) 0xBF, (byte) 0xBF, (byte) 0xBF}).makeValid());
+    assertEquals(fromString("�����"), fromBytes(new byte[]
+      {(byte) 0xFB, (byte) 0x90, (byte) 0x80, (byte) 0x80, (byte) 0x80}).makeValid());
+    // More tests
+    testMakeValid("0x42", "0x42");
+    testMakeValid("0x80", "0xEF 0xBF 0xBD");
+    testMakeValid("0xF4 0x92 0x12", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x12");
+    testMakeValid("0x84 0xA5 0x63 0x64", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x63 0x64");
+    testMakeValid("0xF4 0x9B", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0xE2 0x0C", "0xEF 0xBF 0xBD 0x0C");
+    testMakeValid("0xE8 0x3E", "0xEF 0xBF 0xBD 0x3E");
+    testMakeValid("0x17 0x1B 0xC2", "0x17 0x1B 0xEF 0xBF 0xBD");
+    testMakeValid("0x29 0xB2 0x7C 0xA0", "0x29 0xEF 0xBF 0xBD 0x7C 0xEF 0xBF 0xBD");
+    testMakeValid("0x13 0x68 0x28 0x2A 0x83", "0x13 0x68 0x28 0x2A 0xEF 0xBF 0xBD");
+    testMakeValid("0x3B 0xFE 0xC2 0xB7 0x9E", "0x3B 0xEF 0xBF 0xBD 0xC2 0xB7 0xEF 0xBF 0xBD");
+    testMakeValid("0xFC", "0xEF 0xBF 0xBD");
+    testMakeValid("0x8E 0xBD", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0x6A 0x8B 0x5C 0x5A", "0x6A 0xEF 0xBF 0xBD 0x5C 0x5A");
+    testMakeValid("0xB4 0xC0", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0xB0 0xD3", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0xAA", "0xEF 0xBF 0xBD");
+    testMakeValid("0x1A 0x66", "0x1A 0x66");
+    testMakeValid("0x32 0xB5 0x5F", "0x32 0xEF 0xBF 0xBD 0x5F");
+    testMakeValid("0x70 0xB6 0x69 0xBC", "0x70 0xEF 0xBF 0xBD 0x69 0xEF 0xBF 0xBD");
+    testMakeValid("0xDC 0x15 0x82", "0xEF 0xBF 0xBD 0x15 0xEF 0xBF 0xBD");
+    testMakeValid("0x7F 0x3F 0x72 0xBB", "0x7F 0x3F 0x72 0xEF 0xBF 0xBD");
+    testMakeValid("0x3C 0x3D 0x1F 0x6C 0x75", "0x3C 0x3D 0x1F 0x6C 0x75");
+    testMakeValid("0xBF 0x10 0xF4", "0xEF 0xBF 0xBD 0x10 0xEF 0xBF 0xBD");
+    testMakeValid("0xBF 0xDD 0x89", "0xEF 0xBF 0xBD 0xDD 0x89");
+    testMakeValid("0x85 0xD4 0x90 0x5E 0x6C", "0xEF 0xBF 0xBD 0xD4 0x90 0x5E 0x6C");
+    testMakeValid("0x0A 0x92 0x3C 0x17 0x7D", "0x0A 0xEF 0xBF 0xBD 0x3C 0x17 0x7D");
+    testMakeValid("0x49 0x4E 0x2C 0x7B 0x1C", "0x49 0x4E 0x2C 0x7B 0x1C");
+    testMakeValid("0x2E 0x0C 0x3E 0x02 0x9D", "0x2E 0x0C 0x3E 0x02 0xEF 0xBF 0xBD");
+    testMakeValid("0xF9 0xF2 0x11", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x11");
+    testMakeValid("0xD1", "0xEF 0xBF 0xBD");
+    testMakeValid("0x74 0x35 0x56 0xC4", "0x74 0x35 0x56 0xEF 0xBF 0xBD");
+    testMakeValid("0xBE 0x45 0x7F", "0xEF 0xBF 0xBD 0x45 0x7F");
+    testMakeValid("0x2E", "0x2E");
+    testMakeValid("0x3B 0xC3", "0x3B 0xEF 0xBF 0xBD");
+    testMakeValid("0xF9 0x58 0xE0 0x84", "0xEF 0xBF 0xBD 0x58 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0x72 0xF9 0x4F", "0x72 0xEF 0xBF 0xBD 0x4F");
+    testMakeValid("0xF2 0x2A 0x38", "0xEF 0xBF 0xBD 0x2A 0x38");
+    testMakeValid("0x66 0xFC 0x8D 0x5F", "0x66 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x5F");
+    testMakeValid("0x6D", "0x6D");
+    testMakeValid("0x6E 0xDF 0xDD", "0x6E 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0x50 0xE8 0x45 0xDB", "0x50 0xEF 0xBF 0xBD 0x45 0xEF 0xBF 0xBD");
+    testMakeValid("0x71 0x06 0x1F 0x87", "0x71 0x06 0x1F 0xEF 0xBF 0xBD");
+    testMakeValid("0x02", "0x02");
+    testMakeValid("0x20 0x2B 0x38 0xCB 0xDF", "0x20 0x2B 0x38 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0x10", "0x10");
+    testMakeValid("0x15", "0x15");
+    testMakeValid("0xC5 0xEA 0x2A", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x2A");
+    testMakeValid("0x72 0x2D 0xBB 0x06", "0x72 0x2D 0xEF 0xBF 0xBD 0x06");
+    testMakeValid("0xB7", "0xEF 0xBF 0xBD");
+    testMakeValid("0xB8 0xB5", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testMakeValid("0x9C 0x76 0x17", "0xEF 0xBF 0xBD 0x76 0x17");
+  }
+
+  private void testIsValid(String input, String expected) {
+    ByteArrayOutputStream inp = new ByteArrayOutputStream();
+    for (String hex : input.split(" ")) inp.write(Integer.parseInt(hex.substring(2), 16));
+    assertEquals(input.equals(expected), fromBytes(inp.toByteArray()).isValid());
+  }
+  @Test
+  public void isValid() {
+    // Basic tests
+    assertTrue(EMPTY_UTF8.isValid());
+    assertTrue(fromString("").isValid());
+    assertTrue(fromString("abc").isValid());
+    assertTrue(fromString("hello").isValid());
+    assertTrue(fromString("大千世界").isValid());
+    assertTrue(fromString("数据砖头").isValid());
+    assertTrue(fromBytes(new byte[] {}).isValid());
+    assertTrue(fromBytes(new byte[] {0x61}).isValid());
+    assertTrue(fromBytes(new byte[] {0x7F}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xFF}).isValid());
+    assertTrue(fromBytes(new byte[] {(byte) 0xC2, (byte) 0x80}).isValid());
+    assertTrue(fromBytes(new byte[] {(byte) 0xDF, (byte) 0xBF}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xC0, (byte) 0x80}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xC1, (byte) 0xBF}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xDF, (byte) 0xC0}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xDF, (byte) 0xFF}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0x80, (byte) 0x80}).isValid());
+    assertTrue(fromBytes(new byte[] {(byte) 0xE0, (byte) 0xA0, (byte) 0x80}).isValid());
+    assertTrue(fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBF}).isValid());
+    assertTrue(fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xBF}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xE0, (byte) 0x9F, (byte) 0x80}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xE0, (byte) 0xC0, (byte) 0x80}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0xEF, (byte) 0xBF, (byte) 0xC0}).isValid());
+    assertFalse(fromBytes(new byte[] {(byte) 0x80, (byte) 0x80, (byte) 0x80}).isValid());
+    assertTrue(fromBytes(
+      new byte[] {(byte) 0xF0, (byte) 0x90, (byte) 0x80, (byte) 0x80}).isValid());
+    assertTrue(fromBytes(
+      new byte[] {(byte) 0xF4, (byte) 0x8F, (byte) 0xBF, (byte) 0xBF}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xF0, (byte) 0x8F, (byte) 0x80, (byte) 0x80}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xF0, (byte) 0x90, (byte) 0x80, (byte) 0x7F}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xF4, (byte) 0x90, (byte) 0x80, (byte) 0x80}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xF4, (byte) 0x8F, (byte) 0xBF, (byte) 0xC0}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xF8, (byte) 0x8F, (byte) 0x80, (byte) 0x80, (byte) 0x80}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xF8, (byte) 0x90, (byte) 0x80, (byte) 0x80, (byte) 0x80}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xFB, (byte) 0x8F, (byte) 0xBF, (byte) 0xBF, (byte) 0xBF}).isValid());
+    assertFalse(fromBytes(
+      new byte[] {(byte) 0xFB, (byte) 0x90, (byte) 0x80, (byte) 0x80, (byte) 0x80}).isValid());
+    // More tests
+    testIsValid("0x42", "0x42");
+    testIsValid("0x80", "0xEF 0xBF 0xBD");
+    testIsValid("0xF4 0x92 0x12", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x12");
+    testIsValid("0x84 0xA5 0x63 0x64", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x63 0x64");
+    testIsValid("0xF4 0x9B", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0xE2 0x0C", "0xEF 0xBF 0xBD 0x0C");
+    testIsValid("0xE8 0x3E", "0xEF 0xBF 0xBD 0x3E");
+    testIsValid("0x17 0x1B 0xC2", "0x17 0x1B 0xEF 0xBF 0xBD");
+    testIsValid("0x29 0xB2 0x7C 0xA0", "0x29 0xEF 0xBF 0xBD 0x7C 0xEF 0xBF 0xBD");
+    testIsValid("0x13 0x68 0x28 0x2A 0x83", "0x13 0x68 0x28 0x2A 0xEF 0xBF 0xBD");
+    testIsValid("0x3B 0xFE 0xC2 0xB7 0x9E", "0x3B 0xEF 0xBF 0xBD 0xC2 0xB7 0xEF 0xBF 0xBD");
+    testIsValid("0xFC", "0xEF 0xBF 0xBD");
+    testIsValid("0x8E 0xBD", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0x6A 0x8B 0x5C 0x5A", "0x6A 0xEF 0xBF 0xBD 0x5C 0x5A");
+    testIsValid("0xB4 0xC0", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0xB0 0xD3", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0xAA", "0xEF 0xBF 0xBD");
+    testIsValid("0x1A 0x66", "0x1A 0x66");
+    testIsValid("0x32 0xB5 0x5F", "0x32 0xEF 0xBF 0xBD 0x5F");
+    testIsValid("0x70 0xB6 0x69 0xBC", "0x70 0xEF 0xBF 0xBD 0x69 0xEF 0xBF 0xBD");
+    testIsValid("0xDC 0x15 0x82", "0xEF 0xBF 0xBD 0x15 0xEF 0xBF 0xBD");
+    testIsValid("0x7F 0x3F 0x72 0xBB", "0x7F 0x3F 0x72 0xEF 0xBF 0xBD");
+    testIsValid("0x3C 0x3D 0x1F 0x6C 0x75", "0x3C 0x3D 0x1F 0x6C 0x75");
+    testIsValid("0xBF 0x10 0xF4", "0xEF 0xBF 0xBD 0x10 0xEF 0xBF 0xBD");
+    testIsValid("0xBF 0xDD 0x89", "0xEF 0xBF 0xBD 0xDD 0x89");
+    testIsValid("0x85 0xD4 0x90 0x5E 0x6C", "0xEF 0xBF 0xBD 0xD4 0x90 0x5E 0x6C");
+    testIsValid("0x0A 0x92 0x3C 0x17 0x7D", "0x0A 0xEF 0xBF 0xBD 0x3C 0x17 0x7D");
+    testIsValid("0x49 0x4E 0x2C 0x7B 0x1C", "0x49 0x4E 0x2C 0x7B 0x1C");
+    testIsValid("0x2E 0x0C 0x3E 0x02 0x9D", "0x2E 0x0C 0x3E 0x02 0xEF 0xBF 0xBD");
+    testIsValid("0xF9 0xF2 0x11", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x11");
+    testIsValid("0xD1", "0xEF 0xBF 0xBD");
+    testIsValid("0x74 0x35 0x56 0xC4", "0x74 0x35 0x56 0xEF 0xBF 0xBD");
+    testIsValid("0xBE 0x45 0x7F", "0xEF 0xBF 0xBD 0x45 0x7F");
+    testIsValid("0x2E", "0x2E");
+    testIsValid("0x3B 0xC3", "0x3B 0xEF 0xBF 0xBD");
+    testIsValid("0xF9 0x58 0xE0 0x84", "0xEF 0xBF 0xBD 0x58 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0x72 0xF9 0x4F", "0x72 0xEF 0xBF 0xBD 0x4F");
+    testIsValid("0xF2 0x2A 0x38", "0xEF 0xBF 0xBD 0x2A 0x38");
+    testIsValid("0x66 0xFC 0x8D 0x5F", "0x66 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x5F");
+    testIsValid("0x6D", "0x6D");
+    testIsValid("0x6E 0xDF 0xDD", "0x6E 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0x50 0xE8 0x45 0xDB", "0x50 0xEF 0xBF 0xBD 0x45 0xEF 0xBF 0xBD");
+    testIsValid("0x71 0x06 0x1F 0x87", "0x71 0x06 0x1F 0xEF 0xBF 0xBD");
+    testIsValid("0x02", "0x02");
+    testIsValid("0x20 0x2B 0x38 0xCB 0xDF", "0x20 0x2B 0x38 0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0x10", "0x10");
+    testIsValid("0x15", "0x15");
+    testIsValid("0xC5 0xEA 0x2A", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD 0x2A");
+    testIsValid("0x72 0x2D 0xBB 0x06", "0x72 0x2D 0xEF 0xBF 0xBD 0x06");
+    testIsValid("0xB7", "0xEF 0xBF 0xBD");
+    testIsValid("0xB8 0xB5", "0xEF 0xBF 0xBD 0xEF 0xBF 0xBD");
+    testIsValid("0x9C 0x76 0x17", "0xEF 0xBF 0xBD 0x76 0x17");
+  }
+
+  @Test
+  public void testGetByte() {
+    // Valid UTF-8 string
+    String validString = "abcde";
+    UTF8String validUTF8String = fromString(validString);
+    // Valid byte index handling
+    for (int i = 0; i < validString.length(); ++i) {
+      assertEquals(validString.charAt(i), validUTF8String.getByte(i));
+    }
+    // Invalid byte index handling
+    assertEquals(0, validUTF8String.getByte(-1));
+    assertEquals(0, validUTF8String.getByte(validString.length()));
+    assertEquals(0, validUTF8String.getByte(validString.length() + 1));
+
+    // Invalid UTF-8 string
+    byte[] invalidString = new byte[] {(byte) 0x41, (byte) 0x42, (byte) 0x80};
+    UTF8String invalidUTF8String = fromBytes(invalidString);
+    // Valid byte index handling
+    for (int i = 0; i < invalidString.length; ++i) {
+      assertEquals(invalidString[i], invalidUTF8String.getByte(i));
+    }
+    // Invalid byte index handling
+    assertEquals(0, invalidUTF8String.getByte(-1));
+    assertEquals(0, invalidUTF8String.getByte(invalidString.length));
+    assertEquals(0, invalidUTF8String.getByte(invalidString.length + 1));
+  }
+
+  @Test
+  public void testGetChar() {
+    // Valid UTF-8 string
+    String str = "abcde";
+    UTF8String s = fromString(str);
+    // Valid character index handling
+    for (int i = 0; i < str.length(); ++i) {
+      assertEquals(str.charAt(i), s.getChar(i));
+    }
+    // Invalid character index handling
+    assertThrows(IndexOutOfBoundsException.class, () -> s.getChar(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.getChar(str.length()));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.getChar(str.length() + 1));
+
+    // Invalid UTF-8 string
+    byte[] invalidString = new byte[] {(byte) 0x41, (byte) 0x42, (byte) 0x80};
+    UTF8String invalidUTF8String = fromBytes(invalidString);
+    // Valid byte index handling
+    for (int i = 0; i < invalidString.length; ++i) {
+      if (Character.isValidCodePoint(invalidString[i])) {
+        assertEquals(invalidString[i], invalidUTF8String.getChar(i));
+      } else {
+        assertEquals(0, invalidUTF8String.getChar(i));
+      }
+    }
+    // Invalid byte index handling
+    assertThrows(IndexOutOfBoundsException.class, () -> s.getChar(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.getChar(str.length()));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.getChar(str.length() + 1));
+  }
+
+  @Test
+  public void testCodePointFrom() {
+    // Valid UTF-8 string
+    String str = "abcde";
+    UTF8String s = fromString(str);
+    // Valid character index handling
+    for (int i = 0; i < str.length(); ++i) {
+      assertEquals(str.charAt(i), s.codePointFrom(i));
+    }
+    // Invalid character index handling
+    assertThrows(IndexOutOfBoundsException.class, () -> s.codePointFrom(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.codePointFrom(str.length()));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.codePointFrom(str.length() + 1));
+
+    // Invalid UTF-8 string
+    byte[] invalidString = new byte[] {(byte) 0x41, (byte) 0x42, (byte) 0x80};
+    UTF8String invalidUTF8String = fromBytes(invalidString);
+    // Valid byte index handling
+    for (int i = 0; i < invalidString.length; ++i) {
+      if (Character.isValidCodePoint(invalidString[i])) {
+        assertEquals(invalidString[i], invalidUTF8String.codePointFrom(i));
+      } else {
+        assertEquals(0, invalidUTF8String.codePointFrom(i));
+      }
+    }
+    // Invalid byte index handling
+    assertThrows(IndexOutOfBoundsException.class, () -> s.codePointFrom(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.codePointFrom(str.length()));
+    assertThrows(IndexOutOfBoundsException.class, () -> s.codePointFrom(str.length() + 1));
+  }
+
+  @Test
+  public void utf8StringCodePoints() {
+    String s = "aéह 日å!";
+    UTF8String s0 = fromString(s);
+    for (int i = 0; i < s.length(); ++i) {
+      assertEquals(s.codePointAt(i), s0.getChar(i));
+    }
+
+    UTF8String s1 = fromBytes(new byte[] {0x41, (byte) 0xC3, (byte) 0xB1, (byte) 0xE2,
+      (byte) 0x82, (byte) 0xAC, (byte) 0xF0, (byte) 0x90, (byte) 0x8D, (byte) 0x88});
+    // numBytesForFirstByte
+    assertEquals(1, UTF8String.numBytesForFirstByte(s1.getByte(0)));
+    assertEquals(2, UTF8String.numBytesForFirstByte(s1.getByte(1)));
+    assertEquals(3, UTF8String.numBytesForFirstByte(s1.getByte(3)));
+    assertEquals(4, UTF8String.numBytesForFirstByte(s1.getByte(6)));
+    // getByte
+    assertEquals((byte) 0x41, s1.getByte(0));
+    assertEquals((byte) 0xC3, s1.getByte(1));
+    assertEquals((byte) 0xE2, s1.getByte(3));
+    assertEquals((byte) 0xF0, s1.getByte(6));
+    // codePointFrom
+    assertEquals(0x41, s1.codePointFrom(0));
+    assertEquals(0xF1, s1.codePointFrom(1));
+    assertEquals(0x20AC, s1.codePointFrom(3));
+    assertEquals(0x10348, s1.codePointFrom(6));
+    assertThrows(IndexOutOfBoundsException.class, () -> s1.codePointFrom(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s1.codePointFrom(99));
+    // getChar
+    assertEquals(0x41, s1.getChar(0));
+    assertEquals(0xF1, s1.getChar(1));
+    assertEquals(0x20AC, s1.getChar(2));
+    assertEquals(0x10348, s1.getChar(3));
+    assertThrows(IndexOutOfBoundsException.class, () -> s1.getChar(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s1.getChar(99));
+
+    UTF8String s2 = fromString("Añ€𐍈");
+    // numBytesForFirstByte
+    assertEquals(1, UTF8String.numBytesForFirstByte(s2.getByte(0)));
+    assertEquals(2, UTF8String.numBytesForFirstByte(s2.getByte(1)));
+    assertEquals(3, UTF8String.numBytesForFirstByte(s2.getByte(3)));
+    assertEquals(4, UTF8String.numBytesForFirstByte(s2.getByte(6)));
+    // getByte
+    assertEquals((byte) 0x41, s2.getByte(0));
+    assertEquals((byte) 0xC3, s2.getByte(1));
+    assertEquals((byte) 0xE2, s2.getByte(3));
+    assertEquals((byte) 0xF0, s2.getByte(6));
+    // codePointFrom
+    assertEquals(0x41, s2.codePointFrom(0));
+    assertEquals(0xF1, s2.codePointFrom(1));
+    assertEquals(0x20AC, s2.codePointFrom(3));
+    assertEquals(0x10348, s2.codePointFrom(6));
+    assertThrows(IndexOutOfBoundsException.class, () -> s2.codePointFrom(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s2.codePointFrom(99));
+    // getChar
+    assertEquals(0x41, s2.getChar(0));
+    assertEquals(0xF1, s2.getChar(1));
+    assertEquals(0x20AC, s2.getChar(2));
+    assertEquals(0x10348, s2.getChar(3));
+    assertThrows(IndexOutOfBoundsException.class, () -> s2.getChar(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s2.getChar(99));
+
+    UTF8String s3 = EMPTY_UTF8;
+    // codePointFrom
+    assertThrows(IndexOutOfBoundsException.class, () -> s3.codePointFrom(0));
+    assertThrows(IndexOutOfBoundsException.class, () -> s3.codePointFrom(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s3.codePointFrom(99));
+    // getChar
+    assertThrows(IndexOutOfBoundsException.class, () -> s3.getChar(0));
+    assertThrows(IndexOutOfBoundsException.class, () -> s3.getChar(-1));
+    assertThrows(IndexOutOfBoundsException.class, () -> s3.getChar(99));
+  }
+
+  private void testCodePointIterator(UTF8String utf8String) {
+    CodePointIteratorType iteratorMode = utf8String.isValid() ?
+      CodePointIteratorType.CODE_POINT_ITERATOR_ASSUME_VALID :
+      CodePointIteratorType.CODE_POINT_ITERATOR_MAKE_VALID;
+    Iterator<Integer> iterator = utf8String.codePointIterator(iteratorMode);
+    for (int i = 0; i < utf8String.numChars(); ++i) {
+      assertTrue(iterator.hasNext());
+      int codePoint = (utf8String.isValid() ? utf8String : utf8String.makeValid()).getChar(i);
+      assertEquals(codePoint, (int) iterator.next());
+    }
+    assertFalse(iterator.hasNext());
+  }
+  @Test
+  public void codePointIterator() {
+    // Valid UTF8 strings.
+    testCodePointIterator(fromString(""));
+    testCodePointIterator(fromString("abc"));
+    testCodePointIterator(fromString("a!2&^R"));
+    testCodePointIterator(fromString("aéह 日å!"));
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0x41}));
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0xC2, (byte) 0xA3}));
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0xAC}));
+    // Invalid UTF8 strings.
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0xFF}));
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0x80}));
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0xC2, (byte) 0x80}));
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0x80}));
+    testCodePointIterator(fromBytes(new byte[] {(byte) 0x41, (byte) 0x80, (byte) 0x42}));
+    testCodePointIterator(fromBytes(new byte[] {
+      (byte) 0x41, (byte) 0xC2, (byte) 0x80, (byte) 0x42}));
+    testCodePointIterator(fromBytes(new byte[] {
+      (byte) 0x41, (byte) 0xE2, (byte) 0x82, (byte) 0x80, (byte) 0x42}));
+  }
+
+  private void testReverseCodePointIterator(UTF8String utf8String) {
+    CodePointIteratorType iteratorMode = utf8String.isValid() ?
+      CodePointIteratorType.CODE_POINT_ITERATOR_ASSUME_VALID :
+      CodePointIteratorType.CODE_POINT_ITERATOR_MAKE_VALID;
+    Iterator<Integer> iterator = utf8String.codePointIterator(iteratorMode);
+    for (int i = 0; i < utf8String.numChars(); ++i) {
+      assertTrue(iterator.hasNext());
+      int codePoint = (utf8String.isValid() ? utf8String : utf8String.makeValid()).getChar(i);
+      assertEquals(codePoint, (int) iterator.next());
+    }
+    assertFalse(iterator.hasNext());
+  }
+  @Test
+  public void reverseCodePointIterator() {
+    // Valid UTF8 strings
+    testReverseCodePointIterator(fromString(""));
+    testReverseCodePointIterator(fromString("abc"));
+    testReverseCodePointIterator(fromString("a!2&^R"));
+    testReverseCodePointIterator(fromString("aéह 日å!"));
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0x41}));
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xC2, (byte) 0xA3}));
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0xAC}));
+    // Invalid UTF8 strings
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xFF}));
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0x80}));
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xC2, (byte) 0x80}));
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0xE2, (byte) 0x82, (byte) 0x80}));
+    testReverseCodePointIterator(fromBytes(new byte[] {(byte) 0x41, (byte) 0x80, (byte) 0x42}));
+    testReverseCodePointIterator(fromBytes(new byte[] {
+      (byte) 0x41, (byte) 0xC2, (byte) 0x80, (byte) 0x42}));
+    testReverseCodePointIterator(fromBytes(new byte[] {
+      (byte) 0x41, (byte) 0xE2, (byte) 0x82, (byte) 0x80, (byte) 0x42}));
+  }
+
+  @Test
+  public void toBinaryString() {
+    assertEquals(ZERO_UTF8, UTF8String.toBinaryString(0));
+    assertEquals(UTF8String.fromString("1"), UTF8String.toBinaryString(1));
+    assertEquals(UTF8String.fromString("10"), UTF8String.toBinaryString(2));
+    assertEquals(UTF8String.fromString("100"), UTF8String.toBinaryString(4));
+    assertEquals(UTF8String.fromString("111"), UTF8String.toBinaryString(7));
+    assertEquals(
+      UTF8String.fromString("1111111111111111111111111111111111111111111111111111111111110011"),
+      UTF8String.toBinaryString(-13));
+    assertEquals(
+      UTF8String.fromString("1000000000000000000000000000000000000000000000000000000000000000"),
+      UTF8String.toBinaryString(Long.MIN_VALUE));
+    assertEquals(
+      UTF8String.fromString("111111111111111111111111111111111111111111111111111111111111111"),
+      UTF8String.toBinaryString(Long.MAX_VALUE));
+  }
 }
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
index 768d26bf0e11e..3c29daeff168f 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
@@ -20,7 +20,10 @@ package org.apache.spark.unsafe.types
 import scala.collection.parallel.immutable.ParSeq
 import scala.jdk.CollectionConverters.MapHasAsScala
 
+import com.ibm.icu.util.ULocale
+
 import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.util.CollationFactory.fetchCollation
 // scalastyle:off
 import org.scalatest.funsuite.AnyFunSuite
 import org.scalatest.matchers.must.Matchers
@@ -30,31 +33,93 @@ import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
 
 class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ignore funsuite
   test("collationId stability") {
-    val utf8Binary = fetchCollation(0)
+    assert(INDETERMINATE_COLLATION_ID == -1)
+
+    assert(UTF8_BINARY_COLLATION_ID == 0)
+    val utf8Binary = fetchCollation(UTF8_BINARY_COLLATION_ID)
     assert(utf8Binary.collationName == "UTF8_BINARY")
     assert(utf8Binary.supportsBinaryEquality)
 
-    val utf8BinaryLcase = fetchCollation(1)
-    assert(utf8BinaryLcase.collationName == "UTF8_BINARY_LCASE")
+    assert(UTF8_LCASE_COLLATION_ID == 1)
+    val utf8BinaryLcase = fetchCollation(UTF8_LCASE_COLLATION_ID)
+    assert(utf8BinaryLcase.collationName == "UTF8_LCASE")
     assert(!utf8BinaryLcase.supportsBinaryEquality)
 
-    val unicode = fetchCollation(2)
+    assert(UNICODE_COLLATION_ID == (1 << 29))
+    val unicode = fetchCollation(UNICODE_COLLATION_ID)
     assert(unicode.collationName == "UNICODE")
-    assert(unicode.supportsBinaryEquality);
+    assert(!unicode.supportsBinaryEquality)
 
-    val unicodeCi = fetchCollation(3)
+    assert(UNICODE_CI_COLLATION_ID == ((1 << 29) | (1 << 17)))
+    val unicodeCi = fetchCollation(UNICODE_CI_COLLATION_ID)
     assert(unicodeCi.collationName == "UNICODE_CI")
     assert(!unicodeCi.supportsBinaryEquality)
   }
 
-  test("fetch invalid collation name") {
-    val error = intercept[SparkException] {
-      fetchCollation("UTF8_BS")
+  test("UTF8_BINARY and ICU root locale collation names") {
+    // Collation name already normalized.
+    Seq(
+      "UTF8_BINARY",
+      "UTF8_LCASE",
+      "UNICODE",
+      "UNICODE_CI",
+      "UNICODE_AI",
+      "UNICODE_CI_AI"
+    ).foreach(collationName => {
+      val col = fetchCollation(collationName)
+      assert(col.collationName == collationName)
+    })
+    // Collation name normalization.
+    Seq(
+      // ICU root locale.
+      ("UNICODE_CS", "UNICODE"),
+      ("UNICODE_CS_AS", "UNICODE"),
+      ("UNICODE_CI_AS", "UNICODE_CI"),
+      ("UNICODE_AI_CS", "UNICODE_AI"),
+      ("UNICODE_AI_CI", "UNICODE_CI_AI"),
+      // Randomized case collation names.
+      ("utf8_binary", "UTF8_BINARY"),
+      ("UtF8_LcasE", "UTF8_LCASE"),
+      ("unicode", "UNICODE"),
+      ("UnICoDe_cs_aI", "UNICODE_AI")
+    ).foreach{
+      case (name, normalized) =>
+        val col = fetchCollation(name)
+        assert(col.collationName == normalized)
     }
+  }
 
-    assert(error.getErrorClass === "COLLATION_INVALID_NAME")
-    assert(error.getMessageParameters.asScala ===
-      Map("proposal" -> "UTF8_BINARY", "collationName" -> "UTF8_BS"))
+  test("fetch invalid UTF8_BINARY and ICU root locale collation names") {
+    Seq(
+      ("UTF8_BINARY_CS", "UTF8_BINARY"),
+      ("UTF8_BINARY_AS", "UTF8_BINARY"), // this should be UNICODE_AS
+      ("UTF8_BINARY_CS_AS","UTF8_BINARY"), // this should be UNICODE_CS_AS
+      ("UTF8_BINARY_AS_CS","UTF8_BINARY"),
+      ("UTF8_BINARY_CI","UTF8_BINARY"),
+      ("UTF8_BINARY_AI","UTF8_BINARY"),
+      ("UTF8_BINARY_CI_AI","UTF8_BINARY"),
+      ("UTF8_BINARY_AI_CI","UTF8_BINARY"),
+      ("UTF8_BS","UTF8_LCASE"),
+      ("BINARY_UTF8","ar_SAU"),
+      ("UTF8_BINARY_A","UTF8_BINARY"),
+      ("UNICODE_X","UNICODE"),
+      ("UNICODE_CI_X","UNICODE"),
+      ("UNICODE_LCASE_X","UNICODE"),
+      ("UTF8_UNICODE","UTF8_LCASE"),
+      ("UTF8_BINARY_UNICODE","UTF8_BINARY"),
+      ("CI_UNICODE", "UNICODE"),
+      ("LCASE_UNICODE", "UNICODE"),
+      ("UNICODE_UNSPECIFIED", "UNICODE"),
+      ("UNICODE_CI_UNSPECIFIED", "UNICODE"),
+      ("UNICODE_UNSPECIFIED_CI_UNSPECIFIED", "UNICODE"),
+      ("UNICODE_INDETERMINATE", "UNICODE"),
+      ("UNICODE_CI_INDETERMINATE", "UNICODE")
+    ).foreach{case (collationName, proposals) =>
+      val error = intercept[SparkException] { fetchCollation(collationName) }
+      assert(error.getErrorClass === "COLLATION_INVALID_NAME")
+      assert(error.getMessageParameters.asScala === Map(
+        "collationName" -> collationName, "proposals" -> proposals))
+    }
   }
 
   case class CollationTestCase[R](collationName: String, s1: String, s2: String, expectedResult: R)
@@ -64,18 +129,24 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       CollationTestCase("UTF8_BINARY", "aaa", "aaa", true),
       CollationTestCase("UTF8_BINARY", "aaa", "AAA", false),
       CollationTestCase("UTF8_BINARY", "aaa", "bbb", false),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aaa", true),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AAA", true),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", true),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", true),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aa", false),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "bbb", false),
+      CollationTestCase("UTF8_BINARY", "å", "a\u030A", false),
+      CollationTestCase("UTF8_LCASE", "aaa", "aaa", true),
+      CollationTestCase("UTF8_LCASE", "aaa", "AAA", true),
+      CollationTestCase("UTF8_LCASE", "aaa", "AaA", true),
+      CollationTestCase("UTF8_LCASE", "aaa", "AaA", true),
+      CollationTestCase("UTF8_LCASE", "aaa", "aa", false),
+      CollationTestCase("UTF8_LCASE", "aaa", "bbb", false),
+      CollationTestCase("UTF8_LCASE", "å", "a\u030A", false),
       CollationTestCase("UNICODE", "aaa", "aaa", true),
       CollationTestCase("UNICODE", "aaa", "AAA", false),
       CollationTestCase("UNICODE", "aaa", "bbb", false),
+      CollationTestCase("UNICODE", "å", "a\u030A", true),
       CollationTestCase("UNICODE_CI", "aaa", "aaa", true),
       CollationTestCase("UNICODE_CI", "aaa", "AAA", true),
-      CollationTestCase("UNICODE_CI", "aaa", "bbb", false))
+      CollationTestCase("UNICODE_CI", "aaa", "bbb", false),
+      CollationTestCase("UNICODE_CI", "å", "a\u030A", true),
+      CollationTestCase("UNICODE_CI", "Å", "a\u030A", true)
+    )
 
     checks.foreach(testCase => {
       val collation = fetchCollation(testCase.collationName)
@@ -94,12 +165,12 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       CollationTestCase("UTF8_BINARY", "aaa", "AAA", 1),
       CollationTestCase("UTF8_BINARY", "aaa", "bbb", -1),
       CollationTestCase("UTF8_BINARY", "aaa", "BBB", 1),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aaa", 0),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AAA", 0),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", 0),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "AaA", 0),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "aa", 1),
-      CollationTestCase("UTF8_BINARY_LCASE", "aaa", "bbb", -1),
+      CollationTestCase("UTF8_LCASE", "aaa", "aaa", 0),
+      CollationTestCase("UTF8_LCASE", "aaa", "AAA", 0),
+      CollationTestCase("UTF8_LCASE", "aaa", "AaA", 0),
+      CollationTestCase("UTF8_LCASE", "aaa", "AaA", 0),
+      CollationTestCase("UTF8_LCASE", "aaa", "aa", 1),
+      CollationTestCase("UTF8_LCASE", "aaa", "bbb", -1),
       CollationTestCase("UNICODE", "aaa", "aaa", 0),
       CollationTestCase("UNICODE", "aaa", "AAA", -1),
       CollationTestCase("UNICODE", "aaa", "bbb", -1),
@@ -152,4 +223,246 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       }
     })
   }
+
+  test("test collation caching") {
+    Seq(
+      "UTF8_BINARY",
+      "UTF8_LCASE",
+      "UNICODE",
+      "UNICODE_CI",
+      "UNICODE_AI",
+      "UNICODE_CI_AI",
+      "UNICODE_AI_CI"
+    ).foreach(collationId => {
+      val col1 = fetchCollation(collationId)
+      val col2 = fetchCollation(collationId)
+      assert(col1 eq col2) // Check for reference equality.
+    })
+  }
+
+  test("collations with ICU non-root localization") {
+    Seq(
+      // Language only.
+      "en",
+      "en_CS",
+      "en_CI",
+      "en_AS",
+      "en_AI",
+      // Language + 3-letter country code.
+      "en_USA",
+      "en_USA_CS",
+      "en_USA_CI",
+      "en_USA_AS",
+      "en_USA_AI",
+      // Language + script code.
+      "sr_Cyrl",
+      "sr_Cyrl_CS",
+      "sr_Cyrl_CI",
+      "sr_Cyrl_AS",
+      "sr_Cyrl_AI",
+      // Language + script code + 3-letter country code.
+      "sr_Cyrl_SRB",
+      "sr_Cyrl_SRB_CS",
+      "sr_Cyrl_SRB_CI",
+      "sr_Cyrl_SRB_AS",
+      "sr_Cyrl_SRB_AI"
+    ).foreach(collationICU => {
+      val col = fetchCollation(collationICU)
+      assert(col.collator.getLocale(ULocale.VALID_LOCALE) != ULocale.ROOT)
+    })
+  }
+
+  test("invalid names of collations with ICU non-root localization") {
+    Seq(
+      ("en_US", "en_USA"), // Must use 3-letter country code
+      ("eN_US", "en_USA"), // verify that proper casing is captured in error.
+      ("enn", "en, nn, bn"),
+      ("en_AAA", "en_USA"),
+      ("en_Something", "UNICODE"),
+      ("en_Something_USA", "en_USA"),
+      ("en_LCASE", "en_USA"),
+      ("en_UCASE", "en_USA"),
+      ("en_CI_LCASE", "UNICODE"),
+      ("en_CI_UCASE", "en_USA"),
+      ("en_CI_UNSPECIFIED", "en_USA"),
+      ("en_USA_UNSPECIFIED", "en_USA"),
+      ("en_USA_UNSPECIFIED_CI", "en_USA_CI"),
+      ("en_INDETERMINATE", "en_USA"),
+      ("en_USA_INDETERMINATE", "en_USA"),
+      ("en_Latn_USA", "en_USA"),
+      ("en_Cyrl_USA", "en_USA"),
+      ("en_USA_AAA", "en_USA"),
+      ("sr_Cyrl_SRB_AAA", "sr_Cyrl_SRB"),
+      // Invalid ordering of language, script and country code.
+      ("USA_en", "en"),
+      ("sr_SRB_Cyrl", "sr_Cyrl"),
+      ("SRB_sr", "ar_SAU"),
+      ("SRB_sr_Cyrl", "bs_Cyrl"),
+      ("SRB_Cyrl_sr", "sr_Cyrl_SRB"),
+      ("Cyrl_sr", "sr_Cyrl_SRB"),
+      ("Cyrl_sr_SRB", "sr_Cyrl_SRB"),
+      ("Cyrl_SRB_sr", "sr_Cyrl_SRB"),
+      // Collation specifiers in the middle of locale.
+      ("CI_en", "ceb"),
+      ("USA_CI_en", "UNICODE"),
+      ("en_CI_USA", "en_USA"),
+      ("CI_sr_Cyrl_SRB", "sr_Cyrl_SRB"),
+      ("sr_CI_Cyrl_SRB", "sr_Cyrl_SRB"),
+      ("sr_Cyrl_CI_SRB", "sr_Cyrl_SRB"),
+      ("CI_Cyrl_sr", "sr_Cyrl_SRB"),
+      ("Cyrl_CI_sr", "he_ISR"),
+      ("Cyrl_CI_sr_SRB", "sr_Cyrl_SRB"),
+      ("Cyrl_sr_CI_SRB", "sr_Cyrl_SRB"),
+      // no locale specified
+      ("_CI_AI", "af_CI_AI, am_CI_AI, ar_CI_AI"),
+      ("", "af, am, ar")
+    ).foreach { case (collationName, proposals) => {
+      val error = intercept[SparkException] { fetchCollation(collationName) }
+      assert(error.getErrorClass === "COLLATION_INVALID_NAME")
+
+      assert(error.getMessageParameters.asScala === Map(
+        "collationName" -> collationName, "proposals" -> proposals))
+    }}
+  }
+
+  test("collations name normalization for ICU non-root localization") {
+    Seq(
+      ("en_USA", "en_USA"),
+      ("en_CS", "en"),
+      ("en_AS", "en"),
+      ("en_CS_AS", "en"),
+      ("en_AS_CS", "en"),
+      ("en_CI", "en_CI"),
+      ("en_AI", "en_AI"),
+      ("en_AI_CI", "en_CI_AI"),
+      ("en_CI_AI", "en_CI_AI"),
+      ("en_CS_AI", "en_AI"),
+      ("en_AI_CS", "en_AI"),
+      ("en_CI_AS", "en_CI"),
+      ("en_AS_CI", "en_CI"),
+      ("en_USA_AI_CI", "en_USA_CI_AI"),
+      // Randomized case.
+      ("EN_USA", "en_USA"),
+      ("SR_CYRL", "sr_Cyrl"),
+      ("sr_cyrl_srb", "sr_Cyrl_SRB"),
+      ("sR_cYRl_sRb", "sr_Cyrl_SRB")
+    ).foreach {
+      case (name, normalized) =>
+        val col = fetchCollation(name)
+        assert(col.collationName == normalized)
+    }
+  }
+
+  test("invalid collationId") {
+    val badCollationIds = Seq(
+      INDETERMINATE_COLLATION_ID, // Indeterminate collation.
+      1 << 30, // User-defined collation range.
+      (1 << 30) | 1, // User-defined collation range.
+      (1 << 30) | (1 << 29), // User-defined collation range.
+      1 << 1, // UTF8_BINARY mandatory zero bit 1 breach.
+      1 << 2, // UTF8_BINARY mandatory zero bit 2 breach.
+      1 << 3, // UTF8_BINARY mandatory zero bit 3 breach.
+      1 << 4, // UTF8_BINARY mandatory zero bit 4 breach.
+      1 << 5, // UTF8_BINARY mandatory zero bit 5 breach.
+      1 << 6, // UTF8_BINARY mandatory zero bit 6 breach.
+      1 << 7, // UTF8_BINARY mandatory zero bit 7 breach.
+      1 << 8, // UTF8_BINARY mandatory zero bit 8 breach.
+      1 << 9, // UTF8_BINARY mandatory zero bit 9 breach.
+      1 << 10, // UTF8_BINARY mandatory zero bit 10 breach.
+      1 << 11, // UTF8_BINARY mandatory zero bit 11 breach.
+      1 << 12, // UTF8_BINARY mandatory zero bit 12 breach.
+      1 << 13, // UTF8_BINARY mandatory zero bit 13 breach.
+      1 << 14, // UTF8_BINARY mandatory zero bit 14 breach.
+      1 << 15, // UTF8_BINARY mandatory zero bit 15 breach.
+      1 << 16, // UTF8_BINARY mandatory zero bit 16 breach.
+      1 << 17, // UTF8_BINARY mandatory zero bit 17 breach.
+      1 << 18, // UTF8_BINARY mandatory zero bit 18 breach.
+      1 << 19, // UTF8_BINARY mandatory zero bit 19 breach.
+      1 << 20, // UTF8_BINARY mandatory zero bit 20 breach.
+      1 << 23, // UTF8_BINARY mandatory zero bit 23 breach.
+      1 << 24, // UTF8_BINARY mandatory zero bit 24 breach.
+      1 << 25, // UTF8_BINARY mandatory zero bit 25 breach.
+      1 << 26, // UTF8_BINARY mandatory zero bit 26 breach.
+      1 << 27, // UTF8_BINARY mandatory zero bit 27 breach.
+      1 << 28, // UTF8_BINARY mandatory zero bit 28 breach.
+      (1 << 29) | (1 << 12), // ICU mandatory zero bit 12 breach.
+      (1 << 29) | (1 << 13), // ICU mandatory zero bit 13 breach.
+      (1 << 29) | (1 << 14), // ICU mandatory zero bit 14 breach.
+      (1 << 29) | (1 << 15), // ICU mandatory zero bit 15 breach.
+      (1 << 29) | (1 << 18), // ICU mandatory zero bit 18 breach.
+      (1 << 29) | (1 << 19), // ICU mandatory zero bit 19 breach.
+      (1 << 29) | (1 << 20), // ICU mandatory zero bit 20 breach.
+      (1 << 29) | (1 << 21), // ICU mandatory zero bit 21 breach.
+      (1 << 29) | (1 << 22), // ICU mandatory zero bit 22 breach.
+      (1 << 29) | (1 << 23), // ICU mandatory zero bit 23 breach.
+      (1 << 29) | (1 << 24), // ICU mandatory zero bit 24 breach.
+      (1 << 29) | (1 << 25), // ICU mandatory zero bit 25 breach.
+      (1 << 29) | (1 << 26), // ICU mandatory zero bit 26 breach.
+      (1 << 29) | (1 << 27), // ICU mandatory zero bit 27 breach.
+      (1 << 29) | (1 << 28), // ICU mandatory zero bit 28 breach.
+      (1 << 29) | 0xFFFF // ICU with invalid locale id.
+    )
+    badCollationIds.foreach(collationId => {
+      // Assumptions about collation id will break and assert statement will fail.
+      intercept[AssertionError](fetchCollation(collationId))
+    })
+  }
+
+  test("repeated and/or incompatible and/or misplaced specifiers in collation name") {
+    Seq(
+      ("UTF8_LCASE_LCASE", "UTF8_LCASE"),
+      ("UNICODE_CS_CS", "UNICODE_CS"),
+      ("UNICODE_CI_CI", "UNICODE_CI"),
+      ("UNICODE_CI_CS", "UNICODE_CS"),
+      ("UNICODE_CS_CI", "UNICODE_CS"),
+      ("UNICODE_AS_AS", "UNICODE_AS"),
+      ("UNICODE_AI_AI", "UNICODE_AI"),
+      ("UNICODE_AS_AI", "UNICODE_AS"),
+      ("UNICODE_AI_AS", "UNICODE_AS"),
+      ("UNICODE_AS_CS_AI", "UNICODE_AS_CS"),
+      ("UNICODE_CS_AI_CI", "UNICODE_CS_AI"),
+      ("UNICODE_CS_AS_CI_AI", "UNICODE_CS_AS"),
+      ("UNICODE__CS__AS", "UNICODE_AS"),
+      ("UNICODE-CS-AS", "UNICODE"),
+      ("UNICODECSAS", "UNICODE"),
+      ("_CS_AS_UNICODE", "UNICODE")
+    ).foreach { case (collationName, proposals) =>
+      val error = intercept[SparkException] {
+        fetchCollation(collationName)
+      }
+
+      assert(error.getErrorClass === "COLLATION_INVALID_NAME")
+      assert(error.getMessageParameters.asScala === Map(
+        "collationName" -> collationName, "proposals" -> proposals))
+    }
+  }
+
+  test("basic ICU collator checks") {
+    Seq(
+      CollationTestCase("UNICODE_CI", "a", "A", true),
+      CollationTestCase("UNICODE_CI", "a", "å", false),
+      CollationTestCase("UNICODE_CI", "a", "Å", false),
+      CollationTestCase("UNICODE_AI", "a", "A", false),
+      CollationTestCase("UNICODE_AI", "a", "å", true),
+      CollationTestCase("UNICODE_AI", "a", "Å", false),
+      CollationTestCase("UNICODE_CI_AI", "a", "A", true),
+      CollationTestCase("UNICODE_CI_AI", "a", "å", true),
+      CollationTestCase("UNICODE_CI_AI", "a", "Å", true)
+    ).foreach(testCase => {
+      val collation = fetchCollation(testCase.collationName)
+      assert(collation.equalsFunction(toUTF8(testCase.s1), toUTF8(testCase.s2)) ==
+        testCase.expectedResult)
+    })
+    Seq(
+      CollationTestCase("en", "a", "A", -1),
+      CollationTestCase("en_CI", "a", "A", 0),
+      CollationTestCase("en_AI", "a", "å", 0),
+      CollationTestCase("sv", "Kypper", "Köpfe", -1),
+      CollationTestCase("de", "Kypper", "Köpfe", 1)
+    ).foreach(testCase => {
+      val collation = fetchCollation(testCase.collationName)
+      val result = collation.comparator.compare(toUTF8(testCase.s1), toUTF8(testCase.s2))
+      assert(Integer.signum(result) == testCase.expectedResult)
+    })
+  }
 }
diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java
new file mode 100644
index 0000000000000..8c210a4fab3c3
--- /dev/null
+++ b/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.function.Consumer;
+
+import org.apache.logging.log4j.CloseableThreadContext;
+import org.apache.logging.log4j.message.MessageFactory;
+import org.apache.logging.log4j.message.ParameterizedMessageFactory;
+// checkstyle.off: RegexpSinglelineJava
+import org.slf4j.Logger;
+// checkstyle.on: RegexpSinglelineJava
+
+// checkstyle.off: RegexpSinglelineJava
+/**
+ * Guidelines for the Structured Logging Framework - Java Logging
+ * <p>
+ *
+ * Use the `org.apache.spark.internal.SparkLoggerFactory` to get the logger instance in Java code:
+ * Getting Logger Instance:
+ *   Instead of using `org.slf4j.LoggerFactory`, use `org.apache.spark.internal.SparkLoggerFactory`
+ *   to ensure structured logging.
+ * <p>
+ *
+ * import org.apache.spark.internal.SparkLogger;
+ * import org.apache.spark.internal.SparkLoggerFactory;
+ * private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class);
+ * <p>
+ *
+ * Logging Messages with Variables:
+ *   When logging messages with variables, wrap all the variables with `MDC`s and they will be
+ *   automatically added to the Mapped Diagnostic Context (MDC).
+ * <p>
+ *
+ * import org.apache.spark.internal.LogKeys;
+ * import org.apache.spark.internal.MDC;
+ * logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i));
+ * <p>
+ *
+ * Constant String Messages:
+ *   For logging constant string messages, use the standard logging methods.
+ * <p>
+ *
+ * logger.error("Failed to abort the writer after failing to write map output.", e);
+ * <p>
+ *
+ * If you want to output logs in `java code` through the structured log framework,
+ * you can define `custom LogKey` and use it in `java` code as follows:
+ * <p>
+ *
+ * // To add a `custom LogKey`, implement `LogKey`
+ * public static class CUSTOM_LOG_KEY implements LogKey { }
+ * import org.apache.spark.internal.MDC;
+ * logger.error("Unable to delete key {} for cache", MDC.of(CUSTOM_LOG_KEY, "key"));
+ */
+// checkstyle.on: RegexpSinglelineJava
+public class SparkLogger {
+
+  private static final MessageFactory MESSAGE_FACTORY = ParameterizedMessageFactory.INSTANCE;
+  private final Logger slf4jLogger;
+
+  SparkLogger(Logger slf4jLogger) {
+    this.slf4jLogger = slf4jLogger;
+  }
+
+  public boolean isErrorEnabled() {
+    return slf4jLogger.isErrorEnabled();
+  }
+
+  public void error(String msg) {
+    slf4jLogger.error(msg);
+  }
+
+  public void error(String msg, Throwable throwable) {
+    slf4jLogger.error(msg, throwable);
+  }
+
+  public void error(String msg, MDC... mdcs) {
+    if (mdcs == null || mdcs.length == 0) {
+      slf4jLogger.error(msg);
+    } else if (slf4jLogger.isErrorEnabled()) {
+      withLogContext(msg, mdcs, null, mt -> slf4jLogger.error(mt.message));
+    }
+  }
+
+  public void error(String msg, Throwable throwable, MDC... mdcs) {
+    if (mdcs == null || mdcs.length == 0) {
+      slf4jLogger.error(msg, throwable);
+    } else if (slf4jLogger.isErrorEnabled()) {
+      withLogContext(msg, mdcs, throwable, mt -> slf4jLogger.error(mt.message, mt.throwable));
+    }
+  }
+
+  public boolean isWarnEnabled() {
+    return slf4jLogger.isWarnEnabled();
+  }
+
+  public void warn(String msg) {
+    slf4jLogger.warn(msg);
+  }
+
+  public void warn(String msg, Throwable throwable) {
+    slf4jLogger.warn(msg, throwable);
+  }
+
+  public void warn(String msg, MDC... mdcs) {
+    if (mdcs == null || mdcs.length == 0) {
+      slf4jLogger.warn(msg);
+    } else if (slf4jLogger.isWarnEnabled()) {
+      withLogContext(msg, mdcs, null, mt -> slf4jLogger.warn(mt.message));
+    }
+  }
+
+  public void warn(String msg, Throwable throwable, MDC... mdcs) {
+    if (mdcs == null || mdcs.length == 0) {
+      slf4jLogger.warn(msg, throwable);
+    } else if (slf4jLogger.isWarnEnabled()) {
+      withLogContext(msg, mdcs, throwable, mt -> slf4jLogger.warn(mt.message, mt.throwable));
+    }
+  }
+
+  public boolean isInfoEnabled() {
+    return slf4jLogger.isInfoEnabled();
+  }
+
+  public void info(String msg) {
+    slf4jLogger.info(msg);
+  }
+
+  public void info(String msg, Throwable throwable) {
+    slf4jLogger.info(msg, throwable);
+  }
+
+  public void info(String msg, MDC... mdcs) {
+    if (mdcs == null || mdcs.length == 0) {
+      slf4jLogger.info(msg);
+    } else if (slf4jLogger.isInfoEnabled()) {
+      withLogContext(msg, mdcs, null, mt -> slf4jLogger.info(mt.message));
+    }
+  }
+
+  public void info(String msg, Throwable throwable, MDC... mdcs) {
+    if (mdcs == null || mdcs.length == 0) {
+      slf4jLogger.info(msg, throwable);
+    } else if (slf4jLogger.isInfoEnabled()) {
+      withLogContext(msg, mdcs, throwable, mt -> slf4jLogger.info(mt.message, mt.throwable));
+    }
+  }
+
+  public boolean isDebugEnabled() {
+    return slf4jLogger.isDebugEnabled();
+  }
+
+  public void debug(String msg) {
+    slf4jLogger.debug(msg);
+  }
+
+  public void debug(String format, Object arg) {
+    slf4jLogger.debug(format, arg);
+  }
+
+  public void debug(String format, Object arg1, Object arg2) {
+    slf4jLogger.debug(format, arg1, arg2);
+  }
+
+  public void debug(String format, Object... arguments) {
+    slf4jLogger.debug(format, arguments);
+  }
+
+  public void debug(String msg, Throwable throwable) {
+    slf4jLogger.debug(msg, throwable);
+  }
+
+  public boolean isTraceEnabled() {
+    return slf4jLogger.isTraceEnabled();
+  }
+
+  public void trace(String msg) {
+    slf4jLogger.trace(msg);
+  }
+
+  public void trace(String format, Object arg) {
+    slf4jLogger.trace(format, arg);
+  }
+
+  public void trace(String format, Object arg1, Object arg2) {
+    slf4jLogger.trace(format, arg1, arg2);
+  }
+
+  public void trace(String format, Object... arguments) {
+    slf4jLogger.trace(format, arguments);
+  }
+
+  public void trace(String msg, Throwable throwable) {
+    slf4jLogger.trace(msg, throwable);
+  }
+
+  private void withLogContext(
+      String pattern,
+      MDC[] mdcs,
+      Throwable throwable,
+      Consumer<MessageThrowable> func) {
+    Map<String, String> context = new HashMap<>();
+    Object[] args = new Object[mdcs.length];
+    for (int index = 0; index < mdcs.length; index++) {
+      MDC mdc = mdcs[index];
+      String value = (mdc.value() != null) ? mdc.value().toString() : null;
+      if (Logging$.MODULE$.isStructuredLoggingEnabled()) {
+        context.put(mdc.key().name(), value);
+      }
+      args[index] = value;
+    }
+    MessageThrowable messageThrowable = MessageThrowable.of(
+        MESSAGE_FACTORY.newMessage(pattern, args).getFormattedMessage(), throwable);
+    try (CloseableThreadContext.Instance ignored = CloseableThreadContext.putAll(context)) {
+      func.accept(messageThrowable);
+    }
+  }
+
+  private record MessageThrowable(String message, Throwable throwable) {
+    static MessageThrowable of(String message, Throwable throwable) {
+      return new MessageThrowable(message, throwable);
+    }
+  }
+
+  public Logger getSlf4jLogger() {
+    return slf4jLogger;
+  }
+}
diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java b/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
new file mode 100644
index 0000000000000..a59c007362419
--- /dev/null
+++ b/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.internal;
+
+// checkstyle.off: RegexpSinglelineJava
+import org.slf4j.LoggerFactory;
+// checkstyle.on: RegexpSinglelineJava
+
+public class SparkLoggerFactory {
+
+  public static SparkLogger getLogger(String name) {
+    return new SparkLogger(LoggerFactory.getLogger(name));
+  }
+
+  public static SparkLogger getLogger(Class<?> clazz) {
+    return new SparkLogger(LoggerFactory.getLogger(clazz));
+  }
+}
diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
index 8e1cc470e0ccf..90dddc2cb08c1 100644
--- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -29,15 +29,18 @@
 import java.util.regex.Pattern;
 
 import org.apache.commons.lang3.SystemUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * General utilities available in the network package. Many of these are sourced from Spark's
  * own Utils, just accessible within this package.
  */
 public class JavaUtils {
-  private static final Logger logger = LoggerFactory.getLogger(JavaUtils.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(JavaUtils.class);
 
   /**
    * Define a default value for driver memory here since this value is referenced across the code
@@ -112,7 +115,7 @@ public static void deleteRecursively(File file, FilenameFilter filter) throws IO
         return;
       } catch (IOException e) {
         logger.warn("Attempt to delete using native Unix OS command failed for path = {}. " +
-                        "Falling back to Java IO way", file.getAbsolutePath(), e);
+          "Falling back to Java IO way", e, MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
       }
     }
 
@@ -228,6 +231,8 @@ private static boolean isSymlink(File file) throws IOException {
       Map.entry("pb", ByteUnit.PiB));
   }
 
+  private static final Pattern TIME_STRING_PATTERN = Pattern.compile("(-?[0-9]+)([a-z]+)?");
+
   /**
    * Convert a passed time string (e.g. 50s, 100ms, or 250us) to a time count in the given unit.
    * The unit is also considered the default if the given string does not specify a unit.
@@ -236,7 +241,7 @@ public static long timeStringAs(String str, TimeUnit unit) {
     String lower = str.toLowerCase(Locale.ROOT).trim();
 
     try {
-      Matcher m = Pattern.compile("(-?[0-9]+)([a-z]+)?").matcher(lower);
+      Matcher m = TIME_STRING_PATTERN.matcher(lower);
       if (!m.matches()) {
         throw new NumberFormatException("Failed to parse time string: " + str);
       }
@@ -276,6 +281,11 @@ public static long timeStringAsSec(String str) {
     return timeStringAs(str, TimeUnit.SECONDS);
   }
 
+  private static final Pattern BYTE_STRING_PATTERN =
+    Pattern.compile("([0-9]+)([a-z]+)?");
+  private static final Pattern BYTE_STRING_FRACTION_PATTERN =
+    Pattern.compile("([0-9]+\\.[0-9]+)([a-z]+)?");
+
   /**
    * Convert a passed byte string (e.g. 50b, 100kb, or 250mb) to the given. If no suffix is
    * provided, a direct conversion to the provided unit is attempted.
@@ -284,8 +294,8 @@ public static long byteStringAs(String str, ByteUnit unit) {
     String lower = str.toLowerCase(Locale.ROOT).trim();
 
     try {
-      Matcher m = Pattern.compile("([0-9]+)([a-z]+)?").matcher(lower);
-      Matcher fractionMatcher = Pattern.compile("([0-9]+\\.[0-9]+)([a-z]+)?").matcher(lower);
+      Matcher m = BYTE_STRING_PATTERN.matcher(lower);
+      Matcher fractionMatcher = BYTE_STRING_FRACTION_PATTERN.matcher(lower);
 
       if (m.matches()) {
         long val = Long.parseLong(m.group(1));
@@ -396,7 +406,7 @@ public static File createDirectory(String root, String namePrefix) throws IOExce
         dir = new File(root, namePrefix + "-" + UUID.randomUUID());
         Files.createDirectories(dir.toPath());
       } catch (IOException | SecurityException e) {
-        logger.error("Failed to create directory " + dir, e);
+        logger.error("Failed to create directory {}", e, MDC.of(LogKeys.PATH$.MODULE$, dir));
         dir = null;
       }
     }
diff --git a/common/utils/src/main/resources/error/README.md b/common/utils/src/main/resources/error/README.md
index e2f68a1af9f4a..575e2ebad35a3 100644
--- a/common/utils/src/main/resources/error/README.md
+++ b/common/utils/src/main/resources/error/README.md
@@ -16,9 +16,9 @@ The error state / SQLSTATE itself is comprised of two parts:
 2. Error sub-class
 
 Acceptable values for these various error parts are defined in the following files:
-* `error-classes.json`
-* `error-states.json`
-* `error-conditions.json`
+* [`error-classes.json`](error-classes.json)
+* [`error-states.json`](error-states.json)
+* [`error-conditions.json`](error-conditions.json)
 
 The terms error class, state, and condition come from the SQL standard.
 
@@ -34,6 +34,7 @@ The terms error class, state, and condition come from the SQL standard.
   * Error condition: `AS_OF_JOIN`
     * Error sub-condition: `TOLERANCE_IS_NON_NEGATIVE`
     * Error sub-condition: `TOLERANCE_IS_UNFOLDABLE`
+    * Error sub-condition: `UNSUPPORTED_DIRECTION`
 
 ### Inconsistent Use of the Term "Error Class"
 
@@ -41,7 +42,7 @@ Unfortunately, we have historically used the term "error class" inconsistently t
 
 Fixing this will require renaming `SparkException.errorClass` to `SparkException.errorCondition` and making similar changes to `ErrorClassesJsonReader` and other parts of the codebase. We will address this in [SPARK-47429]. Until that is complete, we will have to live with the fact that a string like `DATATYPE_MISSING_SIZE` is called an "error condition" in our user-facing documentation but an "error class" in the code.
 
-For more details, please see [SPARK-46810][SPARK-46810].
+For more details, please see [SPARK-46810].
 
 [SPARK-46810]: https://issues.apache.org/jira/browse/SPARK-46810
 [SPARK-47429]: https://issues.apache.org/jira/browse/SPARK-47429
@@ -51,9 +52,9 @@ For more details, please see [SPARK-46810][SPARK-46810].
 1. Check if the error is an internal error.
    Internal errors are bugs in the code that we do not expect users to encounter; this does not include unsupported operations.
    If true, use the error condition `INTERNAL_ERROR` and skip to step 4.
-2. Check if an appropriate error condition already exists in `error-conditions.json`.
+2. Check if an appropriate error condition already exists in [`error-conditions.json`](error-conditions.json).
    If true, use the error condition and skip to step 4.
-3. Add a new condition to `error-conditions.json`. If the new condition requires a new error state, add the new error state to `error-states.json`.
+3. Add a new condition to [`error-conditions.json`](error-conditions.json). If the new condition requires a new error state, add the new error state to [`error-states.json`](error-states.json).
 4. Check if the exception type already extends `SparkThrowable`.
    If true, skip to step 6.
 5. Mix `SparkThrowable` into the exception.
@@ -165,7 +166,7 @@ For example: The existing `XXKD0` is used for an internal analyzer error.
 
 #### ANSI/ISO standard
 
-The SQLSTATEs in `error-states.json` are collated from:
+The SQLSTATEs in [`error-states.json`](error-states.json) are collated from:
 - SQL2016
 - DB2 zOS/LUW
 - PostgreSQL 15
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
index e1c8c881f98f3..9a3011635daa3 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -90,6 +90,11 @@
         "message" : [
           "The input argument `tolerance` must be a constant."
         ]
+      },
+      "UNSUPPORTED_DIRECTION" : {
+        "message" : [
+          "Unsupported as-of join direction '<direction>'. Supported as-of join direction include: <supported>."
+        ]
       }
     },
     "sqlState" : "42604"
@@ -101,6 +106,13 @@
     ],
     "sqlState" : "22KD3"
   },
+  "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE" : {
+    "message" : [
+      "Cannot call the <functionName> SQL function because the Avro data source is not loaded.",
+      "Please restart your job or session with the 'spark-avro' package loaded, such as by using the --packages argument on the command line, and then retry your query or command again."
+    ],
+    "sqlState" : "22KD3"
+  },
   "BATCH_METADATA_NOT_FOUND" : {
     "message" : [
       "Unable to find batch <batchMetadataFile>."
@@ -119,12 +131,24 @@
     ],
     "sqlState" : "42KDE"
   },
+  "CANNOT_ALTER_COLLATION_BUCKET_COLUMN" : {
+    "message" : [
+      "ALTER TABLE (ALTER|CHANGE) COLUMN cannot change collation of type/subtypes of bucket columns, but found the bucket column <columnName> in the table <tableName>."
+    ],
+    "sqlState" : "428FR"
+  },
   "CANNOT_ALTER_PARTITION_COLUMN" : {
     "message" : [
       "ALTER TABLE (ALTER|CHANGE) COLUMN is not supported for partition columns, but found the partition column <columnName> in the table <tableName>."
     ],
     "sqlState" : "428FR"
   },
+  "CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK" : {
+    "message" : [
+      "Watermark needs to be defined to reassign event time column. Failed to find watermark definition in the streaming query."
+    ],
+    "sqlState" : "42611"
+  },
   "CANNOT_CAST_DATATYPE" : {
     "message" : [
       "Cannot cast <sourceType> to <targetType>."
@@ -212,6 +236,11 @@
           "Error reading delta file <fileToRead> of <clazz>: <fileToRead> does not exist."
         ]
       },
+      "CANNOT_READ_MISSING_SNAPSHOT_FILE" : {
+        "message" : [
+          "Error reading snapshot file <fileToRead> of <clazz>: <fileToRead> does not exist."
+        ]
+      },
       "CANNOT_READ_SNAPSHOT_FILE_KEY_SIZE" : {
         "message" : [
           "Error reading snapshot file <fileToRead> of <clazz>: key size cannot be <keySize>."
@@ -227,6 +256,11 @@
           "Error reading streaming state file of <fileToRead> does not exist. If the stream job is restarted with a new or updated state operation, please create a new checkpoint location or clear the existing checkpoint location."
         ]
       },
+      "SNAPSHOT_PARTITION_ID_NOT_FOUND" : {
+        "message" : [
+          "Partition id <snapshotPartitionId> not found for state of operator <operatorId> at <checkpointLocation>."
+        ]
+      },
       "UNCATEGORIZED" : {
         "message" : [
           ""
@@ -463,7 +497,13 @@
   },
   "COLLATION_INVALID_NAME" : {
     "message" : [
-      "The value <collationName> does not represent a correct collation name. Suggested valid collation name: [<proposal>]."
+      "The value <collationName> does not represent a correct collation name. Suggested valid collation names: [<proposals>]."
+    ],
+    "sqlState" : "42704"
+  },
+  "COLLATION_INVALID_PROVIDER" : {
+    "message" : [
+      "The value <provider> does not represent a correct collation provider. Supported providers are: [<supportedProviders>]."
     ],
     "sqlState" : "42704"
   },
@@ -736,6 +776,11 @@
           "Input to the function <functionName> cannot contain elements of the \"MAP\" type. In Spark, same maps may have different hashcode, thus hash expressions are prohibited on \"MAP\" elements. To restore previous behavior set \"spark.sql.legacy.allowHashOnMapType\" to \"true\"."
         ]
       },
+      "HASH_VARIANT_TYPE" : {
+        "message" : [
+          "Input to the function <functionName> cannot contain elements of the \"VARIANT\" type yet."
+        ]
+      },
       "INPUT_SIZE_NOT_ONE" : {
         "message" : [
           "Length of <exprName> should be 1."
@@ -753,7 +798,7 @@
       },
       "INVALID_JSON_SCHEMA" : {
         "message" : [
-          "Input schema <schema> must be a struct, an array or a map."
+          "Input schema <schema> must be a struct, an array, a map or a variant."
         ]
       },
       "INVALID_MAP_KEY_TYPE" : {
@@ -1036,7 +1081,7 @@
   },
   "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT" : {
     "message" : [
-      "Call to function <functionName> is invalid because it includes multiple argument assignments to the same parameter name <parameterName>."
+      "Call to routine <functionName> is invalid because it includes multiple argument assignments to the same parameter name <parameterName>."
     ],
     "subClass" : {
       "BOTH_POSITIONAL_AND_NAMED" : {
@@ -1052,6 +1097,14 @@
     },
     "sqlState" : "4274K"
   },
+  "EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED" : {
+    "message" : [
+      "Previous node emitted a row with eventTime=<emittedRowEventTime> which is older than current_watermark_value=<currentWatermark>",
+      "This can lead to correctness issues in the stateful operators downstream in the execution pipeline.",
+      "Please correct the operator logic to emit rows after current global watermark value."
+    ],
+    "sqlState" : "42815"
+  },
   "EMPTY_JSON_FIELD_VALUE" : {
     "message" : [
       "Failed to parse an empty string for data type <dataType>."
@@ -1224,6 +1277,11 @@
           "List namespaces."
         ]
       },
+      "LOAD_TABLE" : {
+        "message" : [
+          "Load the table <tableName>."
+        ]
+      },
       "NAMESPACE_EXISTS" : {
         "message" : [
           "Check that the namespace <namespace> exists."
@@ -1313,7 +1371,20 @@
     ],
     "sqlState" : "2203G"
   },
-  "FIELDS_ALREADY_EXISTS" : {
+  "FAILED_TO_PARSE_TOO_COMPLEX" : {
+    "message" : [
+      "The statement, including potential SQL functions and referenced views, was too complex to parse.",
+      "To mitigate this error divide the statement into multiple, less complex chunks."
+    ],
+    "sqlState" : "54001"
+  },
+  "FEATURE_NOT_ENABLED" : {
+    "message" : [
+      "The feature <featureName> is not enabled. Consider setting the config <configKey> to <configValue> to enable this capability."
+    ],
+    "sqlState" : "56038"
+  },
+  "FIELD_ALREADY_EXISTS" : {
     "message" : [
       "Cannot <op> column, because <fieldNames> already exists in <struct>."
     ],
@@ -1883,7 +1954,7 @@
     "subClass" : {
       "DEFAULT_COLLATION" : {
         "message" : [
-          "Cannot resolve the given default collation. Did you mean '<proposal>'?"
+          "Cannot resolve the given default collation. Suggested valid collation names: ['<proposals>']?"
         ]
       },
       "TIME_ZONE" : {
@@ -1983,6 +2054,11 @@
           "Delimiter cannot be empty string."
         ]
       },
+      "NULL_VALUE" : {
+        "message" : [
+          "Delimiter cannot be null."
+        ]
+      },
       "SINGLE_BACKSLASH" : {
         "message" : [
           "Single backslash is prohibited. It has special meaning as beginning of an escape sequence. To get the backslash character, pass a string with two backslashes as the delimiter."
@@ -2304,12 +2380,24 @@
     },
     "sqlState" : "42K0K"
   },
+  "INVALID_JOIN_TYPE_FOR_JOINWITH" : {
+    "message" : [
+      "Invalid join type in joinWith: <joinType>."
+    ],
+    "sqlState" : "42613"
+  },
   "INVALID_JSON_DATA_TYPE" : {
     "message" : [
       "Failed to convert the JSON string '<invalidType>' to a data type. Please enter a valid data type."
     ],
     "sqlState" : "2203G"
   },
+  "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS" : {
+    "message" : [
+      "Collations can only be applied to string types, but the JSON data type is <jsonType>."
+    ],
+    "sqlState" : "2203G"
+  },
   "INVALID_JSON_ROOT_FIELD" : {
     "message" : [
       "Cannot convert JSON root field to target Spark type."
@@ -2621,6 +2709,12 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_SINGLE_VARIANT_COLUMN" : {
+    "message" : [
+      "The `singleVariantColumn` option cannot be used if there is also a user specified schema."
+    ],
+    "sqlState" : "42613"
+  },
   "INVALID_SQL_ARG" : {
     "message" : [
       "The argument <name> of `sql()` is invalid. Consider to replace it either by a SQL literal or by collection constructor functions such as `map()`, `array()`, `struct()`."
@@ -2637,9 +2731,9 @@
           "ANALYZE TABLE(S) ... COMPUTE STATISTICS ... <ctx> must be either NOSCAN or empty."
         ]
       },
-      "CREATE_FUNC_WITH_IF_NOT_EXISTS_AND_REPLACE" : {
+      "CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE" : {
         "message" : [
-          "CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed."
+          "Cannot create a routine with both IF NOT EXISTS and REPLACE specified."
         ]
       },
       "CREATE_TEMP_FUNC_WITH_DATABASE" : {
@@ -2825,6 +2919,12 @@
     ],
     "sqlState" : "42000"
   },
+  "INVALID_UTF8_STRING" : {
+    "message" : [
+      "Invalid UTF8 byte sequence found in string: <str>."
+    ],
+    "sqlState" : "22029"
+  },
   "INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : {
     "message" : [
       "Variable type must be string type but got <varType>."
@@ -2922,6 +3022,12 @@
     ],
     "sqlState" : "42710"
   },
+  "MALFORMED_CHARACTER_CODING" : {
+    "message" : [
+      "Invalid value found when performing <function> with <charset>"
+    ],
+    "sqlState" : "22000"
+  },
   "MALFORMED_CSV_RECORD" : {
     "message" : [
       "Malformed CSV record: <badRecord>"
@@ -3154,6 +3260,12 @@
     ],
     "sqlState" : "42809"
   },
+  "NOT_NULL_ASSERT_VIOLATION" : {
+    "message" : [
+      "NULL value appeared in non-nullable field: <walkedTypePath>If the schema is inferred from a Scala tuple/case class, or a Java bean, please try to use scala.Option[_] or other nullable types (such as java.lang.Integer instead of int/scala.Int)."
+    ],
+    "sqlState" : "42000"
+  },
   "NOT_NULL_CONSTRAINT_VIOLATION" : {
     "message" : [
       "Assigning a NULL is not allowed here."
@@ -3256,6 +3368,12 @@
     ],
     "sqlState" : "42000"
   },
+  "NULL_DATA_SOURCE_OPTION" : {
+    "message" : [
+      "Data source read/write option <option> cannot have null value."
+    ],
+    "sqlState" : "22024"
+  },
   "NULL_MAP_KEY" : {
     "message" : [
       "Cannot use null as map key."
@@ -3472,16 +3590,16 @@
   },
   "ROUTINE_ALREADY_EXISTS" : {
     "message" : [
-      "Cannot create the function <routineName> because it already exists.",
-      "Choose a different name, drop or replace the existing function, or add the IF NOT EXISTS clause to tolerate a pre-existing function."
+      "Cannot create the routine <routineName> because it already exists.",
+      "Choose a different name, drop or replace the existing routine, or add the IF NOT EXISTS clause to tolerate a pre-existing routine."
     ],
     "sqlState" : "42723"
   },
   "ROUTINE_NOT_FOUND" : {
     "message" : [
-      "The function <routineName> cannot be found. Verify the spelling and correctness of the schema and catalog.",
+      "The routine <routineName> cannot be found. Verify the spelling and correctness of the schema and catalog.",
       "If you did not qualify the name with a schema and catalog, verify the current_schema() output, or qualify the name with the correct schema and catalog.",
-      "To tolerate the error on drop use DROP FUNCTION IF EXISTS."
+      "To tolerate the error on drop use DROP ... IF EXISTS."
     ],
     "sqlState" : "42883"
   },
@@ -3638,6 +3756,12 @@
     ],
     "sqlState" : "42802"
   },
+  "STATE_STORE_COLUMN_FAMILY_SCHEMA_INCOMPATIBLE" : {
+    "message" : [
+      "Incompatible schema transformation with column family=<colFamilyName>, oldSchema=<oldSchema>, newSchema=<newSchema>."
+    ],
+    "sqlState" : "42802"
+  },
   "STATE_STORE_HANDLE_NOT_INITIALIZED" : {
     "message" : [
       "The handle has not been initialized for this StatefulProcessor.",
@@ -3657,12 +3781,47 @@
     ],
     "sqlState" : "42802"
   },
+  "STATE_STORE_INVALID_PROVIDER" : {
+    "message" : [
+      "The given State Store Provider <inputClass> does not extend org.apache.spark.sql.execution.streaming.state.StateStoreProvider."
+    ],
+    "sqlState" : "42K06"
+  },
+  "STATE_STORE_KEY_ROW_FORMAT_VALIDATION_FAILURE" : {
+    "message" : [
+      "The streaming query failed to validate written state for key row.",
+      "The following reasons may cause this:",
+      "1. An old Spark version wrote the checkpoint that is incompatible with the current one",
+      "2. Corrupt checkpoint files",
+      "3. The query changed in an incompatible way between restarts",
+      "For the first case, use a new checkpoint directory or use the original Spark version",
+      "to process the streaming state. Retrieved error_message=<errorMsg>"
+    ],
+    "sqlState" : "XX000"
+  },
+  "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE" : {
+    "message" : [
+      "Provided key schema does not match existing state key schema.",
+      "Please check number and type of fields.",
+      "Existing key_schema=<storedKeySchema> and new key_schema=<newKeySchema>.",
+      "If you want to force running the query without schema validation, please set spark.sql.streaming.stateStore.stateSchemaCheck to false.",
+      "However, please note that running the query with incompatible schema could cause non-deterministic behavior."
+    ],
+    "sqlState" : "XXKST"
+  },
   "STATE_STORE_NULL_TYPE_ORDERING_COLS_NOT_SUPPORTED" : {
     "message" : [
       "Null type ordering column with name=<fieldName> at index=<index> is not supported for range scan encoder."
     ],
     "sqlState" : "42802"
   },
+  "STATE_STORE_PROVIDER_DOES_NOT_SUPPORT_FINE_GRAINED_STATE_REPLAY" : {
+    "message" : [
+      "The given State Store Provider <inputClass> does not extend org.apache.spark.sql.execution.streaming.state.SupportsFineGrainedReplay.",
+      "Therefore, it does not support option snapshotStartBatchId in state data source."
+    ],
+    "sqlState" : "42K06"
+  },
   "STATE_STORE_UNSUPPORTED_OPERATION" : {
     "message" : [
       "<operationType> operation not supported with <entity>"
@@ -3681,6 +3840,28 @@
     ],
     "sqlState" : "42802"
   },
+  "STATE_STORE_VALUE_ROW_FORMAT_VALIDATION_FAILURE" : {
+    "message" : [
+      "The streaming query failed to validate written state for value row.",
+      "The following reasons may cause this:",
+      "1. An old Spark version wrote the checkpoint that is incompatible with the current one",
+      "2. Corrupt checkpoint files",
+      "3. The query changed in an incompatible way between restarts",
+      "For the first case, use a new checkpoint directory or use the original Spark version",
+      "to process the streaming state. Retrieved error_message=<errorMsg>"
+    ],
+    "sqlState" : "XX000"
+  },
+  "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE" : {
+    "message" : [
+      "Provided value schema does not match existing state value schema.",
+      "Please check number and type of fields.",
+      "Existing value_schema=<storedValueSchema> and new value_schema=<newValueSchema>.",
+      "If you want to force running the query without schema validation, please set spark.sql.streaming.stateStore.stateSchemaCheck to false.",
+      "However, please note that running the query with incompatible schema could cause non-deterministic behavior."
+    ],
+    "sqlState" : "XXKST"
+  },
   "STATE_STORE_VARIABLE_SIZE_ORDERING_COLS_NOT_SUPPORTED" : {
     "message" : [
       "Variable size ordering column with name=<fieldName> at index=<index> is not supported for range scan encoder."
@@ -3768,6 +3949,12 @@
     ],
     "sqlState" : "42601"
   },
+  "STREAMING_PYTHON_RUNNER_INITIALIZATION_FAILURE" : {
+    "message" : [
+      "Streaming Runner initialization failed, returned <resFromPython>. Cause: <msg>"
+    ],
+    "sqlState" : "XXKST"
+  },
   "STREAMING_STATEFUL_OPERATOR_NOT_MATCH_IN_STATE_METADATA" : {
     "message" : [
       "Streaming stateful operator name does not match with the operator in state metadata. This likely to happen when user adds/removes/changes stateful operator of existing streaming query.",
@@ -3781,12 +3968,33 @@
     ],
     "sqlState" : "XXKST"
   },
+  "STRUCT_ARRAY_LENGTH_MISMATCH" : {
+    "message" : [
+      "Input row doesn't have expected number of values required by the schema. <expected> fields are required while <actual> values are provided."
+    ],
+    "sqlState" : "2201E"
+  },
   "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT" : {
     "message" : [
       "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
     ],
     "sqlState" : "22003"
   },
+  "SYNTAX_DISCONTINUED" : {
+    "message" : [
+      "Support of the clause or keyword: <clause> has been discontinued in this context."
+    ],
+    "subClass" : {
+      "BANG_EQUALS_NOT" : {
+        "message" : [
+          "The '!' keyword is only supported as an alias for the prefix operator 'NOT'.",
+          "Use the 'NOT' keyword instead for infix clauses such as `NOT LIKE`, `NOT IN`, `NOT BETWEEN`, etc.",
+          "To re-enable the '!' keyword, set \"spark.sql.legacy.bangEqualsNot\" to \"true\"."
+        ]
+      }
+    },
+    "sqlState" : "42601"
+  },
   "TABLE_OR_VIEW_ALREADY_EXISTS" : {
     "message" : [
       "Cannot create table or view <relationName> because it already exists.",
@@ -4034,7 +4242,7 @@
   },
   "UNRESOLVED_ROUTINE" : {
     "message" : [
-      "Cannot resolve function <routineName> on search path <searchPath>."
+      "Cannot resolve routine <routineName> on search path <searchPath>."
     ],
     "sqlState" : "42883"
   },
@@ -4142,6 +4350,12 @@
     ],
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_DATA_TYPE_FOR_ENCODER" : {
+    "message" : [
+      "Cannot create encoder for <dataType>. Please use a different output data type for your UDF or DataFrame."
+    ],
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_DEFAULT_VALUE" : {
     "message" : [
       "DEFAULT column values is not supported."
@@ -4291,6 +4505,11 @@
           "INSERT INTO <tableName> with IF NOT EXISTS in the PARTITION spec."
         ]
       },
+      "LAMBDA_FUNCTION_WITH_PYTHON_UDF" : {
+        "message" : [
+          "Lambda function with Python UDF <funcName> in a higher order function."
+        ]
+      },
       "LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC" : {
         "message" : [
           "Referencing a lateral column alias <lca> in the aggregate function <aggFunc>."
@@ -4436,6 +4655,11 @@
           "Table <tableName> does not support <operation>. Please check the current catalog and namespace to make sure the qualified table name is expected, and also check the catalog implementation which is configured by \"spark.sql.catalog\"."
         ]
       },
+      "TEMPORARY_VIEW_WITH_SCHEMA_BINDING_MODE" : {
+        "message" : [
+          "Temporary views cannot be created with the WITH SCHEMA clause. Recreate the temporary view when the underlying schema changes, or use a persisted view."
+        ]
+      },
       "TIME_TRAVEL" : {
         "message" : [
           "Time travel on the relation: <relationId>."
@@ -4526,6 +4750,12 @@
     },
     "sqlState" : "42809"
   },
+  "UNSUPPORTED_JOIN_TYPE" : {
+    "message" : [
+      "Unsupported join type '<typ>'. Supported join types include: <supported>."
+    ],
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_MERGE_CONDITION" : {
     "message" : [
       "MERGE operation contains unsupported <condName> condition."
@@ -4643,6 +4873,12 @@
           "<treeNode>"
         ]
       },
+      "UNSUPPORTED_CORRELATED_EXPRESSION_IN_JOIN_CONDITION" : {
+        "message" : [
+          "Correlated subqueries in the join predicate cannot reference both join inputs:",
+          "<subqueryExpression>"
+        ]
+      },
       "UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE" : {
         "message" : [
           "Correlated column reference '<expr>' cannot be <dataType> type."
@@ -5996,11 +6232,6 @@
       "Invalid partition transformation: <expr>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1319" : {
-    "message" : [
-      "Invalid join type in joinWith: <joinType>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1320" : {
     "message" : [
       "Typed column <typedCol> that needs input type and schema cannot be passed in untyped `select` API. Use the typed `Dataset.select` API instead."
@@ -7980,16 +8211,6 @@
       "Expected a Boolean type expression in replaceNullWithFalse, but got the type <dataType> in <expr>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_3216" : {
-    "message" : [
-      "Unsupported join type '<typ>'. Supported join types include: <supported>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_3217" : {
-    "message" : [
-      "Unsupported as-of join direction '<direction>'. Supported as-of join direction include: <supported>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3218" : {
     "message" : [
       "Must be 2 children: <others>"
@@ -8120,11 +8341,6 @@
       "Failed to parse a value for data type <dataType>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_3250" : {
-    "message" : [
-      "Failed to convert the JSON string '<other>' to a field."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3260" : {
     "message" : [
       "'<s>' is an invalid timestamp"
diff --git a/common/utils/src/main/resources/error/error-states.json b/common/utils/src/main/resources/error/error-states.json
index 46b57859110ef..dd87d6bda5f22 100644
--- a/common/utils/src/main/resources/error/error-states.json
+++ b/common/utils/src/main/resources/error/error-states.json
@@ -108,7 +108,7 @@
         "usedBy": ["SQL/XML"]
     },
     "01011": {
-        "description": "SQL-Java path too long for infor- mation schema",
+        "description": "SQL-Java path too long for information schema",
         "origin": "SQL/JRT",
         "standard": "Y",
         "usedBy": ["SQL/JRT", "DB2"]
@@ -1584,7 +1584,7 @@
         "usedBy": ["SQL/Foundation"]
     },
     "2201J": {
-        "description": "XQuery sequence cannot be vali- dated",
+        "description": "XQuery sequence cannot be validated",
         "origin": "SQL/XML",
         "standard": "Y",
         "usedBy": ["SQL/XML"]
@@ -1896,7 +1896,7 @@
         "usedBy": ["SQL/MD"]
     },
     "2203L": {
-        "description": "MD-array subset not within MD- extent",
+        "description": "MD-array subset not within MD-extent",
         "origin": "SQL/MD",
         "standard": "Y",
         "usedBy": ["SQL/MD"]
@@ -1926,7 +1926,7 @@
         "usedBy": ["SQL/MD"]
     },
     "2203R": {
-        "description": "MD-array operands with non- matching MD-extents",
+        "description": "MD-array operands with non-matching MD-extents",
         "origin": "SQL/MD",
         "standard": "Y",
         "usedBy": ["SQL/MD"]
@@ -2502,7 +2502,7 @@
         "usedBy": ["SQL/Foundation", "PostgreSQL", "Redshift"]
     },
     "25P01": {
-        "description": "no activ sql transaction",
+        "description": "no active sql transaction",
         "origin": "PostgreSQL",
         "standard": "N",
         "usedBy": ["PostgreSQL", "Redshift"]
@@ -2682,7 +2682,7 @@
         "usedBy": ["SQL/Foundation"]
     },
     "33000": {
-        "description": "invalid SQL descriptor nameno subclass)",
+        "description": "invalid SQL descriptor nameno subclass",
         "origin": "SQL/Foundation",
         "standard": "Y",
         "usedBy": ["SQL/Foundation", "Oracle"]
@@ -4572,7 +4572,7 @@
         "usedBy": ["Spark"]
     },
     "42K0E": {
-        "description": "An expression is not valid in teh context it is used",
+        "description": "An expression is not valid in the context it is used",
         "origin": "Spark",
         "standard": "N",
         "usedBy": ["Spark"]
@@ -6752,7 +6752,7 @@
         "usedBy": ["SQL/MED", "PostgreSQL"]
     },
     "HV021": {
-        "description": "inconsistent descriptor informa- tion",
+        "description": "inconsistent descriptor information",
         "origin": "SQL/MED",
         "standard": "Y",
         "usedBy": ["SQL/MED", "PostgreSQL"]
@@ -6848,7 +6848,7 @@
         "usedBy": ["SQL/CLI", "SQL Server"]
     },
     "HY007": {
-        "description": "associated statement is not pre- pared",
+        "description": "associated statement is not prepared",
         "origin": "SQL/CLI",
         "standard": "Y",
         "usedBy": ["SQL/CLI", "SQL Server"]
diff --git a/common/utils/src/main/resources/org/apache/spark/SparkLayout.json b/common/utils/src/main/resources/org/apache/spark/SparkLayout.json
index b0d8ea27ffbc4..b7fc16f12eb44 100644
--- a/common/utils/src/main/resources/org/apache/spark/SparkLayout.json
+++ b/common/utils/src/main/resources/org/apache/spark/SparkLayout.json
@@ -1,6 +1,11 @@
 {
   "ts": {
-    "$resolver": "timestamp"
+    "$resolver": "timestamp",
+    "pattern": {
+      "format": "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
+      "timeZone": "UTC",
+      "locale": "en_US"
+    }
   },
   "level": {
     "$resolver": "level",
@@ -27,12 +32,30 @@
       "$resolver": "exception",
       "field": "stackTrace",
       "stackTrace": {
-        "stringified": true
+        "elementTemplate": {
+          "class": {
+            "$resolver": "stackTraceElement",
+            "field": "className"
+          },
+          "method": {
+            "$resolver": "stackTraceElement",
+            "field": "methodName"
+          },
+          "file": {
+            "$resolver": "stackTraceElement",
+            "field": "fileName"
+          },
+          "line": {
+            "$resolver": "stackTraceElement",
+            "field": "lineNumber"
+          }
+        }
       }
     }
   },
   "logger": {
-    "$resolver": "logger",
-    "field": "name"
+    "$resolver": "pattern",
+    "pattern": "%c{1}",
+    "stackTraceEnabled": false
   }
 }
\ No newline at end of file
diff --git a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala b/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
index 6d8ccecf9b780..99b993f1127af 100644
--- a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
+++ b/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
@@ -49,18 +49,19 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
     sub.setEnableUndefinedVariableException(true)
     sub.setDisableSubstitutionInValues(true)
     try {
-      sub.replace(messageTemplate.replaceAll("<([a-zA-Z0-9_-]+)>", "\\$\\{$1\\}"))
+      sub.replace(ErrorClassesJsonReader.TEMPLATE_REGEX.replaceAllIn(
+        messageTemplate, "\\$\\{$1\\}"))
     } catch {
-      case _: IllegalArgumentException => throw SparkException.internalError(
-        s"Undefined error message parameter for error class: '$errorClass'. " +
-          s"Parameters: $messageParameters")
+      case i: IllegalArgumentException => throw SparkException.internalError(
+        s"Undefined error message parameter for error class: '$errorClass', " +
+          s"MessageTemplate: $messageTemplate, " +
+          s"Parameters: $messageParameters", i)
     }
   }
 
   def getMessageParameters(errorClass: String): Seq[String] = {
     val messageTemplate = getMessageTemplate(errorClass)
-    val pattern = "<([a-zA-Z0-9_-]+)>".r
-    val matches = pattern.findAllIn(messageTemplate).toSeq
+    val matches = ErrorClassesJsonReader.TEMPLATE_REGEX.findAllIn(messageTemplate).toSeq
     matches.map(m => m.stripSuffix(">").stripPrefix("<"))
   }
 
@@ -106,6 +107,8 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
 }
 
 private object ErrorClassesJsonReader {
+  private val TEMPLATE_REGEX = "<([a-zA-Z0-9_-]+)>".r
+
   private val mapper: JsonMapper = JsonMapper.builder()
     .addModule(DefaultScalaModule)
     .build()
diff --git a/common/utils/src/main/scala/org/apache/spark/SparkException.scala b/common/utils/src/main/scala/org/apache/spark/SparkException.scala
index 6bacdd0a44402..398cb1fad6726 100644
--- a/common/utils/src/main/scala/org/apache/spark/SparkException.scala
+++ b/common/utils/src/main/scala/org/apache/spark/SparkException.scala
@@ -464,6 +464,35 @@ private[spark] class SparkRuntimeException private(
   override def getQueryContext: Array[QueryContext] = context
 }
 
+private[spark] class SparkPythonException private(
+    message: String,
+    cause: Option[Throwable],
+    errorClass: Option[String],
+    messageParameters: Map[String, String],
+    context: Array[QueryContext])
+  extends RuntimeException(message, cause.orNull) with SparkThrowable {
+
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Throwable = null,
+      context: Array[QueryContext] = Array.empty,
+      summary: String = "") = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
+      Option(cause),
+      Option(errorClass),
+      messageParameters,
+      context
+    )
+  }
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
+
+  override def getErrorClass: String = errorClass.orNull
+  override def getQueryContext: Array[QueryContext] = context
+}
+
 /**
  * No such element exception thrown from Spark with an error class.
  */
diff --git a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
index 6bdafb11e4bdf..db5eff72e124a 100644
--- a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
+++ b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
@@ -33,7 +33,7 @@ private[spark] object ErrorMessageFormat extends Enumeration {
 private[spark] object SparkThrowableHelper {
   val errorReader = new ErrorClassesJsonReader(
     // Note that though we call them "error classes" here, the proper name is "error conditions",
-    // hence why the name of the JSON file different. We will address this inconsistency as part
+    // hence why the name of the JSON file is different. We will address this inconsistency as part
     // of this ticket: https://issues.apache.org/jira/browse/SPARK-47429
     Seq(SparkClassUtils.getSparkClassLoader.getResource("error/error-conditions.json")))
 
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
index bfeb733af30a4..c0edebf30c9a6 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
@@ -16,219 +16,837 @@
  */
 package org.apache.spark.internal
 
+import java.util.Locale
+
 /**
- * Various keys used for mapped diagnostic contexts(MDC) in logging.
- * All structured logging keys should be defined here for standardization.
+ * All structured logging `keys` used in `MDC` must be extends `LogKey`
+ * <p>
+ *
+ * `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs.
+ * Follow these guidelines when adding a new LogKey:
+ * <ul>
+ *   <li>
+ *     Define all structured logging keys in `LogKey.scala`, and sort them alphabetically for
+ *     ease of search.
+ *   </li>
+ *   <li>
+ *     Use `UPPER_SNAKE_CASE` for key names.
+ *   </li>
+ *   <li>
+ *     Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`,
+ *     `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a
+ *     general key instead of creating separate keys for each scenario such as
+ *     `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`.
+ *     This balances simplicity with the detail needed for effective logging.
+ *   </li>
+ *   <li>
+ *     Use abbreviations in names if they are widely understood,
+ *     such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES.
+ *   </li>
+ *   <li>
+ *     For time-related keys, use milliseconds as the unit of time.
+ *   </li>
+ * </ul>
  */
-object LogKey extends Enumeration {
-  val ACCUMULATOR_ID = Value
-  val ANALYSIS_ERROR = Value
-  val APP_DESC = Value
-  val APP_ID = Value
-  val APP_STATE = Value
-  val BATCH_ID = Value
-  val BLOCK_ID = Value
-  val BLOCK_MANAGER_ID = Value
-  val BROADCAST_ID = Value
-  val BUCKET = Value
-  val BYTECODE_SIZE = Value
-  val CACHE_AUTO_REMOVED_SIZE = Value
-  val CACHE_UNTIL_HIGHEST_CONSUMED_SIZE = Value
-  val CACHE_UNTIL_LAST_PRODUCED_SIZE = Value
-  val CATEGORICAL_FEATURES = Value
-  val CLASS_LOADER = Value
-  val CLASS_NAME = Value
-  val CLUSTER_ID = Value
-  val CODEC_LEVEL = Value
-  val CODEC_NAME = Value
-  val COLUMN_DATA_TYPE_SOURCE = Value
-  val COLUMN_DATA_TYPE_TARGET = Value
-  val COLUMN_DEFAULT_VALUE = Value
-  val COLUMN_NAME = Value
-  val COMMAND = Value
-  val COMMAND_OUTPUT = Value
-  val COMPONENT = Value
-  val CONFIG = Value
-  val CONFIG2 = Value
-  val CONFIG3 = Value
-  val CONFIG4 = Value
-  val CONFIG5 = Value
-  val CONSUMER = Value
-  val CONTAINER = Value
-  val CONTAINER_ID = Value
-  val COUNT = Value
-  val CSV_HEADER_COLUMN_NAME = Value
-  val CSV_HEADER_COLUMN_NAMES = Value
-  val CSV_HEADER_LENGTH = Value
-  val CSV_SCHEMA_FIELD_NAME = Value
-  val CSV_SCHEMA_FIELD_NAMES = Value
-  val CSV_SOURCE = Value
-  val DATA = Value
-  val DATABASE_NAME = Value
-  val DATAFRAME_CACHE_ENTRY = Value
-  val DATAFRAME_ID = Value
-  val DESCRIPTION = Value
-  val DRIVER_ID = Value
-  val DROPPED_PARTITIONS = Value
-  val DURATION = Value
-  val END_POINT = Value
-  val ENGINE = Value
-  val ERROR = Value
-  val EVENT_LOOP = Value
-  val EVENT_QUEUE = Value
-  val EXECUTE_INFO = Value
-  val EXECUTE_KEY = Value
-  val EXECUTOR_ENV_REGEX = Value
-  val EXECUTOR_ID = Value
-  val EXECUTOR_IDS = Value
-  val EXECUTOR_STATE = Value
-  val EXIT_CODE = Value
-  val EXPRESSION_TERMS = Value
-  val FAILURES = Value
-  val FALLBACK_VERSION = Value
-  val FIELD_NAME = Value
-  val FILE_FORMAT = Value
-  val FILE_FORMAT2 = Value
-  val FROM_OFFSET = Value
-  val FUNCTION_NAME = Value
-  val FUNCTION_PARAMETER = Value
-  val GROUP_ID = Value
-  val HADOOP_VERSION = Value
-  val HISTORY_DIR = Value
-  val HIVE_OPERATION_STATE = Value
-  val HIVE_OPERATION_TYPE = Value
-  val HOST = Value
-  val HOST_PORT = Value
-  val INDEX = Value
-  val INFERENCE_MODE = Value
-  val INITIAL_CAPACITY = Value
-  val INTERVAL = Value
-  val JOB_ID = Value
-  val JOIN_CONDITION = Value
-  val JOIN_CONDITION_SUB_EXPRESSION = Value
-  val KAFKA_PULLS_COUNT = Value
-  val KAFKA_RECORDS_PULLED_COUNT = Value
-  val KEY = Value
-  val LAST_ACCESS_TIME = Value
-  val LEARNING_RATE = Value
-  val LINE = Value
-  val LINE_NUM = Value
-  val LISTENER = Value
-  val LOAD_FACTOR = Value
-  val LOG_TYPE = Value
-  val MASTER_URL = Value
-  val MAX_ATTEMPTS = Value
-  val MAX_CACHE_UNTIL_HIGHEST_CONSUMED_SIZE = Value
-  val MAX_CACHE_UNTIL_LAST_PRODUCED_SIZE = Value
-  val MAX_CAPACITY = Value
-  val MAX_CATEGORIES = Value
-  val MAX_EXECUTOR_FAILURES = Value
-  val MAX_SIZE = Value
-  val MERGE_DIR_NAME = Value
-  val MESSAGE = Value
-  val METHOD_NAME = Value
-  val MIN_SIZE = Value
-  val NEW_VALUE = Value
-  val NUM_COLUMNS = Value
-  val NUM_ITERATIONS = Value
-  val OBJECT_ID = Value
-  val OFFSET = Value
-  val OFFSETS = Value
-  val OLD_BLOCK_MANAGER_ID = Value
-  val OLD_VALUE = Value
-  val OPTIMIZER_CLASS_NAME = Value
-  val OP_ID = Value
-  val OP_TYPE = Value
-  val PARSE_MODE = Value
-  val PARTITION_ID = Value
-  val PARTITION_SPECIFICATION = Value
-  val PARTITION_SPECS = Value
-  val PATH = Value
-  val PATHS = Value
-  val POD_ID = Value
-  val POD_NAME = Value
-  val POD_NAMESPACE = Value
-  val POD_PHASE = Value
-  val POLICY = Value
-  val PORT = Value
-  val PRODUCER_ID = Value
-  val QUERY_CACHE_VALUE = Value
-  val QUERY_HINT = Value
-  val QUERY_ID = Value
-  val QUERY_PLAN = Value
-  val QUERY_PLAN_COMPARISON = Value
-  val QUERY_PLAN_LENGTH_ACTUAL = Value
-  val QUERY_PLAN_LENGTH_MAX = Value
-  val RANGE = Value
-  val RDD_ID = Value
-  val REASON = Value
-  val REATTACHABLE = Value
-  val RECEIVED_BLOCK_INFO = Value
-  val REDUCE_ID = Value
-  val RELATION_NAME = Value
-  val REMAINING_PARTITIONS = Value
-  val RESOURCE_NAME = Value
-  val RETRY_COUNT = Value
-  val RETRY_INTERVAL = Value
-  val RULE_BATCH_NAME = Value
-  val RULE_NAME = Value
-  val RULE_NUMBER_OF_RUNS = Value
-  val RUN_ID = Value
-  val SCHEMA = Value
-  val SCHEMA2 = Value
-  val SERVICE_NAME = Value
-  val SESSION_HOLD_INFO = Value
-  val SESSION_ID = Value
-  val SESSION_KEY = Value
-  val SHARD_ID = Value
-  val SHUFFLE_BLOCK_INFO = Value
-  val SHUFFLE_ID = Value
-  val SHUFFLE_MERGE_ID = Value
-  val SIZE = Value
-  val SLEEP_TIME = Value
-  val SQL_TEXT = Value
-  val STAGE_ID = Value
-  val STATEMENT_ID = Value
-  val STATUS = Value
-  val STREAM_ID = Value
-  val STREAM_NAME = Value
-  val SUBMISSION_ID = Value
-  val SUBSAMPLING_RATE = Value
-  val TABLE_NAME = Value
-  val TASK_ATTEMPT_ID = Value
-  val TASK_ID = Value
-  val TASK_NAME = Value
-  val TASK_SET_NAME = Value
-  val TASK_STATE = Value
-  val THREAD = Value
-  val THREAD_NAME = Value
-  val TID = Value
-  val TIME = Value
-  val TIMEOUT = Value
-  val TIME_UNITS = Value
-  val TIP = Value
-  val TOPIC = Value
-  val TOPIC_PARTITION = Value
-  val TOPIC_PARTITIONS = Value
-  val TOPIC_PARTITION_OFFSET = Value
-  val TOPIC_PARTITION_OFFSET_RANGE = Value
-  val TOTAL_EFFECTIVE_TIME = Value
-  val TOTAL_RECORDS_READ = Value
-  val TOTAL_SIZE = Value
-  val TOTAL_TIME = Value
-  val TOTAL_TIME_READ = Value
-  val UNSUPPORTED_EXPRESSION = Value
-  val UNSUPPORTED_HINT_REASON = Value
-  val UNTIL_OFFSET = Value
-  val URI = Value
-  val USER_ID = Value
-  val USER_NAME = Value
-  val WAIT_RESULT_TIME = Value
-  val WAIT_SEND_TIME = Value
-  val WAIT_TIME = Value
-  val WATERMARK_CONSTRAINT = Value
-  val WORKER_URL = Value
-  val XSD_PATH = Value
+trait LogKey {
+  private lazy val _name: String = getClass.getSimpleName.stripSuffix("$").toLowerCase(Locale.ROOT)
+  def name: String = _name
+}
 
-  type LogKey = Value
+/**
+ * Various keys used for mapped diagnostic contexts(MDC) in logging. All structured logging keys
+ * should be defined here for standardization.
+ */
+private[spark] object LogKeys {
+  case object ACCUMULATOR_ID extends LogKey
+  case object ACL_ENABLED extends LogKey
+  case object ACTUAL_NUM_FILES extends LogKey
+  case object ACTUAL_PARTITION_COLUMN extends LogKey
+  case object ADDED_JARS extends LogKey
+  case object ADMIN_ACLS extends LogKey
+  case object ADMIN_ACL_GROUPS extends LogKey
+  case object ADVISORY_TARGET_SIZE extends LogKey
+  case object AGGREGATE_FUNCTIONS extends LogKey
+  case object ALIGNED_FROM_TIME extends LogKey
+  case object ALIGNED_TO_TIME extends LogKey
+  case object ALPHA extends LogKey
+  case object ANALYSIS_ERROR extends LogKey
+  case object APP_ATTEMPT_ID extends LogKey
+  case object APP_ATTEMPT_SHUFFLE_MERGE_ID extends LogKey
+  case object APP_DESC extends LogKey
+  case object APP_EXECUTOR_ID extends LogKey
+  case object APP_ID extends LogKey
+  case object APP_NAME extends LogKey
+  case object APP_STATE extends LogKey
+  case object ARCHIVE_NAME extends LogKey
+  case object ARGS extends LogKey
+  case object ARTIFACTS extends LogKey
+  case object ARTIFACT_ID extends LogKey
+  case object ATTRIBUTE_MAP extends LogKey
+  case object AUTH_ENABLED extends LogKey
+  case object AVG_BATCH_PROC_TIME extends LogKey
+  case object BACKUP_FILE extends LogKey
+  case object BARRIER_EPOCH extends LogKey
+  case object BARRIER_ID extends LogKey
+  case object BATCH_ID extends LogKey
+  case object BATCH_NAME extends LogKey
+  case object BATCH_TIMESTAMP extends LogKey
+  case object BATCH_WRITE extends LogKey
+  case object BIND_ADDRESS extends LogKey
+  case object BLOCK_ID extends LogKey
+  case object BLOCK_IDS extends LogKey
+  case object BLOCK_MANAGER_ID extends LogKey
+  case object BLOCK_MANAGER_IDS extends LogKey
+  case object BLOCK_TYPE extends LogKey
+  case object BOOT extends LogKey
+  case object BOOTSTRAP_TIME extends LogKey
+  case object BROADCAST extends LogKey
+  case object BROADCAST_ID extends LogKey
+  case object BROADCAST_OUTPUT_STATUS_SIZE extends LogKey
+  case object BUCKET extends LogKey
+  case object BYTECODE_SIZE extends LogKey
+  case object BYTE_BUFFER extends LogKey
+  case object BYTE_SIZE extends LogKey
+  case object CACHED_TABLE_PARTITION_METADATA_SIZE extends LogKey
+  case object CACHE_AUTO_REMOVED_SIZE extends LogKey
+  case object CACHE_UNTIL_HIGHEST_CONSUMED_SIZE extends LogKey
+  case object CACHE_UNTIL_LAST_PRODUCED_SIZE extends LogKey
+  case object CALL_SITE_LONG_FORM extends LogKey
+  case object CALL_SITE_SHORT_FORM extends LogKey
+  case object CANCEL_FUTURE_JOBS extends LogKey
+  case object CATALOG_NAME extends LogKey
+  case object CATEGORICAL_FEATURES extends LogKey
+  case object CHECKPOINT_FILE extends LogKey
+  case object CHECKPOINT_INTERVAL extends LogKey
+  case object CHECKPOINT_LOCATION extends LogKey
+  case object CHECKPOINT_PATH extends LogKey
+  case object CHECKPOINT_ROOT extends LogKey
+  case object CHECKPOINT_TIME extends LogKey
+  case object CHOSEN_WATERMARK extends LogKey
+  case object CLASSIFIER extends LogKey
+  case object CLASS_LOADER extends LogKey
+  case object CLASS_NAME extends LogKey
+  case object CLASS_PATH extends LogKey
+  case object CLASS_PATHS extends LogKey
+  case object CLAUSES extends LogKey
+  case object CLEANUP_LOCAL_DIRS extends LogKey
+  case object CLUSTER_CENTROIDS extends LogKey
+  case object CLUSTER_ID extends LogKey
+  case object CLUSTER_LABEL extends LogKey
+  case object CLUSTER_LEVEL extends LogKey
+  case object CLUSTER_WEIGHT extends LogKey
+  case object CODEC_LEVEL extends LogKey
+  case object CODEC_NAME extends LogKey
+  case object CODEGEN_STAGE_ID extends LogKey
+  case object COLUMN_DATA_TYPE_SOURCE extends LogKey
+  case object COLUMN_DATA_TYPE_TARGET extends LogKey
+  case object COLUMN_DEFAULT_VALUE extends LogKey
+  case object COLUMN_NAME extends LogKey
+  case object COMMAND extends LogKey
+  case object COMMAND_OUTPUT extends LogKey
+  case object COMMITTED_VERSION extends LogKey
+  case object COMPACT_INTERVAL extends LogKey
+  case object COMPONENT extends LogKey
+  case object COMPUTE extends LogKey
+  case object CONFIG extends LogKey
+  case object CONFIG2 extends LogKey
+  case object CONFIG3 extends LogKey
+  case object CONFIG4 extends LogKey
+  case object CONFIG5 extends LogKey
+  case object CONFIG_DEPRECATION_MESSAGE extends LogKey
+  case object CONFIG_KEY_UPDATED extends LogKey
+  case object CONFIG_VERSION extends LogKey
+  case object CONSUMER extends LogKey
+  case object CONTAINER extends LogKey
+  case object CONTAINER_ID extends LogKey
+  case object CONTAINER_STATE extends LogKey
+  case object CONTEXT extends LogKey
+  case object COST extends LogKey
+  case object COUNT extends LogKey
+  case object CREATED_POOL_NAME extends LogKey
+  case object CREATION_SITE extends LogKey
+  case object CREDENTIALS_RENEWAL_INTERVAL_RATIO extends LogKey
+  case object CROSS_VALIDATION_METRIC extends LogKey
+  case object CROSS_VALIDATION_METRICS extends LogKey
+  case object CSV_HEADER_COLUMN_NAME extends LogKey
+  case object CSV_HEADER_COLUMN_NAMES extends LogKey
+  case object CSV_HEADER_LENGTH extends LogKey
+  case object CSV_SCHEMA_FIELD_NAME extends LogKey
+  case object CSV_SCHEMA_FIELD_NAMES extends LogKey
+  case object CSV_SOURCE extends LogKey
+  case object CURRENT_BATCH_ID extends LogKey
+  case object CURRENT_DISK_SIZE extends LogKey
+  case object CURRENT_FILE extends LogKey
+  case object CURRENT_MEMORY_SIZE extends LogKey
+  case object CURRENT_PATH extends LogKey
+  case object CURRENT_TIME extends LogKey
+  case object DATA extends LogKey
+  case object DATABASE_NAME extends LogKey
+  case object DATAFRAME_CACHE_ENTRY extends LogKey
+  case object DATAFRAME_ID extends LogKey
+  case object DATA_FILE extends LogKey
+  case object DATA_SOURCE extends LogKey
+  case object DATA_SOURCES extends LogKey
+  case object DEFAULT_COMPACT_INTERVAL extends LogKey
+  case object DEFAULT_ISOLATION_LEVEL extends LogKey
+  case object DEFAULT_NAME extends LogKey
+  case object DEFAULT_VALUE extends LogKey
+  case object DELAY extends LogKey
+  case object DELEGATE extends LogKey
+  case object DELTA extends LogKey
+  case object DEPRECATED_KEY extends LogKey
+  case object DERIVATIVE extends LogKey
+  case object DESCRIPTION extends LogKey
+  case object DESIRED_NUM_PARTITIONS extends LogKey
+  case object DESIRED_TREE_DEPTH extends LogKey
+  case object DESTINATION_PATH extends LogKey
+  case object DFS_FILE extends LogKey
+  case object DIFF_DELTA extends LogKey
+  case object DIVISIBLE_CLUSTER_INDICES_SIZE extends LogKey
+  case object DRIVER_ID extends LogKey
+  case object DRIVER_MEMORY_SIZE extends LogKey
+  case object DRIVER_STATE extends LogKey
+  case object DROPPED_PARTITIONS extends LogKey
+  case object DSTREAM extends LogKey
+  case object DURATION extends LogKey
+  case object EARLIEST_LOADED_VERSION extends LogKey
+  case object EFFECTIVE_STORAGE_LEVEL extends LogKey
+  case object ELAPSED_TIME extends LogKey
+  case object ENCODING extends LogKey
+  case object END_INDEX extends LogKey
+  case object END_POINT extends LogKey
+  case object END_VERSION extends LogKey
+  case object ENGINE extends LogKey
+  case object EPOCH extends LogKey
+  case object ERROR extends LogKey
+  case object ESTIMATOR_PARAM_MAP extends LogKey
+  case object EVALUATED_FILTERS extends LogKey
+  case object EVENT extends LogKey
+  case object EVENT_LOG_DESTINATION extends LogKey
+  case object EVENT_LOOP extends LogKey
+  case object EVENT_NAME extends LogKey
+  case object EVENT_QUEUE extends LogKey
+  case object EXCEPTION extends LogKey
+  case object EXECUTE_INFO extends LogKey
+  case object EXECUTE_KEY extends LogKey
+  case object EXECUTION_MEMORY_SIZE extends LogKey
+  case object EXECUTION_PLAN_LEAVES extends LogKey
+  case object EXECUTOR_BACKEND extends LogKey
+  case object EXECUTOR_ENVS extends LogKey
+  case object EXECUTOR_ENV_REGEX extends LogKey
+  case object EXECUTOR_ID extends LogKey
+  case object EXECUTOR_IDS extends LogKey
+  case object EXECUTOR_LAUNCH_COMMANDS extends LogKey
+  case object EXECUTOR_MEMORY_SIZE extends LogKey
+  case object EXECUTOR_RESOURCES extends LogKey
+  case object EXECUTOR_SHUFFLE_INFO extends LogKey
+  case object EXECUTOR_STATE extends LogKey
+  case object EXECUTOR_TIMEOUT extends LogKey
+  case object EXECUTOR_USER_CLASS_PATH_FIRST extends LogKey
+  case object EXEC_AMOUNT extends LogKey
+  case object EXISTING_FILE extends LogKey
+  case object EXISTING_PATH extends LogKey
+  case object EXIT_CODE extends LogKey
+  case object EXPECTED_NUM_FILES extends LogKey
+  case object EXPECTED_PARTITION_COLUMN extends LogKey
+  case object EXPIRY_TIMESTAMP extends LogKey
+  case object EXPR extends LogKey
+  case object EXPR_TERMS extends LogKey
+  case object EXTENDED_EXPLAIN_GENERATOR extends LogKey
+  case object FAILED_STAGE extends LogKey
+  case object FAILED_STAGE_NAME extends LogKey
+  case object FAILURES extends LogKey
+  case object FALLBACK_VERSION extends LogKey
+  case object FEATURE_COLUMN extends LogKey
+  case object FEATURE_DIMENSION extends LogKey
+  case object FEATURE_NAME extends LogKey
+  case object FETCH_SIZE extends LogKey
+  case object FIELD_NAME extends LogKey
+  case object FILES extends LogKey
+  case object FILE_ABSOLUTE_PATH extends LogKey
+  case object FILE_END_OFFSET extends LogKey
+  case object FILE_FORMAT extends LogKey
+  case object FILE_FORMAT2 extends LogKey
+  case object FILE_LENGTH_XATTR extends LogKey
+  case object FILE_MODIFICATION_TIME extends LogKey
+  case object FILE_NAME extends LogKey
+  case object FILE_NAME2 extends LogKey
+  case object FILE_NAME3 extends LogKey
+  case object FILE_START_OFFSET extends LogKey
+  case object FILE_SYSTEM extends LogKey
+  case object FILE_VERSION extends LogKey
+  case object FILTER extends LogKey
+  case object FINAL_CONTEXT extends LogKey
+  case object FINAL_OUTPUT_PATH extends LogKey
+  case object FINAL_PATH extends LogKey
+  case object FINISH_TRIGGER_DURATION extends LogKey
+  case object FREE_MEMORY_SIZE extends LogKey
+  case object FROM_OFFSET extends LogKey
+  case object FROM_TIME extends LogKey
+  case object FS_DATA_OUTPUT_STREAM extends LogKey
+  case object FUNCTION_NAME extends LogKey
+  case object FUNCTION_PARAM extends LogKey
+  case object GLOBAL_INIT_FILE extends LogKey
+  case object GLOBAL_WATERMARK extends LogKey
+  case object GROUP_BY_EXPRS extends LogKey
+  case object GROUP_ID extends LogKey
+  case object HADOOP_VERSION extends LogKey
+  case object HASH_JOIN_KEYS extends LogKey
+  case object HASH_MAP_SIZE extends LogKey
+  case object HEARTBEAT extends LogKey
+  case object HEARTBEAT_INTERVAL extends LogKey
+  case object HISTORY_DIR extends LogKey
+  case object HIVE_CLIENT_VERSION extends LogKey
+  case object HIVE_METASTORE_VERSION extends LogKey
+  case object HIVE_OPERATION_STATE extends LogKey
+  case object HIVE_OPERATION_TYPE extends LogKey
+  case object HOST extends LogKey
+  case object HOSTS extends LogKey
+  case object HOST_LOCAL_BLOCKS_SIZE extends LogKey
+  case object HOST_PORT extends LogKey
+  case object HOST_PORT2 extends LogKey
+  case object HUGE_METHOD_LIMIT extends LogKey
+  case object HYBRID_STORE_DISK_BACKEND extends LogKey
+  case object IDENTIFIER extends LogKey
+  case object INCOMPATIBLE_TYPES extends LogKey
+  case object INDEX extends LogKey
+  case object INDEX_FILE extends LogKey
+  case object INDEX_NAME extends LogKey
+  case object INFERENCE_MODE extends LogKey
+  case object INIT extends LogKey
+  case object INITIAL_CAPACITY extends LogKey
+  case object INITIAL_HEARTBEAT_INTERVAL extends LogKey
+  case object INIT_MODE extends LogKey
+  case object INPUT extends LogKey
+  case object INPUT_SPLIT extends LogKey
+  case object INTEGRAL extends LogKey
+  case object INTERVAL extends LogKey
+  case object ISOLATION_LEVEL extends LogKey
+  case object ISSUE_DATE extends LogKey
+  case object IS_NETWORK_REQUEST_DONE extends LogKey
+  case object JAR_ENTRY extends LogKey
+  case object JAR_MESSAGE extends LogKey
+  case object JAR_URL extends LogKey
+  case object JAVA_VERSION extends LogKey
+  case object JAVA_VM_NAME extends LogKey
+  case object JOB_ID extends LogKey
+  case object JOIN_CONDITION extends LogKey
+  case object JOIN_CONDITION_SUB_EXPR extends LogKey
+  case object JOIN_TYPE extends LogKey
+  case object K8S_CONTEXT extends LogKey
+  case object KEY extends LogKey
+  case object KEY2 extends LogKey
+  case object KEYTAB extends LogKey
+  case object KEYTAB_FILE extends LogKey
+  case object KILL_EXECUTORS extends LogKey
+  case object LABEL_COLUMN extends LogKey
+  case object LARGEST_CLUSTER_INDEX extends LogKey
+  case object LAST_ACCESS_TIME extends LogKey
+  case object LAST_VALID_TIME extends LogKey
+  case object LATEST_BATCH_ID extends LogKey
+  case object LATEST_COMMITTED_BATCH_ID extends LogKey
+  case object LATEST_SHUFFLE_MERGE_ID extends LogKey
+  case object LEARNING_RATE extends LogKey
+  case object LEFT_EXPR extends LogKey
+  case object LEFT_LOGICAL_PLAN_STATS_SIZE_IN_BYTES extends LogKey
+  case object LINE extends LogKey
+  case object LINE_NUM extends LogKey
+  case object LISTENER extends LogKey
+  case object LOADED_VERSION extends LogKey
+  case object LOAD_FACTOR extends LogKey
+  case object LOAD_TIME extends LogKey
+  case object LOCALE extends LogKey
+  case object LOCAL_BLOCKS_SIZE extends LogKey
+  case object LOCAL_SCRATCH_DIR extends LogKey
+  case object LOCATION extends LogKey
+  case object LOGICAL_PLAN extends LogKey
+  case object LOGICAL_PLAN_COLUMNS extends LogKey
+  case object LOGICAL_PLAN_LEAVES extends LogKey
+  case object LOG_ID extends LogKey
+  case object LOG_LEVEL extends LogKey
+  case object LOG_OFFSET extends LogKey
+  case object LOG_TYPE extends LogKey
+  case object LOWER_BOUND extends LogKey
+  case object MALFORMATTED_STRING extends LogKey
+  case object MAP_ID extends LogKey
+  case object MASTER_URL extends LogKey
+  case object MAX_ATTEMPTS extends LogKey
+  case object MAX_CACHE_UNTIL_HIGHEST_CONSUMED_SIZE extends LogKey
+  case object MAX_CACHE_UNTIL_LAST_PRODUCED_SIZE extends LogKey
+  case object MAX_CAPACITY extends LogKey
+  case object MAX_CATEGORIES extends LogKey
+  case object MAX_EXECUTOR_FAILURES extends LogKey
+  case object MAX_FILE_VERSION extends LogKey
+  case object MAX_JVM_METHOD_PARAMS_LENGTH extends LogKey
+  case object MAX_MEMORY_SIZE extends LogKey
+  case object MAX_METHOD_CODE_SIZE extends LogKey
+  case object MAX_NUM_BINS extends LogKey
+  case object MAX_NUM_CHUNKS extends LogKey
+  case object MAX_NUM_FILES extends LogKey
+  case object MAX_NUM_LOG_POLICY extends LogKey
+  case object MAX_NUM_PARTITIONS extends LogKey
+  case object MAX_NUM_POSSIBLE_BINS extends LogKey
+  case object MAX_NUM_ROWS_IN_MEMORY_BUFFER extends LogKey
+  case object MAX_SERVICE_NAME_LENGTH extends LogKey
+  case object MAX_SIZE extends LogKey
+  case object MAX_SLOTS extends LogKey
+  case object MAX_SPLIT_BYTES extends LogKey
+  case object MAX_TABLE_PARTITION_METADATA_SIZE extends LogKey
+  case object MEMORY_CONSUMER extends LogKey
+  case object MEMORY_POOL_NAME extends LogKey
+  case object MEMORY_SIZE extends LogKey
+  case object MEMORY_THRESHOLD_SIZE extends LogKey
+  case object MERGE_DIR_NAME extends LogKey
+  case object MESSAGE extends LogKey
+  case object METADATA extends LogKey
+  case object METADATA_DIRECTORY extends LogKey
+  case object METADATA_JSON extends LogKey
+  case object META_FILE extends LogKey
+  case object METHOD_NAME extends LogKey
+  case object METHOD_PARAM_TYPES extends LogKey
+  case object METRICS_JSON extends LogKey
+  case object METRIC_NAME extends LogKey
+  case object MINI_BATCH_FRACTION extends LogKey
+  case object MIN_COMPACTION_BATCH_ID extends LogKey
+  case object MIN_NUM_FREQUENT_PATTERN extends LogKey
+  case object MIN_POINT_PER_CLUSTER extends LogKey
+  case object MIN_RATE extends LogKey
+  case object MIN_SHARE extends LogKey
+  case object MIN_SIZE extends LogKey
+  case object MIN_TIME extends LogKey
+  case object MIN_VERSION_NUM extends LogKey
+  case object MISSING_PARENT_STAGES extends LogKey
+  case object MODEL_WEIGHTS extends LogKey
+  case object MODULE_NAME extends LogKey
+  case object NAMESPACE extends LogKey
+  case object NETWORK_IF extends LogKey
+  case object NEW_FEATURE_COLUMN_NAME extends LogKey
+  case object NEW_LABEL_COLUMN_NAME extends LogKey
+  case object NEW_PATH extends LogKey
+  case object NEW_RDD_ID extends LogKey
+  case object NEW_STATE extends LogKey
+  case object NEW_VALUE extends LogKey
+  case object NEXT_RENEWAL_TIME extends LogKey
+  case object NODES extends LogKey
+  case object NODE_LOCATION extends LogKey
+  case object NON_BUILT_IN_CONNECTORS extends LogKey
+  case object NORM extends LogKey
+  case object NUM_ADDED_PARTITIONS extends LogKey
+  case object NUM_APPS extends LogKey
+  case object NUM_ATTEMPT extends LogKey
+  case object NUM_BIN extends LogKey
+  case object NUM_BLOCKS extends LogKey
+  case object NUM_BROADCAST_BLOCK extends LogKey
+  case object NUM_BYTES extends LogKey
+  case object NUM_BYTES_CURRENT extends LogKey
+  case object NUM_BYTES_EVICTED extends LogKey
+  case object NUM_BYTES_MAX extends LogKey
+  case object NUM_BYTES_TO_FREE extends LogKey
+  case object NUM_BYTES_TO_WARN extends LogKey
+  case object NUM_BYTES_USED extends LogKey
+  case object NUM_CATEGORIES extends LogKey
+  case object NUM_CHECKSUM_FILE extends LogKey
+  case object NUM_CHUNKS extends LogKey
+  case object NUM_CLASSES extends LogKey
+  case object NUM_COEFFICIENTS extends LogKey
+  case object NUM_COLUMNS extends LogKey
+  case object NUM_CONCURRENT_WRITER extends LogKey
+  case object NUM_CORES extends LogKey
+  case object NUM_DATA_FILE extends LogKey
+  case object NUM_DATA_FILES extends LogKey
+  case object NUM_DECOMMISSIONED extends LogKey
+  case object NUM_DRIVERS extends LogKey
+  case object NUM_DROPPED_PARTITIONS extends LogKey
+  case object NUM_EFFECTIVE_RULE_OF_RUNS extends LogKey
+  case object NUM_ELEMENTS_SPILL_THRESHOLD extends LogKey
+  case object NUM_EVENTS extends LogKey
+  case object NUM_EXAMPLES extends LogKey
+  case object NUM_EXECUTORS extends LogKey
+  case object NUM_EXECUTORS_EXITED extends LogKey
+  case object NUM_EXECUTORS_KILLED extends LogKey
+  case object NUM_EXECUTOR_CORES extends LogKey
+  case object NUM_EXECUTOR_CORES_REMAINING extends LogKey
+  case object NUM_EXECUTOR_CORES_TOTAL extends LogKey
+  case object NUM_EXECUTOR_DESIRED extends LogKey
+  case object NUM_EXECUTOR_LAUNCH extends LogKey
+  case object NUM_EXECUTOR_TARGET extends LogKey
+  case object NUM_FAILURES extends LogKey
+  case object NUM_FEATURES extends LogKey
+  case object NUM_FILES extends LogKey
+  case object NUM_FILES_COPIED extends LogKey
+  case object NUM_FILES_FAILED_TO_DELETE extends LogKey
+  case object NUM_FILES_REUSED extends LogKey
+  case object NUM_FREQUENT_ITEMS extends LogKey
+  case object NUM_HOST_LOCAL_BLOCKS extends LogKey
+  case object NUM_INDEX_FILE extends LogKey
+  case object NUM_INDEX_FILES extends LogKey
+  case object NUM_ITERATIONS extends LogKey
+  case object NUM_KAFKA_PULLS extends LogKey
+  case object NUM_KAFKA_RECORDS_PULLED extends LogKey
+  case object NUM_LEADING_SINGULAR_VALUES extends LogKey
+  case object NUM_LEFT_PARTITION_VALUES extends LogKey
+  case object NUM_LOADED_ENTRIES extends LogKey
+  case object NUM_LOCAL_BLOCKS extends LogKey
+  case object NUM_LOCAL_DIRS extends LogKey
+  case object NUM_LOCAL_FREQUENT_PATTERN extends LogKey
+  case object NUM_MERGERS extends LogKey
+  case object NUM_MERGER_LOCATIONS extends LogKey
+  case object NUM_META_FILES extends LogKey
+  case object NUM_NODES extends LogKey
+  case object NUM_PARTITIONS extends LogKey
+  case object NUM_PARTITIONS2 extends LogKey
+  case object NUM_PATHS extends LogKey
+  case object NUM_PEERS extends LogKey
+  case object NUM_PEERS_REPLICATED_TO extends LogKey
+  case object NUM_PEERS_TO_REPLICATE_TO extends LogKey
+  case object NUM_PENDING_LAUNCH_TASKS extends LogKey
+  case object NUM_POD extends LogKey
+  case object NUM_POD_SHARED_SLOT extends LogKey
+  case object NUM_POD_TARGET extends LogKey
+  case object NUM_POINT extends LogKey
+  case object NUM_PREFIXES extends LogKey
+  case object NUM_PRUNED extends LogKey
+  case object NUM_PUSH_MERGED_LOCAL_BLOCKS extends LogKey
+  case object NUM_RECEIVERS extends LogKey
+  case object NUM_RECORDS_READ extends LogKey
+  case object NUM_RELEASED_LOCKS extends LogKey
+  case object NUM_REMAINED extends LogKey
+  case object NUM_REMOTE_BLOCKS extends LogKey
+  case object NUM_REMOVED_WORKERS extends LogKey
+  case object NUM_REPLICAS extends LogKey
+  case object NUM_REQUESTS extends LogKey
+  case object NUM_REQUEST_SYNC_TASK extends LogKey
+  case object NUM_RESOURCE_SLOTS extends LogKey
+  case object NUM_RETRIES extends LogKey
+  case object NUM_RETRY extends LogKey
+  case object NUM_RIGHT_PARTITION_VALUES extends LogKey
+  case object NUM_ROWS extends LogKey
+  case object NUM_RULE_OF_RUNS extends LogKey
+  case object NUM_SEQUENCES extends LogKey
+  case object NUM_SLOTS extends LogKey
+  case object NUM_SPILL_INFOS extends LogKey
+  case object NUM_SPILL_WRITERS extends LogKey
+  case object NUM_SUB_DIRS extends LogKey
+  case object NUM_SUCCESSFUL_TASKS extends LogKey
+  case object NUM_TASKS extends LogKey
+  case object NUM_TASK_CPUS extends LogKey
+  case object NUM_TRAIN_WORD extends LogKey
+  case object NUM_UNFINISHED_DECOMMISSIONED extends LogKey
+  case object NUM_VERSIONS_RETAIN extends LogKey
+  case object NUM_WEIGHTED_EXAMPLES extends LogKey
+  case object NUM_WORKERS extends LogKey
+  case object OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD extends LogKey
+  case object OBJECT_ID extends LogKey
+  case object OFFSET extends LogKey
+  case object OFFSETS extends LogKey
+  case object OFFSET_SEQUENCE_METADATA extends LogKey
+  case object OLD_BLOCK_MANAGER_ID extends LogKey
+  case object OLD_GENERATION_GC extends LogKey
+  case object OLD_VALUE extends LogKey
+  case object OPEN_COST_IN_BYTES extends LogKey
+  case object OPERATION_HANDLE extends LogKey
+  case object OPERATION_HANDLE_ID extends LogKey
+  case object OPERATION_ID extends LogKey
+  case object OPTIMIZED_PLAN_COLUMNS extends LogKey
+  case object OPTIMIZER_CLASS_NAME extends LogKey
+  case object OPTIONS extends LogKey
+  case object OP_ID extends LogKey
+  case object OP_TYPE extends LogKey
+  case object ORIGINAL_DISK_SIZE extends LogKey
+  case object ORIGINAL_MEMORY_SIZE extends LogKey
+  case object OS_ARCH extends LogKey
+  case object OS_NAME extends LogKey
+  case object OS_VERSION extends LogKey
+  case object OUTPUT extends LogKey
+  case object OVERHEAD_MEMORY_SIZE extends LogKey
+  case object PAGE_SIZE extends LogKey
+  case object PARENT_STAGES extends LogKey
+  case object PARSE_MODE extends LogKey
+  case object PARTITIONED_FILE_READER extends LogKey
+  case object PARTITIONER extends LogKey
+  case object PARTITION_ID extends LogKey
+  case object PARTITION_IDS extends LogKey
+  case object PARTITION_SIZE extends LogKey
+  case object PARTITION_SPECIFICATION extends LogKey
+  case object PARTITION_SPECS extends LogKey
+  case object PATH extends LogKey
+  case object PATHS extends LogKey
+  case object PEER extends LogKey
+  case object PERCENT extends LogKey
+  case object PIPELINE_STAGE_UID extends LogKey
+  case object PLUGIN_NAME extends LogKey
+  case object POD_ID extends LogKey
+  case object POD_NAME extends LogKey
+  case object POD_NAMESPACE extends LogKey
+  case object POD_PHASE extends LogKey
+  case object POD_STATE extends LogKey
+  case object POINT_OF_CENTER extends LogKey
+  case object POLICY extends LogKey
+  case object POOL_NAME extends LogKey
+  case object PORT extends LogKey
+  case object PORT2 extends LogKey
+  case object POST_SCAN_FILTERS extends LogKey
+  case object PREDICATE extends LogKey
+  case object PREDICATES extends LogKey
+  case object PREFERRED_SERVICE_NAME extends LogKey
+  case object PREFIX extends LogKey
+  case object PRETTY_ID_STRING extends LogKey
+  case object PRINCIPAL extends LogKey
+  case object PROCESS extends LogKey
+  case object PROCESSING_TIME extends LogKey
+  case object PRODUCER_ID extends LogKey
+  case object PROPERTY_NAME extends LogKey
+  case object PROPORTIONAL extends LogKey
+  case object PROTOCOL_VERSION extends LogKey
+  case object PROVIDER extends LogKey
+  case object PUSHED_FILTERS extends LogKey
+  case object PUSH_MERGED_LOCAL_BLOCKS_SIZE extends LogKey
+  case object PVC_METADATA_NAME extends LogKey
+  case object PYTHON_EXEC extends LogKey
+  case object PYTHON_PACKAGES extends LogKey
+  case object PYTHON_VERSION extends LogKey
+  case object PYTHON_WORKER_MODULE extends LogKey
+  case object PYTHON_WORKER_RESPONSE extends LogKey
+  case object QUANTILES extends LogKey
+  case object QUERY_CACHE_VALUE extends LogKey
+  case object QUERY_HINT extends LogKey
+  case object QUERY_ID extends LogKey
+  case object QUERY_PLAN extends LogKey
+  case object QUERY_PLAN_COMPARISON extends LogKey
+  case object QUERY_PLAN_LENGTH_ACTUAL extends LogKey
+  case object QUERY_PLAN_LENGTH_MAX extends LogKey
+  case object QUERY_RUN_ID extends LogKey
+  case object RANGE extends LogKey
+  case object RATE_LIMIT extends LogKey
+  case object RATIO extends LogKey
+  case object RDD extends LogKey
+  case object RDD_CHECKPOINT_DIR extends LogKey
+  case object RDD_DEBUG_STRING extends LogKey
+  case object RDD_DESCRIPTION extends LogKey
+  case object RDD_ID extends LogKey
+  case object READ_LIMIT extends LogKey
+  case object REASON extends LogKey
+  case object REATTACHABLE extends LogKey
+  case object RECEIVED_BLOCK_INFO extends LogKey
+  case object RECEIVED_BLOCK_TRACKER_LOG_EVENT extends LogKey
+  case object RECEIVER_ID extends LogKey
+  case object RECEIVER_IDS extends LogKey
+  case object RECORDS extends LogKey
+  case object RECOVERY_STATE extends LogKey
+  case object REDACTED_STATEMENT extends LogKey
+  case object REDUCE_ID extends LogKey
+  case object REGEX extends LogKey
+  case object REGISTERED_EXECUTOR_FILE extends LogKey
+  case object REGISTER_MERGE_RESULTS extends LogKey
+  case object RELATION_NAME extends LogKey
+  case object RELATION_OUTPUT extends LogKey
+  case object RELATIVE_TOLERANCE extends LogKey
+  case object RELEASED_LOCKS extends LogKey
+  case object REMAINING_PARTITIONS extends LogKey
+  case object REMOTE_ADDRESS extends LogKey
+  case object REMOTE_BLOCKS_SIZE extends LogKey
+  case object REMOVE_FROM_MASTER extends LogKey
+  case object REPORT_DETAILS extends LogKey
+  case object REQUESTER_SIZE extends LogKey
+  case object REQUEST_EXECUTORS extends LogKey
+  case object REQUEST_ID extends LogKey
+  case object RESOURCE extends LogKey
+  case object RESOURCE_NAME extends LogKey
+  case object RESOURCE_PROFILE_ID extends LogKey
+  case object RESOURCE_PROFILE_IDS extends LogKey
+  case object RESOURCE_PROFILE_TO_TOTAL_EXECS extends LogKey
+  case object RESPONSE_BODY_SIZE extends LogKey
+  case object RESULT extends LogKey
+  case object RESULT_SIZE_BYTES extends LogKey
+  case object RESULT_SIZE_BYTES_MAX extends LogKey
+  case object RETRY_INTERVAL extends LogKey
+  case object RETRY_WAIT_TIME extends LogKey
+  case object RIGHT_EXPR extends LogKey
+  case object RIGHT_LOGICAL_PLAN_STATS_SIZE_IN_BYTES extends LogKey
+  case object RMSE extends LogKey
+  case object ROCKS_DB_LOG_LEVEL extends LogKey
+  case object ROCKS_DB_LOG_MESSAGE extends LogKey
+  case object RPC_ADDRESS extends LogKey
+  case object RPC_ENDPOINT_REF extends LogKey
+  case object RPC_MESSAGE_CAPACITY extends LogKey
+  case object RULE_NAME extends LogKey
+  case object RUN_ID extends LogKey
+  case object SCALA_VERSION extends LogKey
+  case object SCALING_DOWN_RATIO extends LogKey
+  case object SCALING_UP_RATIO extends LogKey
+  case object SCHEDULER_POOL_NAME extends LogKey
+  case object SCHEDULING_MODE extends LogKey
+  case object SCHEMA extends LogKey
+  case object SCHEMA2 extends LogKey
+  case object SERVER_NAME extends LogKey
+  case object SERVICE_NAME extends LogKey
+  case object SERVLET_CONTEXT_HANDLER_PATH extends LogKey
+  case object SESSION_HANDLE extends LogKey
+  case object SESSION_HOLD_INFO extends LogKey
+  case object SESSION_ID extends LogKey
+  case object SESSION_KEY extends LogKey
+  case object SET_CLIENT_INFO_REQUEST extends LogKey
+  case object SHARD_ID extends LogKey
+  case object SHORTER_SERVICE_NAME extends LogKey
+  case object SHORT_USER_NAME extends LogKey
+  case object SHUFFLE_BLOCK_INFO extends LogKey
+  case object SHUFFLE_DB_BACKEND_KEY extends LogKey
+  case object SHUFFLE_DB_BACKEND_NAME extends LogKey
+  case object SHUFFLE_ID extends LogKey
+  case object SHUFFLE_MERGE_ID extends LogKey
+  case object SHUFFLE_MERGE_RECOVERY_FILE extends LogKey
+  case object SHUFFLE_SERVICE_CONF_OVERLAY_URL extends LogKey
+  case object SHUFFLE_SERVICE_METRICS_NAMESPACE extends LogKey
+  case object SHUFFLE_SERVICE_NAME extends LogKey
+  case object SIGMAS_LENGTH extends LogKey
+  case object SIGNAL extends LogKey
+  case object SINK extends LogKey
+  case object SIZE extends LogKey
+  case object SLEEP_TIME extends LogKey
+  case object SLIDE_DURATION extends LogKey
+  case object SMALLEST_CLUSTER_INDEX extends LogKey
+  case object SNAPSHOT_VERSION extends LogKey
+  case object SOCKET_ADDRESS extends LogKey
+  case object SOURCE extends LogKey
+  case object SOURCE_PATH extends LogKey
+  case object SPARK_BRANCH extends LogKey
+  case object SPARK_BUILD_DATE extends LogKey
+  case object SPARK_BUILD_USER extends LogKey
+  case object SPARK_DATA_STREAM extends LogKey
+  case object SPARK_PLAN_ID extends LogKey
+  case object SPARK_REPO_URL extends LogKey
+  case object SPARK_REVISION extends LogKey
+  case object SPARK_VERSION extends LogKey
+  case object SPILL_TIMES extends LogKey
+  case object SQL_TEXT extends LogKey
+  case object SRC_PATH extends LogKey
+  case object STAGE extends LogKey
+  case object STAGES extends LogKey
+  case object STAGE_ATTEMPT extends LogKey
+  case object STAGE_ID extends LogKey
+  case object STAGE_NAME extends LogKey
+  case object START_INDEX extends LogKey
+  case object STATEMENT_ID extends LogKey
+  case object STATE_STORE_ID extends LogKey
+  case object STATE_STORE_PROVIDER extends LogKey
+  case object STATE_STORE_VERSION extends LogKey
+  case object STATS extends LogKey
+  case object STATUS extends LogKey
+  case object STDERR extends LogKey
+  case object STOP_SITE_SHORT_FORM extends LogKey
+  case object STORAGE_LEVEL extends LogKey
+  case object STORAGE_LEVEL_DESERIALIZED extends LogKey
+  case object STORAGE_LEVEL_REPLICATION extends LogKey
+  case object STORAGE_MEMORY_SIZE extends LogKey
+  case object STORE_ID extends LogKey
+  case object STREAMING_CONTEXT extends LogKey
+  case object STREAMING_DATA_SOURCE_DESCRIPTION extends LogKey
+  case object STREAMING_DATA_SOURCE_NAME extends LogKey
+  case object STREAMING_OFFSETS_END extends LogKey
+  case object STREAMING_OFFSETS_START extends LogKey
+  case object STREAMING_QUERY_PROGRESS extends LogKey
+  case object STREAMING_SOURCE extends LogKey
+  case object STREAMING_TABLE extends LogKey
+  case object STREAMING_WRITE extends LogKey
+  case object STREAM_CHUNK_ID extends LogKey
+  case object STREAM_ID extends LogKey
+  case object STREAM_NAME extends LogKey
+  case object SUBMISSION_ID extends LogKey
+  case object SUBSAMPLING_RATE extends LogKey
+  case object SUB_QUERY extends LogKey
+  case object TABLE_NAME extends LogKey
+  case object TABLE_TYPE extends LogKey
+  case object TABLE_TYPES extends LogKey
+  case object TAG extends LogKey
+  case object TARGET_NUM_EXECUTOR extends LogKey
+  case object TARGET_NUM_EXECUTOR_DELTA extends LogKey
+  case object TARGET_PATH extends LogKey
+  case object TARGET_SIZE extends LogKey
+  case object TASK_ATTEMPT_ID extends LogKey
+  case object TASK_ID extends LogKey
+  case object TASK_INDEX extends LogKey
+  case object TASK_LOCALITY extends LogKey
+  case object TASK_NAME extends LogKey
+  case object TASK_REQUIREMENTS extends LogKey
+  case object TASK_RESOURCES extends LogKey
+  case object TASK_RESOURCE_ASSIGNMENTS extends LogKey
+  case object TASK_SET_MANAGER extends LogKey
+  case object TASK_SET_NAME extends LogKey
+  case object TASK_STATE extends LogKey
+  case object TEMP_FILE extends LogKey
+  case object TEMP_OUTPUT_PATH extends LogKey
+  case object TEMP_PATH extends LogKey
+  case object TEST_SIZE extends LogKey
+  case object THREAD extends LogKey
+  case object THREAD_ID extends LogKey
+  case object THREAD_NAME extends LogKey
+  case object THREAD_POOL_KEEPALIVE_TIME extends LogKey
+  case object THREAD_POOL_SIZE extends LogKey
+  case object THREAD_POOL_WAIT_QUEUE_SIZE extends LogKey
+  case object THRESHOLD extends LogKey
+  case object THRESH_TIME extends LogKey
+  case object TIME extends LogKey
+  case object TIMEOUT extends LogKey
+  case object TIMER extends LogKey
+  case object TIMESTAMP extends LogKey
+  case object TIME_UNITS extends LogKey
+  case object TIP extends LogKey
+  case object TOKEN extends LogKey
+  case object TOKEN_KIND extends LogKey
+  case object TOKEN_REGEX extends LogKey
+  case object TOKEN_RENEWER extends LogKey
+  case object TOPIC extends LogKey
+  case object TOPIC_PARTITION extends LogKey
+  case object TOPIC_PARTITIONS extends LogKey
+  case object TOPIC_PARTITION_OFFSET extends LogKey
+  case object TOPIC_PARTITION_OFFSET_RANGE extends LogKey
+  case object TOTAL extends LogKey
+  case object TOTAL_EFFECTIVE_TIME extends LogKey
+  case object TOTAL_SIZE extends LogKey
+  case object TOTAL_TIME extends LogKey
+  case object TOTAL_TIME_READ extends LogKey
+  case object TO_TIME extends LogKey
+  case object TRAINING_SIZE extends LogKey
+  case object TRAIN_VALIDATION_SPLIT_METRIC extends LogKey
+  case object TRAIN_VALIDATION_SPLIT_METRICS extends LogKey
+  case object TRANSFER_TYPE extends LogKey
+  case object TREE_NODE extends LogKey
+  case object TRIGGER_INTERVAL extends LogKey
+  case object UI_FILTER extends LogKey
+  case object UI_FILTER_PARAMS extends LogKey
+  case object UI_PROXY_BASE extends LogKey
+  case object UNKNOWN_PARAM extends LogKey
+  case object UNSUPPORTED_EXPR extends LogKey
+  case object UNSUPPORTED_HINT_REASON extends LogKey
+  case object UNTIL_OFFSET extends LogKey
+  case object UPPER_BOUND extends LogKey
+  case object URI extends LogKey
+  case object URIS extends LogKey
+  case object URL extends LogKey
+  case object URL2 extends LogKey
+  case object URLS extends LogKey
+  case object USER_ID extends LogKey
+  case object USER_NAME extends LogKey
+  case object UUID extends LogKey
+  case object VALUE extends LogKey
+  case object VERSION_NUM extends LogKey
+  case object VIRTUAL_CORES extends LogKey
+  case object VOCAB_SIZE extends LogKey
+  case object WAIT_RESULT_TIME extends LogKey
+  case object WAIT_SEND_TIME extends LogKey
+  case object WATERMARK_CONSTRAINT extends LogKey
+  case object WEB_URL extends LogKey
+  case object WEIGHT extends LogKey
+  case object WORKER extends LogKey
+  case object WORKER_HOST extends LogKey
+  case object WORKER_ID extends LogKey
+  case object WORKER_PORT extends LogKey
+  case object WORKER_URL extends LogKey
+  case object WRITE_AHEAD_LOG_INFO extends LogKey
+  case object WRITE_AHEAD_LOG_RECORD_HANDLE extends LogKey
+  case object WRITE_JOB_UUID extends LogKey
+  case object XML_SCHEDULING_MODE extends LogKey
+  case object XSD_PATH extends LogKey
+  case object YOUNG_GENERATION_GC extends LogKey
+  case object ZERO_TIME extends LogKey
 }
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
index 607f3637e6418..8eea9b44da26d 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.internal
 
-import java.util.Locale
-
 import scala.jdk.CollectionConverters._
 
 import org.apache.logging.log4j.{CloseableThreadContext, Level, LogManager}
@@ -29,9 +27,46 @@ import org.apache.logging.log4j.core.filter.AbstractFilter
 import org.slf4j.{Logger, LoggerFactory}
 
 import org.apache.spark.internal.Logging.SparkShellLoggingFilter
-import org.apache.spark.internal.LogKey.LogKey
 import org.apache.spark.util.SparkClassUtils
 
+/**
+ * Guidelines for the Structured Logging Framework - Scala Logging
+ * <p>
+ *
+ * Use the `org.apache.spark.internal.Logging` trait for logging in Scala code:
+ * Logging Messages with Variables:
+ *   When logging a message with variables, wrap all the variables with `MDC`s and they will be
+ *   automatically added to the Mapped Diagnostic Context (MDC).
+ * This allows for structured logging and better log analysis.
+ * <p>
+ *
+ * logInfo(log"Trying to recover app: ${MDC(LogKeys.APP_ID, app.id)}")
+ * <p>
+ *
+ * Constant String Messages:
+ *   If you are logging a constant string message, use the log methods that accept a constant
+ *   string.
+ * <p>
+ *
+ * logInfo("StateStore stopped")
+ * <p>
+ *
+ * Exceptions:
+ *   To ensure logs are compatible with Spark SQL and log analysis tools, avoid
+ *   `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from
+ *   the `Logging` trait to log exceptions, maintaining structured and parsable logs.
+ * <p>
+ *
+ * If you want to output logs in `scala code` through the structured log framework,
+ * you can define `custom LogKey` and use it in `scala` code as follows:
+ * <p>
+ *
+ * // To add a `custom LogKey`, implement `LogKey`
+ * case object CUSTOM_LOG_KEY extends LogKey
+ * import org.apache.spark.internal.MDC;
+ * logInfo(log"${MDC(CUSTOM_LOG_KEY, "key")}")
+ */
+
 /**
  * Mapped Diagnostic Context (MDC) that will be used in log messages.
  * The values of the MDC will be inline in the log message, while the key-value pairs will be
@@ -42,6 +77,10 @@ case class MDC(key: LogKey, value: Any) {
     "the class of value cannot be MessageWithContext")
 }
 
+object MDC {
+  def of(key: LogKey, value: Any): MDC = MDC(key, value)
+}
+
 /**
  * Wrapper class for log messages that include a logging context.
  * This is used as the return type of the string interpolator `LogStringContext`.
@@ -60,9 +99,11 @@ case class MessageWithContext(message: String, context: java.util.HashMap[String
  * Companion class for lazy evaluation of the MessageWithContext instance.
  */
 class LogEntry(messageWithContext: => MessageWithContext) {
-  def message: String = messageWithContext.message
+  private lazy val cachedMessageWithContext: MessageWithContext = messageWithContext
+
+  def message: String = cachedMessageWithContext.message
 
-  def context: java.util.HashMap[String, String] = messageWithContext.context
+  def context: java.util.HashMap[String, String] = cachedMessageWithContext.context
 }
 
 /**
@@ -104,18 +145,18 @@ trait Logging {
   implicit class LogStringContext(val sc: StringContext) {
     def log(args: MDC*): MessageWithContext = {
       val processedParts = sc.parts.iterator
-      val sb = new StringBuilder(processedParts.next())
+      val sb = new StringBuilder(StringContext.processEscapes(processedParts.next()))
       val context = new java.util.HashMap[String, String]()
 
       args.foreach { mdc =>
         val value = if (mdc.value != null) mdc.value.toString else null
         sb.append(value)
         if (Logging.isStructuredLoggingEnabled) {
-          context.put(mdc.key.toString.toLowerCase(Locale.ROOT), value)
+          context.put(mdc.key.name, value)
         }
 
         if (processedParts.hasNext) {
-          sb.append(processedParts.next())
+          sb.append(StringContext.processEscapes(processedParts.next()))
         }
       }
 
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/README.md b/common/utils/src/main/scala/org/apache/spark/internal/README.md
deleted file mode 100644
index 81c542fd3d9c6..0000000000000
--- a/common/utils/src/main/scala/org/apache/spark/internal/README.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Guidelines for the Structured Logging Framework
-
-## LogKey
-
-LogKeys serve as identifiers for mapped diagnostic contexts (MDC) within logs. Follow these guidelines when adding new LogKeys:
-* Define all structured logging keys in `LogKey.scala`, and sort them alphabetically for ease of search.
-* Use `UPPER_SNAKE_CASE` for key names.
-* Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`, `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a general key instead of creating separate keys for each scenario such as `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`. This balances simplicity with the detail needed for effective logging.
-* Use abbreviations in names if they are widely understood, such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES.
-* For time-related keys, use milliseconds as the unit of time.
-
-## Exceptions
-
-To ensure logs are compatible with Spark SQL and log analysis tools, avoid `Exception.printStackTrace()`. Use `logError`, `logWarning`, and `logInfo` methods from the `Logging` trait to log exceptions, maintaining structured and parsable logs.
diff --git a/common/utils/src/main/scala/org/apache/spark/util/LogUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/LogUtils.scala
new file mode 100644
index 0000000000000..5a798ffad3a92
--- /dev/null
+++ b/common/utils/src/main/scala/org/apache/spark/util/LogUtils.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * :: : DeveloperApi ::
+ * Utils for querying Spark logs with Spark SQL.
+ *
+ * @since 4.0.0
+ */
+@DeveloperApi
+object LogUtils {
+  /**
+   * Schema for structured Spark logs.
+   * Example usage:
+   *   val logDf = spark.read.schema(LOG_SCHEMA).json("path/to/logs")
+   */
+  val LOG_SCHEMA: String = """
+    |ts TIMESTAMP,
+    |level STRING,
+    |msg STRING,
+    |context map<STRING, STRING>,
+    |exception STRUCT<
+    |  class STRING,
+    |  msg STRING,
+    |  stacktrace ARRAY<STRUCT<
+    |    class STRING,
+    |    method STRING,
+    |    file STRING,
+    |    line STRING
+    |  >>
+    |>,
+    |logger STRING""".stripMargin
+}
diff --git a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
index 08291859a32cc..546981c8b5435 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
@@ -36,7 +36,7 @@ import org.apache.ivy.plugins.repository.file.FileRepository
 import org.apache.ivy.plugins.resolver.{ChainResolver, FileSystemResolver, IBiblioResolver}
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.util.ArrayImplicits._
 
 /** Provides utility functions to be used inside SparkSubmit. */
@@ -215,7 +215,7 @@ private[spark] object MavenUtils extends Logging {
         if (artifactInfo.getExt == "jar") {
           true
         } else {
-          logInfo(s"Skipping non-jar dependency ${artifactInfo.getId}")
+          logInfo(log"Skipping non-jar dependency ${MDC(LogKeys.ARTIFACT_ID, artifactInfo.getId)}")
           false
         }
       }
@@ -462,14 +462,13 @@ private[spark] object MavenUtils extends Logging {
       val sysOut = System.out
       // Default configuration name for ivy
       val ivyConfName = "default"
-
-      // A Module descriptor must be specified. Entries are dummy strings
-      val md = getModuleDescriptor
-
-      md.setDefaultConf(ivyConfName)
+      var md: DefaultModuleDescriptor = null
       try {
         // To prevent ivy from logging to system out
         System.setOut(printStream)
+        // A Module descriptor must be specified. Entries are dummy strings
+        md = getModuleDescriptor
+        md.setDefaultConf(ivyConfName)
         val artifacts = extractMavenCoordinates(coordinates)
         // Directories for caching downloads through ivy and storing the jars when maven coordinates
         // are supplied to spark-submit
@@ -516,8 +515,9 @@ private[spark] object MavenUtils extends Logging {
           val failedReports = rr.getArtifactsReports(DownloadStatus.FAILED, true)
           if (failedReports.nonEmpty && noCacheIvySettings.isDefined) {
             val failedArtifacts = failedReports.map(r => r.getArtifact)
-            logInfo(s"Download failed: ${failedArtifacts.mkString("[", ", ", "]")}, " +
-              s"attempt to retry while skipping local-m2-cache.")
+            logInfo(log"Download failed: " +
+              log"${MDC(LogKeys.ARTIFACTS, failedArtifacts.mkString("[", ", ", "]"))}, " +
+              log"attempt to retry while skipping local-m2-cache.")
             failedArtifacts.foreach(artifact => {
               clearInvalidIvyCacheFiles(artifact.getModuleRevisionId, ivySettings.getDefaultCache)
             })
@@ -548,7 +548,9 @@ private[spark] object MavenUtils extends Logging {
         }
       } finally {
         System.setOut(sysOut)
-        clearIvyResolutionFiles(md.getModuleRevisionId, ivySettings.getDefaultCache, ivyConfName)
+        if (md != null) {
+          clearIvyResolutionFiles(md.getModuleRevisionId, ivySettings.getDefaultCache, ivyConfName)
+        }
       }
     }
   }
diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala
index 8194d1e424173..9f604e4bf47f2 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala
@@ -21,7 +21,7 @@ import java.nio.charset.StandardCharsets.UTF_8
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 
 private[spark] trait SparkErrorUtils extends Logging {
   /**
@@ -74,7 +74,8 @@ private[spark] trait SparkErrorUtils extends Logging {
       } catch {
         case t: Throwable if (originalThrowable != null && originalThrowable != t) =>
           originalThrowable.addSuppressed(t)
-          logWarning(s"Suppressing exception in finally: ${t.getMessage}", t)
+          logWarning(
+            log"Suppressing exception in finally: ${MDC(LogKeys.MESSAGE, t.getMessage)}", t)
           throw originalThrowable
       }
     }
diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
index e12f8acdadd3c..22f03df1b2697 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkFileUtils.scala
@@ -20,7 +20,7 @@ import java.io.File
 import java.net.{URI, URISyntaxException}
 import java.nio.file.Files
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.network.util.JavaUtils
 
 private[spark] trait SparkFileUtils extends Logging {
@@ -77,12 +77,12 @@ private[spark] trait SparkFileUtils extends Logging {
       // remove the check when we're sure that Files.createDirectories() would never fail silently.
       Files.createDirectories(dir.toPath)
       if ( !dir.exists() || !dir.isDirectory) {
-        logError(s"Failed to create directory " + dir)
+        logError(log"Failed to create directory ${MDC(LogKeys.PATH, dir)}")
       }
       dir.isDirectory
     } catch {
       case e: Exception =>
-        logError(s"Failed to create directory " + dir, e)
+        logError(log"Failed to create directory ${MDC(LogKeys.PATH, dir)}", e)
         false
     }
   }
diff --git a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
new file mode 100644
index 0000000000000..6bfe595def1d4
--- /dev/null
+++ b/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util;
+
+import org.apache.logging.log4j.Level;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+
+public class PatternSparkLoggerSuite extends SparkLoggerSuiteBase {
+
+  private static final SparkLogger LOGGER =
+    SparkLoggerFactory.getLogger(PatternSparkLoggerSuite.class);
+
+  private String toRegexPattern(Level level, String msg) {
+    return msg
+        .replace("<level>", level.toString())
+        .replace("<className>", className());
+  }
+
+  @Override
+  SparkLogger logger() {
+    return LOGGER;
+  }
+
+  @Override
+  String className() {
+    return PatternSparkLoggerSuite.class.getSimpleName();
+  }
+
+  @Override
+  String logFilePath() {
+    return "target/pattern.log";
+  }
+
+  @Override
+  String expectedPatternForBasicMsg(Level level) {
+    return toRegexPattern(level, ".*<level> <className>: This is a log message\n");
+  }
+
+  @Override
+  String expectedPatternForBasicMsgWithEscapeChar(Level level) {
+    return toRegexPattern(level,
+      ".*<level> <className>: This is a log message\\nThis is a new line \\t other msg\\n");
+  }
+
+  @Override
+  String expectedPatternForBasicMsgWithException(Level level) {
+    return toRegexPattern(level, """
+        .*<level> <className>: This is a log message
+        [\\s\\S]*""");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDC(Level level) {
+    return toRegexPattern(level, ".*<level> <className>: Lost executor 1.\n");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDCs(Level level) {
+    return toRegexPattern(level,
+      ".*<level> <className>: Lost executor 1, reason: the shuffle data is too large\n");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDCsAndException(Level level) {
+    return toRegexPattern(level,"""
+      .*<level> <className>: Lost executor 1, reason: the shuffle data is too large
+      [\\s\\S]*""");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDCValueIsNull(Level level) {
+    return toRegexPattern(level, ".*<level> <className>: Lost executor null.\n");
+  }
+
+  @Override
+  String expectedPatternForScalaCustomLogKey(Level level) {
+    return toRegexPattern(level, ".*<level> <className>: Scala custom log message.\n");
+  }
+
+  @Override
+  String expectedPatternForJavaCustomLogKey(Level level) {
+    return toRegexPattern(level, ".*<level> <className>: Java custom log message.\n");
+  }
+}
diff --git a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
new file mode 100644
index 0000000000000..186088ede1d0b
--- /dev/null
+++ b/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java
@@ -0,0 +1,274 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.List;
+
+import org.apache.commons.lang3.tuple.Pair;
+import org.apache.logging.log4j.Level;
+import org.junit.jupiter.api.Test;
+
+import org.apache.spark.internal.LogKey;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
+import org.apache.spark.internal.SparkLogger;
+
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public abstract class SparkLoggerSuiteBase {
+
+  abstract SparkLogger logger();
+  abstract String className();
+  abstract String logFilePath();
+
+  private File logFile() throws IOException {
+    String pwd = new File(".").getCanonicalPath();
+    return new File(pwd + File.separator + logFilePath());
+  }
+
+  // Return the newly added log contents in the log file after executing the function `f`
+  private String captureLogOutput(Runnable func) throws IOException {
+    String content = "";
+    if (logFile().exists()) {
+      content = Files.readString(logFile().toPath());
+    }
+    func.run();
+    String newContent = Files.readString(logFile().toPath());
+    return newContent.substring(content.length());
+  }
+
+  @FunctionalInterface
+  private interface ExpectedResult {
+    String apply(Level level) throws IOException;
+  }
+
+  private void checkLogOutput(Level level, Runnable func, ExpectedResult result) {
+    try {
+      assertTrue(captureLogOutput(func).matches(result.apply(level)));
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private final String basicMsg = "This is a log message";
+
+  private final String basicMsgWithEscapeChar =
+    "This is a log message\nThis is a new line \t other msg";
+
+  private final MDC executorIDMDC = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1");
+  private final String msgWithMDC = "Lost executor {}.";
+
+  private final MDC[] mdcs = new MDC[] {
+    MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1"),
+    MDC.of(LogKeys.REASON$.MODULE$, "the shuffle data is too large")};
+  private final String msgWithMDCs = "Lost executor {}, reason: {}";
+
+  private final MDC[] emptyMDCs = new MDC[0];
+
+  private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, null);
+
+  private final MDC scalaCustomLogMDC =
+    MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "Scala custom log message.");
+
+  private final MDC javaCustomLogMDC =
+    MDC.of(JavaCustomLogKeys.CUSTOM_LOG_KEY, "Java custom log message.");
+
+  // test for basic message (without any mdc)
+  abstract String expectedPatternForBasicMsg(Level level);
+
+  // test for basic message (with escape char)
+  abstract String expectedPatternForBasicMsgWithEscapeChar(Level level);
+
+  // test for basic message and exception
+  abstract String expectedPatternForBasicMsgWithException(Level level);
+
+  // test for message (with mdc)
+  abstract String expectedPatternForMsgWithMDC(Level level);
+
+  // test for message (with mdcs)
+  abstract String expectedPatternForMsgWithMDCs(Level level);
+
+  // test for message (with mdcs and exception)
+  abstract String expectedPatternForMsgWithMDCsAndException(Level level);
+
+  // test for message (with empty mdcs and exception)
+  String expectedPatternForMsgWithEmptyMDCsAndException(Level level) {
+    return expectedPatternForBasicMsgWithException(level);
+  }
+
+  // test for message (with mdc - the value is null)
+  abstract String expectedPatternForMsgWithMDCValueIsNull(Level level);
+
+  // test for scala custom LogKey
+  abstract String expectedPatternForScalaCustomLogKey(Level level);
+
+  // test for java custom LogKey
+  abstract String expectedPatternForJavaCustomLogKey(Level level);
+
+  @Test
+  public void testBasicMsg() {
+    Runnable errorFn = () -> logger().error(basicMsg);
+    Runnable warnFn = () -> logger().warn(basicMsg);
+    Runnable infoFn = () -> logger().info(basicMsg);
+    Runnable debugFn = () -> logger().debug(basicMsg);
+    Runnable traceFn = () -> logger().trace(basicMsg);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn),
+        Pair.of(Level.DEBUG, debugFn),
+        Pair.of(Level.TRACE, traceFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForBasicMsg));
+  }
+
+  @Test
+  public void testBasicMsgWithEscapeChar() {
+    Runnable errorFn = () -> logger().error(basicMsgWithEscapeChar);
+    Runnable warnFn = () -> logger().warn(basicMsgWithEscapeChar);
+    Runnable infoFn = () -> logger().info(basicMsgWithEscapeChar);
+    Runnable debugFn = () -> logger().debug(basicMsgWithEscapeChar);
+    Runnable traceFn = () -> logger().trace(basicMsgWithEscapeChar);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn),
+        Pair.of(Level.DEBUG, debugFn),
+        Pair.of(Level.TRACE, traceFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(),
+        this::expectedPatternForBasicMsgWithEscapeChar));
+  }
+
+  @Test
+  public void testBasicLoggerWithException() {
+    Throwable exception = new RuntimeException("OOM");
+    Runnable errorFn = () -> logger().error(basicMsg, exception);
+    Runnable warnFn = () -> logger().warn(basicMsg, exception);
+    Runnable infoFn = () -> logger().info(basicMsg, exception);
+    Runnable debugFn = () -> logger().debug(basicMsg, exception);
+    Runnable traceFn = () -> logger().trace(basicMsg, exception);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn),
+        Pair.of(Level.DEBUG, debugFn),
+        Pair.of(Level.TRACE, traceFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(),
+        this::expectedPatternForBasicMsgWithException));
+  }
+
+  @Test
+  public void testLoggerWithMDC() {
+    Runnable errorFn = () -> logger().error(msgWithMDC, executorIDMDC);
+    Runnable warnFn = () -> logger().warn(msgWithMDC, executorIDMDC);
+    Runnable infoFn = () -> logger().info(msgWithMDC, executorIDMDC);
+    List.of(
+       Pair.of(Level.ERROR, errorFn),
+       Pair.of(Level.WARN, warnFn),
+       Pair.of(Level.INFO, infoFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForMsgWithMDC));
+  }
+
+  @Test
+  public void testLoggerWithMDCs() {
+    Runnable errorFn = () -> logger().error(msgWithMDCs, mdcs);
+    Runnable warnFn = () -> logger().warn(msgWithMDCs, mdcs);
+    Runnable infoFn = () -> logger().info(msgWithMDCs, mdcs);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForMsgWithMDCs));
+  }
+
+  @Test
+  public void testLoggerWithEmptyMDCsAndException() {
+    Throwable exception = new RuntimeException("OOM");
+    Runnable errorFn = () -> logger().error(basicMsg, exception, emptyMDCs);
+    Runnable warnFn = () -> logger().warn(basicMsg, exception, emptyMDCs);
+    Runnable infoFn = () -> logger().info(basicMsg, exception, emptyMDCs);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn)).forEach(pair ->
+        checkLogOutput(pair.getLeft(), pair.getRight(),
+            this::expectedPatternForMsgWithEmptyMDCsAndException));
+  }
+
+  @Test
+  public void testLoggerWithMDCsAndException() {
+    Throwable exception = new RuntimeException("OOM");
+    Runnable errorFn = () -> logger().error(msgWithMDCs, exception, mdcs);
+    Runnable warnFn = () -> logger().warn(msgWithMDCs, exception, mdcs);
+    Runnable infoFn = () -> logger().info(msgWithMDCs, exception, mdcs);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(),
+        this::expectedPatternForMsgWithMDCsAndException)
+    );
+  }
+
+  @Test
+  public void testLoggerWithMDCValueIsNull() {
+    Runnable errorFn = () -> logger().error(msgWithMDC, executorIDMDCValueIsNull);
+    Runnable warnFn = () -> logger().warn(msgWithMDC, executorIDMDCValueIsNull);
+    Runnable infoFn = () -> logger().info(msgWithMDC, executorIDMDCValueIsNull);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(),
+        this::expectedPatternForMsgWithMDCValueIsNull));
+  }
+
+  @Test
+  public void testLoggerWithScalaCustomLogKey() {
+    Runnable errorFn = () -> logger().error("{}", scalaCustomLogMDC);
+    Runnable warnFn = () -> logger().warn("{}", scalaCustomLogMDC);
+    Runnable infoFn = () -> logger().info("{}", scalaCustomLogMDC);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForScalaCustomLogKey));
+  }
+
+  @Test
+  public void testLoggerWithJavaCustomLogKey() {
+    Runnable errorFn = () -> logger().error("{}", javaCustomLogMDC);
+    Runnable warnFn = () -> logger().warn("{}", javaCustomLogMDC);
+    Runnable infoFn = () -> logger().info("{}", javaCustomLogMDC);
+    List.of(
+        Pair.of(Level.ERROR, errorFn),
+        Pair.of(Level.WARN, warnFn),
+        Pair.of(Level.INFO, infoFn)).forEach(pair ->
+      checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForJavaCustomLogKey));
+  }
+}
+
+class JavaCustomLogKeys {
+  // Custom `LogKey` must be `implements LogKey`
+  public static class CUSTOM_LOG_KEY implements LogKey { }
+
+  // Singleton
+  public static final CUSTOM_LOG_KEY CUSTOM_LOG_KEY = new CUSTOM_LOG_KEY();
+}
diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
new file mode 100644
index 0000000000000..6959fe11820ff
--- /dev/null
+++ b/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.logging.log4j.Level;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+
+public class StructuredSparkLoggerSuite extends SparkLoggerSuiteBase {
+
+  private static final SparkLogger LOGGER =
+    SparkLoggerFactory.getLogger(StructuredSparkLoggerSuite.class);
+
+  private static final ObjectMapper JSON_MAPPER = new ObjectMapper();
+  private String compactAndToRegexPattern(Level level, String json) {
+    try {
+      return JSON_MAPPER.readTree(json).toString()
+         .replace("<level>", level.toString())
+         .replace("<className>", className())
+         .replace("<timestamp>", "[^\"]+")
+         .replace("\"<stacktrace>\"", ".*")
+         .replace("{", "\\{") + "\n";
+    } catch (JsonProcessingException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  SparkLogger logger() {
+    return LOGGER;
+  }
+
+  @Override
+  String className() {
+    return StructuredSparkLoggerSuite.class.getSimpleName();
+  }
+
+  @Override
+  String logFilePath() {
+    return "target/structured.log";
+  }
+
+  @Override
+  String expectedPatternForBasicMsg(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "This is a log message",
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForBasicMsgWithEscapeChar(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "This is a log message\\\\nThis is a new line \\\\t other msg",
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForBasicMsgWithException(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "This is a log message",
+        "exception": {
+          "class": "java.lang.RuntimeException",
+          "msg": "OOM",
+          "stacktrace": "<stacktrace>"
+        },
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDC(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "Lost executor 1.",
+        "context": {
+          "executor_id": "1"
+        },
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDCs(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "Lost executor 1, reason: the shuffle data is too large",
+        "context": {
+          "executor_id": "1",
+          "reason": "the shuffle data is too large"
+        },
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDCsAndException(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "Lost executor 1, reason: the shuffle data is too large",
+        "context": {
+          "executor_id": "1",
+          "reason": "the shuffle data is too large"
+        },
+        "exception": {
+          "class": "java.lang.RuntimeException",
+          "msg": "OOM",
+          "stacktrace": "<stacktrace>"
+        },
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForMsgWithMDCValueIsNull(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "Lost executor null.",
+        "context": {
+          "executor_id": null
+        },
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForScalaCustomLogKey(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "Scala custom log message.",
+        "context": {
+          "custom_log_key": "Scala custom log message."
+        },
+        "logger": "<className>"
+      }""");
+  }
+
+  @Override
+  String expectedPatternForJavaCustomLogKey(Level level) {
+    return compactAndToRegexPattern(level, """
+      {
+        "ts": "<timestamp>",
+        "level": "<level>",
+        "msg": "Java custom log message.",
+        "context": {
+          "custom_log_key": "Java custom log message."
+        },
+        "logger": "<className>"
+      }""");
+  }
+}
+
diff --git a/common/utils/src/test/resources/log4j2.properties b/common/utils/src/test/resources/log4j2.properties
index e3bd8689993d6..cb38f5b55a0ba 100644
--- a/common/utils/src/test/resources/log4j2.properties
+++ b/common/utils/src/test/resources/log4j2.properties
@@ -39,12 +39,22 @@ appender.pattern.layout.type = PatternLayout
 appender.pattern.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
 
 # Custom loggers
-logger.structured.name = org.apache.spark.util.StructuredLoggingSuite
-logger.structured.level = trace
-logger.structured.appenderRefs = structured
-logger.structured.appenderRef.structured.ref = structured
-
-logger.pattern.name = org.apache.spark.util.PatternLoggingSuite
-logger.pattern.level = trace
-logger.pattern.appenderRefs = pattern
-logger.pattern.appenderRef.pattern.ref = pattern
+logger.structured_logging.name = org.apache.spark.util.StructuredLoggingSuite
+logger.structured_logging.level = trace
+logger.structured_logging.appenderRefs = structured
+logger.structured_logging.appenderRef.structured.ref = structured
+
+logger.pattern_logging.name = org.apache.spark.util.PatternLoggingSuite
+logger.pattern_logging.level = trace
+logger.pattern_logging.appenderRefs = pattern
+logger.pattern_logging.appenderRef.pattern.ref = pattern
+
+logger.structured_logger.name = org.apache.spark.util.StructuredSparkLoggerSuite
+logger.structured_logger.level = trace
+logger.structured_logger.appenderRefs = structured
+logger.structured_logger.appenderRef.structured.ref = structured
+
+logger.pattern_logger.name = org.apache.spark.util.PatternSparkLoggerSuite
+logger.pattern_logger.level = trace
+logger.pattern_logger.appenderRefs = pattern
+logger.pattern_logger.appenderRef.pattern.ref = pattern
diff --git a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
index 24a24538ad72b..17e360f510a24 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/LogKeySuite.scala
@@ -22,16 +22,16 @@ import java.nio.file.{Files, Path}
 import java.util.{ArrayList => JList}
 
 import scala.jdk.CollectionConverters._
+import scala.reflect.runtime.universe._
 
 import org.apache.commons.io.FileUtils
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
-import org.apache.spark.internal.{Logging, LogKey}
-import org.apache.spark.internal.LogKey.LogKey
+import org.apache.spark.internal.{Logging, LogKeys}
 
 // scalastyle:off line.size.limit
 /**
- * To re-generate the LogKey class file, run:
+ * To re-generate the file `LogKey.scala`, run:
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "common-utils/testOnly org.apache.spark.util.LogKeySuite"
  * }}}
@@ -57,20 +57,20 @@ class LogKeySuite
   private val logKeyFilePath = getWorkspaceFilePath("common", "utils", "src", "main", "scala",
     "org", "apache", "spark", "internal", "LogKey.scala")
 
-  // regenerate the file `LogKey.scala` with its enumeration fields sorted alphabetically
+  // regenerate the file `LogKey.scala` with its members sorted alphabetically
   private def regenerateLogKeyFile(
-      originalKeys: Seq[LogKey], sortedKeys: Seq[LogKey]): Unit = {
+      originalKeys: Seq[String], sortedKeys: Seq[String]): Unit = {
     if (originalKeys != sortedKeys) {
       val logKeyFile = logKeyFilePath.toFile
-      logInfo(s"Regenerating LogKey file $logKeyFile")
+      logInfo(s"Regenerating the file $logKeyFile")
       val originalContents = FileUtils.readLines(logKeyFile, StandardCharsets.UTF_8)
       val sortedContents = new JList[String]()
       var firstMatch = false
       originalContents.asScala.foreach { line =>
-        if (line.trim.startsWith("val ") && line.trim.endsWith(" = Value")) {
+        if (line.trim.startsWith("case object ") && line.trim.endsWith(" extends LogKey")) {
           if (!firstMatch) {
-            sortedKeys.foreach { logKey =>
-              sortedContents.add(s"  val ${logKey.toString} = Value")
+            sortedKeys.foreach { key =>
+              sortedContents.add(s"  case object $key extends LogKey")
             }
             firstMatch = true
           }
@@ -83,14 +83,21 @@ class LogKeySuite
     }
   }
 
-  test("LogKey enumeration fields are correctly sorted") {
-    val originalKeys = LogKey.values.toSeq
-    val sortedKeys = originalKeys.sortBy(_.toString)
+  test("The members of LogKeys are correctly sorted") {
+    val originalKeys = getAllLogKeys.reverse
+    val sortedKeys = originalKeys.sorted
     if (regenerateGoldenFiles) {
       regenerateLogKeyFile(originalKeys, sortedKeys)
     } else {
       assert(originalKeys === sortedKeys,
-        "LogKey enumeration fields must be sorted alphabetically")
+        "The members of LogKeys must be sorted alphabetically")
     }
   }
+
+  private def getAllLogKeys: Seq[String] = {
+    val logKeysType = typeOf[LogKeys.type]
+    val classSymbol = logKeysType.typeSymbol.asClass
+    val members = classSymbol.typeSignature.members
+    members.filter(m => m.isTerm && !m.isMethod).map(_.name.toString).toSeq
+  }
 }
diff --git a/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala
index 1983f185e8c87..7631c25662219 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/MDCSuite.scala
@@ -22,7 +22,7 @@ import scala.jdk.CollectionConverters._
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{EXIT_CODE, OFFSET, RANGE}
+import org.apache.spark.internal.LogKeys.{EXIT_CODE, OFFSET, RANGE}
 
 class MDCSuite
     extends AnyFunSuite // scalastyle:ignore funsuite
diff --git a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
index d06405ab6d990..2ba2b15c49f33 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/PatternLoggingSuite.scala
@@ -34,6 +34,19 @@ class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll {
     s""".*$level $className: This is a log message\n"""
   }
 
+  override def expectedPatternForBasicMsgWithEscapeChar(level: Level): String = {
+    s""".*$level $className: This is a log message\nThis is a new line \t other msg\n"""
+  }
+
+  override def expectedPatternForBasicMsgWithEscapeCharMDC(level: Level): String = {
+    s""".*$level $className: This is a log message\nThis is a new line \t other msg\n"""
+  }
+
+  override def expectedPatternForMsgWithMDCAndEscapeChar(level: Level): String = {
+    s""".*$level $className: The first message\nthe first new line\tthe first other msg\n""" +
+      s"""[\\s\\S]*The second message\nthe second new line\tthe second other msg\n"""
+  }
+
   override def expectedPatternForBasicMsgWithException(level: Level): String = {
     s""".*$level $className: This is a log message\n[\\s\\S]*"""
   }
@@ -47,6 +60,10 @@ class PatternLoggingSuite extends LoggingSuiteBase with BeforeAndAfterAll {
   override def expectedPatternForMsgWithMDCAndException(level: Level): String =
     s""".*$level $className: Error in executor 1.\njava.lang.RuntimeException: OOM\n[\\s\\S]*"""
 
+  override def expectedPatternForCustomLogKey(level: Level): String = {
+    s""".*$level $className: Custom log message.\n"""
+  }
+
   override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = {
     val pattern =
       s""".*$level $className: Min Size: 2, Max Size: 4. Please double check.\n"""
diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
index fe03c190fa85e..b3e103f46337c 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
@@ -25,8 +25,7 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule
 import org.apache.logging.log4j.Level
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
-import org.apache.spark.internal.{LogEntry, Logging, MDC}
-import org.apache.spark.internal.LogKey.{EXECUTOR_ID, MAX_SIZE, MIN_SIZE}
+import org.apache.spark.internal.{LogEntry, Logging, LogKey, LogKeys, MDC, MessageWithContext}
 
 trait LoggingSuiteBase
     extends AnyFunSuite // scalastyle:ignore funsuite
@@ -54,19 +53,38 @@ trait LoggingSuiteBase
 
   def basicMsg: String = "This is a log message"
 
-  def msgWithMDC: LogEntry = log"Lost executor ${MDC(EXECUTOR_ID, "1")}."
+  def basicMsgWithEscapeChar: String = "This is a log message\nThis is a new line \t other msg"
 
-  def msgWithMDCValueIsNull: LogEntry = log"Lost executor ${MDC(EXECUTOR_ID, null)}."
+  def basicMsgWithEscapeCharMDC: LogEntry =
+    log"This is a log message\nThis is a new line \t other msg"
 
-  def msgWithMDCAndException: LogEntry = log"Error in executor ${MDC(EXECUTOR_ID, "1")}."
+  // scalastyle:off line.size.limit
+  def msgWithMDCAndEscapeChar: LogEntry =
+    log"The first message\nthe first new line\tthe first other msg\n${MDC(LogKeys.PATHS, "C:\\Users\\run-all_1.R\nC:\\Users\\run-all_2.R")}\nThe second message\nthe second new line\tthe second other msg"
+  // scalastyle:on line.size.limit
 
-  def msgWithConcat: LogEntry = log"Min Size: ${MDC(MIN_SIZE, "2")}, " +
-    log"Max Size: ${MDC(MAX_SIZE, "4")}. " +
+  def msgWithMDC: LogEntry = log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, "1")}."
+
+  def msgWithMDCValueIsNull: LogEntry = log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, null)}."
+
+  def msgWithMDCAndException: LogEntry = log"Error in executor ${MDC(LogKeys.EXECUTOR_ID, "1")}."
+
+  def msgWithConcat: LogEntry = log"Min Size: ${MDC(LogKeys.MIN_SIZE, "2")}, " +
+    log"Max Size: ${MDC(LogKeys.MAX_SIZE, "4")}. " +
     log"Please double check."
 
   // test for basic message (without any mdc)
   def expectedPatternForBasicMsg(level: Level): String
 
+  // test for basic message (with escape char)
+  def expectedPatternForBasicMsgWithEscapeChar(level: Level): String
+
+  // test for basic message (with escape char mdc)
+  def expectedPatternForBasicMsgWithEscapeCharMDC(level: Level): String
+
+  // test for message (with mdc and escape char)
+  def expectedPatternForMsgWithMDCAndEscapeChar(level: Level): String
+
   // test for basic message and exception
   def expectedPatternForBasicMsgWithException(level: Level): String
 
@@ -79,6 +97,9 @@ trait LoggingSuiteBase
   // test for message and exception
   def expectedPatternForMsgWithMDCAndException(level: Level): String
 
+  // test for custom LogKey
+  def expectedPatternForCustomLogKey(level: Level): String
+
   def verifyMsgWithConcat(level: Level, logOutput: String): Unit
 
   test("Basic logging") {
@@ -93,6 +114,43 @@ trait LoggingSuiteBase
     }
   }
 
+  test("Basic logging with escape char") {
+    Seq(
+      (Level.ERROR, () => logError(basicMsgWithEscapeChar)),
+      (Level.WARN, () => logWarning(basicMsgWithEscapeChar)),
+      (Level.INFO, () => logInfo(basicMsgWithEscapeChar)),
+      (Level.DEBUG, () => logDebug(basicMsgWithEscapeChar)),
+      (Level.TRACE, () => logTrace(basicMsgWithEscapeChar))).foreach { case (level, logFunc) =>
+      val logOutput = captureLogOutput(logFunc)
+      assert(expectedPatternForBasicMsgWithEscapeChar(level).r.matches(logOutput))
+    }
+  }
+
+  test("Basic logging with escape char MDC") {
+    Seq(
+      (Level.ERROR, () => logError(basicMsgWithEscapeCharMDC)),
+      (Level.WARN, () => logWarning(basicMsgWithEscapeCharMDC)),
+      (Level.INFO, () => logInfo(basicMsgWithEscapeCharMDC)),
+      (Level.DEBUG, () => logDebug(basicMsgWithEscapeCharMDC)),
+      (Level.TRACE, () => logTrace(basicMsgWithEscapeCharMDC))).foreach { case (level, logFunc) =>
+      val logOutput = captureLogOutput(logFunc)
+      assert(expectedPatternForBasicMsgWithEscapeCharMDC(level).r.matches(logOutput))
+    }
+  }
+
+  test("Logging with MDC and escape char") {
+    Seq(
+      (Level.ERROR, () => logError(msgWithMDCAndEscapeChar)),
+      (Level.WARN, () => logWarning(msgWithMDCAndEscapeChar)),
+      (Level.INFO, () => logInfo(msgWithMDCAndEscapeChar)),
+      (Level.DEBUG, () => logDebug(msgWithMDCAndEscapeChar)),
+      (Level.TRACE, () => logTrace(msgWithMDCAndEscapeChar))
+    ).foreach { case (level, logFunc) =>
+      val logOutput = captureLogOutput(logFunc)
+      assert(expectedPatternForMsgWithMDCAndEscapeChar(level).r.matches(logOutput))
+    }
+  }
+
   test("Basic logging with Exception") {
     val exception = new RuntimeException("OOM")
     Seq(
@@ -144,6 +202,20 @@ trait LoggingSuiteBase
       }
   }
 
+  private val customLog = log"${MDC(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message.")}"
+  test("Logging with custom LogKey") {
+    Seq(
+      (Level.ERROR, () => logError(customLog)),
+      (Level.WARN, () => logWarning(customLog)),
+      (Level.INFO, () => logInfo(customLog)),
+      (Level.DEBUG, () => logDebug(customLog)),
+      (Level.TRACE, () => logTrace(customLog))).foreach {
+      case (level, logFunc) =>
+        val logOutput = captureLogOutput(logFunc)
+        assert(expectedPatternForCustomLogKey(level).r.matches(logOutput))
+    }
+  }
+
   test("Logging with concat") {
     Seq(
       (Level.ERROR, () => logError(msgWithConcat)),
@@ -156,10 +228,41 @@ trait LoggingSuiteBase
           verifyMsgWithConcat(level, logOutput)
       }
   }
+
+  test("LogEntry should construct MessageWithContext only once") {
+    var constructionCount = 0
+
+    def constructMessageWithContext(): MessageWithContext = {
+      constructionCount += 1
+      log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, "1")}."
+    }
+    logInfo(constructMessageWithContext())
+    assert(constructionCount === 1)
+  }
+
+  test("LogEntry should construct MessageWithContext only once II") {
+    var constructionCount = 0
+    var constructionCount2 = 0
+
+    def executorId(): String = {
+      constructionCount += 1
+      "1"
+    }
+
+    def workerId(): String = {
+      constructionCount2 += 1
+      "2"
+    }
+
+    logInfo(log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, executorId())}." +
+      log"worker id ${MDC(LogKeys.WORKER_ID, workerId())}")
+    assert(constructionCount === 1)
+    assert(constructionCount2 === 1)
+  }
 }
 
 class StructuredLoggingSuite extends LoggingSuiteBase {
-  override def className: String = classOf[StructuredLoggingSuite].getName
+  override def className: String = classOf[StructuredLoggingSuite].getSimpleName
   override def logFilePath: String = "target/structured.log"
 
   private val jsonMapper = new ObjectMapper().registerModule(DefaultScalaModule)
@@ -167,6 +270,7 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
     jsonMapper.readTree(json).toString.
       replace("<timestamp>", """[^"]+""").
       replace(""""<stacktrace>"""", """.*""").
+      replace("<windows_paths>", """.*""").
       replace("{", """\{""") + "\n"
   }
 
@@ -181,6 +285,44 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
         }""")
   }
 
+  override def expectedPatternForBasicMsgWithEscapeChar(level: Level): String = {
+    compactAndToRegexPattern(
+      s"""
+        {
+          "ts": "<timestamp>",
+          "level": "$level",
+          "msg": "This is a log message\\\\nThis is a new line \\\\t other msg",
+          "logger": "$className"
+        }""")
+  }
+
+  override def expectedPatternForBasicMsgWithEscapeCharMDC(level: Level): String = {
+    compactAndToRegexPattern(
+      s"""
+        {
+          "ts": "<timestamp>",
+          "level": "$level",
+          "msg": "This is a log message\\\\nThis is a new line \\\\t other msg",
+          "logger": "$className"
+        }""")
+  }
+
+  override def expectedPatternForMsgWithMDCAndEscapeChar(level: Level): String = {
+    // scalastyle:off line.size.limit
+    compactAndToRegexPattern(
+    s"""
+      {
+         "ts": "<timestamp>",
+         "level": "$level",
+         "msg": "The first message\\\\nthe first new line\\\\tthe first other msg\\\\n<windows_paths>\\\\nThe second message\\\\nthe second new line\\\\tthe second other msg",
+         "context": {
+           "paths": "<windows_paths>"
+         },
+         "logger": "$className"
+      }""")
+    // scalastyle:on line.size.limit
+  }
+
   override def expectedPatternForBasicMsgWithException(level: Level): String = {
     compactAndToRegexPattern(
       s"""
@@ -244,6 +386,21 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
         }""")
   }
 
+  override def expectedPatternForCustomLogKey(level: Level): String = {
+    compactAndToRegexPattern(
+      s"""
+        {
+          "ts": "<timestamp>",
+          "level": "$level",
+          "msg": "Custom log message.",
+          "context": {
+              "custom_log_key": "Custom log message."
+          },
+          "logger": "$className"
+        }"""
+    )
+  }
+
   override def verifyMsgWithConcat(level: Level, logOutput: String): Unit = {
     val pattern1 = compactAndToRegexPattern(
       s"""
@@ -272,4 +429,18 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
         }""")
     assert(pattern1.r.matches(logOutput) || pattern2.r.matches(logOutput))
   }
+
+  test("process escape sequences") {
+    assert(log"\n".message == "\n")
+    assert(log"\t".message == "\t")
+    assert(log"\b".message == "\b")
+    assert(log"\r".message == "\r")
+    assert((log"\r" + log"\n" + log"\t" + log"\b").message == "\r\n\t\b")
+    assert((log"\r${MDC(LogKeys.EXECUTOR_ID, 1)}\n".message == "\r1\n"))
+  }
+}
+
+object CustomLogKeys {
+  // Custom `LogKey` must be `extends LogKey`
+  case object CUSTOM_LOG_KEY extends LogKey
 }
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
index ea7a7674baf57..2afba81d192e9 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
@@ -223,7 +223,7 @@ public void appendFloat(float f) {
 
   public void appendBinary(byte[] binary) {
     checkCapacity(1 + U32_SIZE + binary.length);
-    writeBuffer[writePos++] = primitiveHeader(LONG_STR);
+    writeBuffer[writePos++] = primitiveHeader(BINARY);
     writeLong(writeBuffer, writePos, binary.length, U32_SIZE);
     writePos += U32_SIZE;
     System.arraycopy(binary, 0, writeBuffer, writePos, binary.length);
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
index e4e9cc8b4cfac..84e3a45e4b0ee 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
@@ -392,6 +392,13 @@ public static double getDouble(byte[] value, int pos) {
     return Double.longBitsToDouble(readLong(value, pos + 1, 8));
   }
 
+  // Check whether the precision and scale of the decimal are within the limit.
+  private static void checkDecimal(BigDecimal d, int maxPrecision) {
+    if (d.precision() > maxPrecision || d.scale() > maxPrecision) {
+      throw malformedVariant();
+    }
+  }
+
   // Get a decimal value from variant value `value[pos...]`.
   // Throw `MALFORMED_VARIANT` if the variant is malformed.
   public static BigDecimal getDecimal(byte[] value, int pos) {
@@ -399,14 +406,18 @@ public static BigDecimal getDecimal(byte[] value, int pos) {
     int basicType = value[pos] & BASIC_TYPE_MASK;
     int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK;
     if (basicType != PRIMITIVE) throw unexpectedType(Type.DECIMAL);
-    int scale = value[pos + 1];
+    // Interpret the scale byte as unsigned. If it is a negative byte, the unsigned value must be
+    // greater than `MAX_DECIMAL16_PRECISION` and will trigger an error in `checkDecimal`.
+    int scale = value[pos + 1] & 0xFF;
     BigDecimal result;
     switch (typeInfo) {
       case DECIMAL4:
         result = BigDecimal.valueOf(readLong(value, pos + 2, 4), scale);
+        checkDecimal(result, MAX_DECIMAL4_PRECISION);
         break;
       case DECIMAL8:
         result = BigDecimal.valueOf(readLong(value, pos + 2, 8), scale);
+        checkDecimal(result, MAX_DECIMAL8_PRECISION);
         break;
       case DECIMAL16:
         checkIndex(pos + 17, value.length);
@@ -417,6 +428,7 @@ public static BigDecimal getDecimal(byte[] value, int pos) {
           bytes[i] = value[pos + 17 - i];
         }
         result = new BigDecimal(new BigInteger(bytes), scale);
+        checkDecimal(result, MAX_DECIMAL16_PRECISION);
         break;
       default:
         throw unexpectedType(Type.DECIMAL);
diff --git a/conf/log4j2.properties.pattern-layout-template b/conf/log4j2.properties.pattern-layout-template
new file mode 100644
index 0000000000000..ab96e03baed20
--- /dev/null
+++ b/conf/log4j2.properties.pattern-layout-template
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the console
+rootLogger.level = info
+rootLogger.appenderRef.stdout.ref = console
+
+# In the pattern layout configuration below, we specify an explicit `%ex` conversion
+# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
+# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
+# class packaging information. That extra information can sometimes add a substantial
+# performance overhead, so we disable it in our default logging config.
+# For more information, see SPARK-39361.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+
+# Set the default spark-shell/spark-sql log level to WARN. When running the
+# spark-shell/spark-sql, the log level for these classes is used to overwrite
+# the root logger's log level, so that the user can have different defaults
+# for the shell and regular Spark apps.
+logger.repl.name = org.apache.spark.repl.Main
+logger.repl.level = warn
+
+logger.thriftserver.name = org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver
+logger.thriftserver.level = warn
+
+# Settings to quiet third party logs that are too verbose
+logger.jetty1.name = org.sparkproject.jetty
+logger.jetty1.level = warn
+logger.jetty2.name = org.sparkproject.jetty.util.component.AbstractLifeCycle
+logger.jetty2.level = error
+logger.replexprTyper.name = org.apache.spark.repl.SparkIMain$exprTyper
+logger.replexprTyper.level = info
+logger.replSparkILoopInterpreter.name = org.apache.spark.repl.SparkILoop$SparkILoopInterpreter
+logger.replSparkILoopInterpreter.level = info
+logger.parquet1.name = org.apache.parquet
+logger.parquet1.level = error
+logger.parquet2.name = parquet
+logger.parquet2.level = error
+
+# SPARK-9183: Settings to avoid annoying messages when looking up nonexistent UDFs in SparkSQL with Hive support
+logger.RetryingHMSHandler.name = org.apache.hadoop.hive.metastore.RetryingHMSHandler
+logger.RetryingHMSHandler.level = fatal
+logger.FunctionRegistry.name = org.apache.hadoop.hive.ql.exec.FunctionRegistry
+logger.FunctionRegistry.level = error
+
+# For deploying Spark ThriftServer
+# SPARK-34128: Suppress undesirable TTransportException warnings involved in THRIFT-4805
+appender.console.filter.1.type = RegexFilter
+appender.console.filter.1.regex = .*Thrift error occurred during processing of message.*
+appender.console.filter.1.onMatch = deny
+appender.console.filter.1.onMismatch = neutral
diff --git a/conf/log4j2.properties.template b/conf/log4j2.properties.template
index ab96e03baed20..8767245314449 100644
--- a/conf/log4j2.properties.template
+++ b/conf/log4j2.properties.template
@@ -19,17 +19,11 @@
 rootLogger.level = info
 rootLogger.appenderRef.stdout.ref = console
 
-# In the pattern layout configuration below, we specify an explicit `%ex` conversion
-# pattern for logging Throwables. If this was omitted, then (by default) Log4J would
-# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional
-# class packaging information. That extra information can sometimes add a substantial
-# performance overhead, so we disable it in our default logging config.
-# For more information, see SPARK-39361.
 appender.console.type = Console
 appender.console.name = console
 appender.console.target = SYSTEM_ERR
-appender.console.layout.type = PatternLayout
-appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
+appender.console.layout.type = JsonTemplateLayout
+appender.console.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
 
 # Set the default spark-shell/spark-sql log level to WARN. When running the
 # spark-shell/spark-sql, the log level for these classes is used to overwrite
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
index 360f94dfd1e07..e0d9f9b90121f 100644
--- a/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
+++ b/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
@@ -2,140 +2,140 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1709           1724          21          9.2         108.7       1.0X
+Sum                                                2124           2129           8          7.4         135.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1690           1699          13          9.3         107.4       1.0X
+Sum                                                2124           2129           7          7.4         135.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1673           1678           6          9.4         106.4       1.0X
+Sum                                                2082           2096          19          7.6         132.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1913           1917           6          8.2         121.6       1.0X
+Sum                                                2079           2091          17          7.6         132.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1779           1780           2          8.8         113.1       1.0X
+Sum                                                2070           2078          11          7.6         131.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1810           1823          19          8.7         115.0       1.0X
+Sum                                                2005           2023          25          7.8         127.5       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     3263           3268           8          3.2         311.2       1.0X
+Sum of columns                                     3598           3606          12          2.9         343.1       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column                                        2226           2231           7          7.1         141.5       1.0X
-Partition column                                   1842           1855          18          8.5         117.1       1.2X
-Both columns                                       2182           2187           7          7.2         138.7       1.0X
+Data column                                        2081           2094          19          7.6         132.3       1.0X
+Partition column                                   1913           1917           5          8.2         121.7       1.1X
+Both columns                                       2141           2171          43          7.3         136.1       1.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2093           2103          15          5.0         199.6       1.0X
+Sum of string length                               2299           2337          53          4.6         219.3       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3044           3076          45          3.4         290.3       1.0X
+Sum of string length                               3094           3118          33          3.4         295.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2320           2331          15          4.5         221.2       1.0X
+Sum of string length                               2162           2213          72          4.9         206.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1292           1310          24          8.1         123.3       1.0X
+Sum of string length                               1205           1210           7          8.7         114.9       1.0X
 
 
 ================================================================================================
 Select All From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             19999          20110         158          0.0       39997.0       1.0X
+Select of all columns                             17970          18066         135          0.0       35940.5       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               2703           2707           5          0.4        2578.2       1.0X
+Sum of single column                               3222           3242          29          0.3        3072.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               5225           5235          14          0.2        4983.2       1.0X
+Sum of single column                               6336           6343           9          0.2        6043.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               7775           7778           4          0.1        7414.5       1.0X
+Sum of single column                               9410           9463          75          0.1        8974.0       1.0X
 
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5486           5506          19          0.2        5485.8       1.0X
-pushdown disabled                                  5371           5400          25          0.2        5371.5       1.0X
-w/ filters                                         2237           2254          24          0.4        2236.6       2.5X
+w/o filters                                        5535           5544          10          0.2        5535.0       1.0X
+pushdown disabled                                  5450           5479          29          0.2        5450.1       1.0X
+w/ filters                                         2335           2340           9          0.4        2334.5       2.4X
 
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
index 633a0dfcf3bd8..f1065f98b81a2 100644
--- a/connector/avro/benchmarks/AvroReadBenchmark-results.txt
+++ b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
@@ -2,140 +2,140 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1696           1700           6          9.3         107.8       1.0X
+Sum                                                1986           2030          63          7.9         126.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1641           1663          32          9.6         104.3       1.0X
+Sum                                                1983           2021          54          7.9         126.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1588           1588           1          9.9         100.9       1.0X
+Sum                                                1955           1977          30          8.0         124.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1845           1847           4          8.5         117.3       1.0X
+Sum                                                1939           1958          26          8.1         123.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1649           1650           1          9.5         104.8       1.0X
+Sum                                                1961           1963           3          8.0         124.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1720           1722           3          9.1         109.4       1.0X
+Sum                                                1944           1946           3          8.1         123.6       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     3223           3237          19          3.3         307.4       1.0X
+Sum of columns                                     3345           3376          44          3.1         319.0       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column                                        1907           1924          24          8.2         121.2       1.0X
-Partition column                                   1709           1724          21          9.2         108.7       1.1X
-Both columns                                       2040           2046           8          7.7         129.7       0.9X
+Data column                                        2006           2022          22          7.8         127.5       1.0X
+Partition column                                   1761           1765           5          8.9         112.0       1.1X
+Both columns                                       2054           2068          20          7.7         130.6       1.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1993           2012          28          5.3         190.1       1.0X
+Sum of string length                               2002           2024          31          5.2         191.0       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3143           3184          58          3.3         299.7       1.0X
+Sum of string length                               3103           3141          54          3.4         295.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2223           2224           1          4.7         212.0       1.0X
+Sum of string length                               2056           2064          11          5.1         196.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1216           1221           7          8.6         116.0       1.0X
+Sum of string length                               1084           1086           3          9.7         103.3       1.0X
 
 
 ================================================================================================
 Select All From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             19275          19347         102          0.0       38549.7       1.0X
+Select of all columns                             19331          19457         177          0.0       38662.8       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               2529           2532           4          0.4        2412.0       1.0X
+Sum of single column                               3178           3191          18          0.3        3030.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               4969           4970           2          0.2        4739.1       1.0X
+Sum of single column                               6288           6406         167          0.2        5996.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               7413           7503         127          0.1        7069.6       1.0X
+Sum of single column                               9478           9487          12          0.1        9039.2       1.0X
 
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5399           5410          11          0.2        5399.0       1.0X
-pushdown disabled                                  5302           5320          16          0.2        5301.9       1.0X
-w/ filters                                         2108           2178          96          0.5        2107.7       2.6X
+w/o filters                                        5488           5511          22          0.2        5488.5       1.0X
+pushdown disabled                                  5495           5510          15          0.2        5494.9       1.0X
+w/ filters                                         2218           2232          12          0.5        2218.1       2.5X
 
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
index cdeabe1275140..f49e7db17093e 100644
--- a/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1473           1503          43         10.7          93.6       1.0X
-Output Single Double Column                        1491           1514          32         10.5          94.8       1.0X
-Output Int and String Column                       3118           3125           9          5.0         198.3       0.5X
-Output Partitions                                  2901           2953          74          5.4         184.4       0.5X
-Output Buckets                                     3624           3634          14          4.3         230.4       0.4X
+Output Single Int Column                           1589           1616          39          9.9         101.0       1.0X
+Output Single Double Column                        1522           1536          20         10.3          96.8       1.0X
+Output Int and String Column                       3264           3266           2          4.8         207.5       0.5X
+Output Partitions                                  3054           3094          57          5.1         194.2       0.5X
+Output Buckets                                     4024           4078          76          3.9         255.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro compression with different codec:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-BZIP2:                                           114516         115179         937          0.0     1145165.0       1.0X
-DEFLATE:                                           6419           6424           7          0.0       64190.4      17.8X
-UNCOMPRESSED:                                      5241           5276          50          0.0       52408.0      21.9X
-SNAPPY:                                            4659           4674          20          0.0       46592.4      24.6X
-XZ:                                               56023          58323        3252          0.0      560230.7       2.0X
-ZSTANDARD:                                         5025           5075          70          0.0       50251.0      22.8X
+BZIP2:                                           115682         115702          29          0.0     1156821.8       1.0X
+DEFLATE:                                           6294           6309          20          0.0       62944.7      18.4X
+UNCOMPRESSED:                                      5130           5148          26          0.0       51301.8      22.5X
+SNAPPY:                                            4611           4643          45          0.0       46106.1      25.1X
+XZ:                                               64308          64406         139          0.0      643084.7       1.8X
+ZSTANDARD:                                         4651           4687          51          0.0       46509.7      24.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro deflate with different levels:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-DEFLATE: deflate.level=1                           4886           4908          31          0.0       48858.7       1.0X
-DEFLATE: deflate.level=3                           4884           4902          26          0.0       48842.0       1.0X
-DEFLATE: deflate.level=5                           6412           6472          85          0.0       64120.5       0.8X
-DEFLATE: deflate.level=7                           6498           6515          24          0.0       64982.4       0.8X
-DEFLATE: deflate.level=9                           6749           6761          17          0.0       67490.9       0.7X
+DEFLATE: deflate.level=1                           4666           4669           5          0.0       46656.5       1.0X
+DEFLATE: deflate.level=3                           4646           4648           3          0.0       46463.0       1.0X
+DEFLATE: deflate.level=5                           6223           6230          11          0.0       62226.1       0.7X
+DEFLATE: deflate.level=7                           6272           6282          15          0.0       62715.3       0.7X
+DEFLATE: deflate.level=9                           6628           6635           9          0.0       66283.6       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro xz with different levels:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XZ: xz.level=1                                    12318          12319           1          0.0      123181.8       1.0X
-XZ: xz.level=3                                    22329          22469         197          0.0      223294.2       0.6X
-XZ: xz.level=5                                    46825          47393         804          0.0      468247.0       0.3X
-XZ: xz.level=7                                    68116          68235         168          0.0      681159.0       0.2X
-XZ: xz.level=9                                   146500         146710         297          0.0     1464999.7       0.1X
+XZ: xz.level=1                                    12512          12545          46          0.0      125121.6       1.0X
+XZ: xz.level=3                                    23744          23832         124          0.0      237441.6       0.5X
+XZ: xz.level=5                                    48209          50241        2874          0.0      482091.5       0.3X
+XZ: xz.level=7                                    69424          69655         327          0.0      694240.2       0.2X
+XZ: xz.level=9                                   142278         142354         108          0.0     1422778.3       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro zstandard with different levels:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ZSTANDARD: zstandard.level=1                                              4767           4801          48          0.0       47673.3       1.0X
-ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4825           4855          42          0.0       48252.8       1.0X
-ZSTANDARD: zstandard.level=3                                              4872           4919          65          0.0       48724.3       1.0X
-ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4763           4825          89          0.0       47628.2       1.0X
-ZSTANDARD: zstandard.level=5                                              5169           5178          13          0.0       51687.2       0.9X
-ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           5005           5018          18          0.0       50046.5       1.0X
-ZSTANDARD: zstandard.level=7                                              5502           5507           7          0.0       55020.7       0.9X
-ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5327           5364          53          0.0       53270.3       0.9X
-ZSTANDARD: zstandard.level=9                                              6089           6093           5          0.0       60890.2       0.8X
-ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6136           6174          53          0.0       61365.0       0.8X
+ZSTANDARD: zstandard.level=1                                              4669           4670           2          0.0       46688.6       1.0X
+ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4689           4701          16          0.0       46893.1       1.0X
+ZSTANDARD: zstandard.level=3                                              4805           4819          20          0.0       48048.3       1.0X
+ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4667           4670           4          0.0       46666.8       1.0X
+ZSTANDARD: zstandard.level=5                                              4985           5014          41          0.0       49852.2       0.9X
+ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           4950           4991          59          0.0       49499.4       0.9X
+ZSTANDARD: zstandard.level=7                                              5282           5291          13          0.0       52820.2       0.9X
+ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5221           5260          55          0.0       52208.0       0.9X
+ZSTANDARD: zstandard.level=9                                              5997           6034          52          0.0       59974.4       0.8X
+ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           5888           5949          85          0.0       58885.0       0.8X
 
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
index c817dc6337b53..658b9ad7851d4 100644
--- a/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1440           1454          20         10.9          91.5       1.0X
-Output Single Double Column                        1631           1646          21          9.6         103.7       0.9X
-Output Int and String Column                       3281           3290          13          4.8         208.6       0.4X
-Output Partitions                                  2854           2866          18          5.5         181.5       0.5X
-Output Buckets                                     3590           3599          12          4.4         228.3       0.4X
+Output Single Int Column                           1566           1615          69         10.0          99.6       1.0X
+Output Single Double Column                        1718           1720           3          9.2         109.2       0.9X
+Output Int and String Column                       3250           3250           0          4.8         206.6       0.5X
+Output Partitions                                  2869           2870           0          5.5         182.4       0.5X
+Output Buckets                                     3655           3660           7          4.3         232.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro compression with different codec:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-BZIP2:                                           132631         133654        1446          0.0     1326312.3       1.0X
-DEFLATE:                                           6609           6653          63          0.0       66086.0      20.1X
-UNCOMPRESSED:                                      5556           5556           1          0.0       55555.7      23.9X
-SNAPPY:                                            4880           4880           0          0.0       48799.4      27.2X
-XZ:                                               55326          55531         290          0.0      553260.0       2.4X
-ZSTANDARD:                                         5044           5079          50          0.0       50437.1      26.3X
+BZIP2:                                           131005         132600        2255          0.0     1310049.5       1.0X
+DEFLATE:                                           6673           6696          34          0.0       66725.6      19.6X
+UNCOMPRESSED:                                      5469           5506          51          0.0       54692.2      24.0X
+SNAPPY:                                            4970           5003          47          0.0       49696.0      26.4X
+XZ:                                               55374          55620         347          0.0      553743.6       2.4X
+ZSTANDARD:                                         4998           5044          64          0.0       49984.1      26.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro deflate with different levels:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-DEFLATE: deflate.level=1                           5017           5074          81          0.0       50166.3       1.0X
-DEFLATE: deflate.level=3                           5002           5011          13          0.0       50021.7       1.0X
-DEFLATE: deflate.level=5                           6549           6579          43          0.0       65487.6       0.8X
-DEFLATE: deflate.level=7                           6699           6725          37          0.0       66987.3       0.7X
-DEFLATE: deflate.level=9                           6939           6948          13          0.0       69392.2       0.7X
+DEFLATE: deflate.level=1                           4996           5017          30          0.0       49961.8       1.0X
+DEFLATE: deflate.level=3                           5013           5026          18          0.0       50129.7       1.0X
+DEFLATE: deflate.level=5                           6557           6574          23          0.0       65574.0       0.8X
+DEFLATE: deflate.level=7                           6593           6624          44          0.0       65929.1       0.8X
+DEFLATE: deflate.level=9                           6973           6983          14          0.0       69725.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro xz with different levels:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XZ: xz.level=1                                    12308          12335          39          0.0      123075.6       1.0X
-XZ: xz.level=3                                    22817          22991         247          0.0      228166.4       0.5X
-XZ: xz.level=5                                    48180          48301         172          0.0      481795.0       0.3X
-XZ: xz.level=7                                    70991          72142        1628          0.0      709910.8       0.2X
-XZ: xz.level=9                                   159363         160356        1405          0.0     1593630.7       0.1X
+XZ: xz.level=1                                    12335          12414         113          0.0      123346.1       1.0X
+XZ: xz.level=3                                    22830          22901         101          0.0      228298.8       0.5X
+XZ: xz.level=5                                    47861          48099         336          0.0      478610.6       0.3X
+XZ: xz.level=7                                    71299          71967         944          0.0      712993.0       0.2X
+XZ: xz.level=9                                   159311         159585         388          0.0     1593106.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Avro zstandard with different levels:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ZSTANDARD: zstandard.level=1                                              4915           4931          23          0.0       49148.9       1.0X
-ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4893           4925          46          0.0       48931.9       1.0X
-ZSTANDARD: zstandard.level=3                                              5049           5066          24          0.0       50491.9       1.0X
-ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4962           4969          10          0.0       49621.0       1.0X
-ZSTANDARD: zstandard.level=5                                              5295           5373         110          0.0       52946.1       0.9X
-ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           5237           5238           1          0.0       52368.1       0.9X
-ZSTANDARD: zstandard.level=7                                              5750           5789          54          0.0       57503.5       0.9X
-ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5658           5711          75          0.0       56577.4       0.9X
-ZSTANDARD: zstandard.level=9                                              6719           6733          20          0.0       67194.8       0.7X
-ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6694           6711          25          0.0       66935.4       0.7X
+ZSTANDARD: zstandard.level=1                                              4917           4951          48          0.0       49169.8       1.0X
+ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4885           4904          26          0.0       48848.3       1.0X
+ZSTANDARD: zstandard.level=3                                              5045           5051           9          0.0       50448.8       1.0X
+ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4926           4931           7          0.0       49258.9       1.0X
+ZSTANDARD: zstandard.level=5                                              5366           5437         101          0.0       53656.6       0.9X
+ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           5261           5305          62          0.0       52610.6       0.9X
+ZSTANDARD: zstandard.level=7                                              5673           5680           9          0.0       56731.6       0.9X
+ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5592           5615          33          0.0       55917.0       0.9X
+ZSTANDARD: zstandard.level=9                                              6662           6663           2          0.0       66620.2       0.7X
+ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6759           6760           1          0.0       67591.8       0.7X
 
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index 4bedd625e6091..7cbc30f1fb3dc 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -32,7 +32,7 @@ import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.{SparkException, SparkIllegalArgumentException}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CODEC_LEVEL, CODEC_NAME, CONFIG, PATH}
+import org.apache.spark.internal.LogKeys.{CODEC_LEVEL, CODEC_NAME, CONFIG, PATH}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.avro.AvroCompressionCodec._
 import org.apache.spark.sql.avro.AvroOptions.IGNORE_EXTENSION
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index 387526d40f68f..b2285aa966ddb 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -208,14 +208,12 @@ object SchemaConverters {
                   // could be "a" and "A" and we need to distinguish them. In this case, we throw
                   // an exception.
                   // Stable id prefix can be empty so the name of the field can be just the type.
-                  val tempFieldName =
-                    s"${stableIdPrefixForUnionType}${s.getName.toLowerCase(Locale.ROOT)}"
-                  if (fieldNameSet.contains(tempFieldName)) {
+                  val tempFieldName = s"${stableIdPrefixForUnionType}${s.getName}"
+                  if (!fieldNameSet.add(tempFieldName.toLowerCase(Locale.ROOT))) {
                     throw new IncompatibleSchemaException(
-                      "Cannot generate stable indentifier for Avro union type due to name " +
+                      "Cannot generate stable identifier for Avro union type due to name " +
                       s"conflict of type name ${s.getName}")
                   }
-                  fieldNameSet.add(tempFieldName)
                   tempFieldName
                 } else {
                   s"member$i"
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index d16ddb4973205..c807685db0f0c 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -26,7 +26,7 @@ import org.apache.avro.generic.{GenericDatumWriter, GenericRecord, GenericRecord
 import org.apache.avro.io.EncoderFactory
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.execution.LocalTableScanExec
 import org.apache.spark.sql.functions.{col, lit, struct}
 import org.apache.spark.sql.internal.SQLConf
@@ -286,4 +286,85 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
       assert(msg.contains("Invalid default for field id: null not a \"long\""))
     }
   }
+
+  test("SPARK-48545: from_avro and to_avro SQL functions") {
+    withTable("t") {
+      sql(
+        """
+          |create table t as
+          |  select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s
+          |  from values (1, null), (null,  'a') tab(member0, member1)
+          |""".stripMargin)
+      val jsonFormatSchema =
+        """
+          |{
+          |  "type": "record",
+          |  "name": "struct",
+          |  "fields": [{
+          |    "name": "u",
+          |    "type": ["int","string"]
+          |  }]
+          |}
+          |""".stripMargin
+      val toAvroSql =
+        s"""
+           |select to_avro(s, '$jsonFormatSchema') as result from t
+           |""".stripMargin
+      val avroResult = spark.sql(toAvroSql).collect()
+      assert(avroResult != null)
+      checkAnswer(
+        spark.sql(s"select from_avro(result, '$jsonFormatSchema', map()).u from ($toAvroSql)"),
+        Seq(Row(Row(1, null)),
+          Row(Row(null, "a"))))
+
+      // Negative tests.
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |select to_avro(s, 42) as result from t
+             |""".stripMargin)),
+        errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map("sqlExpr" -> "\"toavro(s, 42)\"",
+          "msg" -> ("The second argument of the TO_AVRO SQL function must be a constant string " +
+            "containing the JSON representation of the schema to use for converting the value to " +
+            "AVRO format"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = "to_avro(s, 42)",
+          start = 8,
+          stop = 21)))
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |select from_avro(s, 42, '') as result from t
+             |""".stripMargin)),
+        errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map("sqlExpr" -> "\"fromavro(s, 42, )\"",
+          "msg" -> ("The second argument of the FROM_AVRO SQL function must be a constant string " +
+            "containing the JSON representation of the schema to use for converting the value " +
+            "from AVRO format"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = "from_avro(s, 42, '')",
+          start = 8,
+          stop = 27)))
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |select from_avro(s, '$jsonFormatSchema', 42) as result from t
+             |""".stripMargin)),
+        errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map(
+          "sqlExpr" ->
+            s"\"fromavro(s, $jsonFormatSchema, 42)\"".stripMargin,
+          "msg" -> ("The third argument of the FROM_AVRO SQL function must be a constant map of " +
+            "strings to strings containing the options to use for converting the value " +
+            "from AVRO format"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = s"from_avro(s, '$jsonFormatSchema', 42)",
+          start = 8,
+          stop = 138)))
+    }
+  }
 }
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 67a09812b3f2e..42c13f5e20873 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -377,7 +377,7 @@ abstract class AvroSuite
           "",
           Seq())
       }
-      assert(e.getMessage.contains("Cannot generate stable indentifier"))
+      assert(e.getMessage.contains("Cannot generate stable identifier"))
     }
     {
       val e = intercept[Exception] {
@@ -388,7 +388,7 @@ abstract class AvroSuite
           "",
           Seq())
       }
-      assert(e.getMessage.contains("Cannot generate stable indentifier"))
+      assert(e.getMessage.contains("Cannot generate stable identifier"))
     }
     // Two array types or two map types are not allowed in union.
     {
@@ -441,6 +441,33 @@ abstract class AvroSuite
     }
   }
 
+  test("SPARK-47904: Test that field name case is preserved") {
+    checkUnionStableId(
+      List(
+        Schema.createEnum("myENUM", "", null, List[String]("E1", "e2").asJava),
+        Schema.createRecord("myRecord", "", null, false,
+          List[Schema.Field](new Schema.Field("f", Schema.createFixed("myField", "", null, 6)))
+            .asJava),
+        Schema.createRecord("myRecord2", "", null, false,
+          List[Schema.Field](new Schema.Field("F", Schema.create(Type.FLOAT)))
+            .asJava)),
+      "struct<member_myENUM: string, member_myRecord: struct<f: binary>, " +
+                    "member_myRecord2: struct<F: float>>",
+      Seq())
+
+    {
+      val e = intercept[Exception] {
+        checkUnionStableId(
+          List(
+            Schema.createRecord("myRecord", "", null, false, List[Schema.Field]().asJava),
+            Schema.createRecord("myrecord", "", null, false, List[Schema.Field]().asJava)),
+          "",
+          Seq())
+      }
+      assert(e.getMessage.contains("Cannot generate stable identifier"))
+    }
+  }
+
   test("SPARK-46930: Use custom prefix for stable ids when converting Union type") {
     // Test default "member_" prefix.
     checkUnionStableId(
@@ -1624,7 +1651,7 @@ abstract class AvroSuite
           errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
           parameters = Map(
             "columnName" -> "`testType()`",
-            "columnType" -> "\"INTERVAL\"",
+            "columnType" -> "UDT(\"INTERVAL\")",
             "format" -> "Avro")
         )
       }
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala b/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
index 80f0d6bc7b6eb..7b16a75d62164 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
@@ -66,7 +66,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong()).createOrReplaceTempView("t1")
 
-        prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1"))
+        prepareTable(dir, spark.sql(s"SELECT mod(value, 255) id FROM t1"))
 
         benchmark.addCase("Sum") { _ =>
           spark.sql("SELECT sum(id) FROM avroTable").noop()
@@ -87,7 +87,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(
           dir,
-          spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1"))
+          spark.sql(s"SELECT value % ${Int.MaxValue} AS c1, CAST(value as STRING) AS c2 FROM t1"))
 
         benchmark.addCase("Sum of columns") { _ =>
           spark.sql("SELECT sum(c1), sum(length(c2)) FROM avroTable").noop()
@@ -106,7 +106,8 @@ object AvroReadBenchmark extends SqlBasedBenchmark {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong()).createOrReplaceTempView("t1")
 
-        prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p"))
+        prepareTable(dir,
+          spark.sql(s"SELECT value % 2 AS p, value % ${Int.MaxValue} AS id FROM t1"), Some("p"))
 
         benchmark.addCase("Data column") { _ =>
           spark.sql("SELECT sum(id) FROM avroTable").noop()
@@ -176,7 +177,7 @@ object AvroReadBenchmark extends SqlBasedBenchmark {
       withTempTable("t1", "avroTable") {
         import spark.implicits._
         val middle = width / 2
-        val selectExpr = (1 to width).map(i => s"value as c$i")
+        val selectExpr = (1 to width).map(i => s"value % ${Int.MaxValue} as c$i")
         spark.range(values).map(_ => Random.nextLong()).toDF()
           .selectExpr(selectExpr: _*).createOrReplaceTempView("t1")
 
@@ -198,7 +199,6 @@ object AvroReadBenchmark extends SqlBasedBenchmark {
     withTempPath { dir =>
       withTempTable("t1", "avroTable") {
         import spark.implicits._
-        val middle = width / 2
         val selectExpr = (1 to width).map(i => s"value as c$i")
         spark.range(values).map(_ => Random.nextLong()).toDF()
           .selectExpr(selectExpr: _*)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala
index c23d49440248c..3562675898224 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.Expression.SortOrder.NullOrdering
 import org.apache.spark.connect.proto.Expression.SortOrder.SortDirection
@@ -52,7 +52,7 @@ import org.apache.spark.util.ArrayImplicits._
  *
  * @since 3.4.0
  */
-class Column private[sql] (@DeveloperApi val expr: proto.Expression) extends Logging {
+class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
 
   private[sql] def this(name: String, planId: Option[Long]) =
     this(Column.nameToExpression(name, planId))
@@ -1323,13 +1323,15 @@ class Column private[sql] (@DeveloperApi val expr: proto.Expression) extends Log
   def over(): Column = over(Window.spec)
 }
 
-private[sql] object Column {
+object Column {
 
-  def apply(name: String): Column = new Column(name)
+  private[sql] def apply(name: String): Column = new Column(name)
 
-  def apply(name: String, planId: Option[Long]): Column = new Column(name, planId)
+  private[sql] def apply(name: String, planId: Option[Long]): Column = new Column(name, planId)
 
-  def nameToExpression(name: String, planId: Option[Long] = None): proto.Expression = {
+  private[sql] def nameToExpression(
+      name: String,
+      planId: Option[Long] = None): proto.Expression = {
     val builder = proto.Expression.newBuilder()
     name match {
       case "*" =>
@@ -1344,23 +1346,14 @@ private[sql] object Column {
     builder.build()
   }
 
-  private[sql] def apply(f: proto.Expression.Builder => Unit): Column = {
+  @Since("4.0.0")
+  @DeveloperApi
+  def apply(f: proto.Expression.Builder => Unit): Column = {
     val builder = proto.Expression.newBuilder()
     f(builder)
     new Column(builder.build())
   }
 
-  @DeveloperApi
-  @deprecated("Use forExtension(Array[Byte]) instead", "4.0.0")
-  def apply(extension: com.google.protobuf.Any): Column = {
-    apply(_.setExtension(extension))
-  }
-
-  @DeveloperApi
-  def forExtension(extension: Array[Byte]): Column = {
-    apply(_.setExtension(com.google.protobuf.Any.parseFrom(extension)))
-  }
-
   private[sql] def fn(name: String, inputs: Column*): Column = {
     fn(name, isDistinct = false, inputs: _*)
   }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index 9a42afebf8f2b..e831c264e632a 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
 import org.apache.spark.sql.catalyst.expressions.OrderUtils
 import org.apache.spark.sql.connect.client.SparkResult
 import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, StorageLevelProtoConverter, UdfUtils}
-import org.apache.spark.sql.expressions.ScalarUserDefinedFunction
+import org.apache.spark.sql.expressions.ScalaUserDefinedFunction
 import org.apache.spark.sql.functions.{struct, to_json}
 import org.apache.spark.sql.streaming.DataStreamWriter
 import org.apache.spark.sql.types.{Metadata, StructType}
@@ -1387,7 +1387,7 @@ class Dataset[T] private[sql] (
    * @since 3.5.0
    */
   def reduce(func: (T, T) => T): T = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       function = func,
       inputEncoders = agnosticEncoder :: agnosticEncoder :: Nil,
       outputEncoder = agnosticEncoder)
@@ -2705,7 +2705,7 @@ class Dataset[T] private[sql] (
    * @since 3.5.0
    */
   def filter(func: T => Boolean): Dataset[T] = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       function = func,
       inputEncoders = agnosticEncoder :: Nil,
       outputEncoder = PrimitiveBooleanEncoder)
@@ -2758,7 +2758,7 @@ class Dataset[T] private[sql] (
    */
   def mapPartitions[U: Encoder](func: Iterator[T] => Iterator[U]): Dataset[U] = {
     val outputEncoder = encoderFor[U]
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       function = func,
       inputEncoders = agnosticEncoder :: Nil,
       outputEncoder = outputEncoder)
@@ -2830,7 +2830,7 @@ class Dataset[T] private[sql] (
    */
   @deprecated("use flatMap() or select() with functions.explode() instead", "3.5.0")
   def explode[A <: Product: TypeTag](input: Column*)(f: Row => IterableOnce[A]): DataFrame = {
-    val generator = ScalarUserDefinedFunction(
+    val generator = ScalaUserDefinedFunction(
       UdfUtils.iterableOnceToSeq(f),
       UnboundRowEncoder :: Nil,
       ScalaReflection.encoderFor[Seq[A]])
@@ -2862,7 +2862,7 @@ class Dataset[T] private[sql] (
   @deprecated("use flatMap() or select() with functions.explode() instead", "3.5.0")
   def explode[A, B: TypeTag](inputColumn: String, outputColumn: String)(
       f: A => IterableOnce[B]): DataFrame = {
-    val generator = ScalarUserDefinedFunction(
+    val generator = ScalaUserDefinedFunction(
       UdfUtils.iterableOnceToSeq(f),
       Nil,
       ScalaReflection.encoderFor[Seq[B]])
@@ -3337,24 +3337,170 @@ class Dataset[T] private[sql] (
     }
   }
 
+  /**
+   * Define (named) metrics to observe on the Dataset. This method returns an 'observed' Dataset
+   * that returns the same result as the input, with the following guarantees: <ul> <li>It will
+   * compute the defined aggregates (metrics) on all the data that is flowing through the Dataset
+   * at that point.</li> <li>It will report the value of the defined aggregate columns as soon as
+   * we reach a completion point. A completion point is currently defined as the end of a
+   * query.</li> </ul> Please note that continuous execution is currently not supported.
+   *
+   * The metrics columns must either contain a literal (e.g. lit(42)), or should contain one or
+   * more aggregate functions (e.g. sum(a) or sum(a + b) + avg(c) - lit(1)). Expressions that
+   * contain references to the input Dataset's columns must always be wrapped in an aggregate
+   * function.
+   *
+   * A user can retrieve the metrics by calling
+   * `org.apache.spark.sql.Dataset.collectResult().getObservedMetrics`.
+   *
+   * {{{
+   *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
+   *   val observed_ds = ds.observe("my_metrics", count(lit(1)).as("rows"), max($"id").as("maxid"))
+   *   observed_ds.write.parquet("ds.parquet")
+   *   val metrics = observed_ds.collectResult().getObservedMetrics
+   * }}}
+   *
+   * @group typedrel
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
   def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = {
-    throw new UnsupportedOperationException("observe is not implemented.")
+    sparkSession.newDataset(agnosticEncoder) { builder =>
+      builder.getCollectMetricsBuilder
+        .setInput(plan.getRoot)
+        .setName(name)
+        .addAllMetrics((expr +: exprs).map(_.expr).asJava)
+    }
   }
 
-  def checkpoint(): Dataset[T] = {
-    throw new UnsupportedOperationException("checkpoint is not implemented.")
+  /**
+   * Observe (named) metrics through an `org.apache.spark.sql.Observation` instance. This is
+   * equivalent to calling `observe(String, Column, Column*)` but does not require to collect all
+   * results before returning the metrics - the metrics are filled during iterating the results,
+   * as soon as they are available. This method does not support streaming datasets.
+   *
+   * A user can retrieve the metrics by accessing `org.apache.spark.sql.Observation.get`.
+   *
+   * {{{
+   *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
+   *   val observation = Observation("my_metrics")
+   *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
+   *   observed_ds.write.parquet("ds.parquet")
+   *   val metrics = observation.get
+   * }}}
+   *
+   * @throws IllegalArgumentException
+   *   If this is a streaming Dataset (this.isStreaming == true)
+   *
+   * @group typedrel
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T] = {
+    val df = observe(observation.name, expr, exprs: _*)
+    sparkSession.registerObservation(df.getPlanId.get, observation)
+    df
   }
 
-  def checkpoint(eager: Boolean): Dataset[T] = {
-    throw new UnsupportedOperationException("checkpoint is not implemented.")
-  }
+  /**
+   * Eagerly checkpoint a Dataset and return the new Dataset. Checkpointing can be used to
+   * truncate the logical plan of this Dataset, which is especially useful in iterative algorithms
+   * where the plan may grow exponentially. It will be saved to files inside the checkpoint
+   * directory set with `SparkContext#setCheckpointDir`.
+   *
+   * @group basic
+   * @since 4.0.0
+   */
+  def checkpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = true)
 
-  def localCheckpoint(): Dataset[T] = {
-    throw new UnsupportedOperationException("localCheckpoint is not implemented.")
-  }
+  /**
+   * Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
+   * logical plan of this Dataset, which is especially useful in iterative algorithms where the
+   * plan may grow exponentially. It will be saved to files inside the checkpoint directory set
+   * with `SparkContext#setCheckpointDir`.
+   *
+   * @param eager
+   *   Whether to checkpoint this dataframe immediately
+   *
+   * @note
+   *   When checkpoint is used with eager = false, the final data that is checkpointed after the
+   *   first action may be different from the data that was used during the job due to
+   *   non-determinism of the underlying operation and retries. If checkpoint is used to achieve
+   *   saving a deterministic snapshot of the data, eager = true should be used. Otherwise, it is
+   *   only deterministic after the first execution, after the checkpoint was finalized.
+   *
+   * @group basic
+   * @since 4.0.0
+   */
+  def checkpoint(eager: Boolean): Dataset[T] =
+    checkpoint(eager = eager, reliableCheckpoint = true)
+
+  /**
+   * Eagerly locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used
+   * to truncate the logical plan of this Dataset, which is especially useful in iterative
+   * algorithms where the plan may grow exponentially. Local checkpoints are written to executor
+   * storage and despite potentially faster they are unreliable and may compromise job completion.
+   *
+   * @group basic
+   * @since 4.0.0
+   */
+  def localCheckpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = false)
 
-  def localCheckpoint(eager: Boolean): Dataset[T] = {
-    throw new UnsupportedOperationException("localCheckpoint is not implemented.")
+  /**
+   * Locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used to
+   * truncate the logical plan of this Dataset, which is especially useful in iterative algorithms
+   * where the plan may grow exponentially. Local checkpoints are written to executor storage and
+   * despite potentially faster they are unreliable and may compromise job completion.
+   *
+   * @param eager
+   *   Whether to checkpoint this dataframe immediately
+   *
+   * @note
+   *   When checkpoint is used with eager = false, the final data that is checkpointed after the
+   *   first action may be different from the data that was used during the job due to
+   *   non-determinism of the underlying operation and retries. If checkpoint is used to achieve
+   *   saving a deterministic snapshot of the data, eager = true should be used. Otherwise, it is
+   *   only deterministic after the first execution, after the checkpoint was finalized.
+   *
+   * @group basic
+   * @since 4.0.0
+   */
+  def localCheckpoint(eager: Boolean): Dataset[T] =
+    checkpoint(eager = eager, reliableCheckpoint = false)
+
+  /**
+   * Returns a checkpointed version of this Dataset.
+   *
+   * @param eager
+   *   Whether to checkpoint this dataframe immediately
+   * @param reliableCheckpoint
+   *   Whether to create a reliable checkpoint saved to files inside the checkpoint directory. If
+   *   false creates a local checkpoint using the caching subsystem
+   */
+  private def checkpoint(eager: Boolean, reliableCheckpoint: Boolean): Dataset[T] = {
+    sparkSession.newDataset(agnosticEncoder) { builder =>
+      val command = sparkSession.newCommand { builder =>
+        builder.getCheckpointCommandBuilder
+          .setLocal(!reliableCheckpoint)
+          .setEager(eager)
+          .setRelation(this.plan.getRoot)
+      }
+      val responseIter = sparkSession.execute(command)
+      try {
+        val response = responseIter
+          .find(_.hasCheckpointCommandResult)
+          .getOrElse(throw new RuntimeException("CheckpointCommandResult must be present"))
+
+        val cachedRemoteRelation = response.getCheckpointCommandResult.getRelation
+        sparkSession.cleaner.register(cachedRemoteRelation)
+
+        // Update the builder with the values from the result.
+        builder.setCachedRemoteRelation(cachedRemoteRelation)
+      } finally {
+        // consume the rest of the iterator
+        responseIter.foreach(_ => ())
+      }
+    }
   }
 
   /**
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index e38adb9b0b27e..953cf23afc330 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -27,7 +27,7 @@ import org.apache.spark.connect.proto
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.ProductEncoder
 import org.apache.spark.sql.connect.common.UdfUtils
-import org.apache.spark.sql.expressions.ScalarUserDefinedFunction
+import org.apache.spark.sql.expressions.ScalaUserDefinedFunction
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode, StatefulProcessor, StatefulProcessorWithInitialState, TimeMode}
 
@@ -1031,7 +1031,7 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
 
   override def reduceGroups(f: (V, V) => V): Dataset[(K, V)] = {
     val inputEncoders = Seq(vEncoder, vEncoder)
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       function = f,
       inputEncoders = inputEncoders,
       outputEncoder = vEncoder)
@@ -1091,7 +1091,7 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
   private def getUdf[U: Encoder](nf: AnyRef, outputEncoder: AgnosticEncoder[U])(
       inEncoders: AgnosticEncoder[_]*): proto.CommonInlineUserDefinedFunction = {
     val inputEncoders = kEncoder +: inEncoders // Apply keyAs changes by setting kEncoder
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       function = nf,
       inputEncoders = inputEncoders,
       outputEncoder = outputEncoder)
@@ -1110,7 +1110,7 @@ private object KeyValueGroupedDatasetImpl {
       ds: Dataset[V],
       kEncoder: AgnosticEncoder[K],
       groupingFunc: V => K): KeyValueGroupedDatasetImpl[K, V, K, V] = {
-    val gf = ScalarUserDefinedFunction(
+    val gf = ScalaUserDefinedFunction(
       function = groupingFunc,
       inputEncoders = ds.agnosticEncoder :: Nil, // Using the original value and key encoders
       outputEncoder = kEncoder)
@@ -1132,7 +1132,7 @@ private object KeyValueGroupedDatasetImpl {
       vEncoder: AgnosticEncoder[V],
       groupingExprs: Seq[Column]): KeyValueGroupedDatasetImpl[K, V, K, V] = {
     // Use a dummy udf to pass the K V encoders
-    val dummyGroupingFunc = ScalarUserDefinedFunction(
+    val dummyGroupingFunc = ScalaUserDefinedFunction(
       function = UdfUtils.noOp[V, K](),
       inputEncoders = vEncoder :: Nil,
       outputEncoder = kEncoder).apply(col("*"))
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Observation.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Observation.scala
new file mode 100644
index 0000000000000..75629b6000f91
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Observation.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.UUID
+
+class Observation(name: String) extends ObservationBase(name) {
+
+  /**
+   * Create an Observation instance without providing a name. This generates a random name.
+   */
+  def this() = this(UUID.randomUUID().toString)
+}
+
+/**
+ * (Scala-specific) Create instances of Observation via Scala `apply`.
+ * @since 4.0.0
+ */
+object Observation {
+
+  /**
+   * Observation constructor for creating an anonymous observation.
+   */
+  def apply(): Observation = new Observation()
+
+  /**
+   * Observation constructor for creating a named observation.
+   */
+  def apply(name: String): Observation = new Observation(name)
+
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index 6c626fd716d5b..7799d395d5c6a 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -149,6 +149,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newIntSeqEncoder: Encoder[Seq[Int]] = newSeqEncoder(PrimitiveIntEncoder)
 
   /**
@@ -156,6 +157,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newLongSeqEncoder: Encoder[Seq[Long]] = newSeqEncoder(PrimitiveLongEncoder)
 
   /**
@@ -163,6 +165,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newDoubleSeqEncoder: Encoder[Seq[Double]] = newSeqEncoder(PrimitiveDoubleEncoder)
 
   /**
@@ -170,6 +173,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newFloatSeqEncoder: Encoder[Seq[Float]] = newSeqEncoder(PrimitiveFloatEncoder)
 
   /**
@@ -177,6 +181,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newByteSeqEncoder: Encoder[Seq[Byte]] = newSeqEncoder(PrimitiveByteEncoder)
 
   /**
@@ -184,6 +189,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newShortSeqEncoder: Encoder[Seq[Short]] = newSeqEncoder(PrimitiveShortEncoder)
 
   /**
@@ -191,6 +197,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newBooleanSeqEncoder: Encoder[Seq[Boolean]] = newSeqEncoder(PrimitiveBooleanEncoder)
 
   /**
@@ -198,6 +205,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   val newStringSeqEncoder: Encoder[Seq[String]] = newSeqEncoder(StringEncoder)
 
   /**
@@ -205,6 +213,7 @@ abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrio
    * @deprecated
    *   use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newProductSeqEncoder[A <: Product: TypeTag]: Encoder[Seq[A]] =
     newSeqEncoder(ScalaReflection.encoderFor[A])
 
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 5a2d9bc44c9f7..80336fb1eaea4 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql
 
 import java.io.Closeable
 import java.net.URI
+import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.TimeUnit._
 import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
 
@@ -28,7 +29,7 @@ import com.google.common.cache.{CacheBuilder, CacheLoader}
 import io.grpc.ClientInterceptor
 import org.apache.arrow.memory.RootAllocator
 
-import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.ExecutePlanResponse
 import org.apache.spark.internal.Logging
@@ -36,11 +37,11 @@ import org.apache.spark.sql.catalog.Catalog
 import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
 import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BoxedLongEncoder, UnboundRowEncoder}
-import org.apache.spark.sql.connect.client.{ClassFinder, SparkConnectClient, SparkResult}
+import org.apache.spark.sql.connect.client.{ClassFinder, CloseableIterator, SparkConnectClient, SparkResult}
 import org.apache.spark.sql.connect.client.SparkConnectClient.Configuration
 import org.apache.spark.sql.connect.client.arrow.ArrowSerializer
 import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.internal.{CatalogImpl, SqlApiConf}
+import org.apache.spark.sql.internal.{CatalogImpl, SessionCleaner, SqlApiConf}
 import org.apache.spark.sql.streaming.DataStreamReader
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.types.StructType
@@ -72,6 +73,7 @@ class SparkSession private[sql] (
     with Logging {
 
   private[this] val allocator = new RootAllocator()
+  private[sql] lazy val cleaner = new SessionCleaner(this)
 
   // a unique session ID for this session from client.
   private[sql] def sessionId: String = client.sessionId
@@ -80,6 +82,8 @@ class SparkSession private[sql] (
     client.analyze(proto.AnalyzePlanRequest.AnalyzeCase.SPARK_VERSION).getSparkVersion.getVersion
   }
 
+  private[sql] val observationRegistry = new ConcurrentHashMap[Long, Observation]()
+
   /**
    * Runtime configuration interface for Spark.
    *
@@ -482,11 +486,15 @@ class SparkSession private[sql] (
     }
   }
 
-  private[sql] def newDataFrame(f: proto.Relation.Builder => Unit): DataFrame = {
+  @Since("4.0.0")
+  @DeveloperApi
+  def newDataFrame(f: proto.Relation.Builder => Unit): DataFrame = {
     newDataset(UnboundRowEncoder)(f)
   }
 
-  private[sql] def newDataset[T](encoder: AgnosticEncoder[T])(
+  @Since("4.0.0")
+  @DeveloperApi
+  def newDataset[T](encoder: AgnosticEncoder[T])(
       f: proto.Relation.Builder => Unit): Dataset[T] = {
     val builder = proto.Relation.newBuilder()
     f(builder)
@@ -495,30 +503,6 @@ class SparkSession private[sql] (
     new Dataset[T](this, plan, encoder)
   }
 
-  @DeveloperApi
-  @deprecated("Use newDataFrame(Array[Byte]) instead", "4.0.0")
-  def newDataFrame(extension: com.google.protobuf.Any): DataFrame = {
-    newDataFrame(_.setExtension(extension))
-  }
-
-  @DeveloperApi
-  @deprecated("Use newDataFrame(Array[Byte], AgnosticEncoder[T]) instead", "4.0.0")
-  def newDataset[T](
-      extension: com.google.protobuf.Any,
-      encoder: AgnosticEncoder[T]): Dataset[T] = {
-    newDataset(encoder)(_.setExtension(extension))
-  }
-
-  @DeveloperApi
-  def newDataFrame(extension: Array[Byte]): DataFrame = {
-    newDataFrame(_.setExtension(com.google.protobuf.Any.parseFrom(extension)))
-  }
-
-  @DeveloperApi
-  def newDataset[T](extension: Array[Byte], encoder: AgnosticEncoder[T]): Dataset[T] = {
-    newDataset(encoder)(_.setExtension(com.google.protobuf.Any.parseFrom(extension)))
-  }
-
   private[sql] def newCommand[T](f: proto.Command.Builder => Unit): proto.Command = {
     val builder = proto.Command.newBuilder()
     f(builder)
@@ -552,8 +536,12 @@ class SparkSession private[sql] (
 
   private[sql] def execute[T](plan: proto.Plan, encoder: AgnosticEncoder[T]): SparkResult[T] = {
     val value = client.execute(plan)
-    val result = new SparkResult(value, allocator, encoder, timeZoneId)
-    result
+    new SparkResult(
+      value,
+      allocator,
+      encoder,
+      timeZoneId,
+      Some(setMetricsAndUnregisterObservation))
   }
 
   private[sql] def execute(f: proto.Relation.Builder => Unit): Unit = {
@@ -565,34 +553,23 @@ class SparkSession private[sql] (
     client.execute(plan).foreach(_ => ())
   }
 
-  private[sql] def execute(command: proto.Command): Seq[ExecutePlanResponse] = {
+  @Since("4.0.0")
+  @DeveloperApi
+  def execute(command: proto.Command): Seq[ExecutePlanResponse] = {
     val plan = proto.Plan.newBuilder().setCommand(command).build()
     // .toSeq forces that the iterator is consumed and closed. On top, ignore all
     // progress messages.
     client.execute(plan).filter(!_.hasExecutionProgress).toSeq
   }
 
+  private[sql] def execute(plan: proto.Plan): CloseableIterator[ExecutePlanResponse] =
+    client.execute(plan)
+
   private[sql] def registerUdf(udf: proto.CommonInlineUserDefinedFunction): Unit = {
     val command = proto.Command.newBuilder().setRegisterFunction(udf).build()
     execute(command)
   }
 
-  @DeveloperApi
-  @deprecated("Use execute(Array[Byte]) instead", "4.0.0")
-  def execute(extension: com.google.protobuf.Any): Unit = {
-    val command = proto.Command.newBuilder().setExtension(extension).build()
-    execute(command)
-  }
-
-  @DeveloperApi
-  def execute(extension: Array[Byte]): Unit = {
-    val command = proto.Command
-      .newBuilder()
-      .setExtension(com.google.protobuf.Any.parseFrom(extension))
-      .build()
-    execute(command)
-  }
-
   /**
    * Add a single artifact to the client session.
    *
@@ -813,6 +790,21 @@ class SparkSession private[sql] (
    * Set to false to prevent client.releaseSession on close() (testing only)
    */
   private[sql] var releaseSessionOnClose = true
+
+  private[sql] def registerObservation(planId: Long, observation: Observation): Unit = {
+    if (observationRegistry.putIfAbsent(planId, observation) != null) {
+      throw new IllegalArgumentException("An Observation can be used with a Dataset only once")
+    }
+  }
+
+  private[sql] def setMetricsAndUnregisterObservation(
+      planId: Long,
+      metrics: Map[String, Any]): Unit = {
+    val observationOrNull = observationRegistry.remove(planId)
+    if (observationOrNull != null) {
+      observationOrNull.setMetricsAndNotify(Some(metrics))
+    }
+  }
 }
 
 // The minimal builder needed to create a spark session.
@@ -837,10 +829,16 @@ object SparkSession extends Logging {
 
   /**
    * Set the (global) default [[SparkSession]], and (thread-local) active [[SparkSession]] when
-   * they are not set yet.
+   * they are not set yet or the associated [[SparkConnectClient]] is unusable.
    */
   private def setDefaultAndActiveSession(session: SparkSession): Unit = {
-    defaultSession.compareAndSet(null, session)
+    val currentDefault = defaultSession.getAcquire
+    if (currentDefault == null || !currentDefault.client.isSessionValid) {
+      // Update `defaultSession` if it is null or the contained session is not valid. There is a
+      // chance that the following `compareAndSet` fails if a new default session has just been set,
+      // but that does not matter since that event has happened after this method was invoked.
+      defaultSession.compareAndSet(currentDefault, session)
+    }
     if (getActiveSession.isEmpty) {
       setActiveSession(session)
     }
@@ -980,7 +978,7 @@ object SparkSession extends Logging {
     def appName(name: String): Builder = this
 
     private def tryCreateSessionFromClient(): Option[SparkSession] = {
-      if (client != null) {
+      if (client != null && client.isSessionValid) {
         Option(new SparkSession(client, planIdGenerator))
       } else {
         None
@@ -1032,7 +1030,16 @@ object SparkSession extends Logging {
      */
     def getOrCreate(): SparkSession = {
       val session = tryCreateSessionFromClient()
-        .getOrElse(sessions.get(builder.configuration))
+        .getOrElse({
+          var existingSession = sessions.get(builder.configuration)
+          if (!existingSession.client.isSessionValid) {
+            // If the cached session has become invalid, e.g., due to a server restart, the cache
+            // entry is invalidated.
+            sessions.invalidate(builder.configuration)
+            existingSession = sessions.get(builder.configuration)
+          }
+          existingSession
+        })
       setDefaultAndActiveSession(session)
       applyOptions(session)
       session
@@ -1040,11 +1047,13 @@ object SparkSession extends Logging {
   }
 
   /**
-   * Returns the default SparkSession.
+   * Returns the default SparkSession. If the previously set default SparkSession becomes
+   * unusable, returns None.
    *
    * @since 3.5.0
    */
-  def getDefaultSession: Option[SparkSession] = Option(defaultSession.get())
+  def getDefaultSession: Option[SparkSession] =
+    Option(defaultSession.get()).filter(_.client.isSessionValid)
 
   /**
    * Sets the default SparkSession.
@@ -1065,11 +1074,13 @@ object SparkSession extends Logging {
   }
 
   /**
-   * Returns the active SparkSession for the current thread.
+   * Returns the active SparkSession for the current thread. If the previously set active
+   * SparkSession becomes unusable, returns None.
    *
    * @since 3.5.0
    */
-  def getActiveSession: Option[SparkSession] = Option(activeThreadSession.get())
+  def getActiveSession: Option[SparkSession] =
+    Option(activeThreadSession.get()).filter(_.client.isSessionValid)
 
   /**
    * Changes the SparkSession that will be returned in this thread and its children when
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 2e8211a0966e7..5965a2b7a61de 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -22,7 +22,7 @@ import scala.reflect.runtime.universe.{typeTag, TypeTag}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.connect.common.UdfUtils
-import org.apache.spark.sql.expressions.{ScalarUserDefinedFunction, UserDefinedFunction}
+import org.apache.spark.sql.expressions.{ScalaUserDefinedFunction, UserDefinedFunction}
 import org.apache.spark.sql.types.DataType
 
 /**
@@ -62,7 +62,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    */
   def register(name: String, udf: UserDefinedFunction): UserDefinedFunction = {
     udf.withName(name) match {
-      case scalarUdf: ScalarUserDefinedFunction =>
+      case scalarUdf: ScalaUserDefinedFunction =>
         session.registerUdf(scalarUdf.toProto)
         scalarUdf
       case other =>
@@ -97,7 +97,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register[RT: TypeTag](name: String, func: () => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(func, typeTag[RT])
+    val udf = ScalaUserDefinedFunction(func, typeTag[RT])
     register(name, udf)
   }
 
@@ -108,7 +108,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register[RT: TypeTag, A1: TypeTag](name: String, func: (A1) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(func, typeTag[RT], typeTag[A1])
+    val udf = ScalaUserDefinedFunction(func, typeTag[RT], typeTag[A1])
     register(name, udf)
   }
 
@@ -121,7 +121,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](
       name: String,
       func: (A1, A2) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(func, typeTag[RT], typeTag[A1], typeTag[A2])
+    val udf = ScalaUserDefinedFunction(func, typeTag[RT], typeTag[A1], typeTag[A2])
     register(name, udf)
   }
 
@@ -134,7 +134,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](
       name: String,
       func: (A1, A2, A3) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(func, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3])
+    val udf = ScalaUserDefinedFunction(func, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3])
     register(name, udf)
   }
 
@@ -147,7 +147,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](
       name: String,
       func: (A1, A2, A3, A4) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -166,7 +166,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
   def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](
       name: String,
       func: (A1, A2, A3, A4, A5) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -191,7 +191,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       A4: TypeTag,
       A5: TypeTag,
       A6: TypeTag](name: String, func: (A1, A2, A3, A4, A5, A6) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -220,7 +220,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       A7: TypeTag](
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -251,7 +251,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       A8: TypeTag](
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -284,7 +284,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       A9: TypeTag](
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -319,7 +319,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       A10: TypeTag](
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -356,7 +356,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       A11: TypeTag](
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -395,7 +395,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       A12: TypeTag](
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -437,7 +437,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13) => RT)
       : UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -481,7 +481,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14) => RT)
       : UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -527,7 +527,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15) => RT)
       : UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -575,7 +575,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16) => RT)
       : UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -625,7 +625,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17) => RT)
       : UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -694,7 +694,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
           A16,
           A17,
           A18) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -766,7 +766,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
           A17,
           A18,
           A19) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -841,7 +841,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
           A18,
           A19,
           A20) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -919,7 +919,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
           A19,
           A20,
           A21) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -1000,7 +1000,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
           A20,
           A21,
           A22) => RT): UserDefinedFunction = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       func,
       typeTag[RT],
       typeTag[A1],
@@ -1037,7 +1037,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
   //               | * @since $version
   //               | */
   //               |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
-  //               |  val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+  //               |  val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   //               |  register(name, udf)
   //               |}""".stripMargin)
   //  }
@@ -1047,7 +1047,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1056,7 +1056,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1065,7 +1065,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1074,7 +1074,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1083,7 +1083,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1092,7 +1092,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1101,7 +1101,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1110,7 +1110,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1119,7 +1119,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
    * @since 3.5.0
    */
   def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1131,7 +1131,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF9[_, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1143,7 +1143,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF10[_, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1155,7 +1155,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1167,7 +1167,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1179,7 +1179,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1191,7 +1191,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1203,7 +1203,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1215,7 +1215,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1227,7 +1227,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1239,7 +1239,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1251,7 +1251,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1263,7 +1263,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1275,7 +1275,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
 
@@ -1287,7 +1287,7 @@ class UDFRegistration(session: SparkSession) extends Logging {
       name: String,
       f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): Unit = {
-    val udf = ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
     register(name, udf)
   }
   // scalastyle:on line.size.limit
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
index 0360a40578869..9fd3ae4368f4c 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
@@ -26,6 +26,7 @@ import ammonite.compiler.iface.CodeWrapper
 import ammonite.util.{Bind, Imports, Name, Util}
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkConnectClientParser}
 
@@ -55,6 +56,10 @@ object ConnectRepl {
       inputStream: InputStream = System.in,
       outputStream: OutputStream = System.out,
       errorStream: OutputStream = System.err): Unit = {
+    // For interpreters, structured logging is disabled by default to avoid generating mixed
+    // plain text and structured logs on the same console.
+    Logging.disableStructuredLogging()
+
     // Build the client.
     val client =
       try {
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
new file mode 100644
index 0000000000000..91c8fb57c31bf
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.expressions
+
+import org.apache.spark.sql.{Encoder, TypedColumn}
+
+/**
+ * A base class for user-defined aggregations, which can be used in `Dataset` operations to take
+ * all of the elements of a group and reduce them to a single value.
+ *
+ * For example, the following aggregator extracts an `int` from a specific class and adds them up:
+ * {{{
+ *   case class Data(i: Int)
+ *
+ *   val customSummer =  new Aggregator[Data, Int, Int] {
+ *     def zero: Int = 0
+ *     def reduce(b: Int, a: Data): Int = b + a.i
+ *     def merge(b1: Int, b2: Int): Int = b1 + b2
+ *     def finish(r: Int): Int = r
+ *     def bufferEncoder: Encoder[Int] = Encoders.scalaInt
+ *     def outputEncoder: Encoder[Int] = Encoders.scalaInt
+ *   }
+ *
+ *   spark.udf.register("customSummer", udaf(customSummer))
+ *   val ds: Dataset[Data] = ...
+ *   val aggregated = ds.selectExpr("customSummer(i)")
+ * }}}
+ *
+ * Based loosely on Aggregator from Algebird: https://github.com/twitter/algebird
+ *
+ * @tparam IN
+ *   The input type for the aggregation.
+ * @tparam BUF
+ *   The type of the intermediate value of the reduction.
+ * @tparam OUT
+ *   The type of the final output result.
+ * @since 4.0.0
+ */
+@SerialVersionUID(2093413866369130093L)
+abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
+
+  /**
+   * A zero value for this aggregation. Should satisfy the property that any b + zero = b.
+   * @since 4.0.0
+   */
+  def zero: BUF
+
+  /**
+   * Combine two values to produce a new value. For performance, the function may modify `b` and
+   * return it instead of constructing new object for b.
+   * @since 4.0.0
+   */
+  def reduce(b: BUF, a: IN): BUF
+
+  /**
+   * Merge two intermediate values.
+   * @since 4.0.0
+   */
+  def merge(b1: BUF, b2: BUF): BUF
+
+  /**
+   * Transform the output of the reduction.
+   * @since 4.0.0
+   */
+  def finish(reduction: BUF): OUT
+
+  /**
+   * Specifies the `Encoder` for the intermediate value type.
+   * @since 4.0.0
+   */
+  def bufferEncoder: Encoder[BUF]
+
+  /**
+   * Specifies the `Encoder` for the final output value type.
+   * @since 4.0.0
+   */
+  def outputEncoder: Encoder[OUT]
+
+  /**
+   * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset`. operations.
+   */
+  def toColumn: TypedColumn[IN, OUT] = {
+    throw new UnsupportedOperationException("toColumn is not implemented.")
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index c4431e9a87f12..f4499858306a1 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -94,16 +94,17 @@ sealed abstract class UserDefinedFunction {
 }
 
 /**
- * Holder class for a scalar user-defined function and it's input/output encoder(s).
+ * Holder class for a scala user-defined function and it's input/output encoder(s).
  */
-case class ScalarUserDefinedFunction private[sql] (
+case class ScalaUserDefinedFunction private[sql] (
     // SPARK-43198: Eagerly serialize to prevent the UDF from containing a reference to this class.
     serializedUdfPacket: Array[Byte],
     inputTypes: Seq[proto.DataType],
     outputType: proto.DataType,
     name: Option[String],
     override val nullable: Boolean,
-    override val deterministic: Boolean)
+    override val deterministic: Boolean,
+    aggregate: Boolean)
     extends UserDefinedFunction {
 
   private[this] lazy val udf = {
@@ -114,6 +115,7 @@ case class ScalarUserDefinedFunction private[sql] (
       .addAllInputTypes(inputTypes.asJava)
       .setOutputType(outputType)
       .setNullable(nullable)
+      .setAggregate(aggregate)
 
     scalaUdfBuilder.build()
   }
@@ -129,11 +131,11 @@ case class ScalarUserDefinedFunction private[sql] (
     name.foreach(udfBuilder.setFunctionName)
   }
 
-  override def withName(name: String): ScalarUserDefinedFunction = copy(name = Option(name))
+  override def withName(name: String): ScalaUserDefinedFunction = copy(name = Option(name))
 
-  override def asNonNullable(): ScalarUserDefinedFunction = copy(nullable = false)
+  override def asNonNullable(): ScalaUserDefinedFunction = copy(nullable = false)
 
-  override def asNondeterministic(): ScalarUserDefinedFunction = copy(deterministic = false)
+  override def asNondeterministic(): ScalaUserDefinedFunction = copy(deterministic = false)
 
   def toProto: proto.CommonInlineUserDefinedFunction = {
     val builder = proto.CommonInlineUserDefinedFunction.newBuilder()
@@ -146,7 +148,7 @@ case class ScalarUserDefinedFunction private[sql] (
   }
 }
 
-object ScalarUserDefinedFunction {
+object ScalaUserDefinedFunction {
   private val LAMBDA_DESERIALIZATION_ERR_MSG: String =
     "cannot assign instance of java.lang.invoke.SerializedLambda to field"
 
@@ -169,9 +171,9 @@ object ScalarUserDefinedFunction {
   private[sql] def apply(
       function: AnyRef,
       returnType: TypeTag[_],
-      parameterTypes: TypeTag[_]*): ScalarUserDefinedFunction = {
+      parameterTypes: TypeTag[_]*): ScalaUserDefinedFunction = {
 
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       function = function,
       // Input can be a row because the input data schema can be found from the plan.
       inputEncoders =
@@ -183,22 +185,24 @@ object ScalarUserDefinedFunction {
   private[sql] def apply(
       function: AnyRef,
       inputEncoders: Seq[AgnosticEncoder[_]],
-      outputEncoder: AgnosticEncoder[_]): ScalarUserDefinedFunction = {
+      outputEncoder: AgnosticEncoder[_],
+      aggregate: Boolean = false): ScalaUserDefinedFunction = {
     SparkConnectClosureCleaner.clean(function)
     val udfPacketBytes =
       SparkSerDeUtils.serialize(UdfPacket(function, inputEncoders, outputEncoder))
     checkDeserializable(udfPacketBytes)
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       serializedUdfPacket = udfPacketBytes,
       inputTypes = inputEncoders.map(_.dataType).map(DataTypeProtoConverter.toConnectProtoType),
       outputType = DataTypeProtoConverter.toConnectProtoType(outputEncoder.dataType),
       name = None,
       nullable = true,
-      deterministic = true)
+      deterministic = true,
+      aggregate = aggregate)
   }
 
-  private[sql] def apply(function: AnyRef, returnType: DataType): ScalarUserDefinedFunction = {
-    ScalarUserDefinedFunction(
+  private[sql] def apply(function: AnyRef, returnType: DataType): ScalaUserDefinedFunction = {
+    ScalaUserDefinedFunction(
       function = function,
       inputEncoders = Seq.empty[AgnosticEncoder[_]],
       outputEncoder = RowEncoder.encoderForDataType(returnType, lenient = false))
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index f267baf9854e9..eae239a25589c 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -23,11 +23,12 @@ import scala.reflect.runtime.universe.{typeTag, TypeTag}
 
 import org.apache.spark.connect.proto
 import org.apache.spark.sql.api.java._
+import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.PrimitiveLongEncoder
 import org.apache.spark.sql.connect.common.LiteralValueProtoConverter._
 import org.apache.spark.sql.connect.common.UdfUtils
 import org.apache.spark.sql.errors.DataTypeErrors
-import org.apache.spark.sql.expressions.{ScalarUserDefinedFunction, UserDefinedFunction}
+import org.apache.spark.sql.expressions.{Aggregator, ScalaUserDefinedFunction, UserDefinedFunction}
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.types.DataType.parseTypeWithFallback
 import org.apache.spark.util.SparkClassUtils
@@ -75,6 +76,7 @@ import org.apache.spark.util.SparkClassUtils
  * @groupname struct_funcs Struct functions
  * @groupname csv_funcs CSV functions
  * @groupname json_funcs JSON functions
+ * @groupname variant_funcs VARIANT functions
  * @groupname xml_funcs XML functions
  * @groupname url_funcs URL functions
  * @groupname partition_transforms Partition transform functions
@@ -1931,6 +1933,14 @@ object functions {
    */
   def try_divide(left: Column, right: Column): Column = Column.fn("try_divide", left, right)
 
+  /**
+   * Returns the remainder of `dividend``/``divisor`. Its result is always null if `divisor` is 0.
+   *
+   * @group math_funcs
+   * @since 4.0.0
+   */
+  def try_remainder(left: Column, right: Column): Column = Column.fn("try_remainder", left, right)
+
   /**
    * Returns `left``*``right` and the result is null on overflow. The acceptable input types are
    * the same with the `*` operator.
@@ -4197,6 +4207,20 @@ object functions {
    */
   def split(str: Column, pattern: String): Column = Column.fn("split", str, lit(pattern))
 
+  /**
+   * Splits str around matches of the given pattern.
+   *
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a column of string representing a regular expression. The regex string should be a Java
+   *   regular expression.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def split(str: Column, pattern: Column): Column = Column.fn("split", str, pattern)
+
   /**
    * Splits str around matches of the given pattern.
    *
@@ -4218,6 +4242,27 @@ object functions {
   def split(str: Column, pattern: String, limit: Int): Column =
     Column.fn("split", str, lit(pattern), lit(limit))
 
+  /**
+   * Splits str around matches of the given pattern.
+   *
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a column of string representing a regular expression. The regex string should be a Java
+   *   regular expression.
+   * @param limit
+   *   a column of integer expression which controls the number of times the regex is applied.
+   *   <ul> <li>limit greater than 0: The resulting array's length will not be more than limit,
+   *   and the resulting array's last entry will contain all input beyond the last matched
+   *   regex.</li> <li>limit less than or equal to 0: `regex` will be applied as many times as
+   *   possible, and the resulting array can be of any size.</li> </ul>
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def split(str: Column, pattern: Column, limit: Column): Column =
+    Column.fn("split", str, pattern, limit)
+
   /**
    * Substring starts at `pos` and is of length `len` when str is String type or returns the slice
    * of byte array that starts at `pos` in byte and is of length `len` when str is Binary type
@@ -4231,6 +4276,19 @@ object functions {
   def substring(str: Column, pos: Int, len: Int): Column =
     Column.fn("substring", str, lit(pos), lit(len))
 
+  /**
+   * Substring starts at `pos` and is of length `len` when str is String type or returns the slice
+   * of byte array that starts at `pos` in byte and is of length `len` when str is Binary type
+   *
+   * @note
+   *   The position is not zero based, but 1 based index.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def substring(str: Column, pos: Column, len: Column): Column =
+    Column.fn("substring", str, pos, len)
+
   /**
    * Returns the substring from string str before count occurrences of the delimiter delim. If
    * count is positive, everything the left of the final delimiter (counting from left) is
@@ -5909,6 +5967,25 @@ object functions {
    */
   def timestamp_micros(e: Column): Column = Column.fn("timestamp_micros", e)
 
+  /**
+   * Gets the difference between the timestamps in the specified units by truncating the fraction
+   * part.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def timestamp_diff(unit: String, start: Column, end: Column): Column =
+    Column.fn("timestampdiff", lit(unit), start, end)
+
+  /**
+   * Adds the specified number of units to the given timestamp.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def timestamp_add(unit: String, quantity: Column, ts: Column): Column =
+    Column.fn("timestampadd", lit(unit), quantity, ts)
+
   /**
    * Parses the `timestamp` expression with the `format` expression to a timestamp without time
    * zone. Returns null with invalid input.
@@ -6965,16 +7042,92 @@ object functions {
   }
 
   /**
-   * Parses a JSON string and constructs a Variant value.
+   * Parses a JSON string and constructs a Variant value. Returns null if the input string is not
+   * a valid JSON value.
    *
    * @param json
    *   a string column that contains JSON data.
    *
-   * @group json_funcs
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def try_parse_json(json: Column): Column = Column.fn("try_parse_json", json)
+
+  /**
+   * Parses a JSON string and constructs a Variant value.
+   *
+   * @param json
+   *   a string column that contains JSON data.
+   * @group variant_funcs
    * @since 4.0.0
    */
   def parse_json(json: Column): Column = Column.fn("parse_json", json)
 
+  /**
+   * Check if a variant value is a variant null. Returns true if and only if the input is a
+   * variant null and false otherwise (including in the case of SQL NULL).
+   *
+   * @param v
+   *   a variant column.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def is_variant_null(v: Column): Column = Column.fn("is_variant_null", v)
+
+  /**
+   * Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
+   * `targetType`. Returns null if the path does not exist. Throws an exception if the cast fails.
+   *
+   * @param v
+   *   a variant column.
+   * @param path
+   *   the extraction path. A valid path should start with `$` and is followed by zero or more
+   *   segments like `[123]`, `.name`, `['name']`, or `["name"]`.
+   * @param targetType
+   *   the target data type to cast into, in a DDL-formatted string.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def variant_get(v: Column, path: String, targetType: String): Column =
+    Column.fn("variant_get", v, lit(path), lit(targetType))
+
+  /**
+   * Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
+   * `targetType`. Returns null if the path does not exist or the cast fails..
+   *
+   * @param v
+   *   a variant column.
+   * @param path
+   *   the extraction path. A valid path should start with `$` and is followed by zero or more
+   *   segments like `[123]`, `.name`, `['name']`, or `["name"]`.
+   * @param targetType
+   *   the target data type to cast into, in a DDL-formatted string.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def try_variant_get(v: Column, path: String, targetType: String): Column =
+    Column.fn("try_variant_get", v, lit(path), lit(targetType))
+
+  /**
+   * Returns schema in the SQL format of a variant.
+   *
+   * @param v
+   *   a variant column.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def schema_of_variant(v: Column): Column = Column.fn("schema_of_variant", v)
+
+  /**
+   * Returns the merged schema in the SQL format of a variant column.
+   *
+   * @param v
+   *   a variant column.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def schema_of_variant_agg(v: Column): Column = Column.fn("schema_of_variant_agg", v)
+
   /**
    * Parses a JSON string and infers its schema in DDL format.
    *
@@ -7078,9 +7231,9 @@ object functions {
   /**
    * Returns length of array or map.
    *
-   * The function returns null for null input if spark.sql.legacy.sizeOfNull is set to false or
-   * spark.sql.ansi.enabled is set to true. Otherwise, the function returns -1 for null input.
-   * With the default settings, the function returns -1 for null input.
+   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
+   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input. With the
+   * default settings, the function returns null for null input.
    *
    * @group collection_funcs
    * @since 3.4.0
@@ -7556,9 +7709,9 @@ object functions {
   /**
    * Returns length of array or map. This is an alias of `size` function.
    *
-   * The function returns null for null input if spark.sql.legacy.sizeOfNull is set to false or
-   * spark.sql.ansi.enabled is set to true. Otherwise, the function returns -1 for null input.
-   * With the default settings, the function returns -1 for null input.
+   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
+   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input. With the
+   * default settings, the function returns null for null input.
    *
    * @group collection_funcs
    * @since 3.5.0
@@ -7974,6 +8127,87 @@ object functions {
 
   // scalastyle:off line.size.limit
 
+  /**
+   * Obtains a `UserDefinedFunction` that wraps the given `Aggregator` so that it may be used with
+   * untyped Data Frames.
+   * {{{
+   *   val agg = // Aggregator[IN, BUF, OUT]
+   *
+   *   // declare a UDF based on agg
+   *   val aggUDF = udaf(agg)
+   *   val aggData = df.agg(aggUDF($"colname"))
+   *
+   *   // register agg as a named function
+   *   spark.udf.register("myAggName", udaf(agg))
+   * }}}
+   *
+   * @tparam IN
+   *   the aggregator input type
+   * @tparam BUF
+   *   the aggregating buffer type
+   * @tparam OUT
+   *   the finalized output type
+   *
+   * @param agg
+   *   the typed Aggregator
+   *
+   * @return
+   *   a UserDefinedFunction that can be used as an aggregating expression.
+   *
+   * @group udf_funcs
+   * @note
+   *   The input encoder is inferred from the input type IN.
+   * @since 4.0.0
+   */
+  def udaf[IN: TypeTag, BUF, OUT](agg: Aggregator[IN, BUF, OUT]): UserDefinedFunction = {
+    udaf(agg, ScalaReflection.encoderFor[IN])
+  }
+
+  /**
+   * Obtains a `UserDefinedFunction` that wraps the given `Aggregator` so that it may be used with
+   * untyped Data Frames.
+   * {{{
+   *   Aggregator<IN, BUF, OUT> agg = // custom Aggregator
+   *   Encoder<IN> enc = // input encoder
+   *
+   *   // declare a UDF based on agg
+   *   UserDefinedFunction aggUDF = udaf(agg, enc)
+   *   DataFrame aggData = df.agg(aggUDF($"colname"))
+   *
+   *   // register agg as a named function
+   *   spark.udf.register("myAggName", udaf(agg, enc))
+   * }}}
+   *
+   * @tparam IN
+   *   the aggregator input type
+   * @tparam BUF
+   *   the aggregating buffer type
+   * @tparam OUT
+   *   the finalized output type
+   *
+   * @param agg
+   *   the typed Aggregator
+   * @param inputEncoder
+   *   a specific input encoder to use
+   *
+   * @return
+   *   a UserDefinedFunction that can be used as an aggregating expression
+   *
+   * @group udf_funcs
+   * @note
+   *   This overloading takes an explicit input encoder, to support UDAF declarations in Java.
+   * @since 4.0.0
+   */
+  def udaf[IN, BUF, OUT](
+      agg: Aggregator[IN, BUF, OUT],
+      inputEncoder: Encoder[IN]): UserDefinedFunction = {
+    ScalaUserDefinedFunction(
+      agg,
+      Seq(encoderFor(inputEncoder)),
+      encoderFor(agg.outputEncoder),
+      aggregate = true)
+  }
+
   /**
    * Defines a Scala closure of 0 arguments as user-defined function (UDF). The data types are
    * automatically inferred based on the Scala closure's signature. By default the returned UDF is
@@ -7984,7 +8218,7 @@ object functions {
    * @since 3.4.0
    */
   def udf[RT: TypeTag](f: () => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(f, typeTag[RT])
+    ScalaUserDefinedFunction(f, typeTag[RT])
   }
 
   /**
@@ -7997,7 +8231,7 @@ object functions {
    * @since 3.4.0
    */
   def udf[RT: TypeTag, A1: TypeTag](f: A1 => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1])
+    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1])
   }
 
   /**
@@ -8010,7 +8244,7 @@ object functions {
    * @since 3.4.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: (A1, A2) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2])
+    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2])
   }
 
   /**
@@ -8024,7 +8258,7 @@ object functions {
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](
       f: (A1, A2, A3) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3])
+    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3])
   }
 
   /**
@@ -8038,7 +8272,7 @@ object functions {
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](
       f: (A1, A2, A3, A4) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3], typeTag[A4])
+    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3], typeTag[A4])
   }
 
   /**
@@ -8052,7 +8286,7 @@ object functions {
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](
       f: (A1, A2, A3, A4, A5) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       f,
       typeTag[RT],
       typeTag[A1],
@@ -8079,7 +8313,7 @@ object functions {
       A4: TypeTag,
       A5: TypeTag,
       A6: TypeTag](f: (A1, A2, A3, A4, A5, A6) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       f,
       typeTag[RT],
       typeTag[A1],
@@ -8108,7 +8342,7 @@ object functions {
       A5: TypeTag,
       A6: TypeTag,
       A7: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       f,
       typeTag[RT],
       typeTag[A1],
@@ -8139,7 +8373,7 @@ object functions {
       A6: TypeTag,
       A7: TypeTag,
       A8: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       f,
       typeTag[RT],
       typeTag[A1],
@@ -8172,7 +8406,7 @@ object functions {
       A7: TypeTag,
       A8: TypeTag,
       A9: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       f,
       typeTag[RT],
       typeTag[A1],
@@ -8207,7 +8441,7 @@ object functions {
       A8: TypeTag,
       A9: TypeTag,
       A10: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10) => RT): UserDefinedFunction = {
-    ScalarUserDefinedFunction(
+    ScalaUserDefinedFunction(
       f,
       typeTag[RT],
       typeTag[A1],
@@ -8236,7 +8470,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF0[_], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8249,7 +8483,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF1[_, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8262,7 +8496,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF2[_, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8275,7 +8509,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF3[_, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8288,7 +8522,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF4[_, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8301,7 +8535,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF5[_, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8314,7 +8548,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF6[_, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8327,7 +8561,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8340,7 +8574,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8353,7 +8587,7 @@ object functions {
    * @since 3.5.0
    */
   def udf(f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
 
   /**
@@ -8368,7 +8602,7 @@ object functions {
   def udf(
       f: UDF10[_, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
   }
   // scalastyle:off line.size.limit
 
@@ -8398,7 +8632,7 @@ object functions {
       "Please use Scala `udf` method without return type parameter.",
     "3.0.0")
   def udf(f: AnyRef, dataType: DataType): UserDefinedFunction = {
-    ScalarUserDefinedFunction(f, dataType)
+    ScalaUserDefinedFunction(f, dataType)
   }
 
   /**
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/SessionCleaner.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/SessionCleaner.scala
new file mode 100644
index 0000000000000..21e4f4d141a89
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/SessionCleaner.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import java.lang.ref.Cleaner
+
+import org.apache.spark.connect.proto
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+
+private[sql] class SessionCleaner(session: SparkSession) extends Logging {
+  private val cleaner = Cleaner.create()
+
+  /** Register a CachedRemoteRelation for cleanup when it is garbage collected. */
+  def register(relation: proto.CachedRemoteRelation): Unit = {
+    val dfID = relation.getRelationId
+    cleaner.register(relation, () => doCleanupCachedRemoteRelation(dfID))
+  }
+
+  private[sql] def doCleanupCachedRemoteRelation(dfID: String): Unit = {
+    try {
+      if (!session.client.channel.isShutdown) {
+        session.execute {
+          session.newCommand { builder =>
+            builder.getRemoveCachedRemoteRelationCommandBuilder
+              .setRelation(proto.CachedRemoteRelation.newBuilder().setRelationId(dfID).build())
+          }
+        }
+      }
+    } catch {
+      case e: Throwable => logError("Error in cleaning thread", e)
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index f05d29c6f1ab4..fe68f3cb0b572 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.execution.streaming.AvailableNowTrigger
 import org.apache.spark.sql.execution.streaming.ContinuousTrigger
 import org.apache.spark.sql.execution.streaming.OneTimeTrigger
 import org.apache.spark.sql.execution.streaming.ProcessingTimeTrigger
+import org.apache.spark.sql.streaming.StreamingQueryListener.QueryStartedEvent
 import org.apache.spark.sql.types.NullType
 import org.apache.spark.util.SparkSerDeUtils
 
@@ -297,6 +298,11 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends Logging {
       .build()
 
     val resp = ds.sparkSession.execute(startCmd).head
+    if (resp.getWriteStreamOperationStartResult.hasQueryStartedEventJson) {
+      val event = QueryStartedEvent.fromJson(
+        resp.getWriteStreamOperationStartResult.getQueryStartedEventJson)
+      ds.sparkSession.streams.streamingQueryListenerBus.postToAll(event)
+    }
     RemoteStreamingQuery.fromStartCommandResponse(ds.sparkSession, resp)
   }
 
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 404bd1b078ba4..fcb4bdcb327bc 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql.streaming
 
 import java.util.UUID
 
-import org.json4s.{JObject, JString}
-import org.json4s.JsonAST.JValue
+import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
+import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
+import org.json4s.{JObject, JString, JValue}
 import org.json4s.JsonDSL.{jobject2assoc, pair2Assoc}
 import org.json4s.jackson.JsonMethods.{compact, render}
 
@@ -120,6 +121,21 @@ object StreamingQueryListener extends Serializable {
     }
   }
 
+  private[spark] object QueryStartedEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryStartedEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryStartedEvent =
+      mapper.readValue[QueryStartedEvent](json)
+  }
+
   /**
    * Event representing any progress updates in a query.
    * @param progress
@@ -136,6 +152,21 @@ object StreamingQueryListener extends Serializable {
     private def jsonValue: JValue = JObject("progress" -> progress.jsonValue)
   }
 
+  private[spark] object QueryProgressEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryProgressEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryProgressEvent =
+      mapper.readValue[QueryProgressEvent](json)
+  }
+
   /**
    * Event representing that query is idle and waiting for new data to process.
    *
@@ -161,6 +192,21 @@ object StreamingQueryListener extends Serializable {
     }
   }
 
+  private[spark] object QueryIdleEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryTerminatedEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryTerminatedEvent =
+      mapper.readValue[QueryTerminatedEvent](json)
+  }
+
   /**
    * Event representing that termination of a query.
    *
@@ -199,4 +245,19 @@ object StreamingQueryListener extends Serializable {
         ("errorClassOnException" -> JString(errorClassOnException.orNull))
     }
   }
+
+  private[spark] object QueryTerminatedEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryTerminatedEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryTerminatedEvent =
+      mapper.readValue[QueryTerminatedEvent](json)
+  }
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListenerBus.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListenerBus.scala
new file mode 100644
index 0000000000000..c2934bcfa7058
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListenerBus.scala
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.util.concurrent.CopyOnWriteArrayList
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.connect.proto.{Command, ExecutePlanResponse, Plan, StreamingQueryEventType}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.client.CloseableIterator
+import org.apache.spark.sql.streaming.StreamingQueryListener.{Event, QueryIdleEvent, QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
+
+class StreamingQueryListenerBus(sparkSession: SparkSession) extends Logging {
+  private val listeners = new CopyOnWriteArrayList[StreamingQueryListener]()
+  private var executionThread: Option[Thread] = Option.empty
+
+  val lock = new Object()
+
+  def close(): Unit = {
+    listeners.forEach(remove(_))
+  }
+
+  def append(listener: StreamingQueryListener): Unit = lock.synchronized {
+    listeners.add(listener)
+
+    if (listeners.size() == 1) {
+      var iter: Option[CloseableIterator[ExecutePlanResponse]] = Option.empty
+      try {
+        iter = Some(registerServerSideListener())
+      } catch {
+        case e: Exception =>
+          logWarning("Failed to add the listener, please add it again.", e)
+          listeners.remove(listener)
+          return
+      }
+      executionThread = Some(new Thread(new Runnable {
+        def run(): Unit = {
+          queryEventHandler(iter.get)
+        }
+      }))
+      // Start the thread
+      executionThread.get.start()
+    }
+  }
+
+  def remove(listener: StreamingQueryListener): Unit = lock.synchronized {
+    if (listeners.size() == 1) {
+      val cmdBuilder = Command.newBuilder()
+      cmdBuilder.getStreamingQueryListenerBusCommandBuilder
+        .setRemoveListenerBusListener(true)
+      try {
+        sparkSession.execute(cmdBuilder.build())
+      } catch {
+        case e: Exception =>
+          logWarning("Failed to remove the listener, please remove it again.", e)
+          return
+      }
+      if (executionThread.isDefined) {
+        executionThread.get.interrupt()
+        executionThread = Option.empty
+      }
+    }
+    listeners.remove(listener)
+  }
+
+  def list(): Array[StreamingQueryListener] = lock.synchronized {
+    listeners.asScala.toArray
+  }
+
+  def registerServerSideListener(): CloseableIterator[ExecutePlanResponse] = {
+    val cmdBuilder = Command.newBuilder()
+    cmdBuilder.getStreamingQueryListenerBusCommandBuilder
+      .setAddListenerBusListener(true)
+
+    val plan = Plan.newBuilder().setCommand(cmdBuilder.build()).build()
+    val iterator = sparkSession.client.execute(plan)
+    while (iterator.hasNext) {
+      val response = iterator.next()
+      if (response.getStreamingQueryListenerEventsResult.hasListenerBusListenerAdded &&
+        response.getStreamingQueryListenerEventsResult.getListenerBusListenerAdded) {
+        return iterator
+      }
+    }
+    iterator
+  }
+
+  def queryEventHandler(iter: CloseableIterator[ExecutePlanResponse]): Unit = {
+    try {
+      while (iter.hasNext) {
+        val response = iter.next()
+        val listenerEvents = response.getStreamingQueryListenerEventsResult.getEventsList
+        listenerEvents.forEach(event => {
+          event.getEventType match {
+            case StreamingQueryEventType.QUERY_PROGRESS_EVENT =>
+              postToAll(QueryProgressEvent.fromJson(event.getEventJson))
+            case StreamingQueryEventType.QUERY_IDLE_EVENT =>
+              postToAll(QueryIdleEvent.fromJson(event.getEventJson))
+            case StreamingQueryEventType.QUERY_TERMINATED_EVENT =>
+              postToAll(QueryTerminatedEvent.fromJson(event.getEventJson))
+            case _ =>
+              logWarning(log"Unknown StreamingQueryListener event: ${MDC(LogKeys.EVENT, event)}")
+          }
+        })
+      }
+    } catch {
+      case e: Exception =>
+        logWarning(
+          "StreamingQueryListenerBus Handler thread received exception, all client" +
+            " side listeners are removed and handler thread is terminated.",
+          e)
+        lock.synchronized {
+          executionThread = Option.empty
+          listeners.forEach(remove(_))
+        }
+    }
+  }
+
+  def postToAll(event: Event): Unit = lock.synchronized {
+    listeners.forEach(listener =>
+      try {
+        event match {
+          case t: QueryStartedEvent =>
+            listener.onQueryStarted(t)
+          case t: QueryProgressEvent =>
+            listener.onQueryProgress(t)
+          case t: QueryIdleEvent =>
+            listener.onQueryIdle(t)
+          case t: QueryTerminatedEvent =>
+            listener.onQueryTerminated(t)
+          case _ =>
+            logWarning(log"Unknown StreamingQueryListener event: ${MDC(LogKeys.EVENT, event)}")
+        }
+      } catch {
+        case e: Exception =>
+          logWarning(log"Listener ${MDC(LogKeys.LISTENER, listener)} threw an exception", e)
+      })
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index fd33efd721932..7efced227d6d1 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -22,16 +22,13 @@ import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
 
 import scala.jdk.CollectionConverters._
 
-import com.google.protobuf.ByteString
-
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.connect.proto.Command
 import org.apache.spark.connect.proto.StreamingQueryManagerCommand
 import org.apache.spark.connect.proto.StreamingQueryManagerCommandResult
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connect.common.{InvalidPlanInput, StreamingListenerPacket}
-import org.apache.spark.util.SparkSerDeUtils
+import org.apache.spark.sql.connect.common.InvalidPlanInput
 
 /**
  * A class to manage all the [[StreamingQuery]] active in a `SparkSession`.
@@ -50,6 +47,12 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
   private lazy val listenerCache: ConcurrentMap[String, StreamingQueryListener] =
     new ConcurrentHashMap()
 
+  private[spark] val streamingQueryListenerBus = new StreamingQueryListenerBus(sparkSession)
+
+  private[spark] def close(): Unit = {
+    streamingQueryListenerBus.close()
+  }
+
   /**
    * Returns a list of active queries associated with this SQLContext
    *
@@ -153,17 +156,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
    * @since 3.5.0
    */
   def addListener(listener: StreamingQueryListener): Unit = {
-    // TODO: [SPARK-44400] Improve the Listener to provide users a way to access the Spark session
-    //  and perform arbitrary actions inside the Listener. Right now users can use
-    //  `val spark = SparkSession.builder.getOrCreate()` to create a Spark session inside the
-    //  Listener, but this is a legacy session instead of a connect remote session.
-    val id = UUID.randomUUID.toString
-    cacheListenerById(id, listener)
-    executeManagerCmd(
-      _.getAddListenerBuilder
-        .setListenerPayload(ByteString.copyFrom(SparkSerDeUtils
-          .serialize(StreamingListenerPacket(id, listener))))
-        .setId(id))
+    streamingQueryListenerBus.append(listener)
   }
 
   /**
@@ -172,11 +165,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
    * @since 3.5.0
    */
   def removeListener(listener: StreamingQueryListener): Unit = {
-    val id = getIdByListener(listener)
-    executeManagerCmd(
-      _.getRemoveListenerBuilder
-        .setId(id))
-    removeCachedListener(id)
+    streamingQueryListenerBus.remove(listener)
   }
 
   /**
@@ -185,10 +174,7 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
    * @since 3.5.0
    */
   def listListeners(): Array[StreamingQueryListener] = {
-    executeManagerCmd(_.setListListeners(true)).getListListeners.getListenerIdsList.asScala
-      .filter(listenerCache.containsKey(_))
-      .map(listenerCache.get(_))
-      .toArray
+    streamingQueryListenerBus.list()
   }
 
   private def executeManagerCmd(
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index a0c124f810e92..ebd13bc248f97 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -322,7 +322,7 @@ private object SafeJsonSerializer {
 
   /** Convert map to JValue while handling empty maps. Also, this sorts the keys. */
   def safeMapToJValue[T](map: ju.Map[String, T], valueToJValue: T => JValue): JValue = {
-    if (map.isEmpty) return JNothing
+    if (map == null || map.isEmpty) return JNothing
     val keys = map.asScala.keySet.toSeq.sorted
     keys.map { k => k -> valueToJValue(map.get(k)): JObject }.reduce(_ ~ _)
   }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
index d646fad00c075..0e3a683d2701d 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
@@ -22,11 +22,11 @@ import java.io.{File, FilenameFilter}
 import org.apache.commons.io.FileUtils
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.test.{RemoteSparkSession, SQLHelper}
+import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession, SQLHelper}
 import org.apache.spark.sql.types.{DoubleType, LongType, StructType}
 import org.apache.spark.storage.StorageLevel
 
-class CatalogSuite extends RemoteSparkSession with SQLHelper {
+class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelper {
 
   test("Database APIs") {
     val currentDb = spark.catalog.currentDatabase
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CheckpointSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CheckpointSuite.scala
new file mode 100644
index 0000000000000..e57b051890f56
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CheckpointSuite.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.io.{ByteArrayOutputStream, PrintStream}
+
+import scala.concurrent.duration.DurationInt
+
+import org.apache.commons.io.output.TeeOutputStream
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
+import org.scalatest.exceptions.TestFailedDueToTimeoutException
+
+import org.apache.spark.SparkException
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession, SQLHelper}
+
+class CheckpointSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelper {
+
+  private def captureStdOut(block: => Unit): String = {
+    val currentOut = Console.out
+    val capturedOut = new ByteArrayOutputStream()
+    val newOut = new PrintStream(new TeeOutputStream(currentOut, capturedOut))
+    Console.withOut(newOut) {
+      block
+    }
+    capturedOut.toString
+  }
+
+  private def checkFragments(result: String, fragmentsToCheck: Seq[String]): Unit = {
+    fragmentsToCheck.foreach { fragment =>
+      assert(result.contains(fragment))
+    }
+  }
+
+  private def testCapturedStdOut(block: => Unit, fragmentsToCheck: String*): Unit = {
+    checkFragments(captureStdOut(block), fragmentsToCheck)
+  }
+
+  test("checkpoint") {
+    val df = spark.range(100).localCheckpoint()
+    testCapturedStdOut(df.explain(), "ExistingRDD")
+  }
+
+  test("checkpoint gc") {
+    val df = spark.range(100).localCheckpoint(eager = true)
+    val encoder = df.agnosticEncoder
+    val dfId = df.plan.getRoot.getCachedRemoteRelation.getRelationId
+    spark.cleaner.doCleanupCachedRemoteRelation(dfId)
+
+    val ex = intercept[SparkException] {
+      spark
+        .newDataset(encoder) { builder =>
+          builder.setCachedRemoteRelation(
+            proto.CachedRemoteRelation
+              .newBuilder()
+              .setRelationId(dfId)
+              .build())
+        }
+        .collect()
+    }
+    assert(ex.getMessage.contains(s"No DataFrame with id $dfId is found"))
+  }
+
+  // This test is flaky because cannot guarantee GC
+  // You can locally run this to verify the behavior.
+  ignore("checkpoint gc derived DataFrame") {
+    var df1 = spark.range(100).localCheckpoint(eager = true)
+    var derived = df1.repartition(10)
+    val encoder = df1.agnosticEncoder
+    val dfId = df1.plan.getRoot.getCachedRemoteRelation.getRelationId
+
+    df1 = null
+    System.gc()
+    Thread.sleep(3000L)
+
+    def condition(): Unit = {
+      val ex = intercept[SparkException] {
+        spark
+          .newDataset(encoder) { builder =>
+            builder.setCachedRemoteRelation(
+              proto.CachedRemoteRelation
+                .newBuilder()
+                .setRelationId(dfId)
+                .build())
+          }
+          .collect()
+      }
+      assert(ex.getMessage.contains(s"No DataFrame with id $dfId is found"))
+    }
+
+    intercept[TestFailedDueToTimeoutException] {
+      eventually(timeout(5.seconds), interval(1.second))(condition())
+    }
+
+    // GC triggers remove the cached remote relation
+    derived = null
+    System.gc()
+    Thread.sleep(3000L)
+
+    // Check the state was removed up on garbage-collection.
+    eventually(timeout(60.seconds), interval(1.second))(condition())
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala
index 299ff7ff4fe3a..88281352f2479 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala
@@ -22,9 +22,9 @@ import java.util.Random
 import org.scalatest.matchers.must.Matchers._
 
 import org.apache.spark.SparkIllegalArgumentException
-import org.apache.spark.sql.test.RemoteSparkSession
+import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession}
 
-class ClientDataFrameStatSuite extends RemoteSparkSession {
+class ClientDataFrameStatSuite extends ConnectFunSuite with RemoteSparkSession {
   private def toLetter(i: Int): String = (i + 97).toChar.toString
 
   test("approxQuantile") {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala
index 4a32b8460bce1..9d6f07cf603aa 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala
@@ -162,30 +162,6 @@ class ClientDatasetSuite extends ConnectFunSuite with BeforeAndAfterEach {
     }
   }
 
-  test("command extension deprecated") {
-    val extension = proto.ExamplePluginCommand.newBuilder().setCustomField("abc").build()
-    val command = proto.Command
-      .newBuilder()
-      .setExtension(com.google.protobuf.Any.pack(extension))
-      .build()
-    val expectedPlan = proto.Plan.newBuilder().setCommand(command).build()
-    ss.execute(com.google.protobuf.Any.pack(extension))
-    val actualPlan = service.getAndClearLatestInputPlan()
-    assert(actualPlan.equals(expectedPlan))
-  }
-
-  test("command extension") {
-    val extension = proto.ExamplePluginCommand.newBuilder().setCustomField("abc").build()
-    val command = proto.Command
-      .newBuilder()
-      .setExtension(com.google.protobuf.Any.pack(extension))
-      .build()
-    val expectedPlan = proto.Plan.newBuilder().setCommand(command).build()
-    ss.execute(com.google.protobuf.Any.pack(extension).toByteArray)
-    val actualPlan = service.getAndClearLatestInputPlan()
-    assert(actualPlan.equals(expectedPlan))
-  }
-
   test("serialize as null") {
     val session = newSparkSession()
     val ds = session.range(10)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index a0729adb89609..255dd76697987 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -22,6 +22,8 @@ import java.time.DateTimeException
 import java.util.Properties
 
 import scala.collection.mutable
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration.DurationInt
 import scala.jdk.CollectionConverters._
 
 import org.apache.commons.io.FileUtils
@@ -38,11 +40,16 @@ import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkResult}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SqlApiConf
-import org.apache.spark.sql.test.{IntegrationTestUtils, RemoteSparkSession, SQLHelper}
+import org.apache.spark.sql.test.{ConnectFunSuite, IntegrationTestUtils, RemoteSparkSession, SQLHelper}
 import org.apache.spark.sql.test.SparkConnectServerUtils.port
 import org.apache.spark.sql.types._
+import org.apache.spark.util.SparkThreadUtils
 
-class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateMethodTester {
+class ClientE2ETestSuite
+    extends ConnectFunSuite
+    with RemoteSparkSession
+    with SQLHelper
+    with PrivateMethodTester {
 
   test("throw SparkException with null filename in stack trace elements") {
     withSQLConf("spark.sql.connect.enrichError.enabled" -> "true") {
@@ -1511,6 +1518,46 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateM
         (0 until 5).foreach(i => assert(row.get(i * 2) === row.get(i * 2 + 1)))
       }
   }
+
+  test("Observable metrics") {
+    val df = spark.range(99).withColumn("extra", col("id") - 1)
+    val ob1 = new Observation("ob1")
+    val observedDf = df.observe(ob1, min("id"), avg("id"), max("id"))
+    val observedObservedDf = observedDf.observe("ob2", min("extra"), avg("extra"), max("extra"))
+
+    val ob1Schema = new StructType()
+      .add("min(id)", LongType)
+      .add("avg(id)", DoubleType)
+      .add("max(id)", LongType)
+    val ob2Schema = new StructType()
+      .add("min(extra)", LongType)
+      .add("avg(extra)", DoubleType)
+      .add("max(extra)", LongType)
+    val ob1Metrics = Map("ob1" -> new GenericRowWithSchema(Array(0, 49, 98), ob1Schema))
+    val ob2Metrics = Map("ob2" -> new GenericRowWithSchema(Array(-1, 48, 97), ob2Schema))
+
+    assert(df.collectResult().getObservedMetrics === Map.empty)
+    assert(observedDf.collectResult().getObservedMetrics === ob1Metrics)
+    assert(observedObservedDf.collectResult().getObservedMetrics === ob1Metrics ++ ob2Metrics)
+  }
+
+  test("Observation.get is blocked until the query is finished") {
+    val df = spark.range(99).withColumn("extra", col("id") - 1)
+    val observation = new Observation("ob1")
+    val observedDf = df.observe(observation, min("id"), avg("id"), max("id"))
+
+    // Start a new thread to get the observation
+    val future = Future(observation.get)(ExecutionContext.global)
+    // make sure the thread is blocked right now
+    val e = intercept[java.util.concurrent.TimeoutException] {
+      SparkThreadUtils.awaitResult(future, 2.seconds)
+    }
+    assert(e.getMessage.contains("Future timed out"))
+    observedDf.collect()
+    // make sure the thread is unblocked after the query is finished
+    val metrics = SparkThreadUtils.awaitResult(future, 2.seconds)
+    assert(metrics === Map("min(id)" -> 0, "avg(id)" -> 49, "max(id)" -> 98))
+  }
 }
 
 private[sql] case class ClassData(a: String, b: Int)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionSuite.scala
index b77e92995624f..8a783d880560e 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionSuite.scala
@@ -20,10 +20,10 @@ package org.apache.spark.sql
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.sql.internal.SqlApiConf
-import org.apache.spark.sql.test.{QueryTest, SQLHelper}
+import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
 import org.apache.spark.sql.types.{StringType, StructType}
 
-class DataFrameNaFunctionSuite extends QueryTest with SQLHelper {
+class DataFrameNaFunctionSuite extends QueryTest with RemoteSparkSession {
   private def createDF(): DataFrame = {
     val sparkSession = spark
     import sparkSession.implicits._
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala
index 91516b0069b25..988774d5eec94 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/KeyValueGroupedDatasetE2ETestSuite.scala
@@ -22,7 +22,7 @@ import java.util.Arrays
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Append
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout}
-import org.apache.spark.sql.test.{QueryTest, SQLHelper}
+import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.SparkSerDeUtils
 
@@ -33,7 +33,7 @@ case class ClickState(id: String, count: Int)
 /**
  * All tests in this class requires client UDF artifacts synced with the server.
  */
-class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with SQLHelper {
+class KeyValueGroupedDatasetE2ETestSuite extends QueryTest with RemoteSparkSession {
 
   lazy val session: SparkSession = spark
   import session.implicits._
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 5844df8a4889c..77be7c5de04af 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.{functions => fn}
 import org.apache.spark.sql.avro.{functions => avroFn}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.connect.client.SparkConnectClient
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.lit
@@ -699,7 +700,8 @@ class PlanGenerationTestSuite
   }
 
   test("select collated string") {
-    val schema = StructType(StructField("s", StringType(1)) :: Nil)
+    val schema =
+      StructType(StructField("s", StringType(CollationFactory.UTF8_LCASE_COLLATION_ID)) :: Nil)
     createLocalRelation(schema.catalogString).select("s")
   }
 
@@ -1762,14 +1764,26 @@ class PlanGenerationTestSuite
     fn.split(fn.col("g"), ";")
   }
 
+  functionTest("split using columns") {
+    fn.split(fn.col("g"), fn.col("g"))
+  }
+
   functionTest("split with limit") {
     fn.split(fn.col("g"), ";", 10)
   }
 
+  functionTest("split with limit using columns") {
+    fn.split(fn.col("g"), lit(";"), fn.col("a"))
+  }
+
   functionTest("substring") {
     fn.substring(fn.col("g"), 4, 5)
   }
 
+  functionTest("substring using columns") {
+    fn.substring(fn.col("g"), fn.col("a"), fn.col("b"))
+  }
+
   functionTest("substring_index") {
     fn.substring_index(fn.col("g"), ";", 5)
   }
@@ -2297,6 +2311,14 @@ class PlanGenerationTestSuite
     fn.timestamp_micros(fn.col("x"))
   }
 
+  temporalFunctionTest("timestamp_diff") {
+    fn.timestamp_diff("year", fn.col("t"), fn.col("t"))
+  }
+
+  temporalFunctionTest("timestamp_add") {
+    fn.timestamp_add("week", fn.col("x"), fn.col("t"))
+  }
+
   // Array of Long
   // Array of Long
   // Array of Array of Long
@@ -2481,10 +2503,38 @@ class PlanGenerationTestSuite
       Collections.singletonMap("allowNumericLeadingZeros", "true"))
   }
 
+  functionTest("try_parse_json") {
+    fn.try_parse_json(fn.col("g"))
+  }
+
   functionTest("to_json") {
     fn.to_json(fn.col("d"), Map(("timestampFormat", "dd/MM/yyyy")))
   }
 
+  functionTest("parse_json") {
+    fn.parse_json(fn.col("g"))
+  }
+
+  functionTest("is_variant_null") {
+    fn.is_variant_null(fn.parse_json(fn.col("g")))
+  }
+
+  functionTest("variant_get") {
+    fn.variant_get(fn.parse_json(fn.col("g")), "$", "int")
+  }
+
+  functionTest("try_variant_get") {
+    fn.try_variant_get(fn.parse_json(fn.col("g")), "$", "int")
+  }
+
+  functionTest("schema_of_variant") {
+    fn.schema_of_variant(fn.parse_json(fn.col("g")))
+  }
+
+  functionTest("schema_of_variant_agg") {
+    fn.schema_of_variant_agg(fn.parse_json(fn.col("g")))
+  }
+
   functionTest("size") {
     fn.size(fn.col("f"))
   }
@@ -3191,34 +3241,12 @@ class PlanGenerationTestSuite
   }
 
   /* Extensions */
-  test("relation extension deprecated") {
-    val input = proto.ExamplePluginRelation
-      .newBuilder()
-      .setInput(simple.plan.getRoot)
-      .build()
-    session.newDataFrame(com.google.protobuf.Any.pack(input))
-  }
-
-  test("expression extension deprecated") {
-    val extension = proto.ExamplePluginExpression
-      .newBuilder()
-      .setChild(
-        proto.Expression
-          .newBuilder()
-          .setUnresolvedAttribute(proto.Expression.UnresolvedAttribute
-            .newBuilder()
-            .setUnparsedIdentifier("id")))
-      .setCustomField("abc")
-      .build()
-    simple.select(Column(com.google.protobuf.Any.pack(extension)))
-  }
-
   test("relation extension") {
     val input = proto.ExamplePluginRelation
       .newBuilder()
       .setInput(simple.plan.getRoot)
       .build()
-    session.newDataFrame(com.google.protobuf.Any.pack(input).toByteArray)
+    session.newDataFrame(_.setExtension(com.google.protobuf.Any.pack(input)))
   }
 
   test("expression extension") {
@@ -3232,7 +3260,7 @@ class PlanGenerationTestSuite
             .setUnparsedIdentifier("id")))
       .setCustomField("abc")
       .build()
-    simple.select(Column.forExtension(com.google.protobuf.Any.pack(extension).toByteArray))
+    simple.select(Column(_.setExtension(com.google.protobuf.Any.pack(extension))))
   }
 
   test("crosstab") {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
index 3e4704b6ab8e0..57342e12fcb51 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
@@ -85,6 +85,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(booleans)
     testImplicit(booleans.toSeq)
     testImplicit(booleans.toSeq)(newBooleanSeqEncoder)
+    testImplicit(booleans.toSeq)(newSequenceEncoder)
     testImplicit(booleans.toImmutableArraySeq)
 
     val bytes = Array(76.toByte, 59.toByte, 121.toByte)
@@ -93,6 +94,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(bytes)
     testImplicit(bytes.toSeq)
     testImplicit(bytes.toSeq)(newByteSeqEncoder)
+    testImplicit(bytes.toSeq)(newSequenceEncoder)
     testImplicit(bytes.toImmutableArraySeq)
 
     val shorts = Array(21.toShort, (-213).toShort, 14876.toShort)
@@ -101,6 +103,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(shorts)
     testImplicit(shorts.toSeq)
     testImplicit(shorts.toSeq)(newShortSeqEncoder)
+    testImplicit(shorts.toSeq)(newSequenceEncoder)
     testImplicit(shorts.toImmutableArraySeq)
 
     val ints = Array(4, 6, 5)
@@ -109,6 +112,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(ints)
     testImplicit(ints.toSeq)
     testImplicit(ints.toSeq)(newIntSeqEncoder)
+    testImplicit(ints.toSeq)(newSequenceEncoder)
     testImplicit(ints.toImmutableArraySeq)
 
     val longs = Array(System.nanoTime(), System.currentTimeMillis())
@@ -117,6 +121,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(longs)
     testImplicit(longs.toSeq)
     testImplicit(longs.toSeq)(newLongSeqEncoder)
+    testImplicit(longs.toSeq)(newSequenceEncoder)
     testImplicit(longs.toImmutableArraySeq)
 
     val floats = Array(3f, 10.9f)
@@ -125,6 +130,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(floats)
     testImplicit(floats.toSeq)
     testImplicit(floats.toSeq)(newFloatSeqEncoder)
+    testImplicit(floats.toSeq)(newSequenceEncoder)
     testImplicit(floats.toImmutableArraySeq)
 
     val doubles = Array(23.78d, -329.6d)
@@ -133,6 +139,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(doubles)
     testImplicit(doubles.toSeq)
     testImplicit(doubles.toSeq)(newDoubleSeqEncoder)
+    testImplicit(doubles.toSeq)(newSequenceEncoder)
     testImplicit(doubles.toImmutableArraySeq)
 
     val strings = Array("foo", "baz", "bar")
@@ -140,6 +147,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(strings)
     testImplicit(strings.toSeq)
     testImplicit(strings.toSeq)(newStringSeqEncoder)
+    testImplicit(strings.toSeq)(newSequenceEncoder)
     testImplicit(strings.toImmutableArraySeq)
 
     val myTypes = Array(MyType(12L, Math.E, Math.PI), MyType(0, 0, 0))
@@ -147,6 +155,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     testImplicit(myTypes)
     testImplicit(myTypes.toSeq)
     testImplicit(myTypes.toSeq)(newProductSeqEncoder[MyType])
+    testImplicit(myTypes.toSeq)(newSequenceEncoder)
     testImplicit(myTypes.toImmutableArraySeq)
 
     // Others.
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala
index b967245d90c26..b28aa905c7a29 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala
@@ -26,7 +26,7 @@ import scala.util.{Failure, Success}
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.test.RemoteSparkSession
+import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession}
 import org.apache.spark.util.SparkThreadUtils.awaitResult
 
 /**
@@ -34,7 +34,7 @@ import org.apache.spark.util.SparkThreadUtils.awaitResult
  * class, whether explicit or implicit, as it will trigger a UDF deserialization error during
  * Maven build/test.
  */
-class SparkSessionE2ESuite extends RemoteSparkSession {
+class SparkSessionE2ESuite extends ConnectFunSuite with RemoteSparkSession {
 
   test("interrupt all - background queries, foreground interrupt") {
     val session = spark
@@ -108,7 +108,37 @@ class SparkSessionE2ESuite extends RemoteSparkSession {
     assert(interrupted.length == 2, s"Interrupted operations: $interrupted.")
   }
 
-  test("interrupt tag") {
+  test("interrupt all - streaming queries") {
+    val q1 = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", 1)
+      .load()
+      .writeStream
+      .format("console")
+      .start()
+
+    val q2 = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", 1)
+      .load()
+      .writeStream
+      .format("console")
+      .start()
+
+    assert(q1.isActive)
+    assert(q2.isActive)
+
+    val interrupted = spark.interruptAll()
+
+    q1.awaitTermination(timeoutMs = 20 * 1000)
+    q2.awaitTermination(timeoutMs = 20 * 1000)
+    assert(!q1.isActive)
+    assert(!q2.isActive)
+    assert(interrupted.length == 2, s"Interrupted operations: $interrupted.")
+  }
+
+  // TODO(SPARK-48139): Re-enable `SparkSessionE2ESuite.interrupt tag`
+  ignore("interrupt tag") {
     val session = spark
     import session.implicits._
 
@@ -196,7 +226,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession {
 
     // q2 and q3 should be cancelled
     interrupted.clear()
-    eventually(timeout(30.seconds), interval(1.seconds)) {
+    eventually(timeout(1.minute), interval(1.seconds)) {
       val ids = spark.interruptTag("two")
       interrupted ++= ids
       assert(interrupted.length == 2, s"Interrupted operations: $interrupted.")
@@ -213,7 +243,7 @@ class SparkSessionE2ESuite extends RemoteSparkSession {
 
     // q1 and q4 should be cancelled
     interrupted.clear()
-    eventually(timeout(30.seconds), interval(1.seconds)) {
+    eventually(timeout(1.minute), interval(1.seconds)) {
       val ids = spark.interruptTag("one")
       interrupted ++= ids
       assert(interrupted.length == 2, s"Interrupted operations: $interrupted.")
@@ -229,6 +259,53 @@ class SparkSessionE2ESuite extends RemoteSparkSession {
     assert(interrupted.length == 2, s"Interrupted operations: $interrupted.")
   }
 
+  test("interrupt tag - streaming query") {
+    spark.addTag("foo")
+    val q1 = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", 1)
+      .load()
+      .writeStream
+      .format("console")
+      .start()
+    assert(spark.getTags() == Set("foo"))
+
+    spark.addTag("bar")
+    val q2 = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", 1)
+      .load()
+      .writeStream
+      .format("console")
+      .start()
+    assert(spark.getTags() == Set("foo", "bar"))
+
+    spark.clearTags()
+
+    spark.addTag("zoo")
+    val q3 = spark.readStream
+      .format("rate")
+      .option("rowsPerSecond", 1)
+      .load()
+      .writeStream
+      .format("console")
+      .start()
+    assert(spark.getTags() == Set("zoo"))
+
+    assert(q1.isActive)
+    assert(q2.isActive)
+    assert(q3.isActive)
+
+    val interrupted = spark.interruptTag("foo")
+
+    q1.awaitTermination(timeoutMs = 20 * 1000)
+    q2.awaitTermination(timeoutMs = 20 * 1000)
+    assert(!q1.isActive)
+    assert(!q2.isActive)
+    assert(q3.isActive)
+    assert(interrupted.length == 2, s"Interrupted operations: $interrupted.")
+  }
+
   test("progress is available for the spark result") {
     val result = spark
       .range(10000)
@@ -305,4 +382,43 @@ class SparkSessionE2ESuite extends RemoteSparkSession {
         .create()
     }
   }
+
+  test("SPARK-47986: get or create after session changed") {
+    val remote = s"sc://localhost:$serverPort"
+
+    SparkSession.clearDefaultSession()
+    SparkSession.clearActiveSession()
+
+    val session1 = SparkSession
+      .builder()
+      .remote(remote)
+      .getOrCreate()
+
+    assert(session1 eq SparkSession.getActiveSession.get)
+    assert(session1 eq SparkSession.getDefaultSession.get)
+    assert(session1.range(3).collect().length == 3)
+
+    session1.client.hijackServerSideSessionIdForTesting("-testing")
+
+    val e = intercept[SparkException] {
+      session1.range(3).analyze
+    }
+
+    assert(e.getMessage.contains("[INVALID_HANDLE.SESSION_CHANGED]"))
+    assert(!session1.client.isSessionValid)
+    assert(SparkSession.getActiveSession.isEmpty)
+    assert(SparkSession.getDefaultSession.isEmpty)
+
+    val session2 = SparkSession
+      .builder()
+      .remote(remote)
+      .getOrCreate()
+
+    assert(session1 ne session2)
+    assert(session2.client.isSessionValid)
+    assert(session2 eq SparkSession.getActiveSession.get)
+    assert(session2 eq SparkSession.getDefaultSession.get)
+    assert(session2.range(3).collect().length == 3)
+  }
+
 }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/StubbingTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/StubbingTestSuite.scala
index b9c5888e5cb77..5bcb17672d6a9 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/StubbingTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/StubbingTestSuite.scala
@@ -17,9 +17,9 @@
 package org.apache.spark.sql
 
 import org.apache.spark.sql.connect.client.ToStub
-import org.apache.spark.sql.test.RemoteSparkSession
+import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession}
 
-class StubbingTestSuite extends RemoteSparkSession {
+class StubbingTestSuite extends ConnectFunSuite with RemoteSparkSession {
   private def eval[T](f: => T): T = f
 
   test("capture of to-be stubbed class") {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala
index a76e046db2e3a..1d8d164c9541c 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala
@@ -22,10 +22,10 @@ import java.nio.file.{Files, Paths}
 import scala.util.Properties
 
 import org.apache.spark.sql.connect.common.ProtoDataTypes
-import org.apache.spark.sql.expressions.ScalarUserDefinedFunction
-import org.apache.spark.sql.test.RemoteSparkSession
+import org.apache.spark.sql.expressions.ScalaUserDefinedFunction
+import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession}
 
-class UDFClassLoadingE2ESuite extends RemoteSparkSession {
+class UDFClassLoadingE2ESuite extends ConnectFunSuite with RemoteSparkSession {
 
   private val scalaVersion = Properties.versionNumberString
     .split("\\.")
@@ -39,13 +39,14 @@ class UDFClassLoadingE2ESuite extends RemoteSparkSession {
     new File(s"src/test/resources/udf$scalaVersion.jar").toURI.toURL
 
   private def registerUdf(session: SparkSession): Unit = {
-    val udf = ScalarUserDefinedFunction(
+    val udf = ScalaUserDefinedFunction(
       serializedUdfPacket = udfByteArray,
       inputTypes = Seq(ProtoDataTypes.IntegerType),
       outputType = ProtoDataTypes.IntegerType,
       name = Some("dummyUdf"),
       nullable = true,
-      deterministic = true)
+      deterministic = true,
+      aggregate = false)
     session.registerUdf(udf.toProto)
   }
 
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
index f7ffe7aa12719..4032a9499c448 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
@@ -26,14 +26,15 @@ import scala.jdk.CollectionConverters._
 import org.apache.spark.api.java.function._
 import org.apache.spark.sql.api.java.UDF2
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{PrimitiveIntEncoder, PrimitiveLongEncoder}
-import org.apache.spark.sql.functions.{col, struct, udf}
-import org.apache.spark.sql.test.QueryTest
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.functions.{col, struct, udaf, udf}
+import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
 import org.apache.spark.sql.types.IntegerType
 
 /**
  * All tests in this class requires client UDF defined in this test class synced with the server.
  */
-class UserDefinedFunctionE2ETestSuite extends QueryTest {
+class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession {
   test("Dataset typed filter") {
     val rows = spark.range(10).filter(n => n % 2 == 0).collectAsList()
     assert(rows == Arrays.asList[Long](0, 2, 4, 6, 8))
@@ -346,4 +347,47 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest {
     val result = df.select(f($"id")).as[Long].head()
     assert(result == 1L)
   }
+
+  test("UDAF custom Aggregator - primitive types") {
+    val session: SparkSession = spark
+    import session.implicits._
+    val agg = new Aggregator[Long, Long, Long] {
+      override def zero: Long = 0L
+      override def reduce(b: Long, a: Long): Long = b + a
+      override def merge(b1: Long, b2: Long): Long = b1 + b2
+      override def finish(reduction: Long): Long = reduction
+      override def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+      override def outputEncoder: Encoder[Long] = Encoders.scalaLong
+    }
+    spark.udf.register("agg", udaf(agg))
+    val result = spark.range(10).selectExpr("agg(id)").as[Long].head()
+    assert(result == 45)
+  }
+
+  test("UDAF custom Aggregator - case class as input types") {
+    val session: SparkSession = spark
+    import session.implicits._
+    val agg = new Aggregator[UdafTestInput, (Long, Long), Long] {
+      override def zero: (Long, Long) = (0L, 0L)
+      override def reduce(b: (Long, Long), a: UdafTestInput): (Long, Long) =
+        (b._1 + a.id, b._2 + a.extra)
+      override def merge(b1: (Long, Long), b2: (Long, Long)): (Long, Long) =
+        (b1._1 + b2._1, b1._2 + b2._2)
+      override def finish(reduction: (Long, Long)): Long = reduction._1 + reduction._2
+      override def bufferEncoder: Encoder[(Long, Long)] =
+        Encoders.tuple(Encoders.scalaLong, Encoders.scalaLong)
+      override def outputEncoder: Encoder[Long] = Encoders.scalaLong
+    }
+    spark.udf.register("agg", udaf(agg))
+    val result = spark
+      .range(10)
+      .withColumn("extra", col("id") * 2)
+      .as[UdafTestInput]
+      .selectExpr("agg(id, extra)")
+      .as[Long]
+      .head()
+    assert(result == 135) // 45 + 90
+  }
 }
+
+case class UdafTestInput(id: Long, extra: Long)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
index 76958f055f2ef..d7977fbeb108f 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
@@ -25,13 +25,13 @@ import scala.util.Properties
 import org.apache.commons.io.output.ByteArrayOutputStream
 import org.scalatest.BeforeAndAfterEach
 
-import org.apache.spark.sql.test.{IntegrationTestUtils, RemoteSparkSession}
+import org.apache.spark.sql.test.{ConnectFunSuite, IntegrationTestUtils, RemoteSparkSession}
 import org.apache.spark.tags.AmmoniteTest
 import org.apache.spark.util.IvyTestUtils
 import org.apache.spark.util.MavenUtils.MavenCoordinate
 
 @AmmoniteTest
-class ReplE2ESuite extends RemoteSparkSession with BeforeAndAfterEach {
+class ReplE2ESuite extends ConnectFunSuite with RemoteSparkSession with BeforeAndAfterEach {
 
   private val executorService = Executors.newSingleThreadExecutor()
   private val TIMEOUT_SECONDS = 30
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
index 0f383d007f295..7bf7673a7a121 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
@@ -196,9 +196,6 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.COL_POS_KEY"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.DATASET_ID_KEY"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.curId"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.observe"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Observation"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Observation$"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.ObservationListener"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.ObservationListener$"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.queryExecution"),
@@ -210,7 +207,6 @@ object CheckConnectJvmClientCompatibility {
 
       // functions
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.unwrap_udt"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.udaf"),
 
       // KeyValueGroupedDataset
       ProblemFilters.exclude[Problem](
@@ -304,6 +300,7 @@ object CheckConnectJvmClientCompatibility {
 
       // MergeIntoWriter
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.MergeIntoWriter"),
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.MergeIntoWriter$"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.WhenMatched"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.WhenMatched$"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.WhenNotMatched"),
@@ -336,18 +333,28 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[ReversedMissingMethodProblem](
         "org.apache.spark.sql.SQLImplicits._sqlContext" // protected
       ),
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.internal.SessionCleaner"),
+
+      // private
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.internal.CleanupTask"),
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.internal.CleanupTaskWeakReference"),
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.internal.CleanupCachedRemoteRelation"),
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.internal.CleanupCachedRemoteRelation$"),
 
       // Catalyst Refactoring
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.catalyst.util.SparkCollectionUtils"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.catalyst.util.SparkCollectionUtils$"),
 
       // New public APIs added in the client
-      // ScalarUserDefinedFunction
+      // ScalaUserDefinedFunction
       ProblemFilters
         .exclude[MissingClassProblem](
-          "org.apache.spark.sql.expressions.ScalarUserDefinedFunction"),
+          "org.apache.spark.sql.expressions.ScalaUserDefinedFunction"),
       ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.expressions.ScalarUserDefinedFunction$"),
+        "org.apache.spark.sql.expressions.ScalaUserDefinedFunction$"),
 
       // New private API added in the client
       ProblemFilters
@@ -357,6 +364,11 @@ object CheckConnectJvmClientCompatibility {
         .exclude[MissingClassProblem](
           "org.apache.spark.sql.expressions.SparkConnectClosureCleaner$"),
 
+      // Column
+      // developer API
+      ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.Column.apply"),
+      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.Column.expr"),
+
       // Dataset
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.Dataset.plan"
@@ -439,6 +451,9 @@ object CheckConnectJvmClientCompatibility {
         "org.apache.spark.sql.streaming.RemoteStreamingQuery"),
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.streaming.RemoteStreamingQuery$"),
+      // Skip client side listener specific class
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.streaming.StreamingQueryListenerBus"),
 
       // Encoders are in the wrong JAR
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Encoders"),
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
index 55f962b2a52c8..46aeaeff43d2f 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
@@ -530,6 +530,25 @@ class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach {
     assert(reattachableIter.resultComplete)
   }
 
+  test("SPARK-48056: Client execute gets INVALID_HANDLE.SESSION_NOT_FOUND and proceeds") {
+    startDummyServer(0)
+    client = SparkConnectClient
+      .builder()
+      .connectionString(s"sc://localhost:${server.getPort}")
+      .enableReattachableExecute()
+      .build()
+    service.errorToThrowOnExecute = Some(
+      new StatusRuntimeException(
+        Status.INTERNAL.withDescription("INVALID_HANDLE.SESSION_NOT_FOUND")))
+
+    val plan = buildPlan("select * from range(1)")
+    val iter = client.execute(plan)
+    val reattachableIter =
+      ExecutePlanResponseReattachableIterator.fromIterator(iter)
+    reattachableIter.foreach(_ => ())
+    assert(reattachableIter.resultComplete)
+  }
+
   test("GRPC stub unary call throws error immediately") {
     // Spark Connect error retry handling depends on the error being returned from the unary
     // call immediately.
@@ -609,6 +628,8 @@ class DummySparkConnectService() extends SparkConnectServiceGrpc.SparkConnectSer
   private val inputArtifactRequests: mutable.ListBuffer[AddArtifactsRequest] =
     mutable.ListBuffer.empty
 
+  var errorToThrowOnExecute: Option[Throwable] = None
+
   private[sql] def getAndClearLatestInputPlan(): proto.Plan = {
     val plan = inputPlan
     inputPlan = null
@@ -624,6 +645,13 @@ class DummySparkConnectService() extends SparkConnectServiceGrpc.SparkConnectSer
   override def executePlan(
       request: ExecutePlanRequest,
       responseObserver: StreamObserver[ExecutePlanResponse]): Unit = {
+    if (errorToThrowOnExecute.isDefined) {
+      val error = errorToThrowOnExecute.get
+      errorToThrowOnExecute = None
+      responseObserver.onError(error)
+      return
+    }
+
     // Reply with a dummy response using the same client ID
     val requestSessionId = request.getSessionId
     val operationId = if (request.hasOperationId) {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
index 38712a0f1f633..e6009a967d156 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
@@ -33,11 +33,11 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, ForeachWriter, Row, SparkSession}
 import org.apache.spark.sql.functions.{col, lit, udf, window}
 import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryIdleEvent, QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
-import org.apache.spark.sql.test.{IntegrationTestUtils, QueryTest, SQLHelper}
+import org.apache.spark.sql.test.{IntegrationTestUtils, QueryTest, RemoteSparkSession}
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
 import org.apache.spark.util.SparkFileUtils
 
-class ClientStreamingQuerySuite extends QueryTest with SQLHelper with Logging {
+class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with Logging {
 
   private val testDataPath = Paths
     .get(
@@ -508,6 +508,33 @@ class ClientStreamingQuerySuite extends QueryTest with SQLHelper with Logging {
     assert(spark.streams.listListeners().length == 0)
   }
 
+  test("listener events") {
+    val listener = new MyListener()
+    spark.streams.addListener(listener)
+
+    val q = spark.readStream
+      .format("rate")
+      .load()
+      .writeStream
+      .format("console")
+      .start()
+
+    try {
+      q.processAllAvailable()
+      eventually(timeout(30.seconds)) {
+        assert(q.isActive)
+        assert(listener.start.length == 1)
+        assert(listener.progress.nonEmpty)
+      }
+    } finally {
+      q.stop()
+      eventually(timeout(30.seconds)) {
+        assert(!q.isActive)
+        assert(listener.terminate.nonEmpty)
+      }
+    }
+  }
+
   test("foreachBatch") {
     // Starts a streaming query with a foreachBatch function, which writes batchId and row count
     // to a temp view. The test verifies that the view is populated with data.
@@ -543,6 +570,78 @@ class ClientStreamingQuerySuite extends QueryTest with SQLHelper with Logging {
       q.stop()
     }
   }
+
+  abstract class EventCollector extends StreamingQueryListener {
+    protected def tablePostfix: String
+
+    protected def handleOnQueryStarted(event: QueryStartedEvent): Unit = {
+      val df = spark.createDataFrame(Seq((event.json, 0)))
+      df.write.mode("append").saveAsTable(s"listener_start_events$tablePostfix")
+    }
+
+    protected def handleOnQueryProgress(event: QueryProgressEvent): Unit = {
+      val df = spark.createDataFrame(Seq((event.json, 0)))
+      df.write.mode("append").saveAsTable(s"listener_progress_events$tablePostfix")
+    }
+
+    protected def handleOnQueryTerminated(event: QueryTerminatedEvent): Unit = {
+      val df = spark.createDataFrame(Seq((event.json, 0)))
+      df.write.mode("append").saveAsTable(s"listener_terminated_events$tablePostfix")
+    }
+  }
+
+  /**
+   * V1: Initial interface of StreamingQueryListener containing methods `onQueryStarted`,
+   * `onQueryProgress`, `onQueryTerminated`. It is prior to Spark 3.5.
+   */
+  class EventCollectorV1 extends EventCollector {
+    override protected def tablePostfix: String = "_v1"
+
+    override def onQueryStarted(event: QueryStartedEvent): Unit = handleOnQueryStarted(event)
+
+    override def onQueryProgress(event: QueryProgressEvent): Unit = handleOnQueryProgress(event)
+
+    override def onQueryTerminated(event: QueryTerminatedEvent): Unit =
+      handleOnQueryTerminated(event)
+  }
+
+  /**
+   * V2: The interface after the method `onQueryIdle` is added. It is Spark 3.5+.
+   */
+  class EventCollectorV2 extends EventCollector {
+    override protected def tablePostfix: String = "_v2"
+
+    override def onQueryStarted(event: QueryStartedEvent): Unit = handleOnQueryStarted(event)
+
+    override def onQueryProgress(event: QueryProgressEvent): Unit = handleOnQueryProgress(event)
+
+    override def onQueryIdle(event: QueryIdleEvent): Unit = {}
+
+    override def onQueryTerminated(event: QueryTerminatedEvent): Unit =
+      handleOnQueryTerminated(event)
+  }
+
+  class MyListener extends StreamingQueryListener {
+    var start: Seq[String] = Seq.empty
+    var progress: Seq[String] = Seq.empty
+    var terminate: Seq[String] = Seq.empty
+
+    override def onQueryStarted(event: QueryStartedEvent): Unit = {
+      start = start :+ event.json
+    }
+
+    override def onQueryProgress(event: QueryProgressEvent): Unit = {
+      progress = progress :+ event.json
+    }
+
+    override def onQueryIdle(event: QueryIdleEvent): Unit = {
+      // Do nothing
+    }
+
+    override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
+      terminate = terminate :+ event.json
+    }
+  }
 }
 
 class TestForeachWriter[T] extends ForeachWriter[T] {
@@ -570,58 +669,6 @@ case class TestClass(value: Int) {
   override def toString: String = value.toString
 }
 
-abstract class EventCollector extends StreamingQueryListener {
-  private lazy val spark = SparkSession.builder().getOrCreate()
-
-  protected def tablePostfix: String
-
-  protected def handleOnQueryStarted(event: QueryStartedEvent): Unit = {
-    val df = spark.createDataFrame(Seq((event.json, 0)))
-    df.write.mode("append").saveAsTable(s"listener_start_events$tablePostfix")
-  }
-
-  protected def handleOnQueryProgress(event: QueryProgressEvent): Unit = {
-    val df = spark.createDataFrame(Seq((event.json, 0)))
-    df.write.mode("append").saveAsTable(s"listener_progress_events$tablePostfix")
-  }
-
-  protected def handleOnQueryTerminated(event: QueryTerminatedEvent): Unit = {
-    val df = spark.createDataFrame(Seq((event.json, 0)))
-    df.write.mode("append").saveAsTable(s"listener_terminated_events$tablePostfix")
-  }
-}
-
-/**
- * V1: Initial interface of StreamingQueryListener containing methods `onQueryStarted`,
- * `onQueryProgress`, `onQueryTerminated`. It is prior to Spark 3.5.
- */
-class EventCollectorV1 extends EventCollector {
-  override protected def tablePostfix: String = "_v1"
-
-  override def onQueryStarted(event: QueryStartedEvent): Unit = handleOnQueryStarted(event)
-
-  override def onQueryProgress(event: QueryProgressEvent): Unit = handleOnQueryProgress(event)
-
-  override def onQueryTerminated(event: QueryTerminatedEvent): Unit =
-    handleOnQueryTerminated(event)
-}
-
-/**
- * V2: The interface after the method `onQueryIdle` is added. It is Spark 3.5+.
- */
-class EventCollectorV2 extends EventCollector {
-  override protected def tablePostfix: String = "_v2"
-
-  override def onQueryStarted(event: QueryStartedEvent): Unit = handleOnQueryStarted(event)
-
-  override def onQueryProgress(event: QueryProgressEvent): Unit = handleOnQueryProgress(event)
-
-  override def onQueryIdle(event: QueryIdleEvent): Unit = {}
-
-  override def onQueryTerminated(event: QueryTerminatedEvent): Unit =
-    handleOnQueryTerminated(event)
-}
-
 class ForeachBatchFn(val viewName: String)
     extends VoidFunction2[DataFrame, java.lang.Long]
     with Serializable {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala
index 2fab6e8e3c843..dc74463f1a25b 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateStreamingSuite.scala
@@ -25,14 +25,14 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Append
-import org.apache.spark.sql.test.{QueryTest, SQLHelper}
+import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
 import org.apache.spark.sql.types.{StringType, StructField, StructType, TimestampType}
 
 case class ClickEvent(id: String, timestamp: Timestamp)
 
 case class ClickState(id: String, count: Int)
 
-class FlatMapGroupsWithStateStreamingSuite extends QueryTest with SQLHelper {
+class FlatMapGroupsWithStateStreamingSuite extends QueryTest with RemoteSparkSession {
 
   val flatMapGroupsWithStateSchema: StructType = StructType(
     Array(StructField("id", StringType), StructField("timestamp", TimestampType)))
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala
index 8d69d91a34f7d..f40738b983b39 100755
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala
@@ -34,7 +34,7 @@ trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite
     java.nio.file.Paths.get(sparkHome, first +: more: _*)
   }
 
-  protected val baseResourcePath: Path = {
+  protected def baseResourcePath: Path = {
     getWorkspaceFilePath(
       "connector",
       "connect",
@@ -45,7 +45,7 @@ trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite
       "resources").toAbsolutePath
   }
 
-  protected val commonResourcePath: Path = {
+  protected def commonResourcePath: Path = {
     getWorkspaceFilePath(
       "connector",
       "connect",
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala
index 54fc97c50b3ec..8837c76b76aeb 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/QueryTest.scala
@@ -21,11 +21,13 @@ import java.util.TimeZone
 
 import org.scalatest.Assertions
 
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.util.SparkStringUtils.sideBySide
 import org.apache.spark.util.ArrayImplicits._
 
-abstract class QueryTest extends RemoteSparkSession {
+abstract class QueryTest extends ConnectFunSuite with SQLHelper {
+
+  def spark: SparkSession
 
   /**
    * Runs the plan and makes sure the answer matches the expected result.
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala
index 300de6e9b0812..ecc84e8418013 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.concurrent.duration.FiniteDuration
 
-import org.scalatest.BeforeAndAfterAll
+import org.scalatest.{BeforeAndAfterAll, Suite}
 
 import org.apache.spark.SparkBuildInfo
 import org.apache.spark.sql.SparkSession
@@ -204,7 +204,7 @@ object SparkConnectServerUtils {
   }
 }
 
-trait RemoteSparkSession extends ConnectFunSuite with BeforeAndAfterAll {
+trait RemoteSparkSession extends BeforeAndAfterAll { self: Suite =>
   import SparkConnectServerUtils._
   var spark: SparkSession = _
   protected lazy val serverPort: Int = port
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
index 49a33d3419b6f..33ed73836616a 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/base.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
@@ -381,6 +381,9 @@ message ExecutePlanResponse {
     // (Optional) Intermediate query progress reports.
     ExecutionProgress execution_progress = 18;
 
+    // Response for command that checkpoints a DataFrame.
+    CheckpointCommandResult checkpoint_command_result = 19;
+
     // Support arbitrary result objects.
     google.protobuf.Any extension = 999;
   }
@@ -434,6 +437,7 @@ message ExecutePlanResponse {
     string name = 1;
     repeated Expression.Literal values = 2;
     repeated string keys = 3;
+    int64 plan_id = 4;
   }
 
   message ResultComplete {
@@ -1047,6 +1051,11 @@ message FetchErrorDetailsResponse {
   }
 }
 
+message CheckpointCommandResult {
+  // (Required) The logical plan checkpointed.
+  CachedRemoteRelation relation = 1;
+}
+
 // Main interface for the SparkConnect service.
 service SparkConnectService {
 
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/commands.proto b/connector/connect/common/src/main/protobuf/spark/connect/commands.proto
index acff0a2089e95..0e0c55fa34f00 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/commands.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/commands.proto
@@ -45,6 +45,8 @@ message Command {
     StreamingQueryListenerBusCommand streaming_query_listener_bus_command = 11;
     CommonInlineUserDefinedDataSource register_data_source = 12;
     CreateResourceProfileCommand create_resource_profile_command = 13;
+    CheckpointCommand checkpoint_command = 14;
+    RemoveCachedRemoteRelationCommand remove_cached_remote_relation_command = 15;
 
     // This field is used to mark extensions to the protocol. When plugins generate arbitrary
     // Commands they can add them here. During the planning the correct resolution is done.
@@ -484,3 +486,21 @@ message CreateResourceProfileCommandResult {
   // (Required) Server-side generated resource profile id.
   int32 profile_id = 1;
 }
+
+// Command to remove `CashedRemoteRelation`
+message RemoveCachedRemoteRelationCommand {
+  // (Required) The remote to be related
+  CachedRemoteRelation relation = 1;
+}
+
+message CheckpointCommand {
+  // (Required) The logical plan to checkpoint.
+  Relation relation = 1;
+
+  // (Required) Locally checkpoint using a local temporary
+  // directory in Spark Connect server (Spark Driver)
+  bool local = 2;
+
+  // (Required) Whether to checkpoint this dataframe immediately.
+  bool eager = 3;
+}
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/common.proto b/connector/connect/common/src/main/protobuf/spark/connect/common.proto
index da334bfd9ee8e..b2848370b01dc 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/common.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/common.proto
@@ -81,3 +81,18 @@ message ResourceProfile {
   // (e.g., cores, memory, CPU) to its specific request.
   map<string, TaskResourceRequest> task_resources = 2;
 }
+
+message Origin {
+  // (Required) Indicate the origin type.
+  oneof function {
+    PythonOrigin python_origin = 1;
+  }
+}
+
+message PythonOrigin {
+  // (Required) Name of the origin, for example, the name of the function
+  string fragment = 1;
+
+  // (Required) Callsite to show to end users, for example, stacktrace.
+  string call_site = 2;
+}
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto
index 726ae5dd1c219..257634813e742 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto
@@ -19,6 +19,7 @@ syntax = 'proto3';
 
 import "google/protobuf/any.proto";
 import "spark/connect/types.proto";
+import "spark/connect/common.proto";
 
 package spark.connect;
 
@@ -30,6 +31,7 @@ option go_package = "internal/generated";
 // expressions in SQL appear.
 message Expression {
 
+  ExpressionCommon common = 18;
   oneof expr_type {
     Literal literal = 1;
     UnresolvedAttribute unresolved_attribute = 2;
@@ -342,6 +344,11 @@ message Expression {
   }
 }
 
+message ExpressionCommon {
+  // (Required) Keep the information of the origin for this expression such as stacktrace.
+  Origin origin = 1;
+}
+
 message CommonInlineUserDefinedFunction {
   // (Required) Name of the user-defined function.
   string function_name = 1;
@@ -366,6 +373,8 @@ message PythonUDF {
   bytes command = 3;
   // (Required) Python version being used in the client.
   string python_ver = 4;
+  // (Optional) Additional includes for the Python UDF.
+  repeated string additional_includes = 5;
 }
 
 message ScalarScalaUDF {
@@ -377,6 +386,8 @@ message ScalarScalaUDF {
   DataType outputType = 3;
   // (Required) True if the UDF can return null value
   bool nullable = 4;
+  // (Required) Indicate if the UDF is an aggregate function
+  bool aggregate = 5;
 }
 
 message JavaUDF {
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
index 5cbe6459d226b..04fe21086097c 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -108,10 +108,13 @@ message Unknown {}
 // Common metadata of all relations.
 message RelationCommon {
   // (Required) Shared relation metadata.
-  string source_info = 1;
+  string source_info = 1 [deprecated=true];
 
   // (Optional) A per-client globally unique id for a given connect plan.
   optional int64 plan_id = 2;
+
+  // (Optional) Keep the information of the origin for this expression such as stacktrace.
+  Origin origin = 3;
 }
 
 // Relation that uses a SQL query to generate the output.
@@ -468,7 +471,9 @@ message Sample {
   // (Optional) Whether to sample with replacement.
   optional bool with_replacement = 4;
 
-  // (Optional) The random seed.
+  // (Required) The random seed.
+  // This field is required to avoid generating mutable dataframes (see SPARK-48184 for details),
+  // however, still keep it 'optional' here for backward compatibility.
   optional int64 seed = 5;
 
   // (Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it.
@@ -688,7 +693,9 @@ message StatSampleBy {
   // If a stratum is not specified, we treat its fraction as zero.
   repeated Fraction fractions = 3;
 
-  // (Optional) The random seed.
+  // (Required) The random seed.
+  // This field is required to avoid generating mutable dataframes (see SPARK-48184 for details),
+  // however, still keep it 'optional' here for backward compatibility.
   optional int64 seed = 5;
 
   message Fraction {
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/types.proto b/connector/connect/common/src/main/protobuf/spark/connect/types.proto
index 48f7385330c86..4f768f201575b 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/types.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/types.proto
@@ -101,7 +101,7 @@ message DataType {
 
   message String {
     uint32 type_variation_reference = 1;
-    uint32 collation_id = 2;
+    string collation = 2;
   }
 
   message Binary {
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/ConnectProtoUtils.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/ConnectProtoUtils.scala
new file mode 100644
index 0000000000000..053e03fc08e4b
--- /dev/null
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/ConnectProtoUtils.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.connect.common.ProtoUtils
+
+/**
+ * Utility functions for parsing Spark Connect protocol buffers with a recursion limit. This is
+ * intended to be used by plugins, as they cannot use `ProtoUtils.parseWithRecursionLimit` due to
+ * the shading of the `com.google.protobuf` package.
+ */
+object ConnectProtoUtils {
+  @DeveloperApi
+  def parsePlanWithRecursionLimit(bytes: Array[Byte], recursionLimit: Int): proto.Plan = {
+    ProtoUtils.parseWithRecursionLimit(bytes, proto.Plan.parser(), recursionLimit)
+  }
+
+  @DeveloperApi
+  def parseRelationWithRecursionLimit(bytes: Array[Byte], recursionLimit: Int): proto.Relation = {
+    ProtoUtils.parseWithRecursionLimit(bytes, proto.Relation.parser(), recursionLimit)
+  }
+
+  @DeveloperApi
+  def parseCommandWithRecursionLimit(bytes: Array[Byte], recursionLimit: Int): proto.Command = {
+    ProtoUtils.parseWithRecursionLimit(bytes, proto.Command.parser(), recursionLimit)
+  }
+
+  @DeveloperApi
+  def parseExpressionWithRecursionLimit(
+      bytes: Array[Byte],
+      recursionLimit: Int): proto.Expression = {
+    ProtoUtils.parseWithRecursionLimit(bytes, proto.Expression.parser(), recursionLimit)
+  }
+
+  @DeveloperApi
+  def parseDataTypeWithRecursionLimit(bytes: Array[Byte], recursionLimit: Int): proto.DataType = {
+    ProtoUtils.parseWithRecursionLimit(bytes, proto.DataType.parser(), recursionLimit)
+  }
+}
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
index 74f13272a3655..f3c13c9c2c4d8 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
@@ -42,7 +42,8 @@ import org.apache.spark.sql.connect.client.GrpcRetryHandler.RetryException
  * ReattachExecute request. ReattachExecute request is provided the responseId of last returned
  * ExecutePlanResponse on the iterator to return a new iterator from server that continues after
  * that. If the initial ExecutePlan did not even reach the server, and hence reattach fails with
- * INVALID_HANDLE.OPERATION_NOT_FOUND, we attempt to retry ExecutePlan.
+ * INVALID_HANDLE.OPERATION_NOT_FOUND or INVALID_HANDLE.SESSION_NOT_FOUND, we attempt to retry
+ * ExecutePlan.
  *
  * In reattachable execute the server does buffer some responses in case the client needs to
  * backtrack. To let server release this buffer sooner, this iterator asynchronously sends
@@ -66,7 +67,8 @@ class ExecutePlanResponseReattachableIterator(
     // Add operation id, if not present.
     // with operationId set by the client, the client can use it to try to reattach on error
     // even before getting the first response. If the operation in fact didn't even reach the
-    // server, that will end with INVALID_HANDLE.OPERATION_NOT_FOUND error.
+    // server, that will end with INVALID_HANDLE.OPERATION_NOT_FOUND or
+    // INVALID_HANDLE.SESSION_NOT_FOUND error.
     UUID.randomUUID.toString
   }
 
@@ -234,10 +236,14 @@ class ExecutePlanResponseReattachableIterator(
     } catch {
       case ex: StatusRuntimeException
           if Option(StatusProto.fromThrowable(ex))
-            .exists(_.getMessage.contains("INVALID_HANDLE.OPERATION_NOT_FOUND")) =>
+            .exists(ex => {
+              ex.getMessage.contains("INVALID_HANDLE.OPERATION_NOT_FOUND") ||
+              ex.getMessage.contains("INVALID_HANDLE.SESSION_NOT_FOUND")
+            }) =>
         if (lastReturnedResponseId.isDefined) {
           throw new IllegalStateException(
-            "OPERATION_NOT_FOUND on the server but responses were already received from it.",
+            "OPERATION_NOT_FOUND/SESSION_NOT_FOUND on the server but responses were already " +
+              "received from it.",
             ex)
         }
         // Try a new ExecutePlan, and throw upstream for retry.
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
index 508dad3d748d2..7e0a356b9e493 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
@@ -22,7 +22,7 @@ import scala.util.control.NonFatal
 import io.grpc.stub.StreamObserver
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.LogKey.{ERROR, POLICY, RETRY_COUNT, WAIT_TIME}
+import org.apache.spark.internal.LogKeys.{ERROR, NUM_RETRY, POLICY, RETRY_WAIT_TIME}
 import org.apache.spark.internal.MDC
 
 private[sql] class GrpcRetryHandler(
@@ -190,7 +190,7 @@ private[sql] object GrpcRetryHandler extends Logging {
         // retry exception is considered immediately retriable without any policies.
         logWarning(
           log"Non-Fatal error during RPC execution: ${MDC(ERROR, lastException)}, " +
-            log"retrying (currentRetryNum=${MDC(RETRY_COUNT, currentRetryNum)})")
+            log"retrying (currentRetryNum=${MDC(NUM_RETRY, currentRetryNum)})")
         return
       }
 
@@ -200,8 +200,8 @@ private[sql] object GrpcRetryHandler extends Logging {
         if (time.isDefined) {
           logWarning(
             log"Non-Fatal error during RPC execution: ${MDC(ERROR, lastException)}, " +
-              log"retrying (wait=${MDC(WAIT_TIME, time.get.toMillis)} ms, " +
-              log"currentRetryNum=${MDC(RETRY_COUNT, currentRetryNum)}, " +
+              log"retrying (wait=${MDC(RETRY_WAIT_TIME, time.get.toMillis)} ms, " +
+              log"currentRetryNum=${MDC(NUM_RETRY, currentRetryNum)}, " +
               log"policy=${MDC(POLICY, policy.getName)}).")
           sleep(time.get.toMillis)
           return
@@ -210,7 +210,7 @@ private[sql] object GrpcRetryHandler extends Logging {
 
       logWarning(
         log"Non-Fatal error during RPC execution: ${MDC(ERROR, lastException)}, " +
-          log"exceeded retries (currentRetryNum=${MDC(RETRY_COUNT, currentRetryNum)})")
+          log"exceeded retries (currentRetryNum=${MDC(NUM_RETRY, currentRetryNum)})")
 
       val error = new RetriesExceeded()
       exceptionList.foreach(error.addSuppressed)
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
index 29272c96132bc..42c3387335be9 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
@@ -16,7 +16,10 @@
  */
 package org.apache.spark.sql.connect.client
 
+import java.util.concurrent.atomic.AtomicBoolean
+
 import com.google.protobuf.GeneratedMessageV3
+import io.grpc.{Status, StatusRuntimeException}
 import io.grpc.stub.StreamObserver
 
 import org.apache.spark.internal.Logging
@@ -30,6 +33,12 @@ class ResponseValidator extends Logging {
   // do not use server-side streaming.
   private var serverSideSessionId: Option[String] = None
 
+  // Indicates whether the client and the client information on the server correspond to each other
+  // This flag being false means that the server has restarted and lost the client information, or
+  // there is a logic error in the code; both cases, the user should establish a new connection to
+  // the server. Access to the value has to be synchronized since it can be shared.
+  private val isSessionActive: AtomicBoolean = new AtomicBoolean(true)
+
   // Returns the server side session ID, used to send it back to the server in the follow-up
   // requests so the server can validate it session id against the previous requests.
   def getServerSideSessionId: Option[String] = serverSideSessionId
@@ -42,8 +51,25 @@ class ResponseValidator extends Logging {
     serverSideSessionId = Some(serverSideSessionId.getOrElse("") + suffix)
   }
 
+  /**
+   * Returns true if the session is valid on both the client and the server.
+   */
+  private[sql] def isSessionValid: Boolean = {
+    // An active session is considered valid.
+    isSessionActive.getAcquire
+  }
+
   def verifyResponse[RespT <: GeneratedMessageV3](fn: => RespT): RespT = {
-    val response = fn
+    val response =
+      try {
+        fn
+      } catch {
+        case e: StatusRuntimeException
+            if e.getStatus.getCode == Status.Code.INTERNAL &&
+              e.getMessage.contains("[INVALID_HANDLE.SESSION_CHANGED]") =>
+          isSessionActive.setRelease(false)
+          throw e
+      }
     val field = response.getDescriptorForType.findFieldByName("server_side_session_id")
     // If the field does not exist, we ignore it. New / Old message might not contain it and this
     // behavior allows us to be compatible.
@@ -54,6 +80,7 @@ class ResponseValidator extends Logging {
         serverSideSessionId match {
           case Some(id) =>
             if (value != id) {
+              isSessionActive.setRelease(false)
               throw new IllegalStateException(
                 s"Server side session ID changed from $id to $value")
             }
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
index d9d51c15a880b..7c3108fdb1b0e 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.connect.common.config.ConnectCommon
  */
 private[sql] class SparkConnectClient(
     private[sql] val configuration: SparkConnectClient.Configuration,
-    private val channel: ManagedChannel) {
+    private[sql] val channel: ManagedChannel) {
 
   private val userContext: UserContext = configuration.userContext
 
@@ -71,6 +71,17 @@ private[sql] class SparkConnectClient(
     stubState.responseValidator.hijackServerSideSessionIdForTesting(suffix)
   }
 
+  /**
+   * Returns true if the session is valid on both the client and the server. A session becomes
+   * invalid if the server side information about the client, e.g., session ID, does not
+   * correspond to the actual client state.
+   */
+  private[sql] def isSessionValid: Boolean = {
+    // The last known state of the session is store in `responseValidator`, because it is where the
+    // client gets responses from the server.
+    stubState.responseValidator.isSessionValid
+  }
+
   private[sql] val artifactManager: ArtifactManager = {
     new ArtifactManager(configuration, sessionId, bstub, stub)
   }
@@ -566,6 +577,13 @@ object SparkConnectClient {
 
     def grpcMaxMessageSize: Int = _configuration.grpcMaxMessageSize
 
+    def grpcMaxRecursionLimit(recursionLimit: Int): Builder = {
+      _configuration = _configuration.copy(grpcMaxRecursionLimit = recursionLimit)
+      this
+    }
+
+    def grpcMaxRecursionLimit: Int = _configuration.grpcMaxRecursionLimit
+
     def option(key: String, value: String): Builder = {
       _configuration = _configuration.copy(metadata = _configuration.metadata + ((key, value)))
       this
@@ -703,7 +721,8 @@ object SparkConnectClient {
       useReattachableExecute: Boolean = true,
       interceptors: List[ClientInterceptor] = List.empty,
       sessionId: Option[String] = None,
-      grpcMaxMessageSize: Int = ConnectCommon.CONNECT_GRPC_MAX_MESSAGE_SIZE) {
+      grpcMaxMessageSize: Int = ConnectCommon.CONNECT_GRPC_MAX_MESSAGE_SIZE,
+      grpcMaxRecursionLimit: Int = ConnectCommon.CONNECT_GRPC_MARSHALLER_RECURSION_LIMIT) {
 
     def userContext: proto.UserContext = {
       val builder = proto.UserContext.newBuilder()
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
index 93d1075aea025..0905ee76c3f34 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
@@ -27,10 +27,13 @@ import org.apache.arrow.vector.ipc.message.{ArrowMessage, ArrowRecordBatch}
 import org.apache.arrow.vector.types.pojo
 
 import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.ExecutePlanResponse.ObservedMetrics
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ProductEncoder, UnboundRowEncoder}
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.connect.client.arrow.{AbstractMessageIterator, ArrowDeserializingIterator, ConcatenatingArrowStreamReader, MessageIterator}
-import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, LiteralValueProtoConverter}
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.ArrowUtils
 
@@ -38,7 +41,8 @@ private[sql] class SparkResult[T](
     responses: CloseableIterator[proto.ExecutePlanResponse],
     allocator: BufferAllocator,
     encoder: AgnosticEncoder[T],
-    timeZoneId: String)
+    timeZoneId: String,
+    setObservationMetricsOpt: Option[(Long, Map[String, Any]) => Unit] = None)
     extends AutoCloseable { self =>
 
   case class StageInfo(
@@ -79,6 +83,7 @@ private[sql] class SparkResult[T](
   private[this] var arrowSchema: pojo.Schema = _
   private[this] var nextResultIndex: Int = 0
   private val resultMap = mutable.Map.empty[Int, (Long, Seq[ArrowMessage])]
+  private val observedMetrics = mutable.Map.empty[String, Row]
   private val cleanable =
     SparkResult.cleaner.register(this, new SparkResultCloseable(resultMap, responses))
 
@@ -117,6 +122,9 @@ private[sql] class SparkResult[T](
     while (!stop && responses.hasNext) {
       val response = responses.next()
 
+      // Collect metrics for this response
+      observedMetrics ++= processObservedMetrics(response.getObservedMetricsList)
+
       // Save and validate operationId
       if (opId == null) {
         opId = response.getOperationId
@@ -198,6 +206,29 @@ private[sql] class SparkResult[T](
     nonEmpty
   }
 
+  private def processObservedMetrics(
+      metrics: java.util.List[ObservedMetrics]): Iterable[(String, Row)] = {
+    metrics.asScala.map { metric =>
+      assert(metric.getKeysCount == metric.getValuesCount)
+      var schema = new StructType()
+      val keys = mutable.ListBuffer.empty[String]
+      val values = mutable.ListBuffer.empty[Any]
+      (0 until metric.getKeysCount).map { i =>
+        val key = metric.getKeys(i)
+        val value = LiteralValueProtoConverter.toCatalystValue(metric.getValues(i))
+        schema = schema.add(key, LiteralValueProtoConverter.toDataType(value.getClass))
+        keys += key
+        values += value
+      }
+      // If the metrics is registered by an Observation object, attach them and unblock any
+      // blocked thread.
+      setObservationMetricsOpt.foreach { setObservationMetrics =>
+        setObservationMetrics(metric.getPlanId, keys.zip(values).toMap)
+      }
+      metric.getName -> new GenericRowWithSchema(values.toArray, schema)
+    }
+  }
+
   /**
    * Returns the number of elements in the result.
    */
@@ -248,6 +279,15 @@ private[sql] class SparkResult[T](
     result
   }
 
+  /**
+   * Returns all observed metrics in the result.
+   */
+  def getObservedMetrics: Map[String, Row] = {
+    // We need to process all responses to get all metrics.
+    processResponses()
+    observedMetrics.toMap
+  }
+
   /**
    * Returns an iterator over the contents of the result.
    */
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
index 1f580a0ffc0a3..f63692717947a 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.connect.common
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.SparkClassUtils
@@ -80,7 +81,7 @@ object DataTypeProtoConverter {
   }
 
   private def toCatalystStringType(t: proto.DataType.String): StringType =
-    StringType(t.getCollationId)
+    StringType(if (t.getCollation.nonEmpty) t.getCollation else "UTF8_BINARY")
 
   private def toCatalystYearMonthIntervalType(t: proto.DataType.YearMonthInterval) = {
     (t.hasStartField, t.hasEndField) match {
@@ -177,7 +178,11 @@ object DataTypeProtoConverter {
       case s: StringType =>
         proto.DataType
           .newBuilder()
-          .setString(proto.DataType.String.newBuilder().setCollationId(s.collationId).build())
+          .setString(
+            proto.DataType.String
+              .newBuilder()
+              .setCollation(CollationFactory.fetchCollation(s.collationId).collationName)
+              .build())
           .build()
 
       case CharType(length) =>
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
index ce42cc797bf38..1f3496fa89847 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
@@ -204,7 +204,7 @@ object LiteralValueProtoConverter {
   def toLiteralProto(literal: Any, dataType: DataType): proto.Expression.Literal =
     toLiteralProtoBuilder(literal, dataType).build()
 
-  private def toDataType(clz: Class[_]): DataType = clz match {
+  private[sql] def toDataType(clz: Class[_]): DataType = clz match {
     // primitive types
     case JShort.TYPE => ShortType
     case JInteger.TYPE => IntegerType
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
index 96bd06b01535e..af07ef11cdf30 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.connect.common
 
-import com.google.protobuf.Message
+import com.google.protobuf.{CodedInputStream, InvalidProtocolBufferException, Message, Parser}
 
-private[connect] object ProtoUtils {
+private[sql] object ProtoUtils {
   def abbreviate[T <: Message](message: T, maxStringSize: Int = 1024): T = {
     abbreviate[T](message, Map("STRING" -> maxStringSize))
   }
@@ -51,4 +51,25 @@ private[connect] object ProtoUtils {
       throw new IllegalArgumentException("Spark Connect tag cannot be an empty string.")
     }
   }
+
+  def parseWithRecursionLimit[T <: Message](
+      bytes: Array[Byte],
+      parser: Parser[T],
+      recursionLimit: Int): T = {
+    val cis = CodedInputStream.newInstance(bytes)
+    cis.setSizeLimit(Integer.MAX_VALUE)
+    cis.setRecursionLimit(recursionLimit)
+    val message = parser.parseFrom(cis)
+    try {
+      // If the last tag is 0, it means the message is correctly parsed.
+      // If the last tag is not 0, it means the message is not correctly
+      // parsed, and we should throw an exception.
+      cis.checkLastTagWas(0)
+      message
+    } catch {
+      case e: InvalidProtocolBufferException =>
+        e.setUnfinishedMessage(message)
+        throw e
+    }
+  }
 }
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
index dca65cf905fc8..e244fd13595b2 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
@@ -18,5 +18,7 @@ package org.apache.spark.sql.connect.common.config
 
 private[sql] object ConnectCommon {
   val CONNECT_GRPC_BINDING_PORT: Int = 15002
-  val CONNECT_GRPC_MAX_MESSAGE_SIZE: Int = 128 * 1024 * 1024;
+  val CONNECT_GRPC_PORT_MAX_RETRIES: Int = 0
+  val CONNECT_GRPC_MAX_MESSAGE_SIZE: Int = 128 * 1024 * 1024
+  val CONNECT_GRPC_MARSHALLER_RECURSION_LIMIT: Int = 1024
 }
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
index 31e03b79eb987..8321eb8beb926 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, GCM, DEFAULT, )#0]
+Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary))) AS aes_decrypt(g, g, GCM, DEFAULT, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
index fc572e8fe7c67..1a721c372c106 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, DEFAULT, )#0]
+Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary))) AS aes_decrypt(g, g, g, DEFAULT, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
index c6c693013dd0a..0d87c8b40853a 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, )#0]
+Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary))) AS aes_decrypt(g, g, g, g, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
index 97bb528b84b3f..3afae44e97ddf 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, g)#0]
+Project [static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary))) AS aes_decrypt(g, g, g, g, g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain
index 44084a8e60fb0..9f88193ce3e3f 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, GCM, DEFAULT, , )#0]
+Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), cast( as binary))) AS aes_encrypt(g, g, GCM, DEFAULT, , )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain
index 29ccf0c1c833f..97163bf0f7c32 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, DEFAULT, , )#0]
+Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), cast( as binary))) AS aes_encrypt(g, g, g, DEFAULT, , )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain
index 5591363426ab5..35fdd3df3e6b2 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, , )#0]
+Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), cast( as binary))) AS aes_encrypt(g, g, g, g, , )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain
index 54b08d7bdb48e..0d566721e51d4 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, X'434445', )#0]
+Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast( as binary))) AS aes_encrypt(g, g, g, g, X'434445', )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain
index 024089170bc75..755332cca5edd 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast(g#0 as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, X'434445', g)#0]
+Project [static_invoke(ExpressionImplUtils.aesEncrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, 0x434445, cast(g#0 as binary))) AS aes_encrypt(g, g, g, g, X'434445', g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain
index 61a15dd4c945e..76b460ad4d043 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.BitmapExpressionUtils, LongType, bitmapBitPosition, id#0L, LongType, true, false, true) AS bitmap_bit_position(id)#0L]
+Project [static_invoke(BitmapExpressionUtils.bitmapBitPosition(id#0L)) AS bitmap_bit_position(id)#0L]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain
index 61a15dd4c945e..76b460ad4d043 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.BitmapExpressionUtils, LongType, bitmapBitPosition, id#0L, LongType, true, false, true) AS bitmap_bit_position(id)#0L]
+Project [static_invoke(BitmapExpressionUtils.bitmapBitPosition(id#0L)) AS bitmap_bit_position(id)#0L]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain
index da43425c3ec04..c2783bff65eec 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.BitmapExpressionUtils, LongType, bitmapCount, bytes#0, BinaryType, true, false, true) AS bitmap_count(bytes)#0L]
+Project [static_invoke(BitmapExpressionUtils.bitmapCount(bytes#0)) AS bitmap_count(bytes)#0L]
 +- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain
index f2ada15eccb7d..a9fd2eeb669aa 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain
@@ -1,3 +1,4 @@
-Aggregate [count(if ((_common_expr_0#0 = false)) null else _common_expr_0#0) AS count_if((a > 0))#0L]
-+- Project [id#0L, a#0, b#0, d#0, e#0, f#0, g#0, (a#0 > 0) AS _common_expr_0#0]
-   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
+Project [_aggregateexpression#0L AS count_if((a > 0))#0L]
++- Aggregate [count(if ((_common_expr_0#0 = false)) null else _common_expr_0#0) AS _aggregateexpression#0L]
+   +- Project [id#0L, a#0, b#0, d#0, e#0, f#0, g#0, (a#0 > 0) AS _common_expr_0#0]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
index 165be9b9e12f1..c7f2e4cf9c769 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
@@ -1,2 +1,2 @@
-Project [decode(cast(g#0 as binary), UTF-8, false) AS decode(g, UTF-8)#0]
+Project [static_invoke(StringDecode.decode(cast(g#0 as binary), UTF-8, false, false)) AS decode(g, UTF-8)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
index 2f65436059230..3f36f5e4451ba 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
@@ -1,2 +1,2 @@
-Project [encode(g#0, UTF-8, false) AS encode(g, UTF-8)#0]
+Project [static_invoke(Encode.encode(g#0, UTF-8, false, false)) AS encode(g, UTF-8)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
new file mode 100644
index 0000000000000..e750021ce22bb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(VariantExpressionEvalUtils.isVariantNull(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)))) AS is_variant_null(parse_json(g))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
index 4efc5a3709b6f..50b50a19a49ce 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.unsafe.types.ByteArray, BinaryType, lpad, bytes#0, 5, 0x0C0A0F0E, BinaryType, IntegerType, BinaryType, true, false, true) AS lpad(bytes, 5, X'0C0A0F0E')#0]
+Project [static_invoke(ByteArray.lpad(bytes#0, 5, 0x0C0A0F0E)) AS lpad(bytes, 5, X'0C0A0F0E')#0]
 +- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
new file mode 100644
index 0000000000000..cbcf803b39010
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)) AS parse_json(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
index 8e1df4a043575..1f17ca72867da 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
@@ -1,2 +1,2 @@
-Project [positive(a#0) AS (+ a)#0]
+Project [a#0 AS (+ a)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
index 10d77eef1cb65..5726552fe429d 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.unsafe.types.ByteArray, BinaryType, rpad, bytes#0, 5, 0x0B0A0B0E, BinaryType, IntegerType, BinaryType, true, false, true) AS rpad(bytes, 5, X'0B0A0B0E')#0]
+Project [static_invoke(ByteArray.rpad(bytes#0, 5, 0x0B0A0B0E)) AS rpad(bytes, 5, X'0B0A0B0E')#0]
 +- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
new file mode 100644
index 0000000000000..04b33fdd70678
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(SchemaOfVariant.schemaOfVariant(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)))) AS schema_of_variant(parse_json(g))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
new file mode 100644
index 0000000000000..18e8801bb2986
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
@@ -0,0 +1,2 @@
+Aggregate [schema_of_variant_agg(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), 0, 0) AS schema_of_variant_agg(parse_json(g))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_using_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_using_columns.explain
new file mode 100644
index 0000000000000..2ce3052d7d75e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_using_columns.explain
@@ -0,0 +1,2 @@
+Project [split(g#0, g#0, -1) AS split(g, g, -1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit_using_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit_using_columns.explain
new file mode 100644
index 0000000000000..2d16b9eed332d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit_using_columns.explain
@@ -0,0 +1,2 @@
+Project [split(g#0, ;, a#0) AS split(g, ;, a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_using_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_using_columns.explain
new file mode 100644
index 0000000000000..3050d15d9754c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_using_columns.explain
@@ -0,0 +1,2 @@
+Project [substring(g#0, a#0, cast(b#0 as int)) AS substring(g, a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_with_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_with_columns.explain
new file mode 100644
index 0000000000000..fe07244fc9cec
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_with_columns.explain
@@ -0,0 +1,2 @@
+Project [substring(g#0, 4, 5) AS substring(g, 4, 5)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
new file mode 100644
index 0000000000000..36dde1393cdb2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
@@ -0,0 +1,2 @@
+Project [timestampadd(week, cast(x#0L as int), t#0, Some(America/Los_Angeles)) AS timestampadd(week, x, t)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_diff.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_diff.explain
new file mode 100644
index 0000000000000..7a0a3ff8c53d3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_diff.explain
@@ -0,0 +1,2 @@
+Project [timestampdiff(year, t#0, t#0, Some(America/Los_Angeles)) AS timestampdiff(year, t, t)#0L]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
index b62ccccc0c15e..3017720acbafb 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
@@ -1,2 +1,2 @@
-Project [encode(g#0, UTF-8, false) AS to_binary(g, utf-8)#0]
+Project [static_invoke(Encode.encode(g#0, UTF-8, false, false)) AS to_binary(g, utf-8)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain
index b45be28453089..8ab4b477bb557 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain
@@ -1,2 +1,2 @@
-Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, GCM, DEFAULT, )#0]
+Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary)))) AS try_aes_decrypt(g, g, GCM, DEFAULT, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain
index 82b7ed1ea893e..e45fef8af254c 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain
@@ -1,2 +1,2 @@
-Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, g, DEFAULT, )#0]
+Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary)))) AS try_aes_decrypt(g, g, g, DEFAULT, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain
index 9087d743d941f..cdee84b92bc2a 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain
@@ -1,2 +1,2 @@
-Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, g, g, )#0]
+Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary)))) AS try_aes_decrypt(g, g, g, g, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain
index 8854da9b423d0..b5eb4258b5250 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain
@@ -1,2 +1,2 @@
-Project [tryeval(staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true)) AS try_aes_decrypt(g, g, g, g, g)#0]
+Project [tryeval(static_invoke(ExpressionImplUtils.aesDecrypt(cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary)))) AS try_aes_decrypt(g, g, g, g, g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
new file mode 100644
index 0000000000000..826ec4fc81d83
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false)) AS try_parse_json(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
new file mode 100644
index 0000000000000..933fbff8e1f3d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
@@ -0,0 +1,2 @@
+Project [try_variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), $, IntegerType, false, Some(America/Los_Angeles)) AS try_variant_get(parse_json(g), $)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
index d612190396d2b..6111cc1374fb6 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UrlCodec$, StringType, decode, g#0, UTF-8, StringType, StringType, true, true, true) AS url_decode(g)#0]
+Project [static_invoke(UrlCodec.decode(g#0, UTF-8)) AS url_decode(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
index bd2c63e19c609..871842d41ba4f 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.UrlCodec$, StringType, encode, g#0, UTF-8, StringType, StringType, true, true, true) AS url_encode(g)#0]
+Project [static_invoke(UrlCodec.encode(g#0, UTF-8)) AS url_encode(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
new file mode 100644
index 0000000000000..2e0baf058f72a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
@@ -0,0 +1,2 @@
+Project [variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), $, IntegerType, true, Some(America/Los_Angeles)) AS variant_get(parse_json(g), $)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
index 2a20daaefa8c6..2cc166efa99ec 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
@@ -1,2 +1,2 @@
-SubqueryAlias primary.tempdb.myStreamingTable
-+- StreamingRelationV2 primary.tempdb.myStreamingTable, org.apache.spark.sql.connector.catalog.InMemoryTable, [p1=v1, p2=v2], [id#0L], org.apache.spark.sql.connector.catalog.InMemoryCatalog, tempdb.myStreamingTable
+~SubqueryAlias primary.tempdb.myStreamingTable
++- ~StreamingRelationV2 primary.tempdb.myStreamingTable, org.apache.spark.sql.connector.catalog.InMemoryTable, [p1=v1, p2=v2], [id#0L], org.apache.spark.sql.connector.catalog.InMemoryCatalog, tempdb.myStreamingTable
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
index 33f6007ec68a1..e4b31258f984a 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
@@ -18,7 +18,7 @@
           "name": "c1",
           "dataType": {
             "string": {
-              "collationId": 0
+              "collation": "UTF8_BINARY"
             }
           },
           "nullable": true
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin
index da4ad9bf9a4ed..c39243a10a8e4 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json b/connector/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
new file mode 100644
index 0000000000000..7ae72f8f88e5c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "is_variant_null",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "parse_json",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin
new file mode 100644
index 0000000000000..4d3d2624609e7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
index adf8cabd97b1c..2a5a0ddd15f8a 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
@@ -305,7 +305,7 @@
         "array": {
           "elementType": {
             "string": {
-              "collationId": 0
+              "collation": "UTF8_BINARY"
             }
           },
           "elements": [{
@@ -324,7 +324,7 @@
         "array": {
           "elementType": {
             "string": {
-              "collationId": 0
+              "collation": "UTF8_BINARY"
             }
           },
           "elements": [{
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin
index d8b4407f6cfa2..359ddd61d8b74 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_parse_json.json b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_json.json
new file mode 100644
index 0000000000000..dfcf56c19223e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_json.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "parse_json",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin
new file mode 100644
index 0000000000000..a7187fa2c1af0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
new file mode 100644
index 0000000000000..c4ea467bc1a24
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "schema_of_variant",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "parse_json",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin
new file mode 100644
index 0000000000000..0971460bf4112
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
new file mode 100644
index 0000000000000..19bf62f70b20f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "schema_of_variant_agg",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "parse_json",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin
new file mode 100644
index 0000000000000..68c872ef0d4d2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
new file mode 100644
index 0000000000000..98ef0e54e6211
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "split",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin
new file mode 100644
index 0000000000000..a87702f83d1bd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
new file mode 100644
index 0000000000000..138f9d70b2c85
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "split",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": ";"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin
new file mode 100644
index 0000000000000..04e24be40e9d8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
new file mode 100644
index 0000000000000..ba28b1c7f5700
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "substring",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin
new file mode 100644
index 0000000000000..f14b44ef5a501
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
new file mode 100644
index 0000000000000..8fd71bb36d85e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "timestampadd",
+        "arguments": [{
+          "literal": {
+            "string": "week"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "x"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin
new file mode 100644
index 0000000000000..5ab8ec531e073
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
new file mode 100644
index 0000000000000..635cbb45460e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "timestampdiff",
+        "arguments": [{
+          "literal": {
+            "string": "year"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin
new file mode 100644
index 0000000000000..3a81fd8b318c0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
new file mode 100644
index 0000000000000..91177eb4a5857
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_parse_json",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin
new file mode 100644
index 0000000000000..cc1f159cfd78c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
new file mode 100644
index 0000000000000..9a4a4e25f19e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_variant_get",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "parse_json",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }]
+          }
+        }, {
+          "literal": {
+            "string": "$"
+          }
+        }, {
+          "literal": {
+            "string": "int"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin
new file mode 100644
index 0000000000000..b16bbf4c7a4e9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
index 1e651f0455c7b..aaf3a91c4fe19 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
@@ -200,7 +200,7 @@
         "map": {
           "keyType": {
             "string": {
-              "collationId": 0
+              "collation": "UTF8_BINARY"
             }
           },
           "valueType": {
@@ -228,7 +228,7 @@
                 "name": "_1",
                 "dataType": {
                   "string": {
-                    "collationId": 0
+                    "collation": "UTF8_BINARY"
                   }
                 },
                 "nullable": true
@@ -404,7 +404,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -417,7 +417,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -439,7 +439,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -461,7 +461,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -493,7 +493,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -511,7 +511,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -533,7 +533,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -576,7 +576,7 @@
                   "map": {
                     "keyType": {
                       "string": {
-                        "collationId": 0
+                        "collation": "UTF8_BINARY"
                       }
                     },
                     "valueType": {
@@ -594,7 +594,7 @@
                       "name": "_1",
                       "dataType": {
                         "string": {
-                          "collationId": 0
+                          "collation": "UTF8_BINARY"
                         }
                       },
                       "nullable": true
@@ -608,7 +608,7 @@
                           },
                           "valueType": {
                             "string": {
-                              "collationId": 0
+                              "collation": "UTF8_BINARY"
                             }
                           },
                           "valueContainsNull": true
@@ -640,7 +640,7 @@
             "map": {
               "keyType": {
                 "string": {
-                  "collationId": 0
+                  "collation": "UTF8_BINARY"
                 }
               },
               "valueType": {
@@ -666,7 +666,7 @@
                     "name": "_1",
                     "dataType": {
                       "string": {
-                        "collationId": 0
+                        "collation": "UTF8_BINARY"
                       }
                     },
                     "nullable": true
@@ -680,7 +680,7 @@
                         },
                         "valueType": {
                           "string": {
-                            "collationId": 0
+                            "collation": "UTF8_BINARY"
                           }
                         },
                         "valueContainsNull": true
@@ -700,7 +700,7 @@
                   },
                   "valueType": {
                     "string": {
-                      "collationId": 0
+                      "collation": "UTF8_BINARY"
                     }
                   },
                   "keys": [{
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
index b3f61830bee0b..71640717c12ea 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_variant_get.json b/connector/connect/common/src/test/resources/query-tests/queries/function_variant_get.json
new file mode 100644
index 0000000000000..ab0acd29d505b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_variant_get.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "variant_get",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "parse_json",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }]
+          }
+        }, {
+          "literal": {
+            "string": "$"
+          }
+        }, {
+          "literal": {
+            "string": "int"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin
new file mode 100644
index 0000000000000..fe9b76bb97c4a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
index 537c218952a42..f29245374e6e2 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
@@ -18,7 +18,7 @@
           "name": "c1",
           "dataType": {
             "string": {
-              "collationId": 0
+              "collation": "UTF8_BINARY"
             }
           },
           "nullable": true
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin
index 297ab2bf02622..1ce2e676ce30a 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.json b/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.json
index 86595d46654c0..2ccad0345af62 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.json
@@ -8,7 +8,7 @@
         "planId": "0"
       },
       "localRelation": {
-        "schema": "struct\u003cs:string collate UTF8_BINARY_LCASE\u003e"
+        "schema": "struct\u003cs:string collate UTF8_LCASE\u003e"
       }
     },
     "expressions": [{
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin
index 30d816526ccea..3708878a2de2d 100644
Binary files a/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin and b/connector/connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin differ
diff --git a/connector/connect/docs/client-connection-string.md b/connector/connect/docs/client-connection-string.md
index ebab7cbff4fc1..37b2956a5c44a 100644
--- a/connector/connect/docs/client-connection-string.md
+++ b/connector/connect/docs/client-connection-string.md
@@ -22,8 +22,8 @@ cannot contain arbitrary characters. Configuration parameter are passed in the
 style of the HTTP URL Path Parameter Syntax. This is similar to the JDBC connection
 strings. The path component must be empty. All parameters are interpreted **case sensitive**.
 
-```shell
-sc://hostname:port/;param1=value;param2=value
+```text
+sc://host:port/;param1=value;param2=value
 ```
 
 <table>
@@ -34,7 +34,7 @@ sc://hostname:port/;param1=value;param2=value
     <td>Examples</td>
   </tr>
   <tr>
-    <td>hostname</td>
+    <td>host</td>
     <td>String</td>
     <td>
       The hostname of the endpoint for Spark Connect. Since the endpoint
@@ -49,8 +49,8 @@ sc://hostname:port/;param1=value;param2=value
   </tr>
   <tr>
     <td>port</td>
-<td>Numeric</td>
-    <td>The portname to be used when connecting to the GRPC endpoint. The
+    <td>Numeric</td>
+    <td>The port to be used when connecting to the GRPC endpoint. The
     default values is: <b>15002</b>. Any valid port number can be used.</td>
     <td><pre>15002</pre><pre>443</pre></td>
   </tr>
@@ -75,7 +75,7 @@ sc://hostname:port/;param1=value;param2=value
     <td>user_id</td>
     <td>String</td>
     <td>User ID to automatically set in the Spark Connect UserContext message.
-    This is necssary for the appropriate Spark Session management. This is an
+    This is necessary for the appropriate Spark Session management. This is an
     *optional* parameter and depending on the deployment this parameter might
     be automatically injected using other means.</td>
     <td>
@@ -99,9 +99,16 @@ sc://hostname:port/;param1=value;param2=value
     allows to provide this session ID to allow sharing Spark Sessions for the same users
     for example across multiple languages. The value must be provided in a valid UUID 
     string format.<br/>
-    <i>Default: A UUID generated randomly.</td>
+    <i>Default: </i><pre>A UUID generated randomly</pre></td>
     <td><pre>session_id=550e8400-e29b-41d4-a716-446655440000</pre></td>
   </tr>
+  <tr>
+    <td>grpc_max_message_size</td>
+    <td>Numeric</td>
+    <td>Maximum message size allowed for gRPC messages in bytes.<br/>
+    <i>Default: </i><pre> 128 * 1024 * 1024</pre></td>
+    <td><pre>grpc_max_message_size=134217728</pre></td>
+  </tr>
 </table>
 
 ## Examples
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
index e94e865873937..dc45684a75ebd 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
@@ -38,6 +38,14 @@ object Connect {
       .intConf
       .createWithDefault(ConnectCommon.CONNECT_GRPC_BINDING_PORT)
 
+  val CONNECT_GRPC_PORT_MAX_RETRIES =
+    buildStaticConf("spark.connect.grpc.port.maxRetries")
+      .doc("The max port retry attempts for the gRPC server binding." +
+        "By default, it's set to 0, and the server will fail fast in case of port conflicts.")
+      .version("4.0.0")
+      .intConf
+      .createWithDefault(ConnectCommon.CONNECT_GRPC_PORT_MAX_RETRIES)
+
   val CONNECT_GRPC_INTERCEPTOR_CLASSES =
     buildStaticConf("spark.connect.grpc.interceptor.classes")
       .doc(
@@ -73,7 +81,7 @@ object Connect {
           |""".stripMargin)
       .version("3.5.0")
       .intConf
-      .createWithDefault(1024)
+      .createWithDefault(ConnectCommon.CONNECT_GRPC_MARSHALLER_RECURSION_LIMIT)
 
   val CONNECT_SESSION_MANAGER_DEFAULT_SESSION_TIMEOUT =
     buildStaticConf("spark.connect.session.manager.defaultSessionTimeout")
@@ -279,6 +287,7 @@ object Connect {
       .doc("Sets the maximum number of cached resolved logical plans in Spark Connect Session." +
         " If set to a value less or equal than zero will disable the plan cache.")
       .version("4.0.0")
+      .internal()
       .intConf
       .createWithDefault(5)
 
@@ -289,6 +298,7 @@ object Connect {
         s" When false, the cache is disabled even if '${CONNECT_SESSION_PLAN_CACHE_SIZE.key}' is" +
         " greater than zero. The caching is best-effort and not guaranteed.")
       .version("4.0.0")
+      .internal()
       .booleanConf
       .createWithDefault(true)
 }
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
index d4709db081fc8..3e360372d5600 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
@@ -25,7 +25,7 @@ import io.grpc.stub.{ServerCallStreamObserver, StreamObserver}
 import org.apache.spark.{SparkEnv, SparkSQLException}
 import org.apache.spark.connect.proto.ExecutePlanResponse
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.connect.common.ProtoUtils
 import org.apache.spark.sql.connect.config.Connect.{CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION, CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_SIZE, CONNECT_PROGRESS_REPORT_INTERVAL}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
index 30a899a2ac136..9d83d93083dc4 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
@@ -27,7 +27,7 @@ import io.grpc.stub.StreamObserver
 import org.apache.spark.{SparkEnv, SparkSQLException}
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.sql.connect.config.Connect.CONNECT_EXECUTE_REATTACHABLE_OBSERVER_RETRY_BUFFER_SIZE
 import org.apache.spark.sql.connect.service.ExecuteHolder
 
@@ -245,13 +245,13 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
     removeResponsesUntilIndex(lastProducedIndex)
     // scalastyle:off line.size.limit
     logInfo(
-      log"Release all for opId=${MDC(LogKey.OP_ID, executeHolder.operationId)}. Execution stats: " +
-        log"total=${MDC(LogKey.TOTAL_SIZE, totalSize)} " +
-        log"autoRemoved=${MDC(LogKey.CACHE_AUTO_REMOVED_SIZE, autoRemovedSize)} " +
-        log"cachedUntilConsumed=${MDC(LogKey.CACHE_UNTIL_HIGHEST_CONSUMED_SIZE, cachedSizeUntilHighestConsumed)} " +
-        log"cachedUntilProduced=${MDC(LogKey.CACHE_UNTIL_LAST_PRODUCED_SIZE, cachedSizeUntilLastProduced)} " +
-        log"maxCachedUntilConsumed=${MDC(LogKey.MAX_CACHE_UNTIL_HIGHEST_CONSUMED_SIZE, cachedSizeUntilHighestConsumed.max)} " +
-        log"maxCachedUntilProduced=${MDC(LogKey.MAX_CACHE_UNTIL_LAST_PRODUCED_SIZE, cachedSizeUntilLastProduced.max)}")
+      log"Release all for opId=${MDC(LogKeys.OP_ID, executeHolder.operationId)}. Execution stats: " +
+        log"total=${MDC(LogKeys.TOTAL, totalSize)} " +
+        log"autoRemoved=${MDC(LogKeys.CACHE_AUTO_REMOVED_SIZE, autoRemovedSize)} " +
+        log"cachedUntilConsumed=${MDC(LogKeys.CACHE_UNTIL_HIGHEST_CONSUMED_SIZE, cachedSizeUntilHighestConsumed)} " +
+        log"cachedUntilProduced=${MDC(LogKeys.CACHE_UNTIL_LAST_PRODUCED_SIZE, cachedSizeUntilLastProduced)} " +
+        log"maxCachedUntilConsumed=${MDC(LogKeys.MAX_CACHE_UNTIL_HIGHEST_CONSUMED_SIZE, cachedSizeUntilHighestConsumed.max)} " +
+        log"maxCachedUntilProduced=${MDC(LogKeys.MAX_CACHE_UNTIL_LAST_PRODUCED_SIZE, cachedSizeUntilLastProduced.max)}")
     // scalastyle:on line.size.limit
   }
 
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
index 0a6d12cbb1918..4ef4f632204b3 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
@@ -220,6 +220,7 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends
             .createObservedMetricsResponse(
               executeHolder.sessionHolder.sessionId,
               executeHolder.sessionHolder.serverSessionId,
+              executeHolder.request.getPlan.getRoot.getCommon.getPlanId,
               observedMetrics ++ accumulatedInPython))
       }
 
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
index 23390bf7aba8f..660951f229849 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
@@ -35,8 +35,9 @@ import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_
 import org.apache.spark.sql.connect.planner.SparkConnectPlanner
 import org.apache.spark.sql.connect.service.ExecuteHolder
 import org.apache.spark.sql.connect.utils.MetricGenerator
-import org.apache.spark.sql.execution.{LocalTableScanExec, SQLExecution}
+import org.apache.spark.sql.execution.{DoNotCleanup, LocalTableScanExec, RemoveShuffleFiles, SkipMigration, SQLExecution}
 import org.apache.spark.sql.execution.arrow.ArrowConverters
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ThreadUtils
 
@@ -58,11 +59,21 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
     }
     val planner = new SparkConnectPlanner(executeHolder)
     val tracker = executeHolder.eventsManager.createQueryPlanningTracker()
+    val conf = session.sessionState.conf
+    val shuffleCleanupMode =
+      if (conf.getConf(SQLConf.SHUFFLE_DEPENDENCY_FILE_CLEANUP_ENABLED)) {
+        RemoveShuffleFiles
+      } else if (conf.getConf(SQLConf.SHUFFLE_DEPENDENCY_SKIP_MIGRATION_ENABLED)) {
+        SkipMigration
+      } else {
+        DoNotCleanup
+      }
     val dataframe =
       Dataset.ofRows(
         sessionHolder.session,
-        planner.transformRelation(request.getPlan.getRoot),
-        tracker)
+        planner.transformRelation(request.getPlan.getRoot, cachePlan = true),
+        tracker,
+        shuffleCleanupMode)
     responseObserver.onNext(createSchemaResponse(request.getSessionId, dataframe.schema))
     processAsArrowBatches(dataframe, responseObserver, executeHolder)
     responseObserver.onNext(MetricGenerator.createMetricsResponse(sessionHolder, dataframe))
@@ -253,8 +264,14 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
         name -> values
     }
     if (observedMetrics.nonEmpty) {
-      Some(SparkConnectPlanExecution
-        .createObservedMetricsResponse(sessionId, sessionHolder.serverSessionId, observedMetrics))
+      val planId = executeHolder.request.getPlan.getRoot.getCommon.getPlanId
+      Some(
+        SparkConnectPlanExecution
+          .createObservedMetricsResponse(
+            sessionId,
+            sessionHolder.serverSessionId,
+            planId,
+            observedMetrics))
     } else None
   }
 }
@@ -263,11 +280,13 @@ object SparkConnectPlanExecution {
   def createObservedMetricsResponse(
       sessionId: String,
       serverSessionId: String,
+      planId: Long,
       metrics: Map[String, Seq[(Option[String], Any)]]): ExecutePlanResponse = {
     val observedMetrics = metrics.map { case (name, values) =>
       val metrics = ExecutePlanResponse.ObservedMetrics
         .newBuilder()
         .setName(name)
+        .setPlanId(planId)
       values.foreach { case (key, value) =>
         metrics.addValues(toLiteralProto(value))
         key.foreach(metrics.addKeys)
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 1ef4bbec3e039..eaeb1c775ddb6 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.connect.planner
 
+import java.util.UUID
+
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 import scala.util.Try
@@ -30,19 +32,19 @@ import io.grpc.stub.StreamObserver
 import org.apache.commons.lang3.exception.ExceptionUtils
 
 import org.apache.spark.{Partition, SparkEnv, TaskContext}
-import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.api.python.{PythonEvalType, SimplePythonFunction}
 import org.apache.spark.connect.proto
-import org.apache.spark.connect.proto.{CreateResourceProfileCommand, ExecutePlanResponse, SqlCommand, StreamingForeachFunction, StreamingQueryCommand, StreamingQueryCommandResult, StreamingQueryInstanceId, StreamingQueryManagerCommand, StreamingQueryManagerCommandResult, WriteStreamOperationStart, WriteStreamOperationStartResult}
+import org.apache.spark.connect.proto.{CheckpointCommand, CreateResourceProfileCommand, ExecutePlanResponse, SqlCommand, StreamingForeachFunction, StreamingQueryCommand, StreamingQueryCommandResult, StreamingQueryInstanceId, StreamingQueryManagerCommand, StreamingQueryManagerCommandResult, WriteStreamOperationStart, WriteStreamOperationStartResult}
 import org.apache.spark.connect.proto.ExecutePlanResponse.SqlCommandResult
 import org.apache.spark.connect.proto.Parse.ParseFormat
 import org.apache.spark.connect.proto.StreamingQueryManagerCommandResult.StreamingQueryInstance
 import org.apache.spark.connect.proto.WriteStreamOperationStart.TriggerCase
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.SESSION_ID
+import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID}
 import org.apache.spark.ml.{functions => MLFunctions}
 import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest}
-import org.apache.spark.sql.{Column, Dataset, Encoders, ForeachWriter, Observation, RelationalGroupedDataset, SparkSession}
+import org.apache.spark.sql.{withOrigin, Column, Dataset, Encoders, ForeachWriter, Observation, RelationalGroupedDataset, SparkSession}
 import org.apache.spark.sql.avro.{AvroDataToCatalyst, CatalystDataToAvro}
 import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, QueryPlanningTracker}
 import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar}
@@ -55,6 +57,7 @@ import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, JoinType, L
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.{AppendColumns, CoGroup, CollectMetrics, CommandResult, Deduplicate, DeduplicateWithinWatermark, DeserializeToObject, Except, FlatMapGroupsWithState, Intersect, JoinWith, LocalRelation, LogicalGroupState, LogicalPlan, MapGroups, MapPartitions, Project, Sample, SerializeFromObject, Sort, SubqueryAlias, TypedFilter, Union, Unpivot, UnresolvedHint}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
+import org.apache.spark.sql.catalyst.trees.PySparkCurrentOrigin
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket, InvalidPlanInput, LiteralValueProtoConverter, StorageLevelProtoConverter, StreamingListenerPacket, UdfPacket}
@@ -73,7 +76,7 @@ import org.apache.spark.sql.execution.python.{PythonForeachWriter, UserDefinedPy
 import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.execution.streaming.GroupStateImpl.groupStateTimeoutFromString
 import org.apache.spark.sql.execution.streaming.StreamingQueryWrapper
-import org.apache.spark.sql.expressions.{ReduceAggregator, SparkUserDefinedFunction}
+import org.apache.spark.sql.expressions.{Aggregator, ReduceAggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
 import org.apache.spark.sql.internal.{CatalogImpl, TypedAggUtils}
 import org.apache.spark.sql.protobuf.{CatalystDataToProtobuf, ProtobufDataToCatalyst}
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode, StreamingQuery, StreamingQueryListener, StreamingQueryProgress, Trigger}
@@ -101,7 +104,9 @@ class SparkConnectPlanner(
     throw new IllegalArgumentException("executeHolder does not belong to sessionHolder")
   }
 
-  private[connect] def session: SparkSession = sessionHolder.session
+  @Since("4.0.0")
+  @DeveloperApi
+  def session: SparkSession = sessionHolder.session
 
   private[connect] def parser = session.sessionState.sqlParser
 
@@ -125,6 +130,7 @@ class SparkConnectPlanner(
    * @return
    *   The resolved logical plan.
    */
+  @DeveloperApi
   def transformRelation(rel: proto.Relation): LogicalPlan =
     transformRelation(rel, cachePlan = false)
 
@@ -138,6 +144,7 @@ class SparkConnectPlanner(
    * @return
    *   The resolved logical plan.
    */
+  @DeveloperApi
   def transformRelation(rel: proto.Relation, cachePlan: Boolean): LogicalPlan = {
     sessionHolder.usePlanCache(rel, cachePlan) { rel =>
       val plan = rel.getRelTypeCase match {
@@ -230,11 +237,6 @@ class SparkConnectPlanner(
     }
   }
 
-  @DeveloperApi
-  def transformRelation(bytes: Array[Byte]): LogicalPlan = {
-    transformRelation(proto.Relation.parseFrom(bytes))
-  }
-
   private def transformRelationPlugin(extension: ProtoAny): LogicalPlan = {
     SparkConnectPluginRegistry.relationRegistry
       // Lazily traverse the collection.
@@ -1469,7 +1471,22 @@ class SparkConnectPlanner(
    * @return
    *   Catalyst expression
    */
-  def transformExpression(exp: proto.Expression): Expression = {
+  @DeveloperApi
+  def transformExpression(exp: proto.Expression): Expression = if (exp.hasCommon) {
+    try {
+      val origin = exp.getCommon.getOrigin
+      PySparkCurrentOrigin.set(
+        origin.getPythonOrigin.getFragment,
+        origin.getPythonOrigin.getCallSite)
+      withOrigin { doTransformExpression(exp) }
+    } finally {
+      PySparkCurrentOrigin.clear()
+    }
+  } else {
+    doTransformExpression(exp)
+  }
+
+  private def doTransformExpression(exp: proto.Expression): Expression = {
     exp.getExprTypeCase match {
       case proto.Expression.ExprTypeCase.LITERAL => transformLiteral(exp.getLiteral)
       case proto.Expression.ExprTypeCase.UNRESOLVED_ATTRIBUTE =>
@@ -1510,11 +1527,6 @@ class SparkConnectPlanner(
     }
   }
 
-  @DeveloperApi
-  def transformExpression(bytes: Array[Byte]): Expression = {
-    transformExpression(proto.Expression.parseFrom(bytes))
-  }
-
   private def toNamedExpression(expr: Expression): NamedExpression = expr match {
     case named: NamedExpression => named
     case expr => UnresolvedAlias(expr)
@@ -1603,7 +1615,7 @@ class SparkConnectPlanner(
       case proto.CommonInlineUserDefinedFunction.FunctionCase.PYTHON_UDF =>
         transformPythonFuncExpression(fun)
       case proto.CommonInlineUserDefinedFunction.FunctionCase.SCALAR_SCALA_UDF =>
-        transformScalarScalaUDF(fun)
+        transformScalaUDF(fun)
       case _ =>
         throw InvalidPlanInput(
           s"Function with ID: ${fun.getFunctionCase.getNumber} is not supported")
@@ -1632,14 +1644,14 @@ class SparkConnectPlanner(
   }
 
   private def unpackUdf(fun: proto.CommonInlineUserDefinedFunction): UdfPacket = {
-    unpackScalarScalaUDF[UdfPacket](fun.getScalarScalaUdf)
+    unpackScalaUDF[UdfPacket](fun.getScalarScalaUdf)
   }
 
   private def unpackForeachWriter(fun: proto.ScalarScalaUDF): ForeachWriterPacket = {
-    unpackScalarScalaUDF[ForeachWriterPacket](fun)
+    unpackScalaUDF[ForeachWriterPacket](fun)
   }
 
-  private def unpackScalarScalaUDF[T](fun: proto.ScalarScalaUDF): T = {
+  private def unpackScalaUDF[T](fun: proto.ScalarScalaUDF): T = {
     try {
       logDebug(s"Unpack using class loader: ${Utils.getContextOrSparkClassLoader}")
       Utils.deserialize[T](fun.getPayload.toByteArray, Utils.getContextOrSparkClassLoader)
@@ -1662,39 +1674,56 @@ class SparkConnectPlanner(
   }
 
   /**
-   * Translates a Scalar Scala user-defined function from proto to the Catalyst expression.
+   * Translates a Scala user-defined function from proto to the Catalyst expression.
    *
    * @param fun
-   *   Proto representation of the Scalar Scalar user-defined function.
+   *   Proto representation of the Scala user-defined function.
    * @return
    *   ScalaUDF.
    */
-  private def transformScalarScalaUDF(fun: proto.CommonInlineUserDefinedFunction): ScalaUDF = {
+  private def transformScalaUDF(fun: proto.CommonInlineUserDefinedFunction): Expression = {
     val udf = fun.getScalarScalaUdf
     val udfPacket = unpackUdf(fun)
-    ScalaUDF(
-      function = udfPacket.function,
-      dataType = transformDataType(udf.getOutputType),
-      children = fun.getArgumentsList.asScala.map(transformExpression).toSeq,
-      inputEncoders = udfPacket.inputEncoders.map(e => Try(ExpressionEncoder(e)).toOption),
-      outputEncoder = Option(ExpressionEncoder(udfPacket.outputEncoder)),
-      udfName = Option(fun.getFunctionName),
-      nullable = udf.getNullable,
-      udfDeterministic = fun.getDeterministic)
+    if (udf.getAggregate) {
+      transformScalaFunction(fun)
+        .asInstanceOf[UserDefinedAggregator[Any, Any, Any]]
+        .scalaAggregator(fun.getArgumentsList.asScala.map(transformExpression).toSeq)
+        .toAggregateExpression()
+    } else {
+      ScalaUDF(
+        function = udfPacket.function,
+        dataType = transformDataType(udf.getOutputType),
+        children = fun.getArgumentsList.asScala.map(transformExpression).toSeq,
+        inputEncoders = udfPacket.inputEncoders.map(e => Try(ExpressionEncoder(e)).toOption),
+        outputEncoder = Option(ExpressionEncoder(udfPacket.outputEncoder)),
+        udfName = Option(fun.getFunctionName),
+        nullable = udf.getNullable,
+        udfDeterministic = fun.getDeterministic)
+    }
   }
 
-  private def transformScalarScalaFunction(
-      fun: proto.CommonInlineUserDefinedFunction): SparkUserDefinedFunction = {
+  private def transformScalaFunction(
+      fun: proto.CommonInlineUserDefinedFunction): UserDefinedFunction = {
     val udf = fun.getScalarScalaUdf
     val udfPacket = unpackUdf(fun)
-    SparkUserDefinedFunction(
-      f = udfPacket.function,
-      dataType = transformDataType(udf.getOutputType),
-      inputEncoders = udfPacket.inputEncoders.map(e => Try(ExpressionEncoder(e)).toOption),
-      outputEncoder = Option(ExpressionEncoder(udfPacket.outputEncoder)),
-      name = Option(fun.getFunctionName),
-      nullable = udf.getNullable,
-      deterministic = fun.getDeterministic)
+    if (udf.getAggregate) {
+      assert(udfPacket.inputEncoders.size == 1, "UDAF should have exactly one input encoder")
+      UserDefinedAggregator(
+        aggregator = udfPacket.function.asInstanceOf[Aggregator[Any, Any, Any]],
+        inputEncoder = ExpressionEncoder(udfPacket.inputEncoders.head),
+        name = Option(fun.getFunctionName),
+        nullable = udf.getNullable,
+        deterministic = fun.getDeterministic)
+    } else {
+      SparkUserDefinedFunction(
+        f = udfPacket.function,
+        dataType = transformDataType(udf.getOutputType),
+        inputEncoders = udfPacket.inputEncoders.map(e => Try(ExpressionEncoder(e)).toOption),
+        outputEncoder = Option(ExpressionEncoder(udfPacket.outputEncoder)),
+        name = Option(fun.getFunctionName),
+        nullable = udf.getNullable,
+        deterministic = fun.getDeterministic)
+    }
   }
 
   /**
@@ -1735,8 +1764,10 @@ class SparkConnectPlanner(
       command = fun.getCommand.toByteArray.toImmutableArraySeq,
       // Empty environment variables
       envVars = Maps.newHashMap(),
-      pythonIncludes = sessionHolder.artifactManager.getPythonIncludes.asJava,
       pythonExec = pythonExec,
+      // Merge the user specified includes with the includes managed by the artifact manager.
+      pythonIncludes = (fun.getAdditionalIncludesList.asScala.toSeq ++
+        sessionHolder.artifactManager.getPythonIncludes).asJava,
       pythonVer = fun.getPythonVer,
       // Empty broadcast variables
       broadcastVars = Lists.newArrayList(),
@@ -1828,6 +1859,16 @@ class SparkConnectPlanner(
           new BloomFilterAggregate(children(0), children(1), children(2))
             .toAggregateExpression())
 
+      case "timestampdiff" if fun.getArgumentsCount == 3 =>
+        val children = fun.getArgumentsList.asScala.map(transformExpression)
+        val unit = extractString(children(0), "unit")
+        Some(TimestampDiff(unit, children(1), children(2)))
+
+      case "timestampadd" if fun.getArgumentsCount == 3 =>
+        val children = fun.getArgumentsList.asScala.map(transformExpression)
+        val unit = extractString(children(0), "unit")
+        Some(TimestampAdd(unit, children(1), children(2)))
+
       case "window" if Seq(2, 3, 4).contains(fun.getArgumentsCount) =>
         val children = fun.getArgumentsList.asScala.map(transformExpression)
         val timeCol = children.head
@@ -1973,11 +2014,6 @@ class SparkConnectPlanner(
         val children = fun.getArgumentsList.asScala.map(transformExpression)
         Some(NullIndex(children(0)))
 
-      case "timestampdiff" if fun.getArgumentsCount == 3 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val unit = extractString(children(0), "unit")
-        Some(TimestampDiff(unit, children(1), children(2)))
-
       // ML-specific functions
       case "vector_to_array" if fun.getArgumentsCount == 2 =>
         val expr = transformExpression(fun.getArguments(0))
@@ -2586,6 +2622,10 @@ class SparkConnectPlanner(
         handleCreateResourceProfileCommand(
           command.getCreateResourceProfileCommand,
           responseObserver)
+      case proto.Command.CommandTypeCase.CHECKPOINT_COMMAND =>
+        handleCheckpointCommand(command.getCheckpointCommand, responseObserver)
+      case proto.Command.CommandTypeCase.REMOVE_CACHED_REMOTE_RELATION_COMMAND =>
+        handleRemoveCachedRemoteRelationCommand(command.getRemoveCachedRemoteRelationCommand)
 
       case _ => throw new UnsupportedOperationException(s"$command not supported.")
     }
@@ -2780,7 +2820,7 @@ class SparkConnectPlanner(
       case proto.CommonInlineUserDefinedFunction.FunctionCase.JAVA_UDF =>
         handleRegisterJavaUDF(fun)
       case proto.CommonInlineUserDefinedFunction.FunctionCase.SCALAR_SCALA_UDF =>
-        handleRegisterScalarScalaUDF(fun)
+        handleRegisterScalaUDF(fun)
       case _ =>
         throw InvalidPlanInput(
           s"Function with ID: ${fun.getFunctionCase.getNumber} is not supported")
@@ -2857,8 +2897,8 @@ class SparkConnectPlanner(
     }
   }
 
-  private def handleRegisterScalarScalaUDF(fun: proto.CommonInlineUserDefinedFunction): Unit = {
-    val udf = transformScalarScalaFunction(fun)
+  private def handleRegisterScalaUDF(fun: proto.CommonInlineUserDefinedFunction): Unit = {
+    val udf = transformScalaFunction(fun)
     session.udf.register(fun.getFunctionName, udf)
   }
 
@@ -3140,7 +3180,11 @@ class SparkConnectPlanner(
       }
 
     // Register the new query so that its reference is cached and is stopped on session timeout.
-    SparkConnectService.streamingSessionManager.registerNewStreamingQuery(sessionHolder, query)
+    SparkConnectService.streamingSessionManager.registerNewStreamingQuery(
+      sessionHolder,
+      query,
+      executeHolder.sparkSessionTags,
+      executeHolder.operationId)
     // Register the runner with the query if Python foreachBatch is enabled.
     foreachBatchRunnerCleaner.foreach { cleaner =>
       sessionHolder.streamingForeachBatchRunnerCleanerCache.registerCleanerForQuery(
@@ -3205,7 +3249,9 @@ class SparkConnectPlanner(
 
     // Find the query in connect service level cache, otherwise check session's active streams.
     val query = SparkConnectService.streamingSessionManager
-      .getCachedQuery(id, runId, session) // Common case: query is cached in the cache.
+      // Common case: query is cached in the cache.
+      .getCachedQuery(id, runId, executeHolder.sparkSessionTags, session)
+      .map(_.query)
       .orElse { // Else try to find it in active streams. Mostly will not be found here either.
         Option(session.streams.get(id))
       } match {
@@ -3512,6 +3558,41 @@ class SparkConnectPlanner(
         .build())
   }
 
+  private def handleCheckpointCommand(
+      checkpointCommand: CheckpointCommand,
+      responseObserver: StreamObserver[proto.ExecutePlanResponse]): Unit = {
+    val target = Dataset
+      .ofRows(session, transformRelation(checkpointCommand.getRelation))
+    val checkpointed = target.checkpoint(
+      eager = checkpointCommand.getEager,
+      reliableCheckpoint = !checkpointCommand.getLocal)
+
+    val dfId = UUID.randomUUID().toString
+    logInfo(log"Caching DataFrame with id ${MDC(DATAFRAME_ID, dfId)}")
+    sessionHolder.cacheDataFrameById(dfId, checkpointed)
+
+    executeHolder.eventsManager.postFinished()
+    responseObserver.onNext(
+      proto.ExecutePlanResponse
+        .newBuilder()
+        .setSessionId(sessionId)
+        .setServerSideSessionId(sessionHolder.serverSessionId)
+        .setCheckpointCommandResult(
+          proto.CheckpointCommandResult
+            .newBuilder()
+            .setRelation(proto.CachedRemoteRelation.newBuilder().setRelationId(dfId).build())
+            .build())
+        .build())
+  }
+
+  private def handleRemoveCachedRemoteRelationCommand(
+      removeCachedRemoteRelationCommand: proto.RemoveCachedRemoteRelationCommand): Unit = {
+    val dfId = removeCachedRemoteRelationCommand.getRelation.getRelationId
+    logInfo(log"Removing DataFrame with id ${MDC(DATAFRAME_ID, dfId)} from the cache")
+    sessionHolder.removeCachedDataFrame(dfId)
+    executeHolder.eventsManager.postFinished()
+  }
+
   private val emptyLocalRelation = LocalRelation(
     output = AttributeReference("value", StringType, false)() :: Nil,
     data = Seq.empty)
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
index 94f01026b7a5f..ce5aa0888ca53 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
@@ -24,7 +24,7 @@ import io.grpc.stub.StreamObserver
 import org.apache.spark.connect.proto.ExecutePlanResponse
 import org.apache.spark.connect.proto.StreamingQueryListenerBusCommand
 import org.apache.spark.connect.proto.StreamingQueryListenerEventsResult
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.connect.service.ExecuteHolder
 
 /**
@@ -57,9 +57,10 @@ class SparkConnectStreamingQueryListenerHandler(executeHolder: ExecuteHolder) ex
       case StreamingQueryListenerBusCommand.CommandCase.ADD_LISTENER_BUS_LISTENER =>
         listenerHolder.isServerSideListenerRegistered match {
           case true =>
-            logWarning(
-              s"[SessionId: $sessionId][UserId: $userId][operationId: " +
-                s"${executeHolder.operationId}] Redundant server side listener added. Exiting.")
+            logWarning(log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
+              log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
+              log"[operationId: ${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
+              log"Redundant server side listener added. Exiting.")
             return
           case false =>
             // This transfers sending back the response to the client until
@@ -83,29 +84,38 @@ class SparkConnectStreamingQueryListenerHandler(executeHolder: ExecuteHolder) ex
             } catch {
               case NonFatal(e) =>
                 logError(
-                  s"[SessionId: $sessionId][UserId: $userId][operationId: " +
-                    s"${executeHolder.operationId}] Error sending listener added response.",
+                  log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
+                    log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
+                    log"[operationId: " +
+                    log"${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
+                    log"Error sending listener added response.",
                   e)
                 listenerHolder.cleanUp()
                 return
             }
         }
-        logInfo(s"[SessionId: $sessionId][UserId: $userId][operationId: " +
-          s"${executeHolder.operationId}] Server side listener added. Now blocking until " +
-          "all client side listeners are removed or there is error transmitting the event back.")
+        logInfo(log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
+          log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
+          log"[operationId: ${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
+          log"Server side listener added. Now blocking until " +
+          log"all client side listeners are removed or there is error transmitting the event back.")
         // Block the handling thread, and have serverListener continuously send back new events
         listenerHolder.streamingQueryListenerLatch.await()
-        logInfo(s"[SessionId: $sessionId][UserId: $userId][operationId: " +
-          s"${executeHolder.operationId}] Server side listener long-running handling thread ended.")
+        logInfo(
+          log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
+            log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
+            log"[operationId: ${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
+            log"Server side listener long-running handling thread ended.")
       case StreamingQueryListenerBusCommand.CommandCase.REMOVE_LISTENER_BUS_LISTENER =>
         listenerHolder.isServerSideListenerRegistered match {
           case true =>
             sessionHolder.streamingServersideListenerHolder.cleanUp()
           case false =>
-            logWarning(
-              s"[SessionId: $sessionId][UserId: $userId][operationId: " +
-                s"${executeHolder.operationId}] No active server side listener bus listener " +
-                s"but received remove listener call. Exiting.")
+            logWarning(log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
+              log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
+              log"[operationId: ${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
+              log"No active server side listener bus listener but received remove listener call. " +
+              log"Exiting.")
             return
         }
       case StreamingQueryListenerBusCommand.CommandCase.COMMAND_NOT_SET =>
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
index ef5faac77e3e0..df883a5c86814 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.spark.SparkException
 import org.apache.spark.api.python.{PythonException, PythonWorkerUtils, SimplePythonFunction, SpecialLengths, StreamingPythonRunner}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{DATAFRAME_ID, QUERY_ID, RUN_ID, SESSION_ID}
+import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, QUERY_ID, RUN_ID, SESSION_ID}
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.connect.service.SparkConnectService
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala
index 74e9e32f208df..c342050a212ef 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala
@@ -22,7 +22,7 @@ import java.io.EOFException
 import org.apache.spark.SparkException
 import org.apache.spark.api.python.{PythonException, PythonWorkerUtils, SimplePythonFunction, SpecialLengths, StreamingPythonRunner}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.FUNCTION_NAME
+import org.apache.spark.internal.LogKeys.FUNCTION_NAME
 import org.apache.spark.sql.connect.service.{SessionHolder, SparkConnectService}
 import org.apache.spark.sql.streaming.StreamingQueryListener
 
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
index 3112d12bb0e67..ec7ebbe92d72e 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
@@ -296,7 +296,7 @@ private[connect] class ExecuteHolder(
 object ExecuteJobTag {
   private val prefix = "SparkConnect_OperationTag"
 
-  def apply(sessionId: String, userId: String, operationId: String): String = {
+  def apply(userId: String, sessionId: String, operationId: String): String = {
     s"${prefix}_" +
       s"User_${userId}_" +
       s"Session_${sessionId}_" +
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
index c82cadbd5f7ab..a071579692fb1 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
@@ -29,7 +29,7 @@ import io.grpc.ServerCallHandler
 import io.grpc.ServerInterceptor
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{DESCRIPTION, MESSAGE}
+import org.apache.spark.internal.LogKeys.{DESCRIPTION, MESSAGE}
 
 /**
  * A gRPC interceptor to log RPC requests and responses. It logs the protobufs as JSON. Useful for
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
index bb32ac1275fbe..681f7e29630ff 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, TimeUnit}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
+import scala.concurrent.{ExecutionContext, Future}
 import scala.jdk.CollectionConverters._
 import scala.util.Try
 
@@ -33,7 +34,7 @@ import org.apache.spark.{SparkEnv, SparkException, SparkSQLException}
 import org.apache.spark.api.python.PythonFunction.PythonAccumulator
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -105,8 +106,10 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
   val eventManager: SessionEventsManager = SessionEventsManager(this, new SystemClock())
 
   // Mapping from relation ID (passed to client) to runtime dataframe. Used for callbacks like
-  // foreachBatch() in Streaming. Lazy since most sessions don't need it.
-  private lazy val dataFrameCache: ConcurrentMap[String, DataFrame] = new ConcurrentHashMap()
+  // foreachBatch() in Streaming, and DataFrame.checkpoint API. Lazy since most sessions don't
+  // need it.
+  private[spark] lazy val dataFrameCache: ConcurrentMap[String, DataFrame] =
+    new ConcurrentHashMap()
 
   // Mapping from id to StreamingQueryListener. Used for methods like removeListener() in
   // StreamingQueryManager.
@@ -177,12 +180,14 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
    */
   private[service] def interruptAll(): Seq[String] = {
     val interruptedIds = new mutable.ArrayBuffer[String]()
+    val operationsIds =
+      SparkConnectService.streamingSessionManager.cleanupRunningQueries(this, blocking = false)
     executions.asScala.values.foreach { execute =>
       if (execute.interrupt()) {
         interruptedIds += execute.operationId
       }
     }
-    interruptedIds.toSeq
+    interruptedIds.toSeq ++ operationsIds
   }
 
   /**
@@ -192,6 +197,8 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
    */
   private[service] def interruptTag(tag: String): Seq[String] = {
     val interruptedIds = new mutable.ArrayBuffer[String]()
+    val queries = SparkConnectService.streamingSessionManager.getTaggedQuery(tag, session)
+    queries.foreach(q => Future(q.query.stop())(ExecutionContext.global))
     executions.asScala.values.foreach { execute =>
       if (execute.sparkSessionTags.contains(tag)) {
         if (execute.interrupt()) {
@@ -199,7 +206,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
         }
       }
     }
-    interruptedIds.toSeq
+    interruptedIds.toSeq ++ queries.map(_.operationId)
   }
 
   /**
@@ -296,7 +303,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
 
     // Clean up running streaming queries.
     // Note: there can be concurrent streaming queries being started.
-    SparkConnectService.streamingSessionManager.cleanupRunningQueries(this)
+    SparkConnectService.streamingSessionManager.cleanupRunningQueries(this, blocking = true)
     streamingForeachBatchRunnerCleanerCache.cleanUpAll() // Clean up any streaming workers.
     removeAllListeners() // removes all listener and stop python listener processes if necessary.
 
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
index 4fe7f3eceb81a..6681a5f509c6e 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
@@ -29,7 +29,7 @@ import com.google.common.cache.CacheBuilder
 
 import org.apache.spark.{SparkEnv, SparkSQLException}
 import org.apache.spark.connect.proto
-import org.apache.spark.internal.{Logging, LogKey, MDC}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.connect.config.Connect.{CONNECT_EXECUTE_MANAGER_ABANDONED_TOMBSTONES_SIZE, CONNECT_EXECUTE_MANAGER_DETACHED_TIMEOUT, CONNECT_EXECUTE_MANAGER_MAINTENANCE_INTERVAL}
 import org.apache.spark.util.ThreadUtils
 
@@ -95,7 +95,7 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
       sessionHolder.addExecuteHolder(executeHolder)
       executions.put(executeHolder.key, executeHolder)
       lastExecutionTimeMs = None
-      logInfo(log"ExecuteHolder ${MDC(LogKey.EXECUTE_KEY, executeHolder.key)} is created.")
+      logInfo(log"ExecuteHolder ${MDC(LogKeys.EXECUTE_KEY, executeHolder.key)} is created.")
     }
 
     schedulePeriodicChecks() // Starts the maintenance thread if it hasn't started.
@@ -122,7 +122,7 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
       if (executions.isEmpty) {
         lastExecutionTimeMs = Some(System.currentTimeMillis())
       }
-      logInfo(log"ExecuteHolder ${MDC(LogKey.EXECUTE_KEY, key)} is removed.")
+      logInfo(log"ExecuteHolder ${MDC(LogKeys.EXECUTE_KEY, key)} is removed.")
     }
     // close the execution outside the lock
     executeHolder.foreach { e =>
@@ -147,7 +147,7 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
     sessionExecutionHolders.foreach { case (_, executeHolder) =>
       val info = executeHolder.getExecuteInfo
       logInfo(
-        log"Execution ${MDC(LogKey.EXECUTE_INFO, info)} removed in removeSessionExecutions.")
+        log"Execution ${MDC(LogKeys.EXECUTE_INFO, info)} removed in removeSessionExecutions.")
       removeExecuteHolder(executeHolder.key, abandoned = true)
     }
   }
@@ -202,7 +202,7 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
         val interval = SparkEnv.get.conf.get(CONNECT_EXECUTE_MANAGER_MAINTENANCE_INTERVAL)
         logInfo(
           log"Starting thread for cleanup of abandoned executions every " +
-            log"${MDC(LogKey.INTERVAL, interval)} ms")
+            log"${MDC(LogKeys.INTERVAL, interval)} ms")
         scheduledExecutor = Some(Executors.newSingleThreadScheduledExecutor())
         scheduledExecutor.get.scheduleAtFixedRate(
           () => {
@@ -242,7 +242,7 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
     toRemove.foreach { executeHolder =>
       val info = executeHolder.getExecuteInfo
       logInfo(
-        log"Found execution ${MDC(LogKey.EXECUTE_INFO, info)} that was abandoned " +
+        log"Found execution ${MDC(LogKeys.EXECUTE_INFO, info)} that was abandoned " +
           log"and expired and will be removed.")
       removeExecuteHolder(executeHolder.key, abandoned = true)
     }
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
index c1b1bacba3b6d..90759c00ccfca 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
@@ -46,8 +46,14 @@ object SparkConnectInterceptorRegistry {
    * @param sb
    */
   def chainInterceptors(sb: NettyServerBuilder): Unit = {
+    chainInterceptors(sb, createConfiguredInterceptors())
+  }
+
+  def chainInterceptors(
+      sb: NettyServerBuilder,
+      additionalInterceptors: Seq[ServerInterceptor]): Unit = {
     interceptorChain.foreach(i => sb.intercept(i()))
-    createConfiguredInterceptors().foreach(sb.intercept(_))
+    additionalInterceptors.foreach(sb.intercept(_))
   }
 
   // Type used to identify the closure responsible to instantiate a ServerInterceptor.
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
index c55600886a393..4f05ea927e12b 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.sql.connect.service
 
-import java.net.InetSocketAddress
-
-import scala.jdk.CollectionConverters._
-
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{HOST, PORT}
+import org.apache.spark.internal.LogKeys.{HOST, PORT}
 import org.apache.spark.sql.SparkSession
 
 /**
@@ -36,12 +32,10 @@ object SparkConnectServer extends Logging {
     try {
       try {
         SparkConnectService.start(session.sparkContext)
-        SparkConnectService.server.getListenSockets.asScala.foreach { sa =>
-          val isa = sa.asInstanceOf[InetSocketAddress]
-          logInfo(
-            log"Spark Connect server started at: " +
-              log"${MDC(HOST, isa.getAddress.getHostAddress)}:${MDC(PORT, isa.getPort)}")
-        }
+        val isa = SparkConnectService.bindingAddress
+        logInfo(
+          log"Spark Connect server started at: " +
+            log"${MDC(HOST, isa.getAddress.getHostAddress)}:${MDC(PORT, isa.getPort)}")
       } catch {
         case e: Exception =>
           logError("Error starting Spark Connect server", e)
@@ -49,8 +43,10 @@ object SparkConnectServer extends Logging {
       }
       SparkConnectService.server.awaitTermination()
     } finally {
+      if (SparkConnectService.started) {
+        SparkConnectService.stop()
+      }
       session.stop()
-      SparkConnectService.uiTab.foreach(_.detach())
     }
   }
 }
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
index 4b35971286ddf..e9c92f8d007ea 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
@@ -31,18 +31,20 @@ import io.grpc.protobuf.services.ProtoReflectionService
 import io.grpc.stub.StreamObserver
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.{SparkContext, SparkEnv}
+import org.apache.spark.{SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.{AddArtifactsRequest, AddArtifactsResponse, SparkConnectServiceGrpc}
 import org.apache.spark.connect.proto.SparkConnectServiceGrpc.AsyncService
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.HOST
+import org.apache.spark.internal.LogKeys.HOST
 import org.apache.spark.internal.config.UI.UI_ENABLED
-import org.apache.spark.sql.connect.config.Connect.{CONNECT_GRPC_BINDING_ADDRESS, CONNECT_GRPC_BINDING_PORT, CONNECT_GRPC_MARSHALLER_RECURSION_LIMIT, CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE}
+import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerEvent}
+import org.apache.spark.sql.connect.config.Connect.{CONNECT_GRPC_BINDING_ADDRESS, CONNECT_GRPC_BINDING_PORT, CONNECT_GRPC_MARSHALLER_RECURSION_LIMIT, CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE, CONNECT_GRPC_PORT_MAX_RETRIES}
 import org.apache.spark.sql.connect.execution.ConnectProgressExecutionListener
 import org.apache.spark.sql.connect.ui.{SparkConnectServerAppStatusStore, SparkConnectServerListener, SparkConnectServerTab}
 import org.apache.spark.sql.connect.utils.ErrorUtils
 import org.apache.spark.status.ElementTrackingStore
+import org.apache.spark.util.Utils
 
 /**
  * The SparkConnectService implementation.
@@ -283,10 +285,12 @@ class SparkConnectService(debug: Boolean) extends AsyncService with BindableServ
 object SparkConnectService extends Logging {
 
   private[connect] var server: Server = _
+  private[connect] var bindingAddress: InetSocketAddress = _
 
   private[connect] var uiTab: Option[SparkConnectServerTab] = None
   private[connect] var listener: SparkConnectServerListener = _
   private[connect] var executionListener: Option[ConnectProgressExecutionListener] = None
+  private[connect] var listenerBus: LiveListenerBus = _
 
   // For testing purpose, it's package level private.
   private[connect] def localPort: Int = {
@@ -296,6 +300,10 @@ object SparkConnectService extends Logging {
     server.getPort
   }
 
+  private[connect] def hostAddress: String = {
+    Utils.localCanonicalHostName()
+  }
+
   private[connect] lazy val executionManager = new SparkConnectExecutionManager()
 
   private[connect] lazy val sessionManager = new SparkConnectSessionManager()
@@ -303,6 +311,10 @@ object SparkConnectService extends Logging {
   private[connect] val streamingSessionManager =
     new SparkConnectStreamingQueryCache()
 
+  // Package level private for testing purpose.
+  @volatile private[connect] var started = false
+  @volatile private[connect] var stopped = false
+
   /**
    * Based on the userId and sessionId, find or create a new SparkSession.
    */
@@ -315,6 +327,13 @@ object SparkConnectService extends Logging {
       previoslyObservedSessionId)
   }
 
+  // For testing
+  private[spark] def getOrCreateIsolatedSession(
+      userId: String,
+      sessionId: String): SessionHolder = {
+    getOrCreateIsolatedSession(userId, sessionId, None)
+  }
+
   /**
    * If there are no executions, return Left with System.currentTimeMillis of last active
    * execution. Otherwise return Right with list of ExecuteInfo of all executions.
@@ -336,6 +355,7 @@ object SparkConnectService extends Logging {
     // Add the execution listener needed for query progress.
     executionListener = Some(new ConnectProgressExecutionListener)
     sc.addSparkListener(executionListener.get)
+    listenerBus = sc.listenerBus
   }
 
   /**
@@ -344,35 +364,75 @@ object SparkConnectService extends Logging {
   private def startGRPCService(): Unit = {
     val debugMode = SparkEnv.get.conf.getBoolean("spark.connect.grpc.debug.enabled", true)
     val bindAddress = SparkEnv.get.conf.get(CONNECT_GRPC_BINDING_ADDRESS)
-    val port = SparkEnv.get.conf.get(CONNECT_GRPC_BINDING_PORT)
-    val sb = bindAddress match {
-      case Some(hostname) =>
-        logInfo(log"start GRPC service at: ${MDC(HOST, hostname)}")
-        NettyServerBuilder.forAddress(new InetSocketAddress(hostname, port))
-      case _ => NettyServerBuilder.forPort(port)
+    val startPort = SparkEnv.get.conf.get(CONNECT_GRPC_BINDING_PORT)
+    val sparkConnectService = new SparkConnectService(debugMode)
+    val protoReflectionService =
+      if (debugMode) Some(ProtoReflectionService.newInstance()) else None
+    val configuredInterceptors = SparkConnectInterceptorRegistry.createConfiguredInterceptors()
+
+    val startServiceFn = (port: Int) => {
+      val sb = bindAddress match {
+        case Some(hostname) =>
+          logInfo(log"start GRPC service at: ${MDC(HOST, hostname)}")
+          NettyServerBuilder.forAddress(new InetSocketAddress(hostname, port))
+        case _ => NettyServerBuilder.forPort(port)
+      }
+      sb.maxInboundMessageSize(SparkEnv.get.conf.get(CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE).toInt)
+        .addService(sparkConnectService)
+
+      // Add all registered interceptors to the server builder.
+      SparkConnectInterceptorRegistry.chainInterceptors(sb, configuredInterceptors)
+
+      // If debug mode is configured, load the ProtoReflection service so that tools like
+      // grpcurl can introspect the API for debugging.
+      protoReflectionService.foreach(service => sb.addService(service))
+
+      server = sb.build
+      server.start()
+
+      // It will throw an IllegalStateException if you want to access the binding address
+      // while the server is in a terminated state, so record the actual binding address
+      // immediately after the server starts.
+      // There should only be one address, get the actual binding address
+      // of the server according the `server.port()`
+      bindingAddress = server.getListenSockets.asScala
+        .find(_.isInstanceOf[InetSocketAddress])
+        .get
+        .asInstanceOf[InetSocketAddress]
+
+      (server, server.getPort)
     }
-    sb.maxInboundMessageSize(SparkEnv.get.conf.get(CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE).toInt)
-      .addService(new SparkConnectService(debugMode))
-
-    // Add all registered interceptors to the server builder.
-    SparkConnectInterceptorRegistry.chainInterceptors(sb)
 
-    // If debug mode is configured, load the ProtoReflection service so that tools like
-    // grpcurl can introspect the API for debugging.
-    if (debugMode) {
-      sb.addService(ProtoReflectionService.newInstance())
-    }
-    server = sb.build
-    server.start()
+    val maxRetries: Int = SparkEnv.get.conf.get(CONNECT_GRPC_PORT_MAX_RETRIES)
+    Utils.startServiceOnPort[Server](startPort, startServiceFn, maxRetries, getClass.getName)
   }
 
   // Starts the service
-  def start(sc: SparkContext): Unit = {
+  def start(sc: SparkContext): Unit = synchronized {
+    if (started) {
+      logWarning("The Spark Connect service has already started.")
+      return
+    }
+
     startGRPCService()
     createListenerAndUI(sc)
+
+    started = true
+    stopped = false
+    postSparkConnectServiceStarted(sc)
   }
 
-  def stop(timeout: Option[Long] = None, unit: Option[TimeUnit] = None): Unit = {
+  def stop(timeout: Option[Long] = None, unit: Option[TimeUnit] = None): Unit = synchronized {
+    if (stopped) {
+      logWarning("The Spark Connect service has already been stopped.")
+      return
+    }
+
+    if (!started) {
+      throw new IllegalStateException(
+        "Attempting to stop the Spark Connect service that has not been started.")
+    }
+
     if (server != null) {
       if (timeout.isDefined && unit.isDefined) {
         server.shutdown()
@@ -385,6 +445,57 @@ object SparkConnectService extends Logging {
     executionManager.shutdown()
     sessionManager.shutdown()
     uiTab.foreach(_.detach())
+
+    started = false
+    stopped = true
+    postSparkConnectServiceEnd()
+  }
+
+  /**
+   * Post the event that the Spark Connect service has started. This is expected to be called only
+   * once after the service is ready.
+   */
+  private def postSparkConnectServiceStarted(sc: SparkContext): Unit = {
+    postServiceEvent(isa =>
+      SparkListenerConnectServiceStarted(
+        hostAddress,
+        isa.getPort,
+        sc.conf,
+        System.currentTimeMillis()))
+  }
+
+  /**
+   * Post the event that the Spark Connect service is offline.
+   */
+  private[connect] def postSparkConnectServiceEnd(): Unit = {
+    postServiceEvent(isa =>
+      SparkListenerConnectServiceEnd(hostAddress, isa.getPort, System.currentTimeMillis()))
+  }
+
+  /**
+   * Post the event to the Spark listener bus. To deliver the event to the listeners, the listener
+   * bus must be active in this time.
+   */
+  private def postServiceEvent(eventBuilder: InetSocketAddress => SparkListenerEvent): Unit = {
+    // Sanity checks
+    if (server == null) {
+      logWarning(
+        "The Spark Connect event was dropped because the server bus has not been created and set.")
+      return
+    }
+
+    if (bindingAddress == null) {
+      logWarning(
+        "The Spark Connect event was dropped because the internal server address is not set.")
+      return
+    }
+
+    if (listenerBus == null) {
+      logWarning("The Spark Connect event was dropped because the listener bus has not been set.")
+      return
+    }
+
+    listenerBus.post(eventBuilder(bindingAddress))
   }
 
   def extractErrorMessage(st: Throwable): String = {
@@ -400,3 +511,38 @@ object SparkConnectService extends Logging {
     }
   }
 }
+
+/**
+ * The event is sent after the Spark Connect service has started and is ready to receive the
+ * inbound requests.
+ *
+ * @param hostAddress:
+ *   The host address of the started Spark Connect service.
+ * @param bindingPort:
+ *   The binding port of the started Spark Connect service.
+ * @param sparkConf:
+ *   The SparkConf of the active SparkContext that associated with the service.
+ * @param eventTime:
+ *   The time in ms when the event was generated.
+ */
+case class SparkListenerConnectServiceStarted(
+    hostAddress: String,
+    bindingPort: Int,
+    sparkConf: SparkConf,
+    eventTime: Long)
+    extends SparkListenerEvent
+
+/**
+ * The event is sent to inform that Spark Connect service has already been shutdown. This event
+ * indicates the end of the service, and any in-processing requests or upcoming requests are not
+ * guaranteed to be handled properly by the service.
+ *
+ * @param hostAddress:
+ *   The host address of the Spark Connect service.
+ * @param bindingPort:
+ *   The binding port of the Spark Connect service.
+ * @param eventTime:
+ *   The time in ms when the event was generated.
+ */
+case class SparkListenerConnectServiceEnd(hostAddress: String, bindingPort: Int, eventTime: Long)
+    extends SparkListenerEvent
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
index 1a34964932ef2..edaaa640bf12e 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
@@ -30,7 +30,7 @@ import com.google.common.cache.CacheBuilder
 
 import org.apache.spark.{SparkEnv, SparkSQLException}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{INTERVAL, SESSION_HOLD_INFO}
+import org.apache.spark.internal.LogKeys.{INTERVAL, SESSION_HOLD_INFO}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connect.config.Connect.{CONNECT_SESSION_MANAGER_CLOSED_SESSIONS_TOMBSTONES_SIZE, CONNECT_SESSION_MANAGER_DEFAULT_SESSION_TIMEOUT, CONNECT_SESSION_MANAGER_MAINTENANCE_INTERVAL}
 import org.apache.spark.util.ThreadUtils
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
index 4c9b3baa689b3..03719ddd87419 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
@@ -23,11 +23,12 @@ import java.util.concurrent.TimeUnit
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
+import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration.{Duration, DurationInt, FiniteDuration}
 import scala.util.control.NonFatal
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{DURATION, NEW_VALUE, OLD_VALUE, QUERY_CACHE_VALUE, QUERY_ID, SESSION_ID}
+import org.apache.spark.internal.LogKeys.{DURATION, NEW_VALUE, OLD_VALUE, QUERY_CACHE_VALUE, QUERY_ID, SESSION_ID}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.streaming.StreamingQuery
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils}
@@ -55,16 +56,28 @@ private[connect] class SparkConnectStreamingQueryCache(
 
   import SparkConnectStreamingQueryCache._
 
-  def registerNewStreamingQuery(sessionHolder: SessionHolder, query: StreamingQuery): Unit = {
-    queryCacheLock.synchronized {
+  def registerNewStreamingQuery(
+      sessionHolder: SessionHolder,
+      query: StreamingQuery,
+      tags: Set[String],
+      operationId: String): Unit = queryCacheLock.synchronized {
+    taggedQueriesLock.synchronized {
       val value = QueryCacheValue(
         userId = sessionHolder.userId,
         sessionId = sessionHolder.sessionId,
         session = sessionHolder.session,
         query = query,
+        operationId = operationId,
         expiresAtMs = None)
 
-      queryCache.put(QueryCacheKey(query.id.toString, query.runId.toString), value) match {
+      val queryKey = QueryCacheKey(query.id.toString, query.runId.toString)
+      tags.foreach { tag =>
+        taggedQueries
+          .getOrElseUpdate(tag, new mutable.ArrayBuffer[QueryCacheKey])
+          .addOne(queryKey)
+      }
+
+      queryCache.put(queryKey, value) match {
         case Some(existing) => // Query is being replace. Not really expected.
           logWarning(log"Replacing existing query in the cache (unexpected). " +
             log"Query Id: ${MDC(QUERY_ID, query.id)}.Existing value ${MDC(OLD_VALUE, existing)}, " +
@@ -80,7 +93,7 @@ private[connect] class SparkConnectStreamingQueryCache(
   }
 
   /**
-   * Returns [[StreamingQuery]] if it is cached and session matches the cached query. It ensures
+   * Returns [[QueryCacheValue]] if it is cached and session matches the cached query. It ensures
    * the session associated with it matches the session passed into the call. If the query is
    * inactive (i.e. it has a cache expiry time set), this access extends its expiry time. So if a
    * client keeps accessing a query, it stays in the cache.
@@ -88,8 +101,35 @@ private[connect] class SparkConnectStreamingQueryCache(
   def getCachedQuery(
       queryId: String,
       runId: String,
-      session: SparkSession): Option[StreamingQuery] = {
-    val key = QueryCacheKey(queryId, runId)
+      tags: Set[String],
+      session: SparkSession): Option[QueryCacheValue] = {
+    taggedQueriesLock.synchronized {
+      val key = QueryCacheKey(queryId, runId)
+      val result = getCachedQuery(QueryCacheKey(queryId, runId), session)
+      tags.foreach { tag =>
+        taggedQueries.getOrElseUpdate(tag, new mutable.ArrayBuffer[QueryCacheKey]).addOne(key)
+      }
+      result
+    }
+  }
+
+  /**
+   * Similar with [[getCachedQuery]] but it gets queries tagged previously.
+   */
+  def getTaggedQuery(tag: String, session: SparkSession): Seq[QueryCacheValue] = {
+    taggedQueriesLock.synchronized {
+      taggedQueries
+        .get(tag)
+        .map { k =>
+          k.flatMap(getCachedQuery(_, session)).toSeq
+        }
+        .getOrElse(Seq.empty[QueryCacheValue])
+    }
+  }
+
+  private def getCachedQuery(
+      key: QueryCacheKey,
+      session: SparkSession): Option[QueryCacheValue] = {
     queryCacheLock.synchronized {
       queryCache.get(key).flatMap { v =>
         if (v.session == session) {
@@ -98,7 +138,7 @@ private[connect] class SparkConnectStreamingQueryCache(
             val expiresAtMs = clock.getTimeMillis() + stoppedQueryInactivityTimeout.toMillis
             queryCache.put(key, v.copy(expiresAtMs = Some(expiresAtMs)))
           }
-          Some(v.query)
+          Some(v)
         } else None // Should be rare, may be client is trying access from a different session.
       }
     }
@@ -109,7 +149,10 @@ private[connect] class SparkConnectStreamingQueryCache(
    * the queryCache. This is used when session is expired and we need to cleanup resources of that
    * session.
    */
-  def cleanupRunningQueries(sessionHolder: SessionHolder): Unit = {
+  def cleanupRunningQueries(
+      sessionHolder: SessionHolder,
+      blocking: Boolean = true): Seq[String] = {
+    val operationIds = new mutable.ArrayBuffer[String]()
     for ((k, v) <- queryCache) {
       if (v.userId.equals(sessionHolder.userId) && v.sessionId.equals(sessionHolder.sessionId)) {
         if (v.query.isActive && Option(v.session.streams.get(k.queryId)).nonEmpty) {
@@ -117,7 +160,12 @@ private[connect] class SparkConnectStreamingQueryCache(
             log"Stopping the query with id ${MDC(QUERY_ID, k.queryId)} " +
               log"since the session has timed out")
           try {
-            v.query.stop()
+            if (blocking) {
+              v.query.stop()
+            } else {
+              Future(v.query.stop())(ExecutionContext.global)
+            }
+            operationIds.addOne(v.operationId)
           } catch {
             case NonFatal(ex) =>
               logWarning(
@@ -128,6 +176,7 @@ private[connect] class SparkConnectStreamingQueryCache(
         }
       }
     }
+    operationIds.toSeq
   }
 
   // Visible for testing
@@ -146,6 +195,10 @@ private[connect] class SparkConnectStreamingQueryCache(
   private val queryCache = new mutable.HashMap[QueryCacheKey, QueryCacheValue]
   private val queryCacheLock = new Object
 
+  @GuardedBy("queryCacheLock")
+  private val taggedQueries = new mutable.HashMap[String, mutable.ArrayBuffer[QueryCacheKey]]
+  private val taggedQueriesLock = new Object
+
   @GuardedBy("queryCacheLock")
   private var scheduledExecutor: Option[ScheduledExecutorService] = None
 
@@ -176,7 +229,7 @@ private[connect] class SparkConnectStreamingQueryCache(
    *   - Update status of query if it is inactive. Sets an expiry time for such queries
    *   - Drop expired queries from the cache.
    */
-  private def periodicMaintenance(): Unit = {
+  private def periodicMaintenance(): Unit = taggedQueriesLock.synchronized {
 
     queryCacheLock.synchronized {
       val nowMs = clock.getTimeMillis()
@@ -212,6 +265,18 @@ private[connect] class SparkConnectStreamingQueryCache(
             }
         }
       }
+
+      taggedQueries.toArray.foreach { case (key, value) =>
+        value.zipWithIndex.toArray.foreach { case (queryKey, i) =>
+          if (queryCache.contains(queryKey)) {
+            value.remove(i)
+          }
+        }
+
+        if (value.isEmpty) {
+          taggedQueries.remove(key)
+        }
+      }
     }
   }
 }
@@ -225,6 +290,7 @@ private[connect] object SparkConnectStreamingQueryCache {
       sessionId: String,
       session: SparkSession, // Holds the reference to the session.
       query: StreamingQuery, // Holds the reference to the query.
+      operationId: String,
       expiresAtMs: Option[Long] = None // Expiry time for a stopped query.
   ) {
     override def toString(): String =
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
index a1bbab7dbdbc2..65db08be7f904 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{OP_ID, SESSION_ID}
+import org.apache.spark.internal.LogKeys.{OP_ID, SESSION_ID}
 import org.apache.spark.internal.config.Status.LIVE_ENTITY_UPDATE_PERIOD
 import org.apache.spark.scheduler._
 import org.apache.spark.sql.connect.config.Connect.{CONNECT_UI_SESSION_LIMIT, CONNECT_UI_STATEMENT_LIMIT}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
index b1bfe71930fb1..355048cf30363 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
@@ -35,11 +35,11 @@ import org.apache.commons.lang3.exception.ExceptionUtils
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods
 
-import org.apache.spark.{SparkEnv, SparkException, SparkThrowable}
+import org.apache.spark.{QueryContextType, SparkEnv, SparkException, SparkThrowable}
 import org.apache.spark.api.python.PythonException
 import org.apache.spark.connect.proto.FetchErrorDetailsResponse
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{OP_TYPE, SESSION_ID, USER_ID}
+import org.apache.spark.internal.LogKeys.{OP_TYPE, SESSION_ID, USER_ID}
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.service.{ExecuteEventsManager, SessionHolder, SessionKey, SparkConnectService}
 import org.apache.spark.sql.internal.SQLConf
@@ -118,15 +118,27 @@ private[connect] object ErrorUtils extends Logging {
             sparkThrowableBuilder.setErrorClass(sparkThrowable.getErrorClass)
           }
           for (queryCtx <- sparkThrowable.getQueryContext) {
-            sparkThrowableBuilder.addQueryContexts(
-              FetchErrorDetailsResponse.QueryContext
-                .newBuilder()
+            val builder = FetchErrorDetailsResponse.QueryContext
+              .newBuilder()
+            val context = if (queryCtx.contextType() == QueryContextType.SQL) {
+              builder
+                .setContextType(FetchErrorDetailsResponse.QueryContext.ContextType.SQL)
                 .setObjectType(queryCtx.objectType())
                 .setObjectName(queryCtx.objectName())
                 .setStartIndex(queryCtx.startIndex())
                 .setStopIndex(queryCtx.stopIndex())
                 .setFragment(queryCtx.fragment())
-                .build())
+                .setSummary(queryCtx.summary())
+                .build()
+            } else {
+              builder
+                .setContextType(FetchErrorDetailsResponse.QueryContext.ContextType.DATAFRAME)
+                .setFragment(queryCtx.fragment())
+                .setCallSite(queryCtx.callSite())
+                .setSummary(queryCtx.summary())
+                .build()
+            }
+            sparkThrowableBuilder.addQueryContexts(context)
           }
           if (sparkThrowable.getSqlState != null) {
             sparkThrowableBuilder.setSqlState(sparkThrowable.getSqlState)
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
index e2e4128311871..d76bec5454abb 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
@@ -70,6 +70,7 @@ private[connect] object MetricGenerator extends AdaptiveSparkPlanHelper {
       .newBuilder()
       .setName(p.nodeName)
       .setPlanId(p.id)
+      .setParent(parentId)
       .putAllExecutionMetrics(mv.asJava)
       .build()
     Seq(mo) ++ transformChildren(p)
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
index cc9decb4c98bc..264c6aa70ae2e 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
@@ -26,7 +26,7 @@ import scala.util.{Failure, Success, Try}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.connect.proto
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.{catalog, QueryPlanningTracker}
 import org.apache.spark.sql.catalyst.analysis.{caseSensitiveResolution, Analyzer, FunctionRegistry, Resolver, TableFunctionRegistry}
@@ -126,6 +126,7 @@ class ProtoToParsedPlanTestSuite
         Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES.key,
         "org.apache.spark.sql.connect.plugin.ExampleExpressionPlugin")
       .set(org.apache.spark.sql.internal.SQLConf.ANSI_ENABLED.key, false.toString)
+      .set(org.apache.spark.sql.internal.SQLConf.USE_COMMON_EXPR_ID_FOR_ALIAS.key, false.toString)
   }
 
   protected val suiteBaseResourcePath = commonResourcePath.resolve("query-tests")
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
similarity index 95%
rename from connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
rename to connector/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
index 6aadb6c34b779..3edb63ee8e815 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
@@ -332,6 +332,21 @@ package object dsl {
       def sql(sqlText: String): Relation = {
         Relation.newBuilder().setSql(SQL.newBuilder().setQuery(sqlText)).build()
       }
+
+      def table(name: String): Relation = {
+        proto.Relation
+          .newBuilder()
+          .setRead(
+            proto.Read
+              .newBuilder()
+              .setNamedTable(
+                proto.Read.NamedTable
+                  .newBuilder()
+                  .setUnparsedIdentifier(name)
+                  .build())
+              .build())
+          .build()
+      }
     }
 
     implicit class DslNAFunctions(val logicalPlan: Relation) {
@@ -513,6 +528,25 @@ package object dsl {
         freqItems(cols.toArray, support)
 
       def freqItems(cols: Seq[String]): Relation = freqItems(cols, 0.01)
+
+      def sampleBy(col: String, fractions: Map[Any, Double], seed: Long): Relation = {
+        Relation
+          .newBuilder()
+          .setSampleBy(
+            StatSampleBy
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllFractions(fractions.toSeq.map { case (k, v) =>
+                StatSampleBy.Fraction
+                  .newBuilder()
+                  .setStratum(toLiteralProto(k))
+                  .setFraction(v)
+                  .build()
+              }.asJava)
+              .setSeed(seed)
+              .build())
+          .build()
+      }
     }
 
     def select(exprs: Expression*): Relation = {
@@ -587,6 +621,10 @@ package object dsl {
           .build()
       }
 
+      def filter(condition: Expression): Relation = {
+        where(condition)
+      }
+
       def deduplicate(colNames: Seq[String]): Relation =
         Relation
           .newBuilder()
@@ -641,6 +679,10 @@ package object dsl {
         join(otherPlan, joinType, usingColumns, None)
       }
 
+      def crossJoin(otherPlan: Relation): Relation = {
+        join(otherPlan, JoinType.JOIN_TYPE_CROSS, Seq(), None)
+      }
+
       private def join(
           otherPlan: Relation,
           joinType: JoinType = JoinType.JOIN_TYPE_INNER,
@@ -663,7 +705,7 @@ package object dsl {
 
       def as(alias: String): Relation = {
         Relation
-          .newBuilder(logicalPlan)
+          .newBuilder()
           .setSubqueryAlias(SubqueryAlias.newBuilder().setAlias(alias).setInput(logicalPlan))
           .build()
       }
@@ -693,9 +735,10 @@ package object dsl {
           .setNullOrdering(Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST)
           .setDirection(Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING)
           .setChild(
-            Expression.newBuilder
+            Expression
+              .newBuilder()
               .setUnresolvedAttribute(
-                Expression.UnresolvedAttribute.newBuilder.setUnparsedIdentifier(col).build())
+                Expression.UnresolvedAttribute.newBuilder().setUnparsedIdentifier(col).build())
               .build())
           .build()
       }
@@ -992,7 +1035,13 @@ package object dsl {
             WithColumnsRenamed
               .newBuilder()
               .setInput(logicalPlan)
-              .putAllRenameColumnsMap(renameColumnsMap.asJava))
+              .addAllRenames(renameColumnsMap.toSeq.map { case (k, v) =>
+                WithColumnsRenamed.Rename
+                  .newBuilder()
+                  .setColName(k)
+                  .setNewColName(v)
+                  .build()
+              }.asJava))
           .build()
       }
 
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
index dfada825df47d..70da1f0a2a1d0 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
@@ -20,11 +20,9 @@ package org.apache.spark.sql.connect.planner
 import scala.jdk.CollectionConverters._
 
 import com.google.protobuf.ByteString
-import io.grpc.stub.StreamObserver
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.connect.proto
-import org.apache.spark.connect.proto.ExecutePlanResponse
 import org.apache.spark.connect.proto.Expression.{Alias, ExpressionString, UnresolvedStar}
 import org.apache.spark.sql.{AnalysisException, Dataset, Row}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -34,7 +32,7 @@ import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
-import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteStatus, SessionHolder, SessionStatus, SparkConnectService}
+import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
@@ -45,21 +43,12 @@ import org.apache.spark.unsafe.types.UTF8String
  * test cases.
  */
 trait SparkConnectPlanTest extends SharedSparkSession {
-
-  class MockObserver extends StreamObserver[proto.ExecutePlanResponse] {
-    override def onNext(value: ExecutePlanResponse): Unit = {}
-    override def onError(t: Throwable): Unit = {}
-    override def onCompleted(): Unit = {}
-  }
-
   def transform(rel: proto.Relation): logical.LogicalPlan = {
-    new SparkConnectPlanner(SessionHolder.forTesting(spark)).transformRelation(rel)
+    SparkConnectPlannerTestUtils.transform(spark, rel)
   }
 
   def transform(cmd: proto.Command): Unit = {
-    val executeHolder = buildExecutePlanHolder(cmd)
-    new SparkConnectPlanner(executeHolder)
-      .process(cmd, new MockObserver())
+    SparkConnectPlannerTestUtils.transform(spark, cmd)
   }
 
   def readRel: proto.Relation =
@@ -114,29 +103,6 @@ trait SparkConnectPlanTest extends SharedSparkSession {
     localRelationBuilder.setData(ByteString.copyFrom(bytes))
     proto.Relation.newBuilder().setLocalRelation(localRelationBuilder.build()).build()
   }
-
-  def buildExecutePlanHolder(command: proto.Command): ExecuteHolder = {
-    val sessionHolder = SessionHolder.forTesting(spark)
-    sessionHolder.eventManager.status_(SessionStatus.Started)
-
-    val context = proto.UserContext
-      .newBuilder()
-      .setUserId(sessionHolder.userId)
-      .build()
-    val plan = proto.Plan
-      .newBuilder()
-      .setCommand(command)
-      .build()
-    val request = proto.ExecutePlanRequest
-      .newBuilder()
-      .setPlan(plan)
-      .setSessionId(sessionHolder.sessionId)
-      .setUserContext(context)
-      .build()
-    val executeHolder = SparkConnectService.executionManager.createExecuteHolder(request)
-    executeHolder.eventsManager.status_(ExecuteStatus.Started)
-    executeHolder
-  }
 }
 
 /**
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala
new file mode 100644
index 0000000000000..c9d282af2e5ea
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.planner
+
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.ExecutePlanResponse
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteStatus, SessionHolder, SessionStatus, SparkConnectService}
+
+object SparkConnectPlannerTestUtils {
+  def transform(spark: SparkSession, relation: proto.Relation): LogicalPlan = {
+    new SparkConnectPlanner(SessionHolder.forTesting(spark)).transformRelation(relation)
+  }
+
+  def transform(spark: SparkSession, command: proto.Command): Unit = {
+    val executeHolder = buildExecutePlanHolder(spark, command)
+    new SparkConnectPlanner(executeHolder).process(command, new MockObserver())
+  }
+
+  private def buildExecutePlanHolder(
+      spark: SparkSession,
+      command: proto.Command): ExecuteHolder = {
+    val sessionHolder = SessionHolder.forTesting(spark)
+    sessionHolder.eventManager.status_(SessionStatus.Started)
+
+    val context = proto.UserContext
+      .newBuilder()
+      .setUserId(sessionHolder.userId)
+      .build()
+    val plan = proto.Plan
+      .newBuilder()
+      .setCommand(command)
+      .build()
+    val request = proto.ExecutePlanRequest
+      .newBuilder()
+      .setPlan(plan)
+      .setSessionId(sessionHolder.sessionId)
+      .setUserContext(context)
+      .build()
+
+    val executeHolder = SparkConnectService.executionManager.createExecuteHolder(request)
+    executeHolder.eventsManager.status_(ExecuteStatus.Started)
+    executeHolder
+  }
+
+  private class MockObserver extends StreamObserver[proto.ExecutePlanResponse] {
+    override def onNext(value: ExecutePlanResponse): Unit = {}
+    override def onError(t: Throwable): Unit = {}
+    override def onCompleted(): Unit = {}
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
index 632e2308fc76b..7e862bcfc533f 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
@@ -1036,7 +1036,7 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
   }
 
   test("SPARK-47144: Collated string") {
-    Seq("UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", "UNICODE_CI").map(collationName =>
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").map(collationName =>
       Seq(
         s"select 'abc' collate $collationName",
         s"select collation('abc' collate $collationName)").map(query =>
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
index af18fca9dd216..71ca0f44af680 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
@@ -826,7 +826,9 @@ class SparkConnectServiceSuite
             when(restartedQuery.runId).thenReturn(DEFAULT_UUID)
             SparkConnectService.streamingSessionManager.registerNewStreamingQuery(
               SparkConnectService.getOrCreateIsolatedSession("c1", sessionId, None),
-              restartedQuery)
+              restartedQuery,
+              Set.empty[String],
+              "")
             f(verifyEvents)
           }
         }
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
index 37c7fe25097c4..c234b4f068bc9 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.parser.{CompoundBody, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.types.{DataType, StructType}
@@ -54,6 +54,9 @@ class SparkConnectWithSessionExtensionSuite extends SparkFunSuite {
 
     override def parseQuery(sqlText: String): LogicalPlan =
       delegate.parseQuery(sqlText)
+
+    override def parseScript(sqlScriptText: String): CompoundBody =
+      delegate.parseScript(sqlScriptText)
   }
 
   test("Parse table name with test parser") {
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
index a213a36168e8d..512cdad62b921 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.connect.proto.Relation
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.expressions.{Alias, Expression}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connect.ConnectProtoUtils
 import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.planner.{SparkConnectPlanner, SparkConnectPlanTest}
@@ -68,7 +69,10 @@ class ExampleRelationPlugin extends RelationPlugin {
       return Optional.empty()
     }
     val plugin = rel.unpack(classOf[proto.ExamplePluginRelation])
-    Optional.of(planner.transformRelation(plugin.getInput.toByteArray))
+    val input = ConnectProtoUtils.parseRelationWithRecursionLimit(
+      plugin.getInput.toByteArray,
+      recursionLimit = 1024)
+    Optional.of(planner.transformRelation(input))
   }
 }
 
@@ -81,8 +85,10 @@ class ExampleExpressionPlugin extends ExpressionPlugin {
       return Optional.empty()
     }
     val exp = rel.unpack(classOf[proto.ExamplePluginExpression])
-    Optional.of(
-      Alias(planner.transformExpression(exp.getChild.toByteArray), exp.getCustomField)())
+    val child = ConnectProtoUtils.parseExpressionWithRecursionLimit(
+      exp.getChild.toByteArray,
+      recursionLimit = 1024)
+    Optional.of(Alias(planner.transformExpression(child), exp.getCustomField)())
   }
 }
 
@@ -198,9 +204,7 @@ class SparkConnectPluginRegistrySuite extends SharedSparkSession with SparkConne
               .build()))
         .build()
 
-      val executeHolder = buildExecutePlanHolder(plan)
-      new SparkConnectPlanner(executeHolder)
-        .process(plan, new MockObserver())
+      transform(plan)
       assert(spark.sparkContext.getLocalProperty("testingProperty").equals("Martin"))
     }
   }
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala
new file mode 100644
index 0000000000000..3240b33f3f090
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala
@@ -0,0 +1,328 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.service
+
+import java.net.ServerSocket
+import java.util.concurrent.CopyOnWriteArrayList
+import java.util.concurrent.Semaphore
+
+import scala.collection.mutable
+
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config._
+import org.apache.spark.launcher.SparkLauncher
+import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
+import org.apache.spark.sql.connect.SparkConnectPlugin
+import org.apache.spark.sql.connect.config.Connect.{CONNECT_GRPC_BINDING_PORT, CONNECT_GRPC_PORT_MAX_RETRIES}
+import org.apache.spark.util.Utils
+
+class SparkConnectServiceInternalServerSuite extends SparkFunSuite with LocalSparkContext {
+
+  override def afterEach(): Unit = {
+    super.afterEach()
+    SparkConnectServiceLifeCycleListener.reset()
+  }
+
+  test("The SparkConnectService will retry using different ports in case of conflicts") {
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+    sc = new SparkContext(conf)
+
+    // 1. By default there is no retry, the SparkConnectService will fail to start
+    //    if the port is already in use.
+    val startPort = 15002
+    withSparkEnvConfs((CONNECT_GRPC_BINDING_PORT.key, startPort.toString)) {
+      withPortOccupied(startPort, startPort) {
+        val portConflicts = intercept[Throwable] {
+          SparkConnectService.start(sc)
+        }
+        portConflicts.printStackTrace()
+        assert(Utils.isBindCollision(portConflicts))
+      }
+    }
+
+    // 2. Enable the port retry, the SparkConnectService will retry using different ports
+    //    until it finds an available port before reaching the maximum number of retries.
+    withSparkEnvConfs(
+      (CONNECT_GRPC_BINDING_PORT.key, startPort.toString),
+      (CONNECT_GRPC_PORT_MAX_RETRIES.key, "3")) {
+      // 15002, 15003, 15004 occupied
+      withPortOccupied(startPort, startPort + 2) {
+        SparkConnectService.start(sc)
+        assert(SparkConnectService.started)
+        assert(SparkConnectService.server.getPort == startPort + 3) // 15005 available
+        SparkConnectService.stop()
+      }
+    }
+
+    // 3. It will fail if not able to find an available port
+    //    before reaching the maximum number of retries.
+    withSparkEnvConfs(
+      (CONNECT_GRPC_BINDING_PORT.key, startPort.toString),
+      (CONNECT_GRPC_PORT_MAX_RETRIES.key, "1")) {
+      // 15002, 15003 occupied but only retried on 15003 and reach the maximum number of retries
+      withPortOccupied(startPort, startPort + 1) {
+        val portConflicts = intercept[Throwable] {
+          SparkConnectService.start(sc)
+        }
+        portConflicts.printStackTrace()
+        assert(Utils.isBindCollision(portConflicts))
+      }
+    }
+
+    // 4. The value of port will be validated before the service starts
+    Seq(
+      (CONNECT_GRPC_BINDING_PORT.key, (1024 - 1).toString),
+      (CONNECT_GRPC_BINDING_PORT.key, (65535 + 1).toString)).foreach(conf => {
+      withSparkEnvConfs(conf) {
+        val invalidPort = intercept[IllegalArgumentException] {
+          SparkConnectService.start(sc)
+        }
+        assert(
+          invalidPort.getMessage.contains(
+            "requirement failed: startPort should be between 1024 and 65535 (inclusive)," +
+              " or 0 for a random free port."))
+      }
+    })
+  }
+
+  test("The SparkConnectService will post events for each pair of start and stop") {
+    // Future validations when listener receive the `SparkListenerConnectServiceStarted` event
+    val startedEventValidations: CopyOnWriteArrayList[(String, Boolean)] =
+      new CopyOnWriteArrayList[(String, Boolean)]()
+    val startedEventSignal = new Semaphore(0)
+    SparkConnectServiceLifeCycleListener.checksOnServiceStartedEvent = Some(
+      Seq(
+        _ => {
+          startedEventSignal.release()
+          startedEventValidations.add(
+            ("The listener should receive the `SparkListenerConnectServiceStarted` event.", true))
+        },
+        _ => {
+          startedEventValidations.add(
+            (
+              "The server should has already been started" +
+                " if the listener receives the `SparkListenerConnectServiceStarted` event.",
+              SparkConnectService.started &&
+                !SparkConnectService.stopped &&
+                SparkConnectService.server != null))
+        },
+        serviceStarted => {
+          startedEventValidations.add(
+            (
+              "The SparkConnectService should post it's address " +
+                "by the `SparkListenerConnectServiceStarted` event",
+              serviceStarted.bindingPort == SparkConnectService.server.getPort &&
+                serviceStarted.hostAddress == SparkConnectService.hostAddress))
+        }))
+
+    // Future validations when listener receive the `SparkListenerConnectServiceEnd` event
+    val endEventValidations: CopyOnWriteArrayList[(String, Boolean)] =
+      new CopyOnWriteArrayList[(String, Boolean)]()
+    val endEventSignal = new Semaphore(0)
+    SparkConnectServiceLifeCycleListener.checksOnServiceEndEvent = Some(
+      Seq(
+        _ => {
+          endEventSignal.release()
+          startedEventValidations.add(
+            ("The listener should receive the `SparkListenerConnectServiceEnd` event.", true))
+        },
+        _ => {
+          endEventValidations.add(
+            (
+              "The server has already been stopped" +
+                " if the listener receives the `SparkListenerConnectServiceEnd` event.",
+              SparkConnectService.stopped &&
+                !SparkConnectService.started &&
+                SparkConnectService.server.isShutdown))
+        },
+        serviceEnd => {
+          endEventValidations.add(
+            (
+              "The SparkConnectService should post it's address " +
+                "by the `SparkListenerConnectServiceEnd` event",
+              serviceEnd.bindingPort == SparkConnectService.server.getPort &&
+                serviceEnd.hostAddress == SparkConnectService.hostAddress))
+        }))
+
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+    sc = new SparkContext(conf)
+
+    val listenerInstance = new SparkConnectServiceLifeCycleListener()
+    sc.addSparkListener(listenerInstance)
+
+    // Start the SparkConnectService and wait for the listener
+    // to receive the `SparkListenerConnectServiceStarted` event.
+    SparkConnectService.start(sc)
+    startedEventSignal.acquire()
+    // Now the listener should have already received the `SparkListenerConnectServiceStarted` event.
+
+    // The internal server of SparkConnectService should has
+    // already been created and started in this time.
+    assert(SparkConnectService.started && SparkConnectService.server != null)
+
+    // The event `SparkListenerConnectServiceStarted` should be posted
+    // during the startup of the SparkConnectService.
+    assert(listenerInstance.serviceStartedEvents.size() == 1)
+    // The server should already been started when the listener receive the event
+    // and the server address should be the same as the address of service
+    startedEventValidations.forEach { case (msg, validated) =>
+      assert(validated, msg)
+    }
+    // In the meanwhile, no any end event should be posted
+    assert(listenerInstance.serviceEndEvents.size() == 0)
+
+    // The listener is able to get the SparkConf from the event
+    val event = listenerInstance.serviceStartedEvents.get(0)
+    assert(event.sparkConf != null)
+    val sparkConf = event.sparkConf
+    assert(sparkConf.contains("spark.driver.host"))
+    assert(sparkConf.contains("spark.app.id"))
+
+    // Try to start an already started SparkConnectService
+    SparkConnectService.start(sc)
+    // The listener should still receive only one started event
+    // because the server has not been stopped yet, and there won't be duplicated service start
+    assert(listenerInstance.serviceStartedEvents.size() == 1)
+
+    // Stop the SparkConnectService
+    SparkConnectService.stop()
+    assert(SparkConnectService.stopped)
+    // The listener should receive the `SparkListenerConnectServiceEnd` event
+    endEventSignal.acquire()
+
+    // The event `SparkListenerConnectServiceEnd` should be posted and received by the listener
+    assert(listenerInstance.serviceEndEvents.size() == 1)
+    // The server should already been stopped when the listener receive the event
+    // and the server address should be the same as the address of service
+    endEventValidations.forEach { case (msg, validated) =>
+      assert(validated, msg)
+    }
+
+    // Try to stop an already stopped SparkConnectService
+    SparkConnectService.stop()
+    // The listener should still receive only one end event,
+    // no duplicated `SparkListenerConnectServiceEnd` event posted
+    assert(listenerInstance.serviceEndEvents.size() == 1)
+  }
+
+  test("SparkConnectPlugin will post started and end events that can be received by listeners") {
+    // Future validations when listener receive the `SparkListenerConnectServiceStarted` event
+    val startedEventSignal = new Semaphore(0)
+    SparkConnectServiceLifeCycleListener.checksOnServiceStartedEvent = Some(Seq(_ => {
+      startedEventSignal.release()
+    }))
+
+    // Future validations when listener receive the `SparkListenerConnectServiceEnd` event
+    val endEventSignal = new Semaphore(0)
+    SparkConnectServiceLifeCycleListener.checksOnServiceEndEvent = Some(Seq(_ => {
+      endEventSignal.release()
+    }))
+
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      // Start the SparkConnectService from SparkConnectPlugin
+      .set(PLUGINS, Seq(classOf[SparkConnectPlugin].getName()))
+      // In this case, the listener need to be registered via the configuration
+      // otherwise the listener will not be able to receive the events that post during
+      // the initialization of the SparkConnectPlugin
+      .set(EXTRA_LISTENERS, Seq(classOf[SparkConnectServiceLifeCycleListener].getName()))
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+
+    // Create the SparkContext, initialize the SparkConnectPlugin and start the SparkConnectService
+    sc = new SparkContext(conf)
+
+    val listenerInstance = SparkConnectServiceLifeCycleListener.currentInstance
+    assert(listenerInstance != null)
+    // The internal server of SparkConnectService should has
+    // already been created and started during the initializing of the SparkConnectPlugin.
+    assert(SparkConnectService.started && SparkConnectService.server != null)
+    // The event `SparkListenerConnectServiceStarted` should be posted and received by the listener
+    startedEventSignal.acquire()
+    // Only one `SparkListenerConnectServiceStarted` event should be received by the listener
+    assert(listenerInstance.serviceStartedEvents.size() == 1)
+
+    // Stop the SparkContext, the SparkConnectService will be stopped during the shutdown of
+    // SparkConnectPlugin and the message will be posted to the listener via active ListenerBus.
+    // This requires the ListenerBus can accept events if the SparkPlugins has not been shutdown.
+    sc.stop()
+    assert(SparkConnectService.stopped)
+    // The listener should receive the `SparkListenerConnectServiceEnd` event
+    endEventSignal.acquire()
+  }
+
+  def withPortOccupied(startPort: Int, endPort: Int)(f: => Unit): Unit = {
+    val startedServers = new mutable.ArrayBuffer[ServerSocket]()
+    try {
+      for (toBeOccupiedPort <- startPort to endPort) {
+        val server = new ServerSocket(toBeOccupiedPort)
+        startedServers += server
+      }
+      f
+    } finally {
+      startedServers.foreach(server => {
+        try {
+          server.close()
+        } catch {
+          case _: Throwable =>
+        }
+      })
+    }
+  }
+}
+
+private class SparkConnectServiceLifeCycleListener extends SparkListener {
+
+  SparkConnectServiceLifeCycleListener.currentInstance = this
+
+  val serviceStartedEvents: CopyOnWriteArrayList[SparkListenerConnectServiceStarted] =
+    new CopyOnWriteArrayList[SparkListenerConnectServiceStarted]()
+  val serviceEndEvents: CopyOnWriteArrayList[SparkListenerConnectServiceEnd] =
+    new CopyOnWriteArrayList[SparkListenerConnectServiceEnd]()
+
+  override def onOtherEvent(event: SparkListenerEvent): Unit = {
+    event match {
+      case serviceStarted: SparkListenerConnectServiceStarted =>
+        serviceStartedEvents.add(serviceStarted)
+        SparkConnectServiceLifeCycleListener.checksOnServiceStartedEvent.foreach { checks =>
+          checks.foreach(_(serviceStarted))
+        }
+      case serviceEnd: SparkListenerConnectServiceEnd =>
+        serviceEndEvents.add(serviceEnd)
+        SparkConnectServiceLifeCycleListener.checksOnServiceEndEvent.foreach { checks =>
+          checks.foreach(_(serviceEnd))
+        }
+    }
+  }
+}
+
+private object SparkConnectServiceLifeCycleListener {
+
+  var currentInstance: SparkConnectServiceLifeCycleListener = _
+  var checksOnServiceStartedEvent: Option[Seq[(SparkListenerConnectServiceStarted) => Unit]] =
+    None
+  var checksOnServiceEndEvent: Option[Seq[(SparkListenerConnectServiceEnd) => Unit]] = None
+
+  def reset(): Unit = {
+    currentInstance = null
+    checksOnServiceStartedEvent = None
+    checksOnServiceEndEvent = None
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
index ed3da2c0f7156..512a0a80c4a91 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
@@ -67,7 +67,7 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
 
     // Register the query.
 
-    sessionMgr.registerNewStreamingQuery(sessionHolder, mockQuery)
+    sessionMgr.registerNewStreamingQuery(sessionHolder, mockQuery, Set.empty[String], "")
 
     sessionMgr.getCachedValue(queryId, runId) match {
       case Some(v) =>
@@ -78,9 +78,14 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
     }
 
     // Verify query is returned only with the correct session, not with a different session.
-    assert(sessionMgr.getCachedQuery(queryId, runId, mock[SparkSession]).isEmpty)
+    assert(
+      sessionMgr.getCachedQuery(queryId, runId, Set.empty[String], mock[SparkSession]).isEmpty)
     // Query is returned when correct session is used
-    assert(sessionMgr.getCachedQuery(queryId, runId, mockSession).contains(mockQuery))
+    assert(
+      sessionMgr
+        .getCachedQuery(queryId, runId, Set.empty[String], mockSession)
+        .map(_.query)
+        .contains(mockQuery))
 
     // Cleanup the query and verify if stop() method has been called.
     when(mockQuery.isActive).thenReturn(false)
@@ -99,7 +104,11 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
     clock.advance(30.seconds.toMillis)
 
     // Access the query. This should advance expiry time by 30 seconds.
-    assert(sessionMgr.getCachedQuery(queryId, runId, mockSession).contains(mockQuery))
+    assert(
+      sessionMgr
+        .getCachedQuery(queryId, runId, Set.empty[String], mockSession)
+        .map(_.query)
+        .contains(mockQuery))
     val expiresAtMs = sessionMgr.getCachedValue(queryId, runId).get.expiresAtMs.get
     assert(expiresAtMs == prevExpiryTimeMs + 30.seconds.toMillis)
 
@@ -112,7 +121,7 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
     when(restartedQuery.isActive).thenReturn(true)
     when(mockStreamingQueryManager.get(queryId)).thenReturn(restartedQuery)
 
-    sessionMgr.registerNewStreamingQuery(sessionHolder, restartedQuery)
+    sessionMgr.registerNewStreamingQuery(sessionHolder, restartedQuery, Set.empty[String], "")
 
     // Both queries should existing in the cache.
     assert(sessionMgr.getCachedValue(queryId, runId).map(_.query).contains(mockQuery))
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
index 3b75c37b2aa00..c9c110dd1e626 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
@@ -37,7 +37,7 @@ class SparkConnectServerListenerSuite
 
   private var kvstore: ElementTrackingStore = _
 
-  private val jobTag = ExecuteJobTag("sessionId", "userId", "operationId")
+  private val jobTag = ExecuteJobTag("userId", "sessionId", "operationId")
 
   after {
     if (kvstore != null) {
@@ -174,7 +174,7 @@ class SparkConnectServerListenerSuite
       SparkListenerJobStart(0, System.currentTimeMillis(), Nil, createProperties))
     listener.onOtherEvent(
       SparkListenerConnectSessionClosed("sessionId", "userId", System.currentTimeMillis()))
-    val exec = statusStore.getExecution(ExecuteJobTag("sessionId", "userId", "operationId"))
+    val exec = statusStore.getExecution(ExecuteJobTag("userId", "sessionId", "operationId"))
     assert(exec.isDefined)
     assert(exec.get.jobId === Seq("0"))
     assert(exec.get.sqlExecId === Set("0"))
@@ -190,7 +190,7 @@ class SparkConnectServerListenerSuite
     listener.onOtherEvent(SparkListenerConnectSessionClosed(unknownSession, "userId", 0))
     listener.onOtherEvent(
       SparkListenerConnectOperationStarted(
-        ExecuteJobTag("sessionId", "userId", "operationId"),
+        ExecuteJobTag("userId", "sessionId", "operationId"),
         "operationId",
         System.currentTimeMillis(),
         unknownSession,
diff --git a/connector/docker-integration-tests/README.md b/connector/docker-integration-tests/README.md
index 0192947bdbf90..03d3fe706a606 100644
--- a/connector/docker-integration-tests/README.md
+++ b/connector/docker-integration-tests/README.md
@@ -45,7 +45,7 @@ the container bootstrapping. To run an individual Docker integration test, use t
 
 Besides the default Docker images, the integration tests can be run with custom Docker images. For example,
 
-    ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-free:23.3-slim-faststart ./build/sbt -Pdocker-integration-tests "docker-integration-tests/testOnly *OracleIntegrationSuite"
+    ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-free:23.4-slim-faststart ./build/sbt -Pdocker-integration-tests "docker-integration-tests/testOnly *OracleIntegrationSuite"
 
 The following environment variables can be used to specify the custom Docker images for different databases:
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index bb7647c72491a..9003c2190be22 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -39,7 +39,7 @@
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
-      <version>33.0.0-jre</version>
+      <version>33.1.0-jre</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index cedb33d491fbc..72b2ac8074f4a 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -21,11 +21,11 @@ import java.math.BigDecimal
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
 
-import org.scalatest.time.SpanSugar._
-
 import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
-import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ByteType, ShortType, StructType}
 import org.apache.spark.tags.DockerTest
 
 /**
@@ -40,8 +40,6 @@ import org.apache.spark.tags.DockerTest
 class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DB2DatabaseOnDocker
 
-  override val connectionTimeout = timeout(3.minutes)
-
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y VARCHAR(8))").executeUpdate()
     conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
@@ -64,6 +62,20 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
       .executeUpdate()
     conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', BLOB('fox'),"
       + "'<cinfo cid=\"10\"><name>Kathy</name></cinfo>')").executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE booleans (a BOOLEAN)").executeUpdate()
+    conn.prepareStatement("INSERT INTO booleans VALUES (true)").executeUpdate()
+    // VARGRAPHIC
+    conn.prepareStatement("CREATE TABLE graphics (a GRAPHIC(16), b VARGRAPHIC(16))")
+      .executeUpdate()
+    conn.prepareStatement("INSERT INTO graphics VALUES ('a', 'b')").executeUpdate()
+    // CHAR(n) FOR BIT DATA
+    conn.prepareStatement("CREATE TABLE binarys (" +
+      "a CHAR(10) FOR BIT DATA, b VARCHAR(10) FOR BIT DATA, c BINARY(10), d VARBINARY(10))")
+      .executeUpdate()
+    conn.prepareStatement("INSERT INTO binarys VALUES (" +
+        "'ABC', 'ABC', BINARY('ABC', 10), VARBINARY('ABC', 10))")
+      .executeUpdate()
   }
 
   test("Basic test") {
@@ -77,32 +89,44 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
   }
 
   test("Numeric types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 10)
-    assert(types(0).equals("class java.lang.Integer"))
-    assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Long"))
-    assert(types(3).equals("class java.math.BigDecimal"))
-    assert(types(4).equals("class java.lang.Double"))
-    assert(types(5).equals("class java.lang.Double"))
-    assert(types(6).equals("class java.lang.Float"))
-    assert(types(7).equals("class java.math.BigDecimal"))
-    assert(types(8).equals("class java.math.BigDecimal"))
-    assert(types(9).equals("class java.math.BigDecimal"))
-    assert(rows(0).getInt(0) == 17)
-    assert(rows(0).getInt(1) == 77777)
-    assert(rows(0).getLong(2) == 922337203685477580L)
-    val bd = new BigDecimal("123456745.56789012345000000000")
-    assert(rows(0).getAs[BigDecimal](3).equals(bd))
-    assert(rows(0).getDouble(4) == 42.75)
-    assert(rows(0).getDouble(5) == 5.4E-70)
-    assert(rows(0).getFloat(6) == 3.4028234663852886e+38)
-    assert(rows(0).getDecimal(7) == new BigDecimal("4.299900000000000000"))
-    assert(rows(0).getDecimal(8) == new BigDecimal("99999999999999990000.000000000000000000"))
-    assert(rows(0).getDecimal(9) == new BigDecimal("1234567891234567.123456789123456789"))
+    Seq(true, false).foreach { legacy =>
+      withSQLConf(SQLConf.LEGACY_DB2_TIMESTAMP_MAPPING_ENABLED.key -> legacy.toString) {
+        val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+        val rows = df.collect()
+        assert(rows.length == 1)
+        val types = rows(0).toSeq.map(x => x.getClass.toString)
+        assert(types.length == 10)
+        if (legacy) {
+          assert(types(0).equals("class java.lang.Integer"))
+        } else {
+          assert(types(0).equals("class java.lang.Short"))
+        }
+        assert(types(1).equals("class java.lang.Integer"))
+        assert(types(2).equals("class java.lang.Long"))
+        assert(types(3).equals("class java.math.BigDecimal"))
+        assert(types(4).equals("class java.lang.Double"))
+        assert(types(5).equals("class java.lang.Double"))
+        assert(types(6).equals("class java.lang.Float"))
+        assert(types(7).equals("class java.math.BigDecimal"))
+        assert(types(8).equals("class java.math.BigDecimal"))
+        assert(types(9).equals("class java.math.BigDecimal"))
+        if (legacy) {
+          assert(rows(0).getInt(0) == 17)
+        } else {
+          assert(rows(0).getShort(0) == 17)
+        }
+        assert(rows(0).getInt(1) == 77777)
+        assert(rows(0).getLong(2) == 922337203685477580L)
+        val bd = new BigDecimal("123456745.56789012345000000000")
+        assert(rows(0).getAs[BigDecimal](3).equals(bd))
+        assert(rows(0).getDouble(4) == 42.75)
+        assert(rows(0).getDouble(5) == 5.4E-70)
+        assert(rows(0).getFloat(6) == 3.4028234663852886e+38)
+        assert(rows(0).getDecimal(7) == new BigDecimal("4.299900000000000000"))
+        assert(rows(0).getDecimal(8) == new BigDecimal("99999999999999990000.000000000000000000"))
+        assert(rows(0).getDecimal(9) == new BigDecimal("1234567891234567.123456789123456789"))
+      }
+    }
   }
 
   test("Date types") {
@@ -150,13 +174,12 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
     df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
     // spark types that does not have exact matching db2 table types.
     val df4 = sqlContext.createDataFrame(
-      sparkContext.parallelize(Seq(Row("1".toShort, "20".toByte, true))),
-      new StructType().add("c1", ShortType).add("b", ByteType).add("c3", BooleanType))
+      sparkContext.parallelize(Seq(Row("1".toShort, "20".toByte))),
+      new StructType().add("c1", ShortType).add("b", ByteType))
     df4.write.jdbc(jdbcUrl, "otherscopy", new Properties)
     val rows = sqlContext.read.jdbc(jdbcUrl, "otherscopy", new Properties).collect()
-    assert(rows(0).getInt(0) == 1)
-    assert(rows(0).getInt(1) == 20)
-    assert(rows(0).getString(2) == "1")
+    assert(rows(0).getShort(0) == 1)
+    assert(rows(0).getShort(1) == 20)
   }
 
   test("query JDBC option") {
@@ -224,4 +247,37 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
 
     assert(actual === expected)
   }
+
+  test("SPARK-48269: boolean type") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "booleans", new Properties)
+    checkAnswer(df, Row(true))
+    Seq(true, false).foreach { legacy =>
+      withSQLConf(SQLConf.LEGACY_DB2_BOOLEAN_MAPPING_ENABLED.key -> legacy.toString) {
+        val tbl = "booleanscopy" + legacy
+        df.write.jdbc(jdbcUrl, tbl, new Properties)
+        if (legacy) {
+          checkAnswer(sqlContext.read.jdbc(jdbcUrl, tbl, new Properties), Row("1"))
+        } else {
+          checkAnswer(sqlContext.read.jdbc(jdbcUrl, tbl, new Properties), Row(true))
+        }
+      }
+    }
+  }
+
+  test("SPARK-48269: GRAPHIC types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "graphics", new Properties)
+    checkAnswer(df, Row("a".padTo(16, ' '), "b"))
+    // the padding happens in the source not because of reading as char type
+    assert(!df.schema.exists {
+      _.metadata.contains(CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY) })
+  }
+
+  test("SPARK-48269: binary types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "binarys", new Properties)
+    checkAnswer(df, Row(
+      "ABC".padTo(10, ' ').getBytes,
+      "ABC".getBytes,
+      "ABC".getBytes ++ Array.fill(7)(0),
+      "ABC".getBytes))
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
index abb683c064955..4899de2b2a14c 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -24,7 +24,6 @@ import javax.security.auth.login.Configuration
 import com.github.dockerjava.api.model.{AccessMode, Bind, ContainerConfig, HostConfig, Volume}
 import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation}
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS
-import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.execution.datasources.jdbc.connection.{DB2ConnectionProvider, SecureConnectionProvider}
@@ -68,8 +67,6 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
     }
   }
 
-  override val connectionTimeout = timeout(3.minutes)
-
   override protected def setAuthentication(keytabFile: String, principal: String): Unit = {
     val config = new SecureConnectionProvider.JDBCConfiguration(
       Configuration.getConfiguration, "JaasClient", keytabFile, principal, true)
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index fc095c5f5b310..8d17e0b4e36e6 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -36,7 +36,7 @@ import com.github.dockerjava.zerodep.ZerodepDockerHttpClient
 import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.internal.LogKey.{CLASS_NAME, CONTAINER, STATUS}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, CONTAINER, STATUS}
 import org.apache.spark.internal.MDC
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.test.SharedSparkSession
@@ -115,7 +115,7 @@ abstract class DockerJDBCIntegrationSuite
   protected val startContainerTimeout: Long =
     timeStringAsSeconds(sys.props.getOrElse("spark.test.docker.startContainerTimeout", "5min"))
   protected val connectionTimeout: PatienceConfiguration.Timeout = {
-    val timeoutStr = sys.props.getOrElse("spark.test.docker.conn", "5min")
+    val timeoutStr = sys.props.getOrElse("spark.test.docker.connectionTimeout", "5min")
     timeout(timeStringAsSeconds(timeoutStr).seconds)
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
index 6825c001f7670..efb2fa09f6a3f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnecti
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., mariadb:10.5.12):
+ * To run this test suite for a specific version (e.g., mariadb:10.5.25):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.12
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.25
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
  * }}}
@@ -38,7 +38,7 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val keytabFileName = "mariadb.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.5.12")
+    override val imageName = sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.5.25")
     override val env = Map(
       "MYSQL_ROOT_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala
new file mode 100644
index 0000000000000..61530f713eb86
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+class MsSQLServerDatabaseOnDocker extends DatabaseOnDocker {
+  override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
+    "mcr.microsoft.com/mssql/server:2022-CU12-GDR1-ubuntu-22.04")
+  override val env = Map(
+    "SA_PASSWORD" -> "Sapass123",
+    "ACCEPT_EULA" -> "Y"
+  )
+  override val usesIpc = false
+  override val jdbcPort: Int = 1433
+
+  override def getJdbcUrl(ip: String, port: Int): String =
+    s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;" +
+      "encrypt=true;trustServerCertificate=true"
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index 8bceb9506e850..623f404339e9e 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -19,12 +19,15 @@ package org.apache.spark.sql.jdbc
 
 import java.math.BigDecimal
 import java.sql.{Connection, Date, Timestamp}
+import java.time.LocalDateTime
 import java.util.Properties
 
+import org.apache.spark.SparkSQLException
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{BinaryType, DecimalType}
 import org.apache.spark.tags.DockerTest
 
 /**
@@ -38,19 +41,7 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
-      "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
-    override val env = Map(
-      "SA_PASSWORD" -> "Sapass123",
-      "ACCEPT_EULA" -> "Y"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1433
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
-  }
+  override val db = new MsSQLServerDatabaseOnDocker
 
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement("CREATE TABLE tbl (x INT, y VARCHAR (50))").executeUpdate()
@@ -150,6 +141,11 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
       """
         |INSERT INTO bits VALUES (1, 2, 1)
       """.stripMargin).executeUpdate()
+    conn.prepareStatement(
+        """CREATE TABLE test_rowversion (myKey int PRIMARY KEY,myValue int, RV rowversion)""")
+      .executeUpdate()
+    conn.prepareStatement("""INSERT INTO test_rowversion (myKey, myValue) VALUES (1, 0)""")
+      .executeUpdate()
   }
 
   test("Basic test") {
@@ -227,24 +223,43 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
 
   test("Date types") {
     withDefaultTimeZone(UTC) {
-      val df = spark.read.jdbc(jdbcUrl, "dates", new Properties)
-      val rows = df.collect()
-      assert(rows.length == 1)
-      val row = rows(0)
-      val types = row.toSeq.map(x => x.getClass.toString)
-      assert(types.length == 6)
-      assert(types(0).equals("class java.sql.Date"))
-      assert(types(1).equals("class java.sql.Timestamp"))
-      assert(types(2).equals("class java.sql.Timestamp"))
-      assert(types(3).equals("class java.lang.String"))
-      assert(types(4).equals("class java.sql.Timestamp"))
-      assert(types(5).equals("class java.sql.Timestamp"))
-      assert(row.getAs[Date](0).equals(Date.valueOf("1991-11-09")))
-      assert(row.getAs[Timestamp](1).equals(Timestamp.valueOf("1999-01-01 13:23:35.0")))
-      assert(row.getAs[Timestamp](2).equals(Timestamp.valueOf("9999-12-31 23:59:59.0")))
-      assert(row.getString(3).equals("1901-05-09 23:59:59.0000000 +14:00"))
-      assert(row.getAs[Timestamp](4).equals(Timestamp.valueOf("1996-01-01 23:24:00.0")))
-      assert(row.getAs[Timestamp](5).equals(Timestamp.valueOf("1970-01-01 13:31:24.0")))
+      Seq(true, false).foreach { ntz =>
+        Seq(true, false).foreach { legacy =>
+          withSQLConf(
+            SQLConf.LEGACY_MSSQLSERVER_DATETIMEOFFSET_MAPPING_ENABLED.key -> legacy.toString) {
+            val df = spark.read
+              .option("preferTimestampNTZ", ntz)
+              .jdbc(jdbcUrl, "dates", new Properties)
+            checkAnswer(df, Row(
+              Date.valueOf("1991-11-09"),
+              if (ntz) {
+                LocalDateTime.of(1999, 1, 1, 13, 23, 35)
+              } else {
+                Timestamp.valueOf("1999-01-01 13:23:35")
+              },
+              if (ntz) {
+                LocalDateTime.of(9999, 12, 31, 23, 59, 59)
+              } else {
+                Timestamp.valueOf("9999-12-31 23:59:59")
+              },
+              if (legacy) {
+                "1901-05-09 23:59:59.0000000 +14:00"
+              } else {
+                Timestamp.valueOf("1901-05-09 09:59:59")
+              },
+              if (ntz) {
+                LocalDateTime.of(1996, 1, 1, 23, 24, 0)
+              } else {
+                Timestamp.valueOf("1996-01-01 23:24:00")
+              },
+              if (ntz) {
+                LocalDateTime.of(1970, 1, 1, 13, 31, 24)
+              } else {
+                Timestamp.valueOf("1970-01-01 13:31:24")
+              }))
+          }
+        }
+      }
     }
   }
 
@@ -287,93 +302,96 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
   }
 
   test("SPARK-33813: MsSqlServerDialect should support spatial types") {
-    val df = spark.read.jdbc(jdbcUrl, "spatials", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val row = rows(0)
-    val types = row.toSeq.map(x => x.getClass.toString)
-    assert(types.length == 10)
-    assert(types(0) == "class [B")
-    assert(row.getAs[Array[Byte]](0) ===
-      Array(0, 0, 0, 0, 1, 15, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0,
-        16, 64, 0, 0, 0, 0, 0, 0, 28, 64, 0, 0, 0, 0, 0, 0, 4, 64))
-    assert(types(1) == "class [B")
-    assert(row.getAs[Array[Byte]](1) ===
-      Array[Byte](0, 0, 0, 0, 1, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        -16, 63, 0, 0, 0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
-        0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 2))
-    assert(types(2) == "class [B")
-    assert(row.getAs[Array[Byte]](2) ===
-      Array[Byte](0, 0, 0, 0, 2, 4, 5, 0, 0, 0, -12, -3, -44, 120, -23, -106,
-        94, -64, -35, 36, 6, -127, -107, -45, 71, 64, -125, -64, -54, -95, 69,
-        -106, 94, -64, 80, -115, -105, 110, 18, -45, 71, 64, -125, -64, -54,
-        -95, 69, -106, 94, -64, 78, 98, 16, 88, 57, -44, 71, 64, -12, -3, -44,
-        120, -23, -106, 94, -64, 78, 98, 16, 88, 57, -44, 71, 64, -12, -3, -44,
-        120, -23, -106, 94, -64, -35, 36, 6, -127, -107, -45, 71, 64, 1, 0, 0,
-        0, 2, 0, 0, 0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 8))
-    assert(types(3) == "class [B")
-    assert(row.getAs[Array[Byte]](3) ===
-      Array[Byte](-26, 16, 0, 0, 2, 4, 5, 0, 0, 0, -35, 36, 6, -127, -107, -45,
-        71, 64, -12, -3, -44, 120, -23, -106, 94, -64, 80, -115, -105, 110, 18,
-        -45, 71, 64, -125, -64, -54, -95, 69, -106, 94, -64, 78, 98, 16, 88, 57,
-        -44, 71, 64, -125, -64, -54, -95, 69, -106, 94, -64, 78, 98, 16, 88, 57,
-        -44, 71, 64, -12, -3, -44, 120, -23, -106, 94, -64, -35, 36, 6, -127, -107,
-        -45, 71, 64, -12, -3, -44, 120, -23, -106, 94, -64, 1, 0, 0, 0, 3, 0, 0,
-        0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 9, 2, 0, 0, 0, 3, 1))
-    assert(types(5) == "class [B")
-    assert(row.getAs[Array[Byte]](4) ===
-      Array[Byte](0, 0, 0, 0, 1, 4, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0,
-        0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0, 0, 52, 64,
-        0, 0, 0, 0, 0, 0, 52, 64, 0, 0, 0, 0, 0, 0, 52, 64, 0, 0, 0, 0, 0, 0, 52,
-        64, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0,
-        0, 52, -64, 0, 0, 0, 0, 0, 0, 36, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 36, -64, 0, 0, 0, 0, 0, 0, 36, 64, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
-        0, 2, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 3))
-    assert(types(6) === "class [B")
-    assert(row.getAs[Array[Byte]](5) ===
-      Array[Byte](-26, 16, 0, 0, 2, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, -128, 71, 64, 51,
-        51, 51, 51, 51, -109, 94, -64, 0, 0, 0, 0, 0, -128, 71, 64, 51, 51, 51, 51,
-        51, -109, 94, 64, 0, 0, 0, 0, 0, -128, 72, 64, -51, -52, -52, -52, -52, 108,
-        95, 64, 0, 0, 0, 0, 0, 0, 67, 64, 0, 0, 0, 0, 0, 64, 94, 64, 0, 0, 0, 0, 0,
-        -128, 71, 64, 51, 51, 51, 51, 51, -109, 94, -64, 1, 0, 0, 0, 1, 0, 0, 0, 0,
-        1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 10))
-    assert(types(6) === "class [B")
-    assert(row.getAs[Array[Byte]](6) ===
-      Array[Byte](0, 0, 0, 0, 1, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0,
-        0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 28, 64, 0, 0, 0, 0, 0, 0, 32, 64, 0, 0, 0, 0,
-        0, 0, -8, -1, 0, 0, 0, 0, 0, 0, 35, 64, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
-        0, 0, 3, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1,
-        0, 0, 0, 0, 1, 0, 0, 0, 1))
-    assert(types(6) === "class [B")
-    assert(row.getAs[Array[Byte]](7) ===
-      Array[Byte](0, 0, 0, 0, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 64, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0,
-        0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0,
-        0, 0, 0, 0, -16, 63, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 3, 0, 0, 0,
-        -1, -1, -1, -1, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 2))
-    assert(types(6) === "class [B")
-    assert(row.getAs[Array[Byte]](8) ===
-      Array[Byte](0, 0, 0, 0, 1, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0,
-        0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, -64, 0, 0, 0,
-        0, 0, 0, 0, -64, 0, 0, 0, 0, 0, 0, 0, -64, 0, 0, 0, 0, 0, 0, 0, -64, 0, 0,
-        0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0,
-        0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0,
-        0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0,
-        0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, -16, 63,
-        0, 0, 0, 0, 0, 0, -16, 63, 2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 5, 0, 0, 0, 3, 0,
-        0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 1, 0, 0, 0, 3))
-    assert(types(6) === "class [B")
-    assert(row.getAs[Array[Byte]](9) ===
-      Array[Byte](0, 0, 0, 0, 1, 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0,
-        0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 20, 64, 0, 0,
-        0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0, -16, -65,
-        0, 0, 0, 0, 0, 0, 20, -64, 0, 0, 0, 0, 0, 0, 20, -64, 0, 0, 0, 0, 0, 0, 20,
-        -64, 0, 0, 0, 0, 0, 0, 20, -64, 0, 0, 0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0,
-        -16, -65, 0, 0, 0, 0, 0, 0, -16, -65, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0,
-        0, 3, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
-        0, 0, 0, 1, 0, 0, 0, 3))
+    Seq("true", "false").foreach { legacy =>
+      val df = spark.read.jdbc(jdbcUrl, "spatials", new Properties)
+      val rows = df.collect()
+      assert(rows.length == 1)
+      val row = rows(0)
+      val types = row.toSeq.map(x => x.getClass.toString)
+      assert(types.length == 10)
+      assert(types(0) == "class [B")
+      assert(row.getAs[Array[Byte]](0) ===
+        Array(0, 0, 0, 0, 1, 15, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0,
+          16, 64, 0, 0, 0, 0, 0, 0, 28, 64, 0, 0, 0, 0, 0, 0, 4, 64))
+      assert(types(1) == "class [B")
+      assert(row.getAs[Array[Byte]](1) ===
+        Array[Byte](0, 0, 0, 0, 1, 4, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0,
+          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+          -16, 63, 0, 0, 0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
+          0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 2))
+      assert(types(2) == "class [B")
+      assert(row.getAs[Array[Byte]](2) ===
+        Array[Byte](0, 0, 0, 0, 2, 4, 5, 0, 0, 0, -12, -3, -44, 120, -23, -106,
+          94, -64, -35, 36, 6, -127, -107, -45, 71, 64, -125, -64, -54, -95, 69,
+          -106, 94, -64, 80, -115, -105, 110, 18, -45, 71, 64, -125, -64, -54,
+          -95, 69, -106, 94, -64, 78, 98, 16, 88, 57, -44, 71, 64, -12, -3, -44,
+          120, -23, -106, 94, -64, 78, 98, 16, 88, 57, -44, 71, 64, -12, -3, -44,
+          120, -23, -106, 94, -64, -35, 36, 6, -127, -107, -45, 71, 64, 1, 0, 0,
+          0, 2, 0, 0, 0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 8))
+      assert(types(3) == "class [B")
+      assert(row.getAs[Array[Byte]](3) ===
+        Array[Byte](-26, 16, 0, 0, 2, 4, 5, 0, 0, 0, -35, 36, 6, -127, -107, -45,
+          71, 64, -12, -3, -44, 120, -23, -106, 94, -64, 80, -115, -105, 110, 18,
+          -45, 71, 64, -125, -64, -54, -95, 69, -106, 94, -64, 78, 98, 16, 88, 57,
+          -44, 71, 64, -125, -64, -54, -95, 69, -106, 94, -64, 78, 98, 16, 88, 57,
+          -44, 71, 64, -12, -3, -44, 120, -23, -106, 94, -64, -35, 36, 6, -127, -107,
+          -45, 71, 64, -12, -3, -44, 120, -23, -106, 94, -64, 1, 0, 0, 0, 3, 0, 0,
+          0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 9, 2, 0, 0, 0, 3, 1))
+      assert(types(5) == "class [B")
+      assert(row.getAs[Array[Byte]](4) ===
+        Array[Byte](0, 0, 0, 0, 1, 4, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0,
+          0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0, 0, 52, 64,
+          0, 0, 0, 0, 0, 0, 52, 64, 0, 0, 0, 0, 0, 0, 52, 64, 0, 0, 0, 0, 0, 0, 52,
+          64, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0, 0, 52, -64, 0, 0, 0, 0, 0,
+          0, 52, -64, 0, 0, 0, 0, 0, 0, 36, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+          0, 0, 36, -64, 0, 0, 0, 0, 0, 0, 36, 64, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
+          0, 2, 0, 0, 0, 0, 0, 5, 0, 0, 0, 1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 3))
+      assert(types(6) === "class [B")
+      assert(row.getAs[Array[Byte]](5) ===
+        Array[Byte](-26, 16, 0, 0, 2, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, -128, 71, 64, 51,
+          51, 51, 51, 51, -109, 94, -64, 0, 0, 0, 0, 0, -128, 71, 64, 51, 51, 51, 51,
+          51, -109, 94, 64, 0, 0, 0, 0, 0, -128, 72, 64, -51, -52, -52, -52, -52, 108,
+          95, 64, 0, 0, 0, 0, 0, 0, 67, 64, 0, 0, 0, 0, 0, 64, 94, 64, 0, 0, 0, 0, 0,
+          -128, 71, 64, 51, 51, 51, 51, 51, -109, 94, -64, 1, 0, 0, 0, 1, 0, 0, 0, 0,
+          1, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 10))
+      assert(types(6) === "class [B")
+      assert(row.getAs[Array[Byte]](6) ===
+        Array[Byte](0, 0, 0, 0, 1, 5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0,
+          0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 28, 64, 0, 0, 0, 0, 0, 0, 32, 64, 0, 0, 0, 0,
+          0, 0, -8, -1, 0, 0, 0, 0, 0, 0, 35, 64, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
+          0, 0, 3, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 1,
+          0, 0, 0, 0, 1, 0, 0, 0, 1))
+      assert(types(6) === "class [B")
+      assert(row.getAs[Array[Byte]](7) ===
+        Array[Byte](0, 0, 0, 0, 1, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+          0, 0, 0, 64, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0,
+          0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0,
+          0, 0, 0, 0, -16, 63, 2, 0, 0, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 3, 0, 0, 0,
+          -1, -1, -1, -1, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 2))
+      assert(types(6) === "class [B")
+      assert(row.getAs[Array[Byte]](8) ===
+        Array[Byte](0, 0, 0, 0, 1, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0,
+          0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, -64, 0, 0, 0,
+          0, 0, 0, 0, -64, 0, 0, 0, 0, 0, 0, 0, -64, 0, 0, 0, 0, 0, 0, 0, -64, 0, 0,
+          0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0, 0, 0, 64, 0, 0,
+          0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0,
+          0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 8, 64, 0,
+          0, 0, 0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, -16, 63,
+          0, 0, 0, 0, 0, 0, -16, 63, 2, 0, 0, 0, 2, 0, 0, 0, 0, 2, 5, 0, 0, 0, 3, 0,
+          0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0,
+          1, 0, 0, 0, 3))
+      assert(types(6) === "class [B")
+      assert(row.getAs[Array[Byte]](9) ===
+        Array[Byte](0, 0, 0, 0, 1, 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, -16, 63, 0, 0, 0,
+          0, 0, 0, -16, 63, 0, 0, 0, 0, 0, 0, 8, 64, 0, 0, 0, 0, 0, 0, 20, 64, 0, 0,
+          0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0, -16, -65,
+          0, 0, 0, 0, 0, 0, 20, -64, 0, 0, 0, 0, 0, 0, 20, -64, 0, 0, 0, 0, 0, 0, 20,
+          -64, 0, 0, 0, 0, 0, 0, 20, -64, 0, 0, 0, 0, 0, 0, -16, -65, 0, 0, 0, 0, 0, 0,
+          -16, -65, 0, 0, 0, 0, 0, 0, -16, -65, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 0,
+          0, 3, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0,
+          0, 0, 0, 1, 0, 0, 0, 3))
+    }
   }
 
   test("SPARK-38889: MsSqlServerDialect should handle boolean filter push down") {
@@ -437,4 +455,42 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
       .load()
     assert(df.collect().toSet === expectedResult)
   }
+
+  test("SPARK-47938: Fix 'Cannot find data type BYTE' in SQL Server") {
+    spark.sql("select cast(1 as byte) as c0")
+      .write
+      .jdbc(jdbcUrl, "test_byte", new Properties)
+    val df = spark.read.jdbc(jdbcUrl, "test_byte", new Properties)
+    checkAnswer(df, Row(1.toShort))
+  }
+
+  test("SPARK-47945: money types") {
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("prepareQuery", "DECLARE @mymoney_sm SMALLMONEY = 3148.29, @mymoney MONEY = 3148.29 ")
+      .option("query", "SELECT @mymoney_sm as smallmoney, @mymoney as money")
+      .load()
+    checkAnswer(df, Row(BigDecimal.valueOf(3148.29), BigDecimal.valueOf(3148.29)))
+    assert(df.schema.fields(0).dataType === DecimalType(10, 4))
+    assert(df.schema.fields(1).dataType === DecimalType(19, 4))
+  }
+
+  test("SPARK-47945: rowversion") {
+    val df = spark.read.jdbc(jdbcUrl, "test_rowversion", new Properties)
+    assert(df.schema.fields(2).dataType === BinaryType)
+  }
+
+  test("SPARK-47945: sql_variant") {
+    checkError(
+      exception = intercept[SparkSQLException] {
+        spark.read.format("jdbc")
+          .option("url", jdbcUrl)
+          .option("prepareQuery",
+            "DECLARE @myvariant1 SQL_VARIANT = 1, @myvariant2 SQL_VARIANT = 'test'")
+          .option("query", "SELECT @myvariant1 as variant1, @myvariant2 as variant2")
+          .load()
+      },
+      errorClass = "UNRECOGNIZED_SQL_TYPE",
+      parameters = Map("typeName" -> "sql_variant", "jdbcType" -> "-156"))
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala
index 568eb5f109731..570a81ac3947f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.jdbc
 
 class MySQLDatabaseOnDocker extends DatabaseOnDocker {
-  override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.3.0")
+  override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.4.0")
   override val env = Map(
     "MYSQL_ROOT_PASSWORD" -> "rootpass"
   )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 684cec37c1703..e6cca2ac9cd0a 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -74,9 +74,9 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
       .executeUpdate()
 
     conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, "
-      + "yr YEAR)").executeUpdate()
-    conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', "
-      + "'1996-01-01 01:23:45', '2009-02-13 23:31:30', '2001')").executeUpdate()
+      + "yr YEAR, t1 TIME(3))").executeUpdate()
+    conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24.123', "
+      + "'1996-01-01 01:23:45', '2009-02-13 23:31:30', '2001', '13:31:24.123')").executeUpdate()
 
     // TODO: Test locale conversion for strings.
     conn.prepareStatement("CREATE TABLE strings (a CHAR(10), b VARCHAR(10), c TINYTEXT, "
@@ -185,21 +185,13 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
   test("Date types") {
     withDefaultTimeZone(UTC) {
       val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
-      val rows = df.collect()
-      assert(rows.length == 1)
-      val types = rows(0).toSeq.map(x => x.getClass.toString)
-      assert(types.length == 5)
-      assert(types(0).equals("class java.sql.Date"))
-      assert(types(1).equals("class java.sql.Timestamp"))
-      assert(types(2).equals("class java.sql.Timestamp"))
-      assert(types(3).equals("class java.sql.Timestamp"))
-      assert(types(4).equals("class java.sql.Date"))
-      assert(rows(0).getAs[Date](0).equals(Date.valueOf("1991-11-09")))
-      assert(
-        rows(0).getAs[Timestamp](1) === Timestamp.valueOf("1970-01-01 13:31:24"))
-      assert(rows(0).getAs[Timestamp](2).equals(Timestamp.valueOf("1996-01-01 01:23:45")))
-      assert(rows(0).getAs[Timestamp](3).equals(Timestamp.valueOf("2009-02-13 23:31:30")))
-      assert(rows(0).getAs[Date](4).equals(Date.valueOf("2001-01-01")))
+      checkAnswer(df, Row(
+        Date.valueOf("1991-11-09"),
+        Timestamp.valueOf("1970-01-01 13:31:24"),
+        Timestamp.valueOf("1996-01-01 01:23:45"),
+        Timestamp.valueOf("2009-02-13 23:31:30"),
+        Date.valueOf("2001-01-01"),
+        Timestamp.valueOf("1970-01-01 13:31:24.123")))
     }
     val df = spark.read.format("jdbc")
       .option("url", jdbcUrl)
@@ -218,7 +210,8 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
         LocalDateTime.of(1970, 1, 1, 13, 31, 24),
         LocalDateTime.of(1996, 1, 1, 1, 23, 45),
         Timestamp.valueOf("2009-02-13 23:31:30"),
-        Date.valueOf("2001-01-01")))
+        Date.valueOf("2001-01-01"),
+        LocalDateTime.of(1970, 1, 1, 13, 31, 24, 123000000)))
     }
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala
index bfbcf5b533d73..dd6bbf0af8a33 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala
@@ -17,16 +17,11 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.io.{File, PrintWriter}
-
-import com.github.dockerjava.api.model._
-
 import org.apache.spark.internal.Logging
-import org.apache.spark.util.Utils
 
 class OracleDatabaseOnDocker extends DatabaseOnDocker with Logging {
   lazy override val imageName =
-    sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.3-slim")
+    sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.4-slim")
   val oracle_password = "Th1s1sThe0racle#Pass"
   override val env = Map(
     "ORACLE_PWD" -> oracle_password, // oracle images uses this
@@ -38,30 +33,4 @@ class OracleDatabaseOnDocker extends DatabaseOnDocker with Logging {
   override def getJdbcUrl(ip: String, port: Int): String = {
     s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/freepdb1"
   }
-
-  override def beforeContainerStart(
-      hostConfigBuilder: HostConfig,
-      containerConfigBuilder: ContainerConfig): Unit = {
-    try {
-      val dir = Utils.createTempDir()
-      val writer = new PrintWriter(new File(dir, "install.sql"))
-      // SPARK-46592: gvenzl/oracle-free occasionally fails to start with the following error:
-      // 'ORA-04021: timeout occurred while waiting to lock object', when initializing the
-      // SYSTEM user. This is due to the fact that the default DDL_LOCK_TIMEOUT is 0, which
-      // means that the lock will no wait. We set the timeout to 30 seconds to try again.
-      // TODO: This workaround should be removed once the issue is fixed in the image.
-      // https://github.com/gvenzl/oci-oracle-free/issues/35
-      writer.write("ALTER SESSION SET DDL_LOCK_TIMEOUT = 30;\n")
-      writer.write(s"""ALTER USER SYSTEM IDENTIFIED BY "$oracle_password";""")
-      writer.close()
-      val newBind = new Bind(
-        dir.getAbsolutePath,
-        new Volume("/docker-entrypoint-initdb.d"),
-        AccessMode.DEFAULT)
-      hostConfigBuilder.withBinds(hostConfigBuilder.getBinds :+ newBind: _*)
-    } catch {
-      case e: Exception =>
-        logWarning("Failed to create install.sql file", e)
-    }
-  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 496498e5455b4..2b2596289548c 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -22,8 +22,6 @@ import java.sql.{Connection, Date, Timestamp}
 import java.time.{Duration, Period}
 import java.util.{Properties, TimeZone}
 
-import org.scalatest.time.SpanSugar._
-
 import org.apache.spark.sql.{DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
@@ -56,11 +54,11 @@ import org.apache.spark.tags.DockerTest
  * A sequence of commands to build the Oracle Database Free container image:
  *  $ git clone https://github.com/oracle/docker-images.git
  *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
- *  $ ./buildContainerImage.sh -v 23.2.0 -f
- *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:23.2.0-free
+ *  $ ./buildContainerImage.sh -v 23.4.0 -f
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:23.4.0-free
  *
- * This procedure has been validated with Oracle Database Free version 23.2.0,
- * and with Oracle Express Edition versions 18.4.0 and 21.3.0
+ * This procedure has been validated with Oracle Database Free version 23.4.0,
+ * and with Oracle Express Edition versions 18.4.0 and 21.4.0
  */
 @DockerTest
 class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession {
@@ -68,8 +66,6 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
 
   override val db = new OracleDatabaseOnDocker
 
-  override val connectionTimeout = timeout(7.minutes)
-
   private val rsOfTsWithTimezone = Seq(
     Row(BigDecimal.valueOf(1), new Timestamp(944046000000L)),
     Row(BigDecimal.valueOf(2), new Timestamp(944078400000L))
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index 1cd8a77e8442e..12a71dbd7c7f8 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -18,20 +18,23 @@
 package org.apache.spark.sql.jdbc
 
 import java.math.{BigDecimal => JBigDecimal}
-import java.sql.{Connection, Date, Timestamp}
+import java.sql.{Connection, Date, SQLException, Timestamp}
 import java.text.SimpleDateFormat
 import java.time.LocalDateTime
 import java.util.Properties
 
-import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{Column, DataFrame, Row}
 import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.2):
+ * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
  * }}}
@@ -39,7 +42,7 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
@@ -71,7 +74,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
       + "'((100.3, 40.2), (20.198, 83.1), (500.821, 311.38))', '<500, 200, 100>', '16/B374D848', "
       + "'ab', 'efg', '2021-02-02', '1 minute', '00:11:22:33:44:55', "
       + "'00:11:22:33:44:55:66:77', 12.3456, '10:20:10,14,15', 1E+37, "
-      + "'17:22:31', '2016-08-12 10:22:31.949271', 'cat:AB & dog:CD', "
+      + "'17:22:31.123', '2016-08-12 10:22:31.949271', 'cat:AB & dog:CD', "
       + "'dog and cat and fox', '10:20:10,14,15', '<key>id</key><value>10</value>')"
     ).executeUpdate()
     conn.prepareStatement("INSERT INTO bar VALUES (null, null, null, null, null, "
@@ -280,7 +283,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows(0).getDecimal(33) == new JBigDecimal("12.3456"))
     assert(rows(0).getString(34) == "10:20:10,14,15")
     assert(rows(0).getFloat(35) == 1E+37F)
-    assert(rows(0).getTimestamp(36) == Timestamp.valueOf("1970-01-01 17:22:31.0"))
+    assert(rows(0).getTimestamp(36) == Timestamp.valueOf("1970-01-01 17:22:31.123"))
     assert(rows(0).getTimestamp(37) == Timestamp.valueOf("2016-08-12 10:22:31.949271"))
     assert(rows(0).getString(38) == "'cat':AB & 'dog':CD")
     assert(rows(0).getString(39) == "'and' 'cat' 'dog' 'fox'")
@@ -314,11 +317,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
 
   test("SPARK-47390: Convert TIMESTAMP/TIME WITH TIME ZONE regardless of preferTimestampNTZ") {
     Seq(true, false).foreach { prefer =>
-      val rows = sqlContext.read
+      val df = sqlContext.read
         .option("preferTimestampNTZ", prefer)
         .jdbc(jdbcUrl, "ts_with_timezone", new Properties)
-        .collect()
-      rows.head.toSeq.tail.foreach(c => assert(c.isInstanceOf[java.sql.Timestamp]))
+      checkAnswer(df, Row(
+        1,
+        DateTimeUtils.toJavaTimestamp(1471022551949271L),
+        DateTimeUtils.toJavaTimestamp(62551949000L)))
     }
   }
 
@@ -554,4 +559,74 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
       .option("query", "SELECT 1::oid, 'bar'::regclass, 'integer'::regtype").load()
     checkAnswer(df, Row(1, "bar", "integer"))
   }
+
+  test("SPARK-47886: special number values") {
+    def toDF(qry: String): DataFrame = {
+      spark.read.format("jdbc")
+        .option("url", jdbcUrl)
+        .option("query", qry)
+        .load()
+    }
+    checkAnswer(
+      toDF("SELECT 'NaN'::float8 c1, 'infinity'::float8 c2, '-infinity'::float8 c3"),
+      Row(Double.NaN, Double.PositiveInfinity, Double.NegativeInfinity))
+    checkAnswer(
+      toDF("SELECT 'NaN'::float4 c1, 'infinity'::float4 c2, '-infinity'::float4 c3"),
+      Row(Float.NaN, Float.PositiveInfinity, Float.NegativeInfinity)
+    )
+
+    Seq("NaN", "infinity", "-infinity").foreach { v =>
+      val df = toDF(s"SELECT '$v'::numeric c1")
+      val e = intercept[SparkException](df.collect())
+      checkError(e, null)
+      val cause = e.getCause.asInstanceOf[SQLException]
+      assert(cause.getMessage.contains("Bad value for type BigDecimal"))
+      assert(cause.getSQLState === "22003")
+    }
+  }
+
+  test("SPARK-48387: Timestamp write as timestamp with time zone") {
+    val df = spark.sql("select TIMESTAMP '2018-11-17 13:33:33' as col0")
+    // write timestamps for preparation
+    withSQLConf(SQLConf.LEGACY_POSTGRES_DATETIME_MAPPING_ENABLED.key -> "false") {
+      // write timestamp as timestamp with time zone
+      df.write.jdbc(jdbcUrl, "ts_with_timezone_copy_false", new Properties)
+    }
+    withSQLConf(SQLConf.LEGACY_POSTGRES_DATETIME_MAPPING_ENABLED.key -> "true") {
+      // write timestamp as timestamp without time zone
+      df.write.jdbc(jdbcUrl, "ts_with_timezone_copy_true", new Properties)
+    }
+
+    // read timestamps for test
+    withSQLConf(SQLConf.LEGACY_POSTGRES_DATETIME_MAPPING_ENABLED.key -> "true") {
+      val df1 = spark.read.option("preferTimestampNTZ", false)
+        .jdbc(jdbcUrl, "ts_with_timezone_copy_false", new Properties)
+      checkAnswer(df1, Row(Timestamp.valueOf("2018-11-17 13:33:33")))
+      val df2 = spark.read.option("preferTimestampNTZ", true)
+        .jdbc(jdbcUrl, "ts_with_timezone_copy_false", new Properties)
+      checkAnswer(df2, Row(LocalDateTime.of(2018, 11, 17, 13, 33, 33)))
+
+      val df3 = spark.read.option("preferTimestampNTZ", false)
+        .jdbc(jdbcUrl, "ts_with_timezone_copy_true", new Properties)
+      checkAnswer(df3, Row(Timestamp.valueOf("2018-11-17 13:33:33")))
+      val df4 = spark.read.option("preferTimestampNTZ", true)
+        .jdbc(jdbcUrl, "ts_with_timezone_copy_true", new Properties)
+      checkAnswer(df4, Row(LocalDateTime.of(2018, 11, 17, 13, 33, 33)))
+    }
+    withSQLConf(SQLConf.LEGACY_POSTGRES_DATETIME_MAPPING_ENABLED.key -> "false") {
+      Seq("true", "false").foreach { prefer =>
+        val prop = new Properties
+        prop.setProperty("preferTimestampNTZ", prefer)
+        val dfCopy = spark.read.jdbc(jdbcUrl, "ts_with_timezone_copy_false", prop)
+        checkAnswer(dfCopy, Row(Timestamp.valueOf("2018-11-17 13:33:33")))
+      }
+
+      val df5 = spark.read.option("preferTimestampNTZ", false)
+        .jdbc(jdbcUrl, "ts_with_timezone_copy_true", new Properties)
+      checkAnswer(df5, Row(Timestamp.valueOf("2018-11-17 13:33:33")))
+      val df6 = spark.read.option("preferTimestampNTZ", true)
+        .jdbc(jdbcUrl, "ts_with_timezone_copy_true", new Properties)
+      checkAnswer(df6, Row(LocalDateTime.of(2018, 11, 17, 13, 33, 33)))
+    }
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index d08be3b5f40e3..af1cd464ad5fe 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnecti
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.2):
+ * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly *PostgresKrbIntegrationSuite"
  * }}}
@@ -38,7 +38,7 @@ class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val keytabFileName = "postgres.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
index 7ae03e974845b..8b27e9cb0e0a3 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
@@ -28,9 +28,9 @@ import org.apache.spark.tags.DockerTest
 
 /**
  * This suite is used to generate subqueries, and test Spark against Postgres.
- * To run this test suite for a specific version (e.g., postgres:16.2):
+ * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.GeneratedSubquerySuite"
  * }}}
@@ -39,7 +39,7 @@ import org.apache.spark.tags.DockerTest
 class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGeneratorHelper {
 
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
index f2a7e14cfc4b9..de28e16b325ce 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
@@ -30,9 +30,9 @@ import org.apache.spark.tags.DockerTest
  *    confidence, and you won't have to manually verify the golden files generated with your test.
  * 2. Add this line to your .sql file: --ONLY_IF spark
  *
- * Note: To run this test suite for a specific version (e.g., postgres:16.2):
+ * Note: To run this test suite for a specific version (e.g., postgres:16.3-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "testOnly org.apache.spark.sql.jdbc.PostgreSQLQueryTestSuite"
  * }}}
@@ -45,7 +45,7 @@ class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
   protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
 
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 6c1b7fdd1be5a..57129e9d846f6 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark.sql.jdbc.v2
 import java.sql.Connection
 import java.util.Locale
 
-import org.scalatest.time.SpanSugar._
-
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
@@ -52,7 +50,6 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "db2"
   override val namespaceOpt: Option[String] = Some("DB2INST1")
   override val db = new DB2DatabaseOnDocker
-  override val connectionTimeout = timeout(3.minutes)
 
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName)
@@ -65,6 +62,12 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)")
       .executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
index 72edfc9f1bf1c..60345257f2dc4 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
@@ -38,6 +38,25 @@ abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite {
       .executeUpdate()
     connection.prepareStatement("INSERT INTO employee VALUES (6, 'jen', 12000, 1200)")
       .executeUpdate()
+
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_quote''_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_quote_not_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_percent%_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_percent_not_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_underscore_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_underscorenot_present')")
+      .executeUpdate()
   }
 
   def tablePreparation(connection: Connection): Unit
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 0dc3a39f4db5d..9ddd79fb257d8 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -19,12 +19,10 @@ package org.apache.spark.sql.jdbc.v2
 
 import java.sql.Connection
 
-import org.scalatest.time.SpanSugar._
-
 import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.jdbc.MsSQLServerDatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -60,19 +58,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
     "scan with aggregate push-down: REGR_SXY without DISTINCT")
 
   override val catalogName: String = "mssql"
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
-      "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
-    override val env = Map(
-      "SA_PASSWORD" -> "Sapass123",
-      "ACCEPT_EULA" -> "Y"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1433
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
-  }
+  override val db = new MsSQLServerDatabaseOnDocker
 
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.mssql", classOf[JDBCTableCatalog].getName)
@@ -80,12 +66,16 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
     .set("spark.sql.catalog.mssql.pushDownAggregate", "true")
     .set("spark.sql.catalog.mssql.pushDownLimit", "true")
 
-  override val connectionTimeout = timeout(7.minutes)
-
   override def tablePreparation(connection: Connection): Unit = {
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary NUMERIC(20, 2), bonus FLOAT)")
       .executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def notSupportsTableComment: Boolean = true
@@ -143,4 +133,17 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
       "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))")
     assert(df3.collect().length == 3)
   }
+
+  test("SPARK-47994: SQLServer does not support 1 or 0 as boolean type in CASE WHEN filter") {
+    val df = sql(
+      s"""
+        |WITH tbl AS (
+        |SELECT CASE
+        |WHEN e.dept = 1 THEN 'first' WHEN e.dept = 2 THEN 'second' ELSE 'third' END
+        |AS deptString FROM $catalogName.employee as e)
+        |SELECT * FROM tbl
+        |WHERE deptString = 'first'
+        |""".stripMargin)
+    assert(df.collect().length == 2)
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
index 4bdc80dedfbe1..e010a0caf13fa 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
@@ -21,7 +21,7 @@ import java.sql.Connection
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.{DockerJDBCIntegrationSuite, MsSQLServerDatabaseOnDocker}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
@@ -35,20 +35,7 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class MsSqlServerNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
-      "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
-    override val env = Map(
-      "SA_PASSWORD" -> "Sapass123",
-      "ACCEPT_EULA" -> "Y"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1433
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
-  }
-
+  override val db = new MsSQLServerDatabaseOnDocker
   val map = new CaseInsensitiveStringMap(
     Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
       "driver" -> "com.microsoft.sqlserver.jdbc.SQLServerDriver").asJava)
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index 4997d335fda6b..d5478e664221d 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.jdbc.v2
 
 import java.sql.{Connection, SQLFeatureNotSupportedException}
 
-import org.scalatest.time.SpanSugar._
-
 import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
@@ -68,8 +66,6 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     .set("spark.sql.catalog.mysql.pushDownLimit", "true")
     .set("spark.sql.catalog.mysql.pushDownOffset", "true")
 
-  override val connectionTimeout = timeout(7.minutes)
-
   private var mySQLVersion = -1
 
   override def tablePreparation(connection: Connection): Unit = {
@@ -77,6 +73,12 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," +
         " bonus DOUBLE)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col LONGTEXT
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
index d2a7aa7758263..2b607fccd1710 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
@@ -40,7 +40,7 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
 
   val map = new CaseInsensitiveStringMap(
     Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
-      "driver" -> "com.mysql.jdbc.Driver").asJava)
+      "driver" -> "com.mysql.cj.jdbc.Driver").asJava)
 
   catalog.initialize("mysql", map)
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 0aa2905f93b85..342fb4bb38e60 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -20,10 +20,8 @@ package org.apache.spark.sql.jdbc.v2
 import java.sql.Connection
 import java.util.Locale
 
-import org.scalatest.time.SpanSugar._
-
 import org.apache.spark.{SparkConf, SparkRuntimeException}
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.OracleDatabaseOnDocker
@@ -50,12 +48,12 @@ import org.apache.spark.tags.DockerTest
  *
  * A sequence of commands to build the Oracle Database Free container image:
  *  $ git clone https://github.com/oracle/docker-images.git
- *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
- *  $ ./buildContainerImage.sh -v 23.2.0 -f
- *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:23.2.0-free
+ *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles0
+ *  $ ./buildContainerImage.sh -v 23.4.0 -f
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:23.4.0-free
  *
- * This procedure has been validated with Oracle Database Free version 23.2.0,
- * and with Oracle Express Edition versions 18.4.0 and 21.3.0
+ * This procedure has been validated with Oracle Database Free version 23.4.0,
+ * and with Oracle Express Edition versions 18.4.0 and 21.4.0
  */
 @DockerTest
 class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
@@ -91,12 +89,16 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
     .set("spark.sql.catalog.oracle.pushDownLimit", "true")
     .set("spark.sql.catalog.oracle.pushDownOffset", "true")
 
-  override val connectionTimeout = timeout(7.minutes)
-
   override def tablePreparation(connection: Connection): Unit = {
     connection.prepareStatement(
       "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," +
         " bonus BINARY_DOUBLE)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
@@ -142,4 +144,13 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
       )
     }
   }
+
+  test("SPARK-47879: Use VARCHAR2 instead of VARCHAR") {
+    val tableName = catalogName + ".t1"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(c1 varchar(10), c2 char(3))")
+      sql(s"INSERT INTO $tableName SELECT 'Eason' as c1, 'Y' as c2")
+      checkAnswer(sql(s"SELECT * FROM $tableName"), Seq(Row("Eason", "Y  ")))
+    }
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
index 05f38102d4101..48f8282e58804 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
@@ -46,11 +46,11 @@ import org.apache.spark.tags.DockerTest
  * A sequence of commands to build the Oracle Database Free container image:
  *  $ git clone https://github.com/oracle/docker-images.git
  *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
- *  $ ./buildContainerImage.sh -v 23.2.0 -f
- *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:23.2.0-free
+ *  $ ./buildContainerImage.sh -v 23.4.0 -f
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:23.4.0-free
  *
- * This procedure has been validated with Oracle Database Free version 23.2.0,
- * and with Oracle Express Edition versions 18.4.0 and 21.3.0
+ * This procedure has been validated with Oracle Database Free version 23.4.0,
+ * and with Oracle Express Edition versions 18.4.0 and 21.4.0
  */
 @DockerTest
 class OracleNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 1f09c2fd3fc59..7c439d449d86f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.2)
+ * To run this test suite for a specific version (e.g., postgres:16.3-alpine)
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
  * }}}
  */
@@ -38,7 +38,7 @@ import org.apache.spark.tags.DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "postgresql"
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
@@ -59,6 +59,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," +
         " bonus double precision)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
index 838de5acab0df..8a2d0ded84381 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -26,16 +26,16 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.2):
+ * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
  * }}}
  */
 @DockerTest
 class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index c80fbfc748dd1..88ba00a8a1aea 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -20,9 +20,8 @@ package org.apache.spark.sql.jdbc.v2
 import org.apache.logging.log4j.Level
 
 import org.apache.spark.sql.{AnalysisException, DataFrame}
-import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sample, Sort}
-import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.{Catalogs, Identifier, TableCatalog}
 import org.apache.spark.sql.connector.catalog.index.SupportsIndex
 import org.apache.spark.sql.connector.expressions.NullOrdering
@@ -84,6 +83,19 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   def testCreateTableWithProperty(tbl: String): Unit = {}
 
+  private def checkErrorFailedJDBC(
+      e: AnalysisException,
+      errorClass: String,
+      tbl: String): Unit = {
+    checkErrorMatchPVals(
+      exception = e,
+      errorClass = errorClass,
+      parameters = Map(
+        "url" -> "jdbc:.*",
+        "tableName" -> s"`$tbl`")
+    )
+  }
+
   test("SPARK-33034: ALTER TABLE ... add new columns") {
     withTable(s"$catalogName.alt_table") {
       sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)")
@@ -107,7 +119,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 DOUBLE)")
         },
-        errorClass = "FIELDS_ALREADY_EXISTS",
+        errorClass = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "add",
           "fieldNames" -> "`C3`",
@@ -122,9 +134,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     val e = intercept[AnalysisException] {
       sql(s"ALTER TABLE $catalogName.not_existing_table ADD COLUMNS (C4 STRING)")
     }
-    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
-      ExpectedContext(s"$catalogName.not_existing_table", 12,
-        11 + s"$catalogName.not_existing_table".length))
+    checkErrorFailedJDBC(e, "FAILED_JDBC.LOAD_TABLE", "not_existing_table")
   }
 
   test("SPARK-33034: ALTER TABLE ... drop column") {
@@ -146,9 +156,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     val e = intercept[AnalysisException] {
       sql(s"ALTER TABLE $catalogName.not_existing_table DROP COLUMN C1")
     }
-    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
-      ExpectedContext(s"$catalogName.not_existing_table", 12,
-        11 + s"$catalogName.not_existing_table".length))
+    checkErrorFailedJDBC(e, "FAILED_JDBC.LOAD_TABLE", "not_existing_table")
   }
 
   test("SPARK-33034: ALTER TABLE ... update column type") {
@@ -164,9 +172,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     val e = intercept[AnalysisException] {
       sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN id TYPE DOUBLE")
     }
-    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
-      ExpectedContext(s"$catalogName.not_existing_table", 12,
-        11 + s"$catalogName.not_existing_table".length))
+    checkErrorFailedJDBC(e, "FAILED_JDBC.LOAD_TABLE", "not_existing_table")
   }
 
   test("SPARK-33034: ALTER TABLE ... rename column") {
@@ -179,7 +185,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $catalogName.alt_table RENAME COLUMN ID1 TO ID2")
         },
-        errorClass = "FIELDS_ALREADY_EXISTS",
+        errorClass = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "rename",
           "fieldNames" -> "`ID2`",
@@ -194,11 +200,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     val e = intercept[AnalysisException] {
       sql(s"ALTER TABLE $catalogName.not_existing_table RENAME COLUMN ID TO C")
     }
-    checkErrorTableNotFound(e,
-      UnresolvedAttribute.parseAttributeName(s"$catalogName.not_existing_table")
-        .map(part => quoteIdentifier(part)).mkString("."),
-      ExpectedContext(s"$catalogName.not_existing_table", 12,
-        11 + s"$catalogName.not_existing_table".length))
+    checkErrorFailedJDBC(e, "FAILED_JDBC.LOAD_TABLE", "not_existing_table")
   }
 
   test("SPARK-33034: ALTER TABLE ... update column nullability") {
@@ -209,9 +211,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     val e = intercept[AnalysisException] {
       sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN ID DROP NOT NULL")
     }
-    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
-      ExpectedContext(s"$catalogName.not_existing_table", 12,
-        11 + s"$catalogName.not_existing_table".length))
+    checkErrorFailedJDBC(e, "FAILED_JDBC.LOAD_TABLE", "not_existing_table")
   }
 
   test("CREATE TABLE with table comment") {
@@ -233,7 +233,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       val e = intercept[AnalysisException] {
         sql(s"CREATE TABLE $catalogName.new_table (i INT) TBLPROPERTIES('a'='1')")
       }
-      assert(e.getErrorClass == "FAILED_JDBC.UNCLASSIFIED")
+      checkErrorFailedJDBC(e, "FAILED_JDBC.CREATE_TABLE", "new_table")
       testCreateTableWithProperty(s"$catalogName.new_table")
     }
   }
@@ -359,6 +359,235 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     assert(scan.schema.names.sameElements(Seq(col)))
   }
 
+  test("SPARK-48172: Test CONTAINS") {
+    val df1 = spark.sql(
+      s"""
+         |SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin)
+    df1.explain("formatted")
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE contains(pattern_testing_col, 'character')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test ENDSWITH") {
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE endswith(pattern_testing_col, 'present')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test STARTSWITH") {
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE startswith(pattern_testing_col, 'special_character')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test LIKE") {
+    // this one should map to contains
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%character%'
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+
+    // map to startsWith
+    // this one should map to contains
+    val df5 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin)
+    val rows5 = df5.collect()
+    assert(rows5.length === 1)
+    assert(rows5(0).getString(0) === "special_character_quote'_present")
+
+    val df6 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin)
+    val rows6 = df6.collect()
+    assert(rows6.length === 1)
+    assert(rows6(0).getString(0) === "special_character_percent%_present")
+
+    val df7 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin)
+    val rows7 = df7.collect()
+    assert(rows7.length === 1)
+    assert(rows7(0).getString(0) === "special_character_underscore_present")
+
+    val df8 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE 'special_character%'
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows8 = df8.collect()
+    assert(rows8.length === 6)
+    assert(rows8(0).getString(0) === "special_character_percent%_present")
+    assert(rows8(1).getString(0) === "special_character_percent_not_present")
+    assert(rows8(2).getString(0) === "special_character_quote'_present")
+    assert(rows8(3).getString(0) === "special_character_quote_not_present")
+    assert(rows8(4).getString(0) === "special_character_underscore_present")
+    assert(rows8(5).getString(0) === "special_character_underscorenot_present")
+    // map to endsWith
+    // this one should map to contains
+    val df9 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin)
+    val rows9 = df9.collect()
+    assert(rows9.length === 1)
+    assert(rows9(0).getString(0) === "special_character_quote'_present")
+
+    val df10 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin)
+    val rows10 = df10.collect()
+    assert(rows10.length === 1)
+    assert(rows10(0).getString(0) === "special_character_percent%_present")
+
+    val df11 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin)
+    val rows11 = df11.collect()
+    assert(rows11.length === 1)
+    assert(rows11(0).getString(0) === "special_character_underscore_present")
+
+    val df12 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin)
+    val rows12 = df12.collect()
+    assert(rows12.length === 6)
+    assert(rows12(0).getString(0) === "special_character_percent%_present")
+    assert(rows12(1).getString(0) === "special_character_percent_not_present")
+    assert(rows12(2).getString(0) === "special_character_quote'_present")
+    assert(rows12(3).getString(0) === "special_character_quote_not_present")
+    assert(rows12(4).getString(0) === "special_character_underscore_present")
+    assert(rows12(5).getString(0) === "special_character_underscorenot_present")
+  }
+
   test("SPARK-37038: Test TABLESAMPLE") {
     if (supportsTableSample) {
       withTable(s"$catalogName.new_table") {
diff --git a/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json b/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json
index a7b22e1370fd8..2fa44d7bd66a6 100644
--- a/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json
+++ b/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-conditions.json
@@ -23,6 +23,13 @@
       "latest offset: <latestOffset>, end offset: <endOffset>"
     ]
   },
+  "KAFKA_START_OFFSET_DOES_NOT_MATCH_ASSIGNED" : {
+    "message" : [
+      "Partitions specified for Kafka start offsets don't match what are assigned. Maybe topic partitions are created ",
+      "or deleted while the query is running. Use -1 for latest, -2 for earliest.",
+      "Specified: <specifiedPartitions> Assigned: <assignedPartitions>"
+    ]
+  },
   "KAFKA_DATA_LOSS" : {
     "message" : [
       "Some data may have been lost because they are not available in Kafka any more;",
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
index d3fe3264afe14..cb1c7055483b3 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
@@ -21,7 +21,7 @@ import java.{util => ju}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.connector.metric.CustomTaskMetric
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
index e5e22243a5826..10bdbb1d9d447 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
@@ -25,7 +25,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, OFFSETS, TIP}
+import org.apache.spark.internal.LogKeys.{ERROR, OFFSETS, TIP}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.connector.read.InputPartition
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
index 735184db3c1af..13a68e72269f0 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
@@ -27,8 +27,8 @@ private object KafkaExceptionsHelper {
   val errorClassesJsonReader: ErrorClassesJsonReader =
     new ErrorClassesJsonReader(
       // Note that though we call them "error classes" here, the proper name is "error conditions",
-      // hence why the name of the JSON file different. We will address this inconsistency as part
-      // of this ticket: https://issues.apache.org/jira/browse/SPARK-47429
+      // hence why the name of the JSON file is different. We will address this inconsistency as
+      // part of this ticket: https://issues.apache.org/jira/browse/SPARK-47429
       Seq(getClass.getClassLoader.getResource("error/kafka-error-conditions.json")))
 }
 
@@ -97,7 +97,7 @@ object KafkaExceptions {
         "startOffset" -> startOffset.toString,
         "endOffset" -> endOffset.toString,
         "topicPartition" -> topicPartition.toString,
-        "groupId" -> groupId),
+        "groupId" -> Option(groupId).getOrElse("null")),
       cause = cause)
   }
 
@@ -155,6 +155,16 @@ object KafkaExceptions {
         "prevOffset" -> prevOffset.toString,
         "newOffset" -> newOffset.toString))
   }
+
+  def startOffsetDoesNotMatchAssigned(
+      specifiedPartitions: Set[TopicPartition],
+      assignedPartitions: Set[TopicPartition]): KafkaIllegalStateException = {
+    new KafkaIllegalStateException(
+      errorClass = "KAFKA_START_OFFSET_DOES_NOT_MATCH_ASSIGNED",
+      messageParameters = Map(
+        "specifiedPartitions" -> specifiedPartitions.toString,
+        "assignedPartitions" -> assignedPartitions.toString))
+  }
 }
 
 /**
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 3313d42d1a30e..c79da13017b97 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -26,7 +26,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, OFFSETS, TIP}
+import org.apache.spark.internal.LogKeys.{ERROR, OFFSETS, TIP}
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
index 5ed8576e88888..bb4f14686f976 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
@@ -31,7 +31,7 @@ import org.apache.kafka.common.requests.OffsetFetchResponse
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{OFFSETS, RETRY_COUNT, TOPIC_PARTITION_OFFSET}
+import org.apache.spark.internal.LogKeys.{NUM_RETRY, OFFSETS, TOPIC_PARTITION_OFFSET}
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.kafka010.KafkaSourceProvider.StrategyOnNoMatchStartingOffset
@@ -120,10 +120,9 @@ private[kafka010] class KafkaOffsetReaderAdmin(
       isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
     def validateTopicPartitions(partitions: Set[TopicPartition],
       partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-      assert(partitions == partitionOffsets.keySet,
-        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-          "Use -1 for latest, -2 for earliest.\n" +
-          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
+      if (partitions != partitionOffsets.keySet) {
+        throw KafkaExceptions.startOffsetDoesNotMatchAssigned(partitionOffsets.keySet, partitions)
+      }
       logDebug(s"Assigned partitions: $partitions. Seeking to $partitionOffsets")
       partitionOffsets
     }
@@ -536,7 +535,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
           case NonFatal(e) =>
             lastException = e
             logWarning(
-              log"Error in attempt ${MDC(RETRY_COUNT, attempt)} getting Kafka offsets: ", e)
+              log"Error in attempt ${MDC(NUM_RETRY, attempt)} getting Kafka offsets: ", e)
             attempt += 1
             Thread.sleep(offsetFetchAttemptIntervalMs)
             resetAdmin()
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
index 34d44fdf10591..fa53d6373176e 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
@@ -28,7 +28,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{OFFSETS, RETRY_COUNT, TOPIC_PARTITION_OFFSET}
+import org.apache.spark.internal.LogKeys.{NUM_RETRY, OFFSETS, TOPIC_PARTITION_OFFSET}
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.kafka010.KafkaSourceProvider.StrategyOnNoMatchStartingOffset
@@ -142,10 +142,9 @@ private[kafka010] class KafkaOffsetReaderConsumer(
       isStartingOffsets: Boolean): Map[TopicPartition, Long] = {
     def validateTopicPartitions(partitions: Set[TopicPartition],
       partitionOffsets: Map[TopicPartition, Long]): Map[TopicPartition, Long] = {
-      assert(partitions == partitionOffsets.keySet,
-        "If startingOffsets contains specific offsets, you must specify all TopicPartitions.\n" +
-          "Use -1 for latest, -2 for earliest.\n" +
-          s"Specified: ${partitionOffsets.keySet} Assigned: ${partitions}")
+      if (partitions != partitionOffsets.keySet) {
+        throw KafkaExceptions.startOffsetDoesNotMatchAssigned(partitionOffsets.keySet, partitions)
+      }
       logDebug(s"Partitions assigned to consumer: $partitions. Seeking to $partitionOffsets")
       partitionOffsets
     }
@@ -613,7 +612,7 @@ private[kafka010] class KafkaOffsetReaderConsumer(
                 case NonFatal(e) =>
                   lastException = e
                   logWarning(
-                    log"Error in attempt ${MDC(RETRY_COUNT, attempt)} getting Kafka offsets: ", e)
+                    log"Error in attempt ${MDC(NUM_RETRY, attempt)} getting Kafka offsets: ", e)
                   attempt += 1
                   Thread.sleep(offsetFetchAttemptIntervalMs)
                   resetConsumer()
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 97b866067ea88..b77bb94aaf46f 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.kafka010
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.TOPIC_PARTITIONS
+import org.apache.spark.internal.LogKeys.TOPIC_PARTITIONS
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
index 5a75682f54f9a..fb473e71d5a75 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.kafka010
 import java.{util => ju}
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.BATCH_ID
+import org.apache.spark.internal.LogKeys.BATCH_ID
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.streaming.Sink
 
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index b0ab469690e2a..d43b22d9de922 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -23,7 +23,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, FROM_OFFSET, OFFSETS, TIP, TOPIC_PARTITIONS, UNTIL_OFFSET}
+import org.apache.spark.internal.LogKeys.{ERROR, FROM_OFFSET, OFFSETS, TIP, TOPIC_PARTITIONS, UNTIL_OFFSET}
 import org.apache.spark.internal.config.Network.NETWORK_TIMEOUT
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql._
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
index 4eb73e6d39f02..bc7f8b6b44f90 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
@@ -22,7 +22,7 @@ import java.{util => ju}
 import org.apache.kafka.clients.consumer.ConsumerRecord
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
-import org.apache.spark.internal.LogKey.{FROM_OFFSET, PARTITION_ID, TOPIC}
+import org.apache.spark.internal.LogKeys.{FROM_OFFSET, PARTITION_ID, TOPIC}
 import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
index 981aa71bf9479..9f68cb6fd0882 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
@@ -27,7 +27,7 @@ import org.apache.kafka.clients.consumer.ConsumerRecord
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{DATA, KEY}
+import org.apache.spark.internal.LogKeys.{DATA, KEY}
 import org.apache.spark.sql.kafka010.{FETCHED_DATA_CACHE_EVICTOR_THREAD_RUN_INTERVAL, FETCHED_DATA_CACHE_TIMEOUT}
 import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.{AvailableOffsetRange, CacheKey, UNKNOWN_OFFSET}
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
index 661fe731b97b9..edd5121cfbeee 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
@@ -183,8 +183,8 @@ private[consumer] object InternalKafkaConsumerPool {
       setMaxTotal(-1)
 
       // Set minimum evictable idle time which will be referred from evictor thread
-      setMinEvictableIdleTime(Duration.ofMillis(minEvictableIdleTimeMillis))
-      setSoftMinEvictableIdleTime(BaseObjectPoolConfig.DEFAULT_SOFT_MIN_EVICTABLE_IDLE_DURATION)
+      setMinEvictableIdleDuration(Duration.ofMillis(minEvictableIdleTimeMillis))
+      setSoftMinEvictableIdleDuration(BaseObjectPoolConfig.DEFAULT_SOFT_MIN_EVICTABLE_IDLE_DURATION)
 
       // evictor thread will run test with ten idle objects
       setTimeBetweenEvictionRuns(Duration.ofMillis(evictorThreadRunIntervalMillis))
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
index 72ceebb700d69..ceb9d96660ae3 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
@@ -31,7 +31,7 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaTokenUtil}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.kafka010.KafkaExceptions
@@ -393,9 +393,9 @@ private[kafka010] class KafkaDataConsumer(
     val walTime = System.nanoTime() - startTimestampNano
 
     logInfo(log"From Kafka ${MDC(CONSUMER, kafkaMeta)} read " +
-      log"${MDC(TOTAL_RECORDS_READ, totalRecordsRead)} records through " +
-      log"${MDC(KAFKA_PULLS_COUNT, numPolls)} polls " +
-      log"(polled out ${MDC(KAFKA_RECORDS_PULLED_COUNT, numRecordsPolled)} records), " +
+      log"${MDC(NUM_RECORDS_READ, totalRecordsRead)} records through " +
+      log"${MDC(NUM_KAFKA_PULLS, numPolls)} polls " +
+      log"(polled out ${MDC(NUM_KAFKA_RECORDS_PULLED, numRecordsPolled)} records), " +
       log"taking ${MDC(TOTAL_TIME_READ, totalTimeReadNanos / NANOS_PER_MILLIS.toDouble)} ms, " +
       log"during time span of ${MDC(TIME, walTime / NANOS_PER_MILLIS.toDouble)} ms."
     )
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
index afd426694d7b0..c3457cf8982d9 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
@@ -24,7 +24,7 @@ import scala.util.control.NonFatal
 import org.apache.kafka.clients.producer.KafkaProducer
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PRODUCER_ID
+import org.apache.spark.internal.LogKeys.PRODUCER_ID
 
 private[kafka010] class CachedKafkaProducer(
     val cacheKey: Seq[(String, Object)],
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
index f35023d744b63..79e0a91dd8968 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
@@ -28,7 +28,7 @@ import org.apache.kafka.clients.producer.KafkaProducer
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PRODUCER_ID
+import org.apache.spark.internal.LogKeys.PRODUCER_ID
 import org.apache.spark.kafka010.{KafkaConfigUpdater, KafkaRedactionUtil}
 import org.apache.spark.sql.kafka010.{PRODUCER_CACHE_EVICTOR_THREAD_RUN_INTERVAL, PRODUCER_CACHE_TIMEOUT}
 import org.apache.spark.util.{Clock, ShutdownHookManager, SystemClock, ThreadUtils, Utils}
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
index 691e81f02a8c9..320485a79e59d 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
@@ -135,6 +135,31 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
       KafkaOffsetRange(tp, 2, LATEST, None)).sortBy(_.topicPartition.toString))
   }
 
+  testWithAllOffsetFetchingSQLConf(
+    "SPARK-48383: START_OFFSET_DOES_NOT_MATCH_ASSIGNED error class"
+  ) {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 3)
+    val reader = createKafkaReader(topic, minPartitions = Some(4))
+
+    // There are three topic partitions, but we only include two in offsets.
+    val tp1 = new TopicPartition(topic, 0)
+    val tp2 = new TopicPartition(topic, 1)
+    val startingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> EARLIEST, tp2 -> EARLIEST))
+    val endingOffsets = SpecificOffsetRangeLimit(Map(tp1 -> LATEST, tp2 -> 3))
+
+    val ex = intercept[KafkaIllegalStateException] {
+      reader.getOffsetRangesFromUnresolvedOffsets(startingOffsets, endingOffsets)
+    }
+    checkError(
+      exception = ex,
+      errorClass = "KAFKA_START_OFFSET_DOES_NOT_MATCH_ASSIGNED",
+      parameters = Map(
+        "specifiedPartitions" -> "Set\\(.*,.*\\)",
+        "assignedPartitions" -> "Set\\(.*,.*,.*\\)"),
+      matchPVals = true)
+  }
+
   testWithAllOffsetFetchingSQLConf("SPARK-30656: getOffsetRangesFromUnresolvedOffsets - " +
     "multiple topic partitions") {
     val topic = newTopic()
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 068e3423cd26c..0737658e65256 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -52,7 +52,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.kafka010.KafkaTokenUtil
 import org.apache.spark.util.{SecurityUtils, ShutdownHookManager, Utils}
 import org.apache.spark.util.ArrayImplicits._
@@ -70,7 +70,7 @@ class KafkaTestUtils(
   private val JAVA_AUTH_CONFIG = "java.security.auth.login.config"
 
   private val localHostNameForURI = Utils.localHostNameForURI()
-  logInfo(log"Local host name is ${MDC(LogKey.URI, localHostNameForURI)}")
+  logInfo(log"Local host name is ${MDC(LogKeys.URI, localHostNameForURI)}")
 
   // MiniKDC uses canonical host name on host part, hence we need to provide canonical host name
   // on the 'host' part of the principal.
@@ -333,7 +333,7 @@ class KafkaTestUtils(
         Utils.deleteRecursively(new File(f))
       } catch {
         case e: IOException if Utils.isWindows =>
-          logWarning(log"${MDC(LogKey.ERROR, e.getMessage)}")
+          logWarning(log"${MDC(LogKeys.ERROR, e.getMessage)}")
       }
     }
 
@@ -654,13 +654,13 @@ class KafkaTestUtils(
         Utils.deleteRecursively(snapshotDir)
       } catch {
         case e: IOException if Utils.isWindows =>
-          logWarning(log"${MDC(LogKey.ERROR, e.getMessage)}")
+          logWarning(log"${MDC(LogKeys.ERROR, e.getMessage)}")
       }
       try {
         Utils.deleteRecursively(logDir)
       } catch {
         case e: IOException if Utils.isWindows =>
-          logWarning(log"${MDC(LogKey.ERROR, e.getMessage)}")
+          logWarning(log"${MDC(LogKeys.ERROR, e.getMessage)}")
       }
       System.clearProperty(ZOOKEEPER_AUTH_PROVIDER)
     }
diff --git a/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala b/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
index d0bcf90babc13..3616f93659fbb 100644
--- a/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
+++ b/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
@@ -25,7 +25,7 @@ import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_PLAINTEXT, S
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CLUSTER_ID, SERVICE_NAME}
+import org.apache.spark.internal.LogKeys.{CLUSTER_ID, SERVICE_NAME}
 import org.apache.spark.security.HadoopDelegationTokenProvider
 
 private[spark] class KafkaDelegationTokenProvider
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
index 2bc2acf9aaf91..2320f1908da5a 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
@@ -26,7 +26,7 @@ import org.apache.kafka.clients.consumer._
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CONFIG
+import org.apache.spark.internal.LogKeys.CONFIG
 import org.apache.spark.kafka010.KafkaConfigUpdater
 
 /**
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
index 86ee208496263..cefaa3de182a5 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
@@ -28,7 +28,7 @@ import org.apache.kafka.clients.consumer._
 import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{OFFSET, TIME, TOPIC_PARTITION, TOPIC_PARTITION_OFFSET_RANGE}
+import org.apache.spark.internal.LogKeys.{OFFSET, TIME, TOPIC_PARTITION, TOPIC_PARTITION_OFFSET_RANGE}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.{StreamingContext, Time}
 import org.apache.spark.streaming.dstream._
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
index 91df53c9e06bb..75b046430ef50 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
@@ -27,7 +27,7 @@ import org.apache.kafka.common.{KafkaException, TopicPartition}
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.kafka010.KafkaConfigUpdater
 
 private[kafka010] sealed trait KafkaDataConsumer[K, V] {
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
index 5bc89864cf0af..2637034766574 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
@@ -24,7 +24,7 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{FROM_OFFSET, PARTITION_ID, TOPIC, UNTIL_OFFSET}
+import org.apache.spark.internal.LogKeys.{FROM_OFFSET, PARTITION_ID, TOPIC, UNTIL_OFFSET}
 import org.apache.spark.internal.config.Network._
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
 import org.apache.spark.rdd.RDD
diff --git a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
index f3e4c45b3aa99..d15e5e25f561d 100644
--- a/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
+++ b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
@@ -25,7 +25,7 @@ import org.apache.kafka.common.TopicPartition
 import org.apache.spark.SparkContext
 import org.apache.spark.api.java.{ JavaRDD, JavaSparkContext }
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CONFIG, GROUP_ID}
+import org.apache.spark.internal.LogKeys.{CONFIG, GROUP_ID}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.api.java.{ JavaInputDStream, JavaStreamingContext }
diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
index 6b0c091534b78..b391203b4b968 100644
--- a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
@@ -30,7 +30,7 @@ import com.amazonaws.services.kinesis.model._
 
 import org.apache.spark._
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, RETRY_COUNT}
+import org.apache.spark.internal.LogKeys.{ERROR, NUM_RETRY}
 import org.apache.spark.rdd.{BlockRDD, BlockRDDPartition}
 import org.apache.spark.storage.BlockId
 import org.apache.spark.util.NextIterator
@@ -279,7 +279,7 @@ class KinesisSequenceRangeIterator(
            t match {
              case ptee: ProvisionedThroughputExceededException =>
                logWarning(log"Error while ${MDC(ERROR, message)} " +
-                 log"[attempt = ${MDC(RETRY_COUNT, retryCount + 1)}]", ptee)
+                 log"[attempt = ${MDC(NUM_RETRY, retryCount + 1)}]", ptee)
              case e: Throwable =>
                throw new SparkException(s"Error while $message", e)
            }
diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
index f6740999ce1c7..c52eeca1e48a1 100644
--- a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
@@ -23,7 +23,7 @@ import scala.util.control.NonFatal
 import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorCheckpointer
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{SHARD_ID, WORKER_URL}
+import org.apache.spark.internal.LogKeys.{SHARD_ID, WORKER_URL}
 import org.apache.spark.streaming.Duration
 import org.apache.spark.streaming.util.RecurringTimer
 import org.apache.spark.util.{Clock, SystemClock}
@@ -103,7 +103,7 @@ private[kinesis] class KinesisCheckpointer(
       }
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Failed to checkpoint shardId $shardId to DynamoDB.", e)
+        logWarning(log"Failed to checkpoint shardId ${MDC(SHARD_ID, shardId)} to DynamoDB.", e)
     }
   }
 
diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
index 47b03c2b75376..953817e625e48 100644
--- a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
@@ -29,7 +29,7 @@ import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.WORKER_URL
+import org.apache.spark.internal.LogKeys.WORKER_URL
 import org.apache.spark.storage.{StorageLevel, StreamBlockId}
 import org.apache.spark.streaming.Duration
 import org.apache.spark.streaming.kinesis.KinesisInitialPositions.AtTimestamp
diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index 8424dde7d9c40..aaafb3215d031 100644
--- a/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -27,7 +27,7 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.ShutdownReason
 import com.amazonaws.services.kinesis.model.Record
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{REASON, RETRY_INTERVAL, SHARD_ID, WORKER_URL}
+import org.apache.spark.internal.LogKeys.{REASON, RETRY_INTERVAL, SHARD_ID, WORKER_URL}
 
 /**
  * Kinesis-specific implementation of the Kinesis Client Library (KCL) IRecordProcessor.
diff --git a/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
index cd4c61396a12f..652822c5fdc97 100644
--- a/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
+++ b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
@@ -34,7 +34,7 @@ import com.amazonaws.services.kinesis.{AmazonKinesis, AmazonKinesisClient}
 import com.amazonaws.services.kinesis.model._
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{STREAM_NAME, TABLE_NAME}
+import org.apache.spark.internal.LogKeys.{STREAM_NAME, TABLE_NAME}
 
 /**
  * Shared utility methods for performing Kinesis tests that actually transfer data.
diff --git a/connector/profiler/README.md b/connector/profiler/README.md
index 527f8b487d4d4..d928a47cab7d2 100644
--- a/connector/profiler/README.md
+++ b/connector/profiler/README.md
@@ -23,7 +23,7 @@ Code profiling is currently only supported for
 To get maximum profiling information set the following jvm options for the executor :
 
 ```
-    -XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer
+spark.executor.extraJavaOptions=-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints -XX:+PreserveFramePointer
 ```
 
 For more information on async_profiler see the [Async Profiler Manual](https://krzysztofslusarski.github.io/2022/12/12/async-manual.html)
diff --git a/connector/profiler/pom.xml b/connector/profiler/pom.xml
index 933a74edc0a94..6b254dbae128c 100644
--- a/connector/profiler/pom.xml
+++ b/connector/profiler/pom.xml
@@ -31,6 +31,9 @@
   </properties>
   <packaging>jar</packaging>
   <name>Spark Profiler</name>
+  <description>
+    Enables code profiling of executors based on the the async profiler.
+  </description>
   <url>https://spark.apache.org/</url>
 
   <dependencies>
@@ -44,7 +47,8 @@
     <dependency>
       <groupId>me.bechberger</groupId>
       <artifactId>ap-loader-all</artifactId>
-      <version>3.0-8</version>
+      <version>${ap-loader.version}</version>
+      <scope>provided</scope>
     </dependency>
   </dependencies>
 </project>
diff --git a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala
index 15ffbbd9d730c..20b6db5221fa9 100644
--- a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala
+++ b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorJVMProfiler.scala
@@ -25,8 +25,8 @@ import org.apache.hadoop.fs.{FileSystem, FSDataOutputStream, Path}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.LogKey.PATH
 import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.util.ThreadUtils
 
 
diff --git a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorProfilerPlugin.scala b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorProfilerPlugin.scala
index fb9abfe59aa78..b6b6221277968 100644
--- a/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorProfilerPlugin.scala
+++ b/connector/profiler/src/main/scala/org/apache/spark/executor/profiler/ExecutorProfilerPlugin.scala
@@ -23,8 +23,8 @@ import scala.util.Random
 
 import org.apache.spark.SparkConf
 import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
-import org.apache.spark.internal.LogKey.EXECUTOR_ID
 import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.EXECUTOR_ID
 
 
 /**
diff --git a/connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java b/connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
index 019ee08e09188..48c61e80d6655 100644
--- a/connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
+++ b/connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
@@ -11,9 +11,6 @@
 import info.ganglia.gmetric4j.gmetric.GMetricType;
 import info.ganglia.gmetric4j.gmetric.GangliaException;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
@@ -22,6 +19,11 @@
 import java.util.concurrent.TimeUnit;
 import java.util.regex.Pattern;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
+
 import static com.codahale.metrics.MetricRegistry.name;
 import static com.codahale.metrics.MetricAttribute.*;
 
@@ -201,7 +203,7 @@ public GangliaReporter build(GMetric... gmetrics) {
         }
     }
 
-    private static final Logger LOGGER = LoggerFactory.getLogger(GangliaReporter.class);
+    private static final SparkLogger LOGGER = SparkLoggerFactory.getLogger(GangliaReporter.class);
 
     private final GMetric gmetric;
     private final GMetric[] gmetrics;
@@ -292,7 +294,8 @@ private void reportTimer(String name, Timer timer) {
 
             reportMetered(sanitizedName, timer, group, "calls");
         } catch (GangliaException e) {
-            LOGGER.warn("Unable to report timer {}", sanitizedName, e);
+            LOGGER.warn("Unable to report timer {}", e,
+                MDC.of(LogKeys.METRIC_NAME$.MODULE$, sanitizedName));
         }
     }
 
@@ -302,7 +305,8 @@ private void reportMeter(String name, Meter meter) {
         try {
             reportMetered(sanitizedName, meter, group, "events");
         } catch (GangliaException e) {
-            LOGGER.warn("Unable to report meter {}", name, e);
+            LOGGER.warn("Unable to report meter {}", e,
+                MDC.of(LogKeys.METRIC_NAME$.MODULE$, name));
         }
     }
 
@@ -333,7 +337,8 @@ private void reportHistogram(String name, Histogram histogram) {
             announceIfEnabled(P99, sanitizedName, group, snapshot.get99thPercentile(), "");
             announceIfEnabled(P999, sanitizedName, group, snapshot.get999thPercentile(), "");
         } catch (GangliaException e) {
-            LOGGER.warn("Unable to report histogram {}", sanitizedName, e);
+            LOGGER.warn("Unable to report histogram {}", e,
+                MDC.of(LogKeys.METRIC_NAME$.MODULE$, sanitizedName));
         }
     }
 
@@ -343,7 +348,8 @@ private void reportCounter(String name, Counter counter) {
         try {
             announce(prefix(sanitizedName, COUNT.getCode()), group, Long.toString(counter.getCount()), GMetricType.DOUBLE, "");
         } catch (GangliaException e) {
-            LOGGER.warn("Unable to report counter {}", name, e);
+            LOGGER.warn("Unable to report counter {}", e,
+                MDC.of(LogKeys.METRIC_NAME$.MODULE$, name));
         }
     }
 
@@ -356,7 +362,8 @@ private void reportGauge(String name, Gauge gauge) {
         try {
             announce(name(prefix, sanitizedName), group, value, type, "");
         } catch (GangliaException e) {
-            LOGGER.warn("Unable to report gauge {}", name, e);
+            LOGGER.warn("Unable to report gauge {}", e,
+                MDC.of(LogKeys.METRIC_NAME$.MODULE$, name));
         }
     }
 
diff --git a/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt b/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
index e24325c07ff7c..2c1c69ac42db7 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               253            256           4          0.4        2529.6       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               117            121           4          0.9        1166.9       2.2X
-Coalesce Num Partitions: 100 Num Hosts: 10              110            111           2          0.9        1097.0       2.3X
-Coalesce Num Partitions: 100 Num Hosts: 20               92             96           4          1.1         915.7       2.8X
-Coalesce Num Partitions: 100 Num Hosts: 40               89             91           3          1.1         889.1       2.8X
-Coalesce Num Partitions: 100 Num Hosts: 80               87             89           2          1.1         869.9       2.9X
-Coalesce Num Partitions: 500 Num Hosts: 1               871            880          11          0.1        8714.5       0.3X
-Coalesce Num Partitions: 500 Num Hosts: 5               255            260           7          0.4        2552.2       1.0X
-Coalesce Num Partitions: 500 Num Hosts: 10              178            179           1          0.6        1780.8       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 20              146            148           3          0.7        1460.5       1.7X
-Coalesce Num Partitions: 500 Num Hosts: 40              114            117           3          0.9        1135.5       2.2X
-Coalesce Num Partitions: 500 Num Hosts: 80              106            108           2          0.9        1058.1       2.4X
-Coalesce Num Partitions: 1000 Num Hosts: 1             1651           1672          31          0.1       16508.4       0.2X
-Coalesce Num Partitions: 1000 Num Hosts: 5              420            425           7          0.2        4198.6       0.6X
-Coalesce Num Partitions: 1000 Num Hosts: 10             256            256           0          0.4        2558.1       1.0X
-Coalesce Num Partitions: 1000 Num Hosts: 20             188            189           1          0.5        1877.5       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 40             137            139           1          0.7        1369.9       1.8X
-Coalesce Num Partitions: 1000 Num Hosts: 80             118            124           9          0.8        1182.9       2.1X
-Coalesce Num Partitions: 5000 Num Hosts: 1             7631           7716         127          0.0       76309.6       0.0X
-Coalesce Num Partitions: 5000 Num Hosts: 5             1854           1867          12          0.1       18541.2       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 10             993            999           5          0.1        9928.0       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 20             546            549           3          0.2        5457.1       0.5X
-Coalesce Num Partitions: 5000 Num Hosts: 40             336            337           2          0.3        3360.1       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 80             217            221           4          0.5        2171.0       1.2X
-Coalesce Num Partitions: 10000 Num Hosts: 1           14258          14329          65          0.0      142581.8       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            3579           3594          14          0.0       35793.0       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10           1813           1824          14          0.1       18134.3       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 20            965            967           4          0.1        9647.8       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 40            535            540           5          0.2        5348.5       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 80            314            315           1          0.3        3142.6       0.8X
+Coalesce Num Partitions: 100 Num Hosts: 1               269            278          10          0.4        2693.1       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               135            140           5          0.7        1345.0       2.0X
+Coalesce Num Partitions: 100 Num Hosts: 10              109            115           6          0.9        1091.6       2.5X
+Coalesce Num Partitions: 100 Num Hosts: 20              100            101           1          1.0         999.4       2.7X
+Coalesce Num Partitions: 100 Num Hosts: 40               96             98           3          1.0         961.5       2.8X
+Coalesce Num Partitions: 100 Num Hosts: 80               93            100           9          1.1         933.4       2.9X
+Coalesce Num Partitions: 500 Num Hosts: 1               875            902          28          0.1        8754.7       0.3X
+Coalesce Num Partitions: 500 Num Hosts: 5               262            265           3          0.4        2619.9       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10              179            182           4          0.6        1792.2       1.5X
+Coalesce Num Partitions: 500 Num Hosts: 20              136            139           6          0.7        1357.2       2.0X
+Coalesce Num Partitions: 500 Num Hosts: 40              115            116           1          0.9        1145.7       2.4X
+Coalesce Num Partitions: 500 Num Hosts: 80              105            110           7          1.0        1047.2       2.6X
+Coalesce Num Partitions: 1000 Num Hosts: 1             1655           1656           2          0.1       16546.0       0.2X
+Coalesce Num Partitions: 1000 Num Hosts: 5              425            428           4          0.2        4251.4       0.6X
+Coalesce Num Partitions: 1000 Num Hosts: 10             263            267           3          0.4        2634.4       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20             182            188           9          0.5        1822.5       1.5X
+Coalesce Num Partitions: 1000 Num Hosts: 40             142            143           1          0.7        1424.5       1.9X
+Coalesce Num Partitions: 1000 Num Hosts: 80             123            131           8          0.8        1226.2       2.2X
+Coalesce Num Partitions: 5000 Num Hosts: 1             7484           7491          10          0.0       74836.3       0.0X
+Coalesce Num Partitions: 5000 Num Hosts: 5             1873           1880          11          0.1       18725.7       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 10             995           1005          11          0.1        9950.1       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 20             557            561           6          0.2        5570.9       0.5X
+Coalesce Num Partitions: 5000 Num Hosts: 40             337            341           4          0.3        3369.7       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 80             222            223           1          0.5        2222.0       1.2X
+Coalesce Num Partitions: 10000 Num Hosts: 1           14102          14133          45          0.0      141020.7       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            3681           3702          31          0.0       36811.9       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1871           1884          22          0.1       18706.8       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 20            998           1004           5          0.1        9980.9       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 40            570            575           5          0.2        5696.7       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 80            345            346           2          0.3        3447.7       0.8X
 
 
diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
index 2f1280b3817df..aba428c1729e7 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               135            137           2          0.7        1352.2       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               108            109           0          0.9        1083.3       1.2X
-Coalesce Num Partitions: 100 Num Hosts: 10               92            101           9          1.1         923.1       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 20               92             93           1          1.1         920.9       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 40               90             99          12          1.1         903.6       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 80               89             98           9          1.1         888.8       1.5X
-Coalesce Num Partitions: 500 Num Hosts: 1               313            319           5          0.3        3127.5       0.4X
-Coalesce Num Partitions: 500 Num Hosts: 5               135            137           2          0.7        1346.4       1.0X
-Coalesce Num Partitions: 500 Num Hosts: 10              113            120           9          0.9        1130.4       1.2X
-Coalesce Num Partitions: 500 Num Hosts: 20              102            109          11          1.0        1016.9       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 40               95             97           2          1.1         948.9       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 80               94             95           1          1.1         938.5       1.4X
-Coalesce Num Partitions: 1000 Num Hosts: 1              528            529           1          0.2        5279.9       0.3X
-Coalesce Num Partitions: 1000 Num Hosts: 5              193            195           3          0.5        1925.2       0.7X
-Coalesce Num Partitions: 1000 Num Hosts: 10             134            135           0          0.7        1343.9       1.0X
-Coalesce Num Partitions: 1000 Num Hosts: 20             112            115           4          0.9        1122.3       1.2X
-Coalesce Num Partitions: 1000 Num Hosts: 40             104            105           2          1.0        1039.0       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 80              99            100           2          1.0         986.8       1.4X
-Coalesce Num Partitions: 5000 Num Hosts: 1             2332           2376          49          0.0       23322.6       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5              727            733           8          0.1        7269.1       0.2X
-Coalesce Num Partitions: 5000 Num Hosts: 10             408            410           4          0.2        4082.9       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 20             255            259           4          0.4        2548.7       0.5X
-Coalesce Num Partitions: 5000 Num Hosts: 40             176            180           4          0.6        1764.4       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 80             135            143          10          0.7        1351.9       1.0X
-Coalesce Num Partitions: 10000 Num Hosts: 1            4249           4278          26          0.0       42489.3       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            1512           1517           4          0.1       15121.1       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10            764            769          10          0.1        7636.8       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 20            435            438           2          0.2        4352.9       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 40            268            272           4          0.4        2678.4       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 80            186            190           5          0.5        1860.7       0.7X
+Coalesce Num Partitions: 100 Num Hosts: 1               134            143           8          0.7        1343.4       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5                96             97           2          1.0         962.0       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 10               90             92           3          1.1         898.7       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 20               90             91           1          1.1         898.9       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 40               96            100           4          1.0         957.0       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 80               87             93           9          1.2         866.3       1.6X
+Coalesce Num Partitions: 500 Num Hosts: 1               312            314           3          0.3        3115.2       0.4X
+Coalesce Num Partitions: 500 Num Hosts: 5               135            136           1          0.7        1352.9       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10              110            111           1          0.9        1103.7       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 20              103            109           9          1.0        1034.3       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40               95             97           2          1.1         948.3       1.4X
+Coalesce Num Partitions: 500 Num Hosts: 80               90             93           3          1.1         899.9       1.5X
+Coalesce Num Partitions: 1000 Num Hosts: 1              527            535          13          0.2        5267.7       0.3X
+Coalesce Num Partitions: 1000 Num Hosts: 5              179            180           1          0.6        1788.4       0.8X
+Coalesce Num Partitions: 1000 Num Hosts: 10             132            138           8          0.8        1321.6       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20             116            122           5          0.9        1157.1       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 40              99            104           7          1.0         988.1       1.4X
+Coalesce Num Partitions: 1000 Num Hosts: 80              95             97           2          1.1         948.9       1.4X
+Coalesce Num Partitions: 5000 Num Hosts: 1             2326           2336          10          0.0       23263.2       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5              735            743           7          0.1        7351.3       0.2X
+Coalesce Num Partitions: 5000 Num Hosts: 10             400            405           6          0.2        4002.9       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 20             263            266           2          0.4        2631.4       0.5X
+Coalesce Num Partitions: 5000 Num Hosts: 40             175            180           7          0.6        1746.4       0.8X
+Coalesce Num Partitions: 5000 Num Hosts: 80             139            141           1          0.7        1389.1       1.0X
+Coalesce Num Partitions: 10000 Num Hosts: 1            4250           4263          21          0.0       42497.5       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            1508           1512           6          0.1       15082.5       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10            765            770           7          0.1        7645.5       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20            420            424           4          0.2        4198.5       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 40            277            279           3          0.4        2768.6       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 80            187            189           2          0.5        1868.9       0.7X
 
 
diff --git a/core/benchmarks/KryoBenchmark-jdk21-results.txt b/core/benchmarks/KryoBenchmark-jdk21-results.txt
index 9910ed6e6e162..8488d15602228 100644
--- a/core/benchmarks/KryoBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoBenchmark-jdk21-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       164            166           1          6.1         164.4       1.0X
-basicTypes: Long with unsafe:true                      180            181           0          5.5         180.3       0.9X
-basicTypes: Float with unsafe:true                     185            188           2          5.4         184.7       0.9X
-basicTypes: Double with unsafe:true                    187            188           1          5.3         187.1       0.9X
-Array: Int with unsafe:true                              1              1           0        761.6           1.3     125.2X
-Array: Long with unsafe:true                             2              2           0        481.6           2.1      79.2X
-Array: Float with unsafe:true                            1              1           0        749.8           1.3     123.3X
-Array: Double with unsafe:true                           2              2           0        483.7           2.1      79.5X
-Map of string->Double  with unsafe:true                 27             27           1         37.2          26.9       6.1X
-basicTypes: Int with unsafe:false                      212            214           2          4.7         212.4       0.8X
-basicTypes: Long with unsafe:false                     232            233           1          4.3         232.1       0.7X
-basicTypes: Float with unsafe:false                    226            227           1          4.4         226.1       0.7X
-basicTypes: Double with unsafe:false                   222            223           1          4.5         221.6       0.7X
-Array: Int with unsafe:false                            13             13           0         79.8          12.5      13.1X
-Array: Long with unsafe:false                           22             23           1         45.7          21.9       7.5X
-Array: Float with unsafe:false                           6              6           0        172.2           5.8      28.3X
-Array: Double with unsafe:false                         15             15           0         65.2          15.3      10.7X
-Map of string->Double  with unsafe:false                29             31           1         34.7          28.9       5.7X
+basicTypes: Int with unsafe:true                       173            174           1          5.8         172.9       1.0X
+basicTypes: Long with unsafe:true                      178            179           1          5.6         177.7       1.0X
+basicTypes: Float with unsafe:true                     180            182           1          5.6         179.6       1.0X
+basicTypes: Double with unsafe:true                    183            191          15          5.5         182.8       0.9X
+Array: Int with unsafe:true                              1              1           0        753.9           1.3     130.3X
+Array: Long with unsafe:true                             2              2           0        486.0           2.1      84.0X
+Array: Float with unsafe:true                            1              1           0        759.7           1.3     131.3X
+Array: Double with unsafe:true                           2              2           0        473.8           2.1      81.9X
+Map of string->Double  with unsafe:true                 27             27           1         37.5          26.7       6.5X
+basicTypes: Int with unsafe:false                      204            205           1          4.9         203.7       0.8X
+basicTypes: Long with unsafe:false                     229            230           1          4.4         229.1       0.8X
+basicTypes: Float with unsafe:false                    208            209           1          4.8         208.1       0.8X
+basicTypes: Double with unsafe:false                   204            205           2          4.9         203.7       0.8X
+Array: Int with unsafe:false                            13             13           0         79.4          12.6      13.7X
+Array: Long with unsafe:false                           21             21           0         47.8          20.9       8.3X
+Array: Float with unsafe:false                           6              6           0        168.3           5.9      29.1X
+Array: Double with unsafe:false                         11             12           0         87.2          11.5      15.1X
+Map of string->Double  with unsafe:false                30             31           0         32.9          30.4       5.7X
 
 
diff --git a/core/benchmarks/KryoBenchmark-results.txt b/core/benchmarks/KryoBenchmark-results.txt
index 7391c558b1942..5c35cc6affc1f 100644
--- a/core/benchmarks/KryoBenchmark-results.txt
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       172            173           0          5.8         171.7       1.0X
-basicTypes: Long with unsafe:true                      186            195          14          5.4         185.8       0.9X
-basicTypes: Float with unsafe:true                     194            201          11          5.2         193.5       0.9X
-basicTypes: Double with unsafe:true                    192            194           1          5.2         191.8       0.9X
-Array: Int with unsafe:true                              1              1           0        725.9           1.4     124.6X
-Array: Long with unsafe:true                             2              2           0        484.4           2.1      83.2X
-Array: Float with unsafe:true                            1              1           0        728.4           1.4     125.1X
-Array: Double with unsafe:true                           2              2           0        483.6           2.1      83.0X
-Map of string->Double  with unsafe:true                 26             26           0         39.2          25.5       6.7X
-basicTypes: Int with unsafe:false                      206            207           1          4.9         205.7       0.8X
-basicTypes: Long with unsafe:false                     228            229           1          4.4         227.6       0.8X
-basicTypes: Float with unsafe:false                    225            226           0          4.4         225.1       0.8X
-basicTypes: Double with unsafe:false                   217            218           1          4.6         217.2       0.8X
-Array: Int with unsafe:false                            14             14           0         72.2          13.8      12.4X
-Array: Long with unsafe:false                           20             21           1         49.5          20.2       8.5X
-Array: Float with unsafe:false                           6              6           0        169.3           5.9      29.1X
-Array: Double with unsafe:false                          9              9           0        107.7           9.3      18.5X
-Map of string->Double  with unsafe:false                27             28           0         36.7          27.3       6.3X
+basicTypes: Int with unsafe:true                       171            172           1          5.9         170.5       1.0X
+basicTypes: Long with unsafe:true                      185            187           2          5.4         185.5       0.9X
+basicTypes: Float with unsafe:true                     190            192           1          5.3         190.0       0.9X
+basicTypes: Double with unsafe:true                    199            200           1          5.0         198.8       0.9X
+Array: Int with unsafe:true                              1              1           0        751.1           1.3     128.1X
+Array: Long with unsafe:true                             2              2           0        483.9           2.1      82.5X
+Array: Float with unsafe:true                            1              1           0        734.7           1.4     125.3X
+Array: Double with unsafe:true                           2              2           0        478.2           2.1      81.6X
+Map of string->Double  with unsafe:true                 26             26           0         38.8          25.8       6.6X
+basicTypes: Int with unsafe:false                      207            209           1          4.8         207.5       0.8X
+basicTypes: Long with unsafe:false                     239            241           2          4.2         239.1       0.7X
+basicTypes: Float with unsafe:false                    213            213           1          4.7         212.6       0.8X
+basicTypes: Double with unsafe:false                   224            226           1          4.5         224.2       0.8X
+Array: Int with unsafe:false                            14             14           0         73.5          13.6      12.5X
+Array: Long with unsafe:false                           21             21           0         47.8          20.9       8.1X
+Array: Float with unsafe:false                           6              6           0        169.9           5.9      29.0X
+Array: Double with unsafe:false                         10             10           0        101.4           9.9      17.3X
+Map of string->Double  with unsafe:false                28             28           0         35.9          27.9       6.1X
 
 
diff --git a/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt b/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
index 5ca8793bc3772..da82b05fd59e6 100644
--- a/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
@@ -2,27 +2,27 @@
 Benchmark of kryo asIterator on deserialization stream
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark of kryo asIterator on deserialization stream:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------
-Colletion of int with 1 elements, useIterator: true                        6              6           0          1.7         597.3       1.0X
-Colletion of int with 10 elements, useIterator: true                      13             14           0          0.7        1347.0       0.4X
-Colletion of int with 100 elements, useIterator: true                     85             86           1          0.1        8454.2       0.1X
-Colletion of string with 1 elements, useIterator: true                     8              8           1          1.3         754.5       0.8X
-Colletion of string with 10 elements, useIterator: true                   22             22           0          0.5        2176.6       0.3X
-Colletion of string with 100 elements, useIterator: true                 161            161           1          0.1       16078.6       0.0X
-Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.4         731.1       0.8X
-Colletion of Array[int] with 10 elements, useIterator: true               20             20           1          0.5        1970.7       0.3X
-Colletion of Array[int] with 100 elements, useIterator: true             148            150           1          0.1       14839.8       0.0X
-Colletion of int with 1 elements, useIterator: false                       6              7           1          1.6         608.2       1.0X
-Colletion of int with 10 elements, useIterator: false                     13             14           0          0.7        1337.8       0.4X
-Colletion of int with 100 elements, useIterator: false                    83             84           0          0.1        8349.0       0.1X
-Colletion of string with 1 elements, useIterator: false                    7              8           0          1.4         725.7       0.8X
-Colletion of string with 10 elements, useIterator: false                  21             22           0          0.5        2149.3       0.3X
-Colletion of string with 100 elements, useIterator: false                160            161           1          0.1       16031.1       0.0X
-Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         711.9       0.8X
-Colletion of Array[int] with 10 elements, useIterator: false              19             19           1          0.5        1891.2       0.3X
-Colletion of Array[int] with 100 elements, useIterator: false            141            142           1          0.1       14076.4       0.0X
+Colletion of int with 1 elements, useIterator: true                        6              6           0          1.6         618.2       1.0X
+Colletion of int with 10 elements, useIterator: true                      14             15           0          0.7        1444.0       0.4X
+Colletion of int with 100 elements, useIterator: true                     92             92           1          0.1        9168.2       0.1X
+Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         777.9       0.8X
+Colletion of string with 10 elements, useIterator: true                   22             23           0          0.5        2221.0       0.3X
+Colletion of string with 100 elements, useIterator: true                 166            167           1          0.1       16617.2       0.0X
+Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.4         730.2       0.8X
+Colletion of Array[int] with 10 elements, useIterator: true               20             20           0          0.5        1967.8       0.3X
+Colletion of Array[int] with 100 elements, useIterator: true             145            146           1          0.1       14469.9       0.0X
+Colletion of int with 1 elements, useIterator: false                       7              7           0          1.5         653.6       0.9X
+Colletion of int with 10 elements, useIterator: false                     15             16           0          0.7        1528.7       0.4X
+Colletion of int with 100 elements, useIterator: false                    98             98           1          0.1        9755.3       0.1X
+Colletion of string with 1 elements, useIterator: false                    7              7           0          1.4         718.5       0.9X
+Colletion of string with 10 elements, useIterator: false                  21             22           2          0.5        2093.0       0.3X
+Colletion of string with 100 elements, useIterator: false                157            157           1          0.1       15666.5       0.0X
+Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         698.1       0.9X
+Colletion of Array[int] with 10 elements, useIterator: false              18             19           0          0.5        1831.7       0.3X
+Colletion of Array[int] with 100 elements, useIterator: false            134            135           0          0.1       13430.8       0.0X
 
 
diff --git a/core/benchmarks/KryoIteratorBenchmark-results.txt b/core/benchmarks/KryoIteratorBenchmark-results.txt
index fb6073c50f767..e2cbfb871e7ba 100644
--- a/core/benchmarks/KryoIteratorBenchmark-results.txt
+++ b/core/benchmarks/KryoIteratorBenchmark-results.txt
@@ -2,27 +2,27 @@
 Benchmark of kryo asIterator on deserialization stream
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark of kryo asIterator on deserialization stream:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------
-Colletion of int with 1 elements, useIterator: true                        6              6           0          1.6         620.2       1.0X
-Colletion of int with 10 elements, useIterator: true                      13             13           0          0.8        1312.1       0.5X
-Colletion of int with 100 elements, useIterator: true                     78             79           0          0.1        7833.8       0.1X
-Colletion of string with 1 elements, useIterator: true                     7              8           0          1.3         746.6       0.8X
-Colletion of string with 10 elements, useIterator: true                   22             23           0          0.4        2235.0       0.3X
-Colletion of string with 100 elements, useIterator: true                 166            168           3          0.1       16582.0       0.0X
-Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.4         723.4       0.9X
-Colletion of Array[int] with 10 elements, useIterator: true               19             19           0          0.5        1882.2       0.3X
-Colletion of Array[int] with 100 elements, useIterator: true             143            144           1          0.1       14251.0       0.0X
-Colletion of int with 1 elements, useIterator: false                       6              6           0          1.6         611.0       1.0X
-Colletion of int with 10 elements, useIterator: false                     13             14           0          0.7        1336.8       0.5X
-Colletion of int with 100 elements, useIterator: false                    83             84           1          0.1        8336.2       0.1X
-Colletion of string with 1 elements, useIterator: false                    7              8           0          1.4         730.6       0.8X
-Colletion of string with 10 elements, useIterator: false                  21             21           0          0.5        2094.0       0.3X
-Colletion of string with 100 elements, useIterator: false                173            179          18          0.1       17327.1       0.0X
-Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         691.7       0.9X
-Colletion of Array[int] with 10 elements, useIterator: false              19             19           0          0.5        1858.8       0.3X
-Colletion of Array[int] with 100 elements, useIterator: false            138            139           1          0.1       13756.6       0.0X
+Colletion of int with 1 elements, useIterator: true                        6              6           1          1.6         611.4       1.0X
+Colletion of int with 10 elements, useIterator: true                      14             15           0          0.7        1443.2       0.4X
+Colletion of int with 100 elements, useIterator: true                     93             94           0          0.1        9331.3       0.1X
+Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         753.6       0.8X
+Colletion of string with 10 elements, useIterator: true                   22             22           0          0.5        2150.8       0.3X
+Colletion of string with 100 elements, useIterator: true                 163            164           1          0.1       16325.7       0.0X
+Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.3         741.1       0.8X
+Colletion of Array[int] with 10 elements, useIterator: true               20             20           0          0.5        1989.2       0.3X
+Colletion of Array[int] with 100 elements, useIterator: true             147            147           1          0.1       14659.2       0.0X
+Colletion of int with 1 elements, useIterator: false                       6              6           0          1.7         597.3       1.0X
+Colletion of int with 10 elements, useIterator: false                     13             14           0          0.8        1323.4       0.5X
+Colletion of int with 100 elements, useIterator: false                    83             84           3          0.1        8272.9       0.1X
+Colletion of string with 1 elements, useIterator: false                    7              7           0          1.4         714.5       0.9X
+Colletion of string with 10 elements, useIterator: false                  21             22           1          0.5        2146.0       0.3X
+Colletion of string with 100 elements, useIterator: false                157            157           0          0.1       15690.5       0.0X
+Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.5         668.5       0.9X
+Colletion of Array[int] with 10 elements, useIterator: false              18             18           0          0.6        1802.1       0.3X
+Colletion of Array[int] with 100 elements, useIterator: false            134            135           1          0.1       13393.9       0.0X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt b/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
index 9434b9c3484aa..bb234fd6a13b3 100644
--- a/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 3927           5573        1781          0.0     7853845.5       1.0X
-KryoPool:false                                                6170           7745        1218          0.0    12340812.7       0.6X
+KryoPool:true                                                 3937           5805        1793          0.0     7874251.0       1.0X
+KryoPool:false                                                6392           8372        1513          0.0    12783860.9       0.6X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-results.txt b/core/benchmarks/KryoSerializerBenchmark-results.txt
index a9f20882f7929..79b87a83b34fa 100644
--- a/core/benchmarks/KryoSerializerBenchmark-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 3881           5584        1898          0.0     7761840.3       1.0X
-KryoPool:false                                                5852           7519        1549          0.0    11704736.8       0.7X
+KryoPool:true                                                 4180           6016        1956          0.0     8359077.2       1.0X
+KryoPool:false                                                6204           8232        1447          0.0    12408361.0       0.7X
 
 
diff --git a/core/benchmarks/LZFBenchmark-jdk21-results.txt b/core/benchmarks/LZFBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..e1566f201a1f6
--- /dev/null
+++ b/core/benchmarks/LZFBenchmark-jdk21-results.txt
@@ -0,0 +1,19 @@
+================================================================================================
+Benchmark LZFCompressionCodec
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Compress small objects:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+Compression 256000000 int values in parallel                598            600           2        428.2           2.3       1.0X
+Compression 256000000 int values single-threaded            568            570           2        451.0           2.2       1.1X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Compress large objects:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Compression 1024 array values in 1 threads                39             45           5          0.0       38475.4       1.0X
+Compression 1024 array values single-threaded             32             33           1          0.0       31154.5       1.2X
+
+
diff --git a/core/benchmarks/LZFBenchmark-results.txt b/core/benchmarks/LZFBenchmark-results.txt
new file mode 100644
index 0000000000000..facc67f9cf4a8
--- /dev/null
+++ b/core/benchmarks/LZFBenchmark-results.txt
@@ -0,0 +1,19 @@
+================================================================================================
+Benchmark LZFCompressionCodec
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Compress small objects:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------------
+Compression 256000000 int values in parallel                602            612           6        425.1           2.4       1.0X
+Compression 256000000 int values single-threaded            610            617           5        419.8           2.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Compress large objects:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Compression 1024 array values in 1 threads                35             43           6          0.0       33806.8       1.0X
+Compression 1024 array values single-threaded             32             32           0          0.0       30990.4       1.1X
+
+
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
index 502d10c1c58ca..a15442496b244 100644
--- a/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
+++ b/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
@@ -2,12 +2,12 @@
 MapStatuses Convert Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Num Maps: 50000 Fetch partitions:500                708            715           8          0.0   707870326.0       1.0X
-Num Maps: 50000 Fetch partitions:1000              1610           1623          12          0.0  1610312472.0       0.4X
-Num Maps: 50000 Fetch partitions:1500              2443           2461          23          0.0  2442675908.0       0.3X
+Num Maps: 50000 Fetch partitions:500                674            685          12          0.0   673772738.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              1579           1590          12          0.0  1579383970.0       0.4X
+Num Maps: 50000 Fetch partitions:1500              2435           2472          37          0.0  2434530380.0       0.3X
 
 
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
index 9fe4175bb5d9e..b9f36af4a6531 100644
--- a/core/benchmarks/MapStatusesConvertBenchmark-results.txt
+++ b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
@@ -2,12 +2,12 @@
 MapStatuses Convert Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Num Maps: 50000 Fetch partitions:500                775            778           5          0.0   774980756.0       1.0X
-Num Maps: 50000 Fetch partitions:1000              1765           1765           1          0.0  1765011999.0       0.4X
-Num Maps: 50000 Fetch partitions:1500              2671           2682          15          0.0  2671372452.0       0.3X
+Num Maps: 50000 Fetch partitions:500                703            716          11          0.0   703103575.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              1707           1723          14          0.0  1707060398.0       0.4X
+Num Maps: 50000 Fetch partitions:1500              2626           2638          14          0.0  2625981097.0       0.3X
 
 
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
index 8c4230316d00b..f4846ce8b0fb3 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                         83             86           5          2.4         417.0       1.0X
-Deserialization                                      139            147          13          1.4         694.0       0.6X
+Serialization                                         84             88           7          2.4         419.8       1.0X
+Deserialization                                      140            148           8          1.4         700.9       0.6X
 
-Compressed Serialized MapStatus sizes: 428.0 B
+Compressed Serialized MapStatus sizes: 427.0 B
 Compressed Serialized Broadcast MapStatus sizes: 2.5 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                          77             79           3          2.6         384.9       1.0X
-Deserialization                                       138            146          13          1.4         690.0       0.6X
+Serialization                                          80             81           2          2.5         400.1       1.0X
+Deserialization                                       139            146           6          1.4         694.8       0.6X
 
 Compressed Serialized MapStatus sizes: 2.5 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         157            162           8          1.3         782.9       1.0X
-Deserialization                                       155            160          13          1.3         774.7       1.0X
+Serialization                                         153            158          11          1.3         767.3       1.0X
+Deserialization                                       159            165           5          1.3         794.1       1.0X
 
 Compressed Serialized MapStatus sizes: 442.0 B
 Compressed Serialized Broadcast MapStatus sizes: 13.6 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          143            144           1          1.4         714.5       1.0X
-Deserialization                                        154            158           9          1.3         772.0       0.9X
+Serialization                                          140            143           4          1.4         701.0       1.0X
+Deserialization                                        154            159           7          1.3         771.3       0.9X
 
 Compressed Serialized MapStatus sizes: 13.6 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          713            736          38          0.3        3564.0       1.0X
-Deserialization                                        314            337          16          0.6        1571.5       2.3X
+Serialization                                          707            726          21          0.3        3533.4       1.0X
+Deserialization                                        316            342          17          0.6        1582.2       2.2X
 
-Compressed Serialized MapStatus sizes: 572.0 B
+Compressed Serialized MapStatus sizes: 570.0 B
 Compressed Serialized Broadcast MapStatus sizes: 122.3 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                           570            575           6          0.4        2850.0       1.0X
-Deserialization                                         322            345          18          0.6        1611.9       1.8X
+Serialization                                           566            569           5          0.4        2828.3       1.0X
+Deserialization                                         324            343          19          0.6        1617.8       1.7X
 
 Compressed Serialized MapStatus sizes: 122.3 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
index 3f55d4c405a35..3b0b9b756d32b 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                         85             91           5          2.3         427.2       1.0X
-Deserialization                                      141            142           2          1.4         703.2       0.6X
+Serialization                                         89             95           4          2.2         445.9       1.0X
+Deserialization                                      138            145           6          1.4         689.8       0.6X
 
-Compressed Serialized MapStatus sizes: 428.0 B
+Compressed Serialized MapStatus sizes: 427.0 B
 Compressed Serialized Broadcast MapStatus sizes: 2.5 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                          77             78           1          2.6         386.1       1.0X
-Deserialization                                       140            145           9          1.4         698.5       0.6X
+Serialization                                          80             83           2          2.5         399.4       1.0X
+Deserialization                                       134            145          10          1.5         671.0       0.6X
 
 Compressed Serialized MapStatus sizes: 2.5 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         156            161           5          1.3         777.6       1.0X
-Deserialization                                       157            165          13          1.3         786.6       1.0X
+Serialization                                         159            163           7          1.3         793.4       1.0X
+Deserialization                                       155            163           8          1.3         776.8       1.0X
 
 Compressed Serialized MapStatus sizes: 442.0 B
 Compressed Serialized Broadcast MapStatus sizes: 13.6 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          145            147           3          1.4         725.2       1.0X
-Deserialization                                        156            160           5          1.3         779.4       0.9X
+Serialization                                          144            147           1          1.4         718.8       1.0X
+Deserialization                                        154            159           4          1.3         770.4       0.9X
 
 Compressed Serialized MapStatus sizes: 13.6 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          721            746          27          0.3        3603.9       1.0X
-Deserialization                                        330            348          15          0.6        1651.7       2.2X
+Serialization                                          710            713           4          0.3        3549.7       1.0X
+Deserialization                                        346            355           7          0.6        1730.4       2.1X
 
-Compressed Serialized MapStatus sizes: 571.0 B
+Compressed Serialized MapStatus sizes: 569.0 B
 Compressed Serialized Broadcast MapStatus sizes: 122.3 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                           575            581           8          0.3        2875.6       1.0X
-Deserialization                                         326            341          10          0.6        1630.7       1.8X
+Serialization                                           557            564           5          0.4        2783.1       1.0X
+Deserialization                                         325            337          10          0.6        1626.5       1.7X
 
 Compressed Serialized MapStatus sizes: 122.3 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
diff --git a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
index 2a6bd778fc8a2..8da4b4953cad0 100644
--- a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
+++ b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
@@ -2,24 +2,17 @@
 PersistenceEngineBenchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 1000 Workers:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-ZooKeeperPersistenceEngine with JavaSerializer                     5036           5232         229          0.0     5035730.1       1.0X
-ZooKeeperPersistenceEngine with KryoSerializer                     4038           4053          16          0.0     4038447.8       1.2X
-FileSystemPersistenceEngine with JavaSerializer                    2902           2906           5          0.0     2902453.3       1.7X
-FileSystemPersistenceEngine with JavaSerializer (lz4)               816            829          19          0.0      816173.1       6.2X
-FileSystemPersistenceEngine with JavaSerializer (lzf)               755            780          33          0.0      755209.0       6.7X
-FileSystemPersistenceEngine with JavaSerializer (snappy)            814            832          16          0.0      813672.5       6.2X
-FileSystemPersistenceEngine with JavaSerializer (zstd)              987           1014          45          0.0      986834.7       5.1X
-FileSystemPersistenceEngine with KryoSerializer                     687            698          14          0.0      687313.5       7.3X
-FileSystemPersistenceEngine with KryoSerializer (lz4)               590            599          15          0.0      589867.9       8.5X
-FileSystemPersistenceEngine with KryoSerializer (lzf)               915            922           9          0.0      915432.2       5.5X
-FileSystemPersistenceEngine with KryoSerializer (snappy)            768            795          37          0.0      768494.4       6.6X
-FileSystemPersistenceEngine with KryoSerializer (zstd)              898            950          45          0.0      898118.6       5.6X
-RocksDBPersistenceEngine with JavaSerializer                        299            299           0          0.0      298800.0      16.9X
-RocksDBPersistenceEngine with KryoSerializer                        112            113           1          0.0      111779.6      45.1X
-BlackHolePersistenceEngine                                            0              0           0          5.5         180.3   27924.2X
+ZooKeeperPersistenceEngine with JavaSerializer                     6876           7518         567          0.0     6875910.8       1.0X
+FileSystemPersistenceEngine with JavaSerializer                    2973           3015          55          0.0     2973365.8       2.3X
+FileSystemPersistenceEngine with JavaSerializer (lz4)               813            836          26          0.0      813019.4       8.5X
+FileSystemPersistenceEngine with JavaSerializer (lzf)               756            772          21          0.0      755574.7       9.1X
+FileSystemPersistenceEngine with JavaSerializer (snappy)            755            788          46          0.0      754897.8       9.1X
+FileSystemPersistenceEngine with JavaSerializer (zstd)              930            964          35          0.0      930157.5       7.4X
+RocksDBPersistenceEngine with JavaSerializer                        302            305           3          0.0      302099.4      22.8X
+BlackHolePersistenceEngine                                            0              0           0          5.9         168.2   40871.1X
 
 
diff --git a/core/benchmarks/PersistenceEngineBenchmark-results.txt b/core/benchmarks/PersistenceEngineBenchmark-results.txt
index da1838608de1c..f927e3c57aa33 100644
--- a/core/benchmarks/PersistenceEngineBenchmark-results.txt
+++ b/core/benchmarks/PersistenceEngineBenchmark-results.txt
@@ -2,24 +2,17 @@
 PersistenceEngineBenchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 1000 Workers:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-ZooKeeperPersistenceEngine with JavaSerializer                     5192           5309         116          0.0     5192160.2       1.0X
-ZooKeeperPersistenceEngine with KryoSerializer                     4056           4059           5          0.0     4055626.8       1.3X
-FileSystemPersistenceEngine with JavaSerializer                    2926           2934           8          0.0     2926383.4       1.8X
-FileSystemPersistenceEngine with JavaSerializer (lz4)               820            827          11          0.0      820359.8       6.3X
-FileSystemPersistenceEngine with JavaSerializer (lzf)               772            781           9          0.0      772349.1       6.7X
-FileSystemPersistenceEngine with JavaSerializer (snappy)            802            812          10          0.0      801815.8       6.5X
-FileSystemPersistenceEngine with JavaSerializer (zstd)              972            994          31          0.0      972042.3       5.3X
-FileSystemPersistenceEngine with KryoSerializer                     708            726          15          0.0      707927.8       7.3X
-FileSystemPersistenceEngine with KryoSerializer (lz4)               584            596          11          0.0      583999.8       8.9X
-FileSystemPersistenceEngine with KryoSerializer (lzf)               880            896          14          0.0      880189.2       5.9X
-FileSystemPersistenceEngine with KryoSerializer (snappy)            772            821          46          0.0      772130.1       6.7X
-FileSystemPersistenceEngine with KryoSerializer (zstd)              906            928          29          0.0      905578.7       5.7X
-RocksDBPersistenceEngine with JavaSerializer                        302            302           0          0.0      301664.5      17.2X
-RocksDBPersistenceEngine with KryoSerializer                        109            111           2          0.0      108979.5      47.6X
-BlackHolePersistenceEngine                                            0              0           0          6.3         158.3   32800.5X
+ZooKeeperPersistenceEngine with JavaSerializer                     6080           6179         119          0.0     6079694.4       1.0X
+FileSystemPersistenceEngine with JavaSerializer                    3011           3060          42          0.0     3011244.1       2.0X
+FileSystemPersistenceEngine with JavaSerializer (lz4)               799            827          28          0.0      799357.3       7.6X
+FileSystemPersistenceEngine with JavaSerializer (lzf)               800            839          35          0.0      800038.9       7.6X
+FileSystemPersistenceEngine with JavaSerializer (snappy)            786            797          11          0.0      785847.0       7.7X
+FileSystemPersistenceEngine with JavaSerializer (zstd)             1025           1028           3          0.0     1024806.3       5.9X
+RocksDBPersistenceEngine with JavaSerializer                        309            311           3          0.0      308522.6      19.7X
+BlackHolePersistenceEngine                                            0              0           0          5.8         173.5   35032.8X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt b/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
index 24d94cd0ca462..06701357609a5 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.3        3226.0       1.0X
-Utils.cloneProperties                                 0              0           0         34.5          29.0     111.2X
+SerializationUtils.clone                              0              0           0          0.3        3356.0       1.0X
+Utils.cloneProperties                                 0              0           0         34.5          29.0     115.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      142517.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.4        2705.0      52.7X
+SerializationUtils.clone                              0              0           0          0.0      160560.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3085.0      52.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      243556.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.4        2615.0      93.1X
+SerializationUtils.clone                              0              0           0          0.0      253143.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3696.0      68.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      860141.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.1       15128.0      56.9X
+SerializationUtils.clone                              1              1           0          0.0      907866.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       20328.0      44.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              2           0          0.0     1604435.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       30647.0      52.4X
+SerializationUtils.clone                              2              2           0          0.0     1699706.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       40867.0      41.6X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-results.txt b/core/benchmarks/PropertiesCloneBenchmark-results.txt
index b4a712748306d..13c241f53d182 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.3        3246.0       1.0X
-Utils.cloneProperties                                 0              0           0         34.5          29.0     111.9X
+SerializationUtils.clone                              0              0           0          0.3        3617.0       1.0X
+Utils.cloneProperties                                 0              0           0         34.5          29.0     124.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      156702.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.4        2694.0      58.2X
+SerializationUtils.clone                              0              0           0          0.0      148347.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.4        2815.0      52.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      268443.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.3        3637.0      73.8X
+SerializationUtils.clone                              0              0           0          0.0      280142.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3686.0      76.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      890198.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.1       19807.0      44.9X
+SerializationUtils.clone                              1              1           0          0.0      941434.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       20278.0      46.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              2           0          0.0     1638110.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       40827.0      40.1X
+SerializationUtils.clone                              2              2           0          0.0     1737205.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       40696.0      42.7X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt b/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
index 609f39000d8ae..6f1f333363e96 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    451            451           0        222.0           4.5       1.0X
+java.util.Random                                    451            451           0        221.9           4.5       1.0X
 XORShiftRandom                                      185            185           0        539.4           1.9       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    897            897           0        111.5           9.0       1.0X
-XORShiftRandom                                      371            371           0        269.7           3.7       2.4X
+java.util.Random                                    894            899           7        111.8           8.9       1.0X
+XORShiftRandom                                      371            372           2        269.4           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    902            902           0        110.9           9.0       1.0X
-XORShiftRandom                                      371            372           1        269.5           3.7       2.4X
+java.util.Random                                    900            900           0        111.1           9.0       1.0X
+XORShiftRandom                                      371            371           0        269.6           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   3389           3391           3         29.5          33.9       1.0X
-XORShiftRandom                                     2501           2504           2         40.0          25.0       1.4X
+java.util.Random                                   3373           3374           2         29.7          33.7       1.0X
+XORShiftRandom                                     2454           2460          11         40.8          24.5       1.4X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                               1              1           0      12974.4           0.1       1.0X
+XORShiftRandom.hashSeed                               1              1           0      12974.9           0.1       1.0X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-results.txt b/core/benchmarks/XORShiftRandomBenchmark-results.txt
index 4c5dc35208ae6..9701c0ca237bf 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    442            442           0        226.5           4.4       1.0X
-XORShiftRandom                                      185            185           0        539.5           1.9       2.4X
+java.util.Random                                    442            442           0        226.4           4.4       1.0X
+XORShiftRandom                                      185            185           0        539.4           1.9       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    909            910           1        110.0           9.1       1.0X
-XORShiftRandom                                      371            371           0        269.7           3.7       2.5X
+java.util.Random                                    913            913           0        109.6           9.1       1.0X
+XORShiftRandom                                      371            372           1        269.6           3.7       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    905            905           0        110.5           9.0       1.0X
-XORShiftRandom                                      371            371           1        269.7           3.7       2.4X
+java.util.Random                                    906            906           0        110.4           9.1       1.0X
+XORShiftRandom                                      371            371           1        269.6           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   4069           4071           2         24.6          40.7       1.0X
-XORShiftRandom                                     2983           2991           8         33.5          29.8       1.4X
+java.util.Random                                   4170           4171           1         24.0          41.7       1.0X
+XORShiftRandom                                     2993           2996           3         33.4          29.9       1.4X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                               1              1           0      12961.6           0.1       1.0X
+XORShiftRandom.hashSeed                               1              1           0      12975.0           0.1       1.0X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-jdk21-results.txt b/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
index de19d9149a0bd..4c986ca0e4e0a 100644
--- a/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
@@ -2,48 +2,48 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            670            901         260          0.0       66991.5       1.0X
-Compression 10000 times at level 2 without buffer pool            888            894          10          0.0       88794.6       0.8X
-Compression 10000 times at level 3 without buffer pool            998           1001           3          0.0       99815.9       0.7X
-Compression 10000 times at level 1 with buffer pool               941            941           1          0.0       94077.1       0.7X
-Compression 10000 times at level 2 with buffer pool               977            978           1          0.0       97697.2       0.7X
-Compression 10000 times at level 3 with buffer pool              1096           1096           0          0.0      109596.6       0.6X
+Compression 10000 times at level 1 without buffer pool            649            748         156          0.0       64921.9       1.0X
+Compression 10000 times at level 2 without buffer pool            689            689           0          0.0       68927.1       0.9X
+Compression 10000 times at level 3 without buffer pool            782            782           0          0.0       78180.6       0.8X
+Compression 10000 times at level 1 with buffer pool               580            582           2          0.0       57976.0       1.1X
+Compression 10000 times at level 2 with buffer pool               614            618           4          0.0       61395.3       1.1X
+Compression 10000 times at level 3 with buffer pool               725            734          11          0.0       72535.5       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            824            825           0          0.0       82440.7       1.0X
-Decompression 10000 times from level 2 without buffer pool            826            827           1          0.0       82582.0       1.0X
-Decompression 10000 times from level 3 without buffer pool            828            829           0          0.0       82846.0       1.0X
-Decompression 10000 times from level 1 with buffer pool               751            751           0          0.0       75054.5       1.1X
-Decompression 10000 times from level 2 with buffer pool               752            752           0          0.0       75223.2       1.1X
-Decompression 10000 times from level 3 with buffer pool               751            752           0          0.0       75135.7       1.1X
+Decompression 10000 times from level 1 without buffer pool            831            832           1          0.0       83114.9       1.0X
+Decompression 10000 times from level 2 without buffer pool            834            835           1          0.0       83372.7       1.0X
+Decompression 10000 times from level 3 without buffer pool            831            832           1          0.0       83092.3       1.0X
+Decompression 10000 times from level 1 with buffer pool               759            760           1          0.0       75870.2       1.1X
+Decompression 10000 times from level 2 with buffer pool               759            760           1          0.0       75877.3       1.1X
+Decompression 10000 times from level 3 with buffer pool               759            759           0          0.0       75874.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 3:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                  48             50           1          0.0      376677.0       1.0X
-Parallel Compression with 1 workers                  42             43           2          0.0      329346.1       1.1X
-Parallel Compression with 2 workers                  39             41           2          0.0      304580.2       1.2X
-Parallel Compression with 4 workers                  38             39           1          0.0      297897.8       1.3X
-Parallel Compression with 8 workers                  41             42           1          0.0      317437.0       1.2X
-Parallel Compression with 16 workers                 45             47           1          0.0      351974.0       1.1X
+Parallel Compression with 0 workers                  48             50           1          0.0      376632.9       1.0X
+Parallel Compression with 1 workers                  35             37           2          0.0      272066.6       1.4X
+Parallel Compression with 2 workers                  34             38           2          0.0      263055.3       1.4X
+Parallel Compression with 4 workers                  37             39           2          0.0      286835.7       1.3X
+Parallel Compression with 8 workers                  38             40           1          0.0      299961.3       1.3X
+Parallel Compression with 16 workers                 43             45           1          0.0      335272.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 9:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                 160            163           6          0.0     1248450.7       1.0X
-Parallel Compression with 1 workers                 198            198           1          0.0     1543157.9       0.8X
-Parallel Compression with 2 workers                 116            127          19          0.0      908815.6       1.4X
-Parallel Compression with 4 workers                 111            114           2          0.0      863559.0       1.4X
-Parallel Compression with 8 workers                 114            119           2          0.0      892995.2       1.4X
-Parallel Compression with 16 workers                116            119           2          0.0      906657.5       1.4X
+Parallel Compression with 0 workers                 157            158           1          0.0     1224138.2       1.0X
+Parallel Compression with 1 workers                 187            188           1          0.0     1463264.4       0.8X
+Parallel Compression with 2 workers                 111            115           6          0.0      863722.6       1.4X
+Parallel Compression with 4 workers                 105            109           2          0.0      822422.6       1.5X
+Parallel Compression with 8 workers                 110            114           2          0.0      862852.1       1.4X
+Parallel Compression with 16 workers                111            115           2          0.0      870311.3       1.4X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-results.txt b/core/benchmarks/ZStandardBenchmark-results.txt
index cbe95071f1806..5569f27bb0169 100644
--- a/core/benchmarks/ZStandardBenchmark-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-results.txt
@@ -2,48 +2,48 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            674            680           8          0.0       67377.6       1.0X
-Compression 10000 times at level 2 without buffer pool            712            713           1          0.0       71200.8       0.9X
-Compression 10000 times at level 3 without buffer pool            822            828           5          0.0       82220.5       0.8X
-Compression 10000 times at level 1 with buffer pool               591            592           1          0.0       59114.1       1.1X
-Compression 10000 times at level 2 with buffer pool               627            628           1          0.0       62672.1       1.1X
-Compression 10000 times at level 3 with buffer pool               748            750           2          0.0       74829.4       0.9X
+Compression 10000 times at level 1 without buffer pool            656            660           3          0.0       65632.5       1.0X
+Compression 10000 times at level 2 without buffer pool            695            696           1          0.0       69509.7       0.9X
+Compression 10000 times at level 3 without buffer pool            803            807           7          0.0       80258.4       0.8X
+Compression 10000 times at level 1 with buffer pool               584            586           2          0.0       58381.4       1.1X
+Compression 10000 times at level 2 with buffer pool               615            616           1          0.0       61463.0       1.1X
+Compression 10000 times at level 3 with buffer pool               743            743           0          0.0       74310.9       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            608            610           2          0.0       60825.6       1.0X
-Decompression 10000 times from level 2 without buffer pool            607            608           1          0.0       60673.5       1.0X
-Decompression 10000 times from level 3 without buffer pool            607            607           0          0.0       60683.0       1.0X
-Decompression 10000 times from level 1 with buffer pool               564            565           1          0.0       56405.9       1.1X
-Decompression 10000 times from level 2 with buffer pool               563            564           1          0.0       56319.2       1.1X
-Decompression 10000 times from level 3 with buffer pool               564            565           0          0.0       56394.2       1.1X
+Decompression 10000 times from level 1 without buffer pool            620            621           1          0.0       61972.9       1.0X
+Decompression 10000 times from level 2 without buffer pool            622            622           1          0.0       62168.8       1.0X
+Decompression 10000 times from level 3 without buffer pool            621            622           1          0.0       62130.0       1.0X
+Decompression 10000 times from level 1 with buffer pool               549            550           0          0.0       54939.0       1.1X
+Decompression 10000 times from level 2 with buffer pool               550            550           0          0.0       54963.5       1.1X
+Decompression 10000 times from level 3 with buffer pool               549            550           1          0.0       54927.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 3:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                  51             52           1          0.0      395223.4       1.0X
-Parallel Compression with 1 workers                  43             46           5          0.0      338033.5       1.2X
-Parallel Compression with 2 workers                  42             44           1          0.0      325731.2       1.2X
-Parallel Compression with 4 workers                  42             44           1          0.0      325162.3       1.2X
-Parallel Compression with 8 workers                  44             46           1          0.0      342514.8       1.2X
-Parallel Compression with 16 workers                 48             50           1          0.0      375817.4       1.1X
+Parallel Compression with 0 workers                  47             48           1          0.0      365666.1       1.0X
+Parallel Compression with 1 workers                  34             36           3          0.0      268562.3       1.4X
+Parallel Compression with 2 workers                  32             35           2          0.0      251265.1       1.5X
+Parallel Compression with 4 workers                  35             38           1          0.0      273574.1       1.3X
+Parallel Compression with 8 workers                  37             40           1          0.0      288217.8       1.3X
+Parallel Compression with 16 workers                 42             44           1          0.0      330318.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 9:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                 163            164           1          0.0     1270581.2       1.0X
-Parallel Compression with 1 workers                 198            200           2          0.0     1548312.0       0.8X
-Parallel Compression with 2 workers                 117            122           5          0.0      914281.7       1.4X
-Parallel Compression with 4 workers                 112            115           2          0.0      872458.1       1.5X
-Parallel Compression with 8 workers                 116            120           2          0.0      909773.7       1.4X
-Parallel Compression with 16 workers                116            122           8          0.0      908698.0       1.4X
+Parallel Compression with 0 workers                 155            157           2          0.0     1214057.2       1.0X
+Parallel Compression with 1 workers                 192            193           2          0.0     1499524.2       0.8X
+Parallel Compression with 2 workers                 112            119           9          0.0      871848.8       1.4X
+Parallel Compression with 4 workers                 106            109           2          0.0      830699.8       1.5X
+Parallel Compression with 8 workers                 111            114           2          0.0      870700.3       1.4X
+Parallel Compression with 16 workers                112            114           2          0.0      873315.6       1.4X
 
 
diff --git a/core/pom.xml b/core/pom.xml
index 6468f500db046..adb1b3034b427 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -399,7 +399,7 @@
     <dependency>
       <groupId>net.razorvine</groupId>
       <artifactId>pickle</artifactId>
-      <version>1.3</version>
+      <version>1.5</version>
     </dependency>
     <dependency>
       <groupId>net.sf.py4j</groupId>
@@ -547,6 +547,8 @@
               <include>org.eclipse.jetty:jetty-server</include>
               <include>com.google.guava:guava</include>
               <include>com.google.protobuf:*</include>
+              <!-- While shading new dependencies, please also update LICENSE/NOTICE files in the
+                core/src/main/resources/META-INF if necessary -->
             </includes>
           </artifactSet>
           <relocations>
diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index 441587cf7350e..4e251a1c2901b 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -1,9 +1,12 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
index 33dfa44229064..5e9f1b78273a5 100644
--- a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
@@ -1,9 +1,12 @@
 /*
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -13,13 +16,6 @@
  */
 package org.apache.spark.io;
 
-import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
-import org.apache.spark.util.ThreadUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.annotation.concurrent.GuardedBy;
 import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
@@ -30,6 +26,16 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.locks.Condition;
 import java.util.concurrent.locks.ReentrantLock;
+import javax.annotation.concurrent.GuardedBy;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Throwables;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
+import org.apache.spark.util.ThreadUtils;
 
 /**
  * {@link InputStream} implementation which asynchronously reads ahead from the underlying input
@@ -42,7 +48,8 @@
  */
 public class ReadAheadInputStream extends InputStream {
 
-  private static final Logger logger = LoggerFactory.getLogger(ReadAheadInputStream.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ReadAheadInputStream.class);
 
   private ReentrantLock stateChangeLock = new ReentrantLock();
 
@@ -205,7 +212,7 @@ private void closeUnderlyingInputStreamIfNecessary() {
       try {
         underlyingInputStream.close();
       } catch (IOException e) {
-        logger.warn(e.getMessage(), e);
+        logger.warn("{}", e, MDC.of(LogKeys.ERROR$.MODULE$, e.getMessage()));
       }
     }
   }
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 83352611770fd..fe798e40a6ad7 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -18,6 +18,7 @@
 package org.apache.spark.memory;
 
 import javax.annotation.concurrent.GuardedBy;
+import java.io.InterruptedIOException;
 import java.io.IOException;
 import java.nio.channels.ClosedByInterruptException;
 import java.util.Arrays;
@@ -29,9 +30,11 @@
 import java.util.TreeMap;
 
 import com.google.common.annotations.VisibleForTesting;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 import org.apache.spark.util.Utils;
 
@@ -58,7 +61,7 @@
  */
 public class TaskMemoryManager {
 
-  private static final Logger logger = LoggerFactory.getLogger(TaskMemoryManager.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(TaskMemoryManager.class);
 
   /** The number of bits used to address the page table. */
   private static final int PAGE_NUMBER_BITS = 13;
@@ -242,12 +245,14 @@ private long trySpillAndAcquire(
         cList.remove(idx);
         return 0;
       }
-    } catch (ClosedByInterruptException e) {
+    } catch (ClosedByInterruptException | InterruptedIOException e) {
       // This called by user to kill a task (e.g: speculative task).
-      logger.error("error while calling spill() on " + consumerToSpill, e);
+      logger.error("error while calling spill() on {}", e,
+        MDC.of(LogKeys.MEMORY_CONSUMER$.MODULE$, consumerToSpill));
       throw new RuntimeException(e.getMessage());
     } catch (IOException e) {
-      logger.error("error while calling spill() on " + consumerToSpill, e);
+      logger.error("error while calling spill() on {}", e,
+        MDC.of(LogKeys.MEMORY_CONSUMER$.MODULE$, consumerToSpill));
       // checkstyle.off: RegexpSinglelineJava
       throw new SparkOutOfMemoryError("error while calling spill() on " + consumerToSpill + " : "
         + e.getMessage());
@@ -270,24 +275,29 @@ public void releaseExecutionMemory(long size, MemoryConsumer consumer) {
    * Dump the memory usage of all consumers.
    */
   public void showMemoryUsage() {
-    logger.info("Memory used in task " + taskAttemptId);
+    logger.info("Memory used in task {}",
+      MDC.of(LogKeys.TASK_ATTEMPT_ID$.MODULE$, taskAttemptId));
     synchronized (this) {
       long memoryAccountedForByConsumers = 0;
       for (MemoryConsumer c: consumers) {
         long totalMemUsage = c.getUsed();
         memoryAccountedForByConsumers += totalMemUsage;
         if (totalMemUsage > 0) {
-          logger.info("Acquired by " + c + ": " + Utils.bytesToString(totalMemUsage));
+          logger.info("Acquired by {}: {}",
+            MDC.of(LogKeys.MEMORY_CONSUMER$.MODULE$, c),
+            MDC.of(LogKeys.MEMORY_SIZE$.MODULE$, Utils.bytesToString(totalMemUsage)));
         }
       }
       long memoryNotAccountedFor =
         memoryManager.getExecutionMemoryUsageForTask(taskAttemptId) - memoryAccountedForByConsumers;
       logger.info(
         "{} bytes of memory were used by task {} but are not associated with specific consumers",
-        memoryNotAccountedFor, taskAttemptId);
+        MDC.of(LogKeys.MEMORY_SIZE$.MODULE$, memoryNotAccountedFor),
+        MDC.of(LogKeys.TASK_ATTEMPT_ID$.MODULE$, taskAttemptId));
       logger.info(
         "{} bytes of memory are used for execution and {} bytes of memory are used for storage",
-        memoryManager.executionMemoryUsed(), memoryManager.storageMemoryUsed());
+        MDC.of(LogKeys.EXECUTION_MEMORY_SIZE$.MODULE$, memoryManager.executionMemoryUsed()),
+        MDC.of(LogKeys.STORAGE_MEMORY_SIZE$.MODULE$,  memoryManager.storageMemoryUsed()));
     }
   }
 
@@ -333,7 +343,8 @@ public MemoryBlock allocatePage(long size, MemoryConsumer consumer) {
     try {
       page = memoryManager.tungstenMemoryAllocator().allocate(acquired);
     } catch (OutOfMemoryError e) {
-      logger.warn("Failed to allocate a page ({} bytes), try again.", acquired);
+      logger.warn("Failed to allocate a page ({} bytes), try again.",
+        MDC.of(LogKeys.PAGE_SIZE$.MODULE$, acquired));
       // there is no enough memory actually, it means the actual free memory is smaller than
       // MemoryManager thought, we should keep the acquired memory.
       synchronized (this) {
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index d067c870acc9e..86f7d5143eff5 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -33,9 +33,11 @@
 import scala.collection.Iterator;
 
 import com.google.common.io.Closeables;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.Partitioner;
 import org.apache.spark.ShuffleDependency;
 import org.apache.spark.SparkConf;
@@ -81,7 +83,8 @@ final class BypassMergeSortShuffleWriter<K, V>
   extends ShuffleWriter<K, V>
   implements ShuffleChecksumSupport {
 
-  private static final Logger logger = LoggerFactory.getLogger(BypassMergeSortShuffleWriter.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(BypassMergeSortShuffleWriter.class);
 
   private final int fileBufferSize;
   private final boolean transferToEnabled;
@@ -223,7 +226,8 @@ private long[] writePartitionedData(ShuffleMapOutputWriter mapOutputWriter) thro
               writePartitionedDataWithStream(file, writer);
             }
             if (!file.delete()) {
-              logger.error("Unable to delete file for partition {}", i);
+              logger.error("Unable to delete file for partition {}",
+                MDC.of(LogKeys.PARTITION_ID$.MODULE$, i));
             }
           }
         }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index b097089282ce3..f96513f1b1097 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -23,17 +23,19 @@
 import java.util.LinkedList;
 import java.util.zip.Checksum;
 
-import org.apache.spark.SparkException;
 import scala.Tuple2;
 
 import com.google.common.annotations.VisibleForTesting;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.spark.SparkConf;
+import org.apache.spark.SparkException;
 import org.apache.spark.TaskContext;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.internal.config.package$;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
@@ -70,7 +72,8 @@
  */
 final class ShuffleExternalSorter extends MemoryConsumer implements ShuffleChecksumSupport {
 
-  private static final Logger logger = LoggerFactory.getLogger(ShuffleExternalSorter.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(ShuffleExternalSorter.class);
 
   @VisibleForTesting
   static final int DISK_WRITE_BUFFER_SIZE = 1024 * 1024;
@@ -159,11 +162,11 @@ private void writeSortedFile(boolean isFinalFile) {
     if (!isFinalFile) {
       logger.info(
         "Task {} on Thread {} spilling sort data of {} to disk ({} {} so far)",
-        taskContext.taskAttemptId(),
-        Thread.currentThread().getId(),
-        Utils.bytesToString(getMemoryUsage()),
-        spills.size(),
-        spills.size() != 1 ? " times" : " time");
+        MDC.of(LogKeys.TASK_ATTEMPT_ID$.MODULE$, taskContext.taskAttemptId()),
+        MDC.of(LogKeys.THREAD_ID$.MODULE$, Thread.currentThread().getId()),
+        MDC.of(LogKeys.MEMORY_SIZE$.MODULE$, Utils.bytesToString(getMemoryUsage())),
+        MDC.of(LogKeys.NUM_SPILL_INFOS$.MODULE$, spills.size()),
+        MDC.of(LogKeys.SPILL_TIMES$.MODULE$, spills.size() != 1 ? "times" : "time"));
     }
 
     // This call performs the actual sort.
@@ -349,7 +352,8 @@ public void cleanupResources() {
     }
     for (SpillInfo spill : spills) {
       if (spill.file.exists() && !spill.file.delete()) {
-        logger.error("Unable to delete spill file {}", spill.file.getPath());
+        logger.error("Unable to delete spill file {}",
+          MDC.of(LogKeys.PATH$.MODULE$, spill.file.getPath()));
       }
     }
   }
@@ -416,8 +420,8 @@ public void insertRecord(Object recordBase, long recordOffset, int length, int p
     // for tests
     assert(inMemSorter != null);
     if (inMemSorter.numRecords() >= numElementsForSpillThreshold) {
-      logger.info("Spilling data because number of spilledRecords crossed the threshold " +
-        numElementsForSpillThreshold);
+      logger.info("Spilling data because number of spilledRecords crossed the threshold {}" +
+        MDC.of(LogKeys.NUM_ELEMENTS_SPILL_THRESHOLD$.MODULE$, numElementsForSpillThreshold));
       spill();
     }
 
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index f5949d6ae7a5b..13fd18c0942b1 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -35,12 +35,14 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.Closeables;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.spark.*;
 import org.apache.spark.annotation.Private;
 import org.apache.spark.internal.config.package$;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.io.CompressionCodec;
 import org.apache.spark.io.CompressionCodec$;
 import org.apache.spark.io.NioBufferedFileInputStream;
@@ -66,7 +68,7 @@
 @Private
 public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
 
-  private static final Logger logger = LoggerFactory.getLogger(UnsafeShuffleWriter.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(UnsafeShuffleWriter.class);
 
   private static final ClassTag<Object> OBJECT_CLASS_TAG = ClassTag$.MODULE$.Object();
 
@@ -226,7 +228,8 @@ void closeAndWriteOutput() throws IOException {
       sorter = null;
       for (SpillInfo spill : spills) {
         if (spill.file.exists() && !spill.file.delete()) {
-          logger.error("Error while deleting spill file {}", spill.file.getPath());
+          logger.error("Error while deleting spill file {}",
+            MDC.of(LogKeys.PATH$.MODULE$, spill.file.getPath()));
         }
       }
     }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
index eb4d9d9abc8e3..38f0a60f8b0dd 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.shuffle.sort.io;
 
+import java.util.Collections;
 import java.util.Map;
 import java.util.Optional;
 
@@ -56,7 +57,10 @@ public void initializeExecutor(String appId, String execId, Map<String, String>
     if (blockManager == null) {
       throw new IllegalStateException("No blockManager available from the SparkEnv.");
     }
-    blockResolver = new IndexShuffleBlockResolver(sparkConf, blockManager);
+    blockResolver =
+      new IndexShuffleBlockResolver(
+        sparkConf, blockManager, Collections.emptyMap() /* Shouldn't be accessed */
+      );
   }
 
   @Override
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
index efe508d1361c7..c0b9018c770a0 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java
@@ -26,10 +26,11 @@
 import java.nio.channels.WritableByteChannel;
 import java.util.Optional;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import org.apache.spark.SparkConf;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.shuffle.api.ShuffleMapOutputWriter;
 import org.apache.spark.shuffle.api.ShufflePartitionWriter;
 import org.apache.spark.shuffle.api.WritableByteChannelWrapper;
@@ -44,8 +45,8 @@
  */
 public class LocalDiskShuffleMapOutputWriter implements ShuffleMapOutputWriter {
 
-  private static final Logger log =
-    LoggerFactory.getLogger(LocalDiskShuffleMapOutputWriter.class);
+  private static final SparkLogger log =
+    SparkLoggerFactory.getLogger(LocalDiskShuffleMapOutputWriter.class);
 
   private final int shuffleId;
   private final long mapId;
@@ -73,7 +74,7 @@ public LocalDiskShuffleMapOutputWriter(
     this.blockResolver = blockResolver;
     this.bufferSize =
       (int) (long) sparkConf.get(
-        package$.MODULE$.SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE()) * 1024;
+        package$.MODULE$.SHUFFLE_LOCAL_DISK_FILE_OUTPUT_BUFFER_SIZE()) * 1024;
     this.partitionLengths = new long[numPartitions];
     this.outputFile = blockResolver.getDataFile(shuffleId, mapId);
     this.outputTempFile = null;
@@ -123,7 +124,8 @@ public MapOutputCommitMessage commitAllPartitions(long[] checksums) throws IOExc
   public void abort(Throwable error) throws IOException {
     cleanUp();
     if (outputTempFile != null && outputTempFile.exists() && !outputTempFile.delete()) {
-      log.warn("Failed to delete temporary shuffle file at {}", outputTempFile.getAbsolutePath());
+      log.warn("Failed to delete temporary shuffle file at {}",
+        MDC.of(LogKeys.PATH$.MODULE$, outputTempFile.getAbsolutePath()));
     }
   }
 
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index 35c5efc77f6f2..2a8e15cd09ccf 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -25,11 +25,13 @@
 
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.io.Closeables;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.spark.SparkEnv;
 import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
@@ -66,7 +68,7 @@
  */
 public final class BytesToBytesMap extends MemoryConsumer {
 
-  private static final Logger logger = LoggerFactory.getLogger(BytesToBytesMap.class);
+  private static final SparkLogger logger = SparkLoggerFactory.getLogger(BytesToBytesMap.class);
 
   private static final HashMapGrowthStrategy growthStrategy = HashMapGrowthStrategy.DOUBLING;
 
@@ -392,7 +394,8 @@ private void handleFailedDelete() {
         // remove the spill file from disk
         File file = spillWriters.removeFirst().getFile();
         if (file != null && file.exists() && !file.delete()) {
-          logger.error("Was unable to delete spill file {}", file.getAbsolutePath());
+          logger.error("Was unable to delete spill file {}",
+            MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
         }
       }
     }
@@ -893,7 +896,8 @@ public void free() {
       File file = spillWriters.removeFirst().getFile();
       if (file != null && file.exists()) {
         if (!file.delete()) {
-          logger.error("Was unable to delete spill file {}", file.getAbsolutePath());
+          logger.error("Was unable to delete spill file {}",
+            MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
         }
       }
     }
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 2f9e1a9f45460..af421e903ba3f 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -28,11 +28,13 @@
 
 import com.google.common.annotations.VisibleForTesting;
 import org.apache.commons.io.IOUtils;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import org.apache.spark.TaskContext;
 import org.apache.spark.executor.ShuffleWriteMetrics;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
@@ -50,7 +52,8 @@
  */
 public final class UnsafeExternalSorter extends MemoryConsumer {
 
-  private static final Logger logger = LoggerFactory.getLogger(UnsafeExternalSorter.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(UnsafeExternalSorter.class);
 
   @Nullable
   private final PrefixComparator prefixComparator;
@@ -217,10 +220,10 @@ public long spill(long size, MemoryConsumer trigger) throws IOException {
     }
 
     logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)",
-      Thread.currentThread().getId(),
-      Utils.bytesToString(getMemoryUsage()),
-      spillWriters.size(),
-      spillWriters.size() > 1 ? " times" : " time");
+      MDC.of(LogKeys.THREAD_ID$.MODULE$, Thread.currentThread().getId()),
+      MDC.of(LogKeys.MEMORY_SIZE$.MODULE$, Utils.bytesToString(getMemoryUsage())),
+      MDC.of(LogKeys.NUM_SPILL_WRITERS$.MODULE$, spillWriters.size()),
+      MDC.of(LogKeys.SPILL_TIMES$.MODULE$, spillWriters.size() > 1 ? "times" : "time"));
 
     ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics();
 
@@ -335,7 +338,8 @@ private void deleteSpillFiles() {
       File file = spill.getFile();
       if (file != null && file.exists()) {
         if (!file.delete()) {
-          logger.error("Was unable to delete spill file {}", file.getAbsolutePath());
+          logger.error("Was unable to delete spill file {}",
+            MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath()));
         }
       }
     }
@@ -476,8 +480,8 @@ public void insertRecord(
 
     assert(inMemSorter != null);
     if (inMemSorter.numRecords() >= numElementsForSpillThreshold) {
-      logger.info("Spilling data because number of spilledRecords crossed the threshold " +
-        numElementsForSpillThreshold);
+      logger.info("Spilling data because number of spilledRecords crossed the threshold {}",
+        MDC.of(LogKeys.NUM_ELEMENTS_SPILL_THRESHOLD$.MODULE$, numElementsForSpillThreshold));
       spill();
     }
 
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index cf29835b2ce89..0693f8cb1a808 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -23,13 +23,13 @@
 import org.apache.spark.TaskContext;
 import org.apache.spark.internal.config.package$;
 import org.apache.spark.internal.config.ConfigEntry;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 import org.apache.spark.io.NioBufferedFileInputStream;
 import org.apache.spark.io.ReadAheadInputStream;
 import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.storage.BlockId;
 import org.apache.spark.unsafe.Platform;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.*;
 
@@ -38,7 +38,8 @@
  * of the file format).
  */
 public final class UnsafeSorterSpillReader extends UnsafeSorterIterator implements Closeable {
-  private static final Logger logger = LoggerFactory.getLogger(UnsafeSorterSpillReader.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(UnsafeSorterSpillReader.class);
   public static final int MAX_BUFFER_SIZE_BYTES = 16777216; // 16 mb
 
   private InputStream in;
diff --git a/core/src/main/resources/META-INF/LICENSE b/core/src/main/resources/META-INF/LICENSE
new file mode 100644
index 0000000000000..8dbc84b910d41
--- /dev/null
+++ b/core/src/main/resources/META-INF/LICENSE
@@ -0,0 +1,240 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+------------------------------------------------------------------------------------
+This product bundles various third-party components under other open source licenses.
+This section summarizes those components and their licenses. See licenses/
+for text of these licenses.
+
+
+Apache Software Foundation License 2.0
+--------------------------------------
+com.google.guava:guava
+com.google.protobuf:protobuf-java
+core/src/main/java/org/apache/spark/util/collection/TimSort.java
+core/src/main/resources/org/apache/spark/ui/static/bootstrap*
+core/src/main/resources/org/apache/spark/ui/static/vis*
+core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.min.js
+core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.css
+org.eclipse.jetty:jetty-client
+org.eclipse.jetty:jetty-http
+org.eclipse.jetty:jetty-io
+org.eclipse.jetty:jetty-plus
+org.eclipse.jetty:jetty-proxy
+org.eclipse.jetty:jetty-security
+org.eclipse.jetty:jetty-server
+org.eclipse.jetty:jetty-servlet
+org.eclipse.jetty:jetty-servlets
+org.eclipse.jetty:jetty-util
+
+MIT License
+-----------
+core/src/main/resources/org/apache/spark/ui/static/dagre-d3.min.js
+core/src/main/resources/org/apache/spark/ui/static/*dataTables*
+core/src/main/resources/org/apache/spark/ui/static/graphlib-dot.min.js
+core/src/main/resources/org/apache/spark/ui/static/jquery*
+core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+
+ISC License
+-----------
+core/src/main/resources/org/apache/spark/ui/static/d3.min.js
diff --git a/core/src/main/resources/META-INF/NOTICE b/core/src/main/resources/META-INF/NOTICE
new file mode 100644
index 0000000000000..5514b6dc7999b
--- /dev/null
+++ b/core/src/main/resources/META-INF/NOTICE
@@ -0,0 +1,29 @@
+Apache Spark - Core
+Copyright 2014 and onwards The Apache Software Foundation.
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+This module also bundles software with extra NOTICE as following:
+
+=== NOTICE FOR Jetty ===
+Notices for Eclipse Jetty
+=========================
+This content is produced and maintained by the Eclipse Jetty project.
+
+Cryptography
+------------
+Content may contain encryption software. The country in which you are currently
+may have restrictions on the import, possession, and use, and/or re-export to
+another country, of encryption software. BEFORE using any encryption software,
+please check the country's laws, regulations and policies concerning the import,
+possession, or use, and re-export of encryption software, to see if this is
+permitted.
+
+The UnixCrypt.java code implements the one way cryptography used by
+Unix systems for simple password protection.  Copyright 1996 Aki Yoshida,
+modified April 2001  by Iris Van den Broeke, Daniel Deville.
+Permission to use, copy, modify and distribute UnixCrypt
+for non-commercial or commercial purposes and without fee is
+granted provided that the copyright notice appears in all copies.
+=== END OF NOTICE FOR Jetty ===
diff --git a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
index 942242107e22f..adce6c3f5ffdb 100644
--- a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
@@ -23,7 +23,8 @@ import java.util.function.Consumer
 
 import scala.collection.mutable.{ArrayBuffer, HashSet}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerStageCompleted}
 import org.apache.spark.util.ThreadUtils
@@ -161,7 +162,8 @@ private[spark] class BarrierCoordinator(
         s"${request.numTasks} from Task $taskId, previously it was $numTasks.")
 
       // Check whether the epoch from the barrier tasks matches current barrierEpoch.
-      logInfo(s"Current barrier epoch for $barrierId is $barrierEpoch.")
+      logInfo(log"Current barrier epoch for ${MDC(BARRIER_ID, barrierId)}" +
+        log" is ${MDC(BARRIER_EPOCH, barrierEpoch)}.")
       if (epoch != barrierEpoch) {
         requester.sendFailure(new SparkException(s"The request to sync of $barrierId with " +
           s"barrier epoch $barrierEpoch has already finished. Maybe task $taskId is not " +
@@ -176,14 +178,17 @@ private[spark] class BarrierCoordinator(
         // Add the requester to array of RPCCallContexts pending for reply.
         requesters += requester
         messages(request.partitionId) = request.message
-        logInfo(s"Barrier sync epoch $barrierEpoch from $barrierId received update from Task " +
-          s"$taskId, current progress: ${requesters.size}/$numTasks.")
+        logInfo(log"Barrier sync epoch ${MDC(BARRIER_EPOCH, barrierEpoch)}" +
+          log" from ${MDC(BARRIER_ID, barrierId)} received update from Task" +
+          log" ${MDC(TASK_ID, taskId)}, current progress:" +
+          log" ${MDC(REQUESTER_SIZE, requesters.size)}/${MDC(NUM_REQUEST_SYNC_TASK, numTasks)}.")
         if (requesters.size == numTasks) {
           requesters.foreach(_.reply(messages.clone()))
           // Finished current barrier() call successfully, clean up ContextBarrierState and
           // increase the barrier epoch.
-          logInfo(s"Barrier sync epoch $barrierEpoch from $barrierId received all updates from " +
-            s"tasks, finished successfully.")
+          logInfo(log"Barrier sync epoch ${MDC(BARRIER_EPOCH, barrierEpoch)}" +
+            log" from ${MDC(BARRIER_ID, barrierId)} received all updates from" +
+            log" tasks, finished successfully.")
           barrierEpoch += 1
           requesters.clear()
           requestMethods.clear()
diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index e083ece918b63..c8d6000cd6282 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -26,7 +26,8 @@ import scala.util.{Failure, Success => ScalaSuccess, Try}
 
 import org.apache.spark.annotation.{Experimental, Since}
 import org.apache.spark.executor.TaskMetrics
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.resource.ResourceInformation
@@ -56,19 +57,27 @@ class BarrierTaskContext private[spark] (
   // with the driver side epoch.
   private var barrierEpoch = 0
 
+  private def logProgressInfo(msg: MessageWithContext, startTime: Option[Long]): Unit = {
+    val waitMsg = startTime.fold(log"")(st => log", waited " +
+      log"for ${MDC(TOTAL_TIME, System.currentTimeMillis() - st)} ms,")
+    logInfo(log"Task ${MDC(TASK_ATTEMPT_ID, taskAttemptId())}" +
+      log" from Stage ${MDC(STAGE_ID, stageId())}" +
+      log"(Attempt ${MDC(STAGE_ATTEMPT, stageAttemptNumber())}) " +
+      msg + waitMsg +
+      log" current barrier epoch is ${MDC(BARRIER_EPOCH, barrierEpoch)}.")
+  }
+
   private def runBarrier(message: String, requestMethod: RequestMethod.Value): Array[String] = {
-    logInfo(s"Task ${taskAttemptId()} from Stage ${stageId()}(Attempt ${stageAttemptNumber()}) " +
-      s"has entered the global sync, current barrier epoch is $barrierEpoch.")
+    logProgressInfo(log"has entered the global sync", None)
     logTrace("Current callSite: " + Utils.getCallSite())
 
     val startTime = System.currentTimeMillis()
     val timerTask = new TimerTask {
       override def run(): Unit = {
-        logInfo(s"Task ${taskAttemptId()} from Stage ${stageId()}(Attempt " +
-          s"${stageAttemptNumber()}) waiting " +
-          s"under the global sync since $startTime, has been waiting for " +
-          s"${MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime)} seconds, " +
-          s"current barrier epoch is $barrierEpoch.")
+        logProgressInfo(
+          log"waiting under the global sync since ${MDC(TIME, startTime)}",
+          Some(startTime)
+        )
       }
     }
     // Log the update of global sync every 1 minute.
@@ -104,17 +113,11 @@ class BarrierTaskContext private[spark] (
       val messages = abortableRpcFuture.future.value.get.get
 
       barrierEpoch += 1
-      logInfo(s"Task ${taskAttemptId()} from Stage ${stageId()}(Attempt ${stageAttemptNumber()}) " +
-        s"finished global sync successfully, waited for " +
-        s"${MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime)} seconds, " +
-        s"current barrier epoch is $barrierEpoch.")
+      logProgressInfo(log"finished global sync successfully", Some(startTime))
       messages
     } catch {
       case e: SparkException =>
-        logInfo(s"Task ${taskAttemptId()} from Stage ${stageId()}(Attempt " +
-          s"${stageAttemptNumber()}) failed to perform global sync, waited for " +
-          s"${MILLISECONDS.toSeconds(System.currentTimeMillis() - startTime)} seconds, " +
-          s"current barrier epoch is $barrierEpoch.")
+        logProgressInfo(log"failed to perform global sync", Some(startTime))
         throw e
     } finally {
       timerTask.cancel()
diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
index c16a84c13187b..fb56389cde77e 100644
--- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala
@@ -25,7 +25,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ACCUMULATOR_ID, BROADCAST_ID, LISTENER, RDD_ID, SHUFFLE_ID}
+import org.apache.spark.internal.LogKeys.{ACCUMULATOR_ID, BROADCAST_ID, LISTENER, RDD_ID, SHUFFLE_ID}
 import org.apache.spark.internal.config._
 import org.apache.spark.rdd.{RDD, ReliableRDDCheckpointData}
 import org.apache.spark.scheduler.SparkListener
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index 3b5bb6792928e..3b7c7778e26ce 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -24,7 +24,8 @@ import scala.reflect.ClassTag
 import org.roaringbitmap.RoaringBitmap
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleHandle, ShuffleWriteProcessor}
@@ -211,10 +212,13 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
   // This may crash the driver with an OOM error.
   if (numPartitions.toLong * partitioner.numPartitions.toLong > (1L << 30)) {
     logWarning(
-      s"The number of shuffle blocks (${numPartitions.toLong * partitioner.numPartitions.toLong})" +
-        s" for shuffleId ${shuffleId} for ${_rdd} with ${numPartitions} partitions" +
-        " is possibly too large, which could cause the driver to crash with an out-of-memory" +
-        " error. Consider decreasing the number of partitions in this shuffle stage."
+      log"The number of shuffle blocks " +
+        log"(${MDC(NUM_PARTITIONS, numPartitions.toLong * partitioner.numPartitions.toLong)})" +
+        log" for shuffleId ${MDC(SHUFFLE_ID, shuffleId)} " +
+        log"for ${MDC(RDD_DESCRIPTION, _rdd)} " +
+        log"with ${MDC(NUM_PARTITIONS2, numPartitions)} partitions" +
+        log" is possibly too large, which could cause the driver to crash with an out-of-memory" +
+        log" error. Consider decreasing the number of partitions in this shuffle stage."
     )
   }
 
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index 94927caff1d78..1fe02eec3a072 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -26,7 +26,8 @@ import scala.util.control.NonFatal
 
 import com.codahale.metrics.{Counter, Gauge, MetricRegistry}
 
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.internal.config.Tests.TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED
@@ -205,11 +206,13 @@ private[spark] class ExecutorAllocationManager(
       throw new SparkException(
         s"s${DYN_ALLOCATION_SUSTAINED_SCHEDULER_BACKLOG_TIMEOUT.key} must be > 0!")
     }
+    val shuffleTrackingEnabled = conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED)
+    val shuffleDecommissionEnabled = decommissionEnabled &&
+      conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED)
     if (!conf.get(config.SHUFFLE_SERVICE_ENABLED) && !reliableShuffleStorage) {
-      if (conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED)) {
+      if (shuffleTrackingEnabled) {
         logInfo("Dynamic allocation is enabled without a shuffle service.")
-      } else if (decommissionEnabled &&
-          conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED)) {
+      } else if (shuffleDecommissionEnabled) {
         logInfo("Shuffle data decommission is enabled without a shuffle service.")
       } else if (!testing) {
         throw new SparkException("Dynamic allocation of executors requires one of the " +
@@ -223,6 +226,12 @@ private[spark] class ExecutorAllocationManager(
       }
     }
 
+    if (shuffleTrackingEnabled && (shuffleDecommissionEnabled || reliableShuffleStorage)) {
+      logWarning("You are enabling both shuffle tracking and other DA supported mechanism, " +
+        "which will cause idle executors not to be released in a timely, " +
+        "please check the configurations.")
+    }
+
     if (executorAllocationRatio > 1.0 || executorAllocationRatio <= 0.0) {
       throw new SparkException(
         s"${DYN_ALLOCATION_EXECUTOR_ALLOCATION_RATIO.key} must be > 0 and <= 1.0")
@@ -445,10 +454,12 @@ private[spark] class ExecutorAllocationManager(
           val delta = targetNum.delta
           totalDelta += delta
           if (delta > 0) {
-            val executorsString = "executor" + { if (delta > 1) "s" else "" }
-            logInfo(s"Requesting $delta new $executorsString because tasks are backlogged " +
-              s"(new desired total will be ${numExecutorsTargetPerResourceProfileId(rpId)} " +
-              s"for resource profile id: ${rpId})")
+            val executorsString = log" new executor" + { if (delta > 1) log"s" else log"" }
+            logInfo(log"Requesting ${MDC(TARGET_NUM_EXECUTOR_DELTA, delta)}" +
+              executorsString + log" because tasks are backlogged " +
+              log"(new desired total will be" +
+              log" ${MDC(TARGET_NUM_EXECUTOR, numExecutorsTargetPerResourceProfileId(rpId))} " +
+              log"for resource profile id: ${MDC(RESOURCE_PROFILE_ID, rpId)})")
             numExecutorsToAddPerResourceProfileId(rpId) =
               if (delta == numExecutorsToAddPerResourceProfileId(rpId)) {
                 numExecutorsToAddPerResourceProfileId(rpId) * 2
@@ -542,8 +553,8 @@ private[spark] class ExecutorAllocationManager(
         if (testing) {
           throw new SparkException("ResourceProfile Id was UNKNOWN, this is not expected")
         }
-        logWarning(s"Not removing executor $executorIdToBeRemoved because the " +
-          "ResourceProfile was UNKNOWN!")
+        logWarning(log"Not removing executor ${MDC(EXECUTOR_IDS, executorIdToBeRemoved)} " +
+          log"because the ResourceProfile was UNKNOWN!")
       } else {
         // get the running total as we remove or initialize it to the count - pendingRemoval
         val newExecutorTotal = numExecutorsTotalPerRpId.getOrElseUpdate(rpId,
@@ -603,11 +614,13 @@ private[spark] class ExecutorAllocationManager(
       } else {
         executorMonitor.executorsKilled(executorsRemoved.toSeq)
       }
-      logInfo(s"Executors ${executorsRemoved.mkString(",")} removed due to idle timeout.")
+      logInfo(log"Executors ${MDC(EXECUTOR_IDS, executorsRemoved.mkString(","))}" +
+        log"removed due to idle timeout.")
       executorsRemoved.toSeq
     } else {
-      logWarning(s"Unable to reach the cluster manager to kill executor/s " +
-        s"${executorIdsToBeRemoved.mkString(",")} or no executor eligible to kill!")
+      logWarning(log"Unable to reach the cluster manager to kill executor/s " +
+        log"${MDC(EXECUTOR_IDS, executorIdsToBeRemoved.mkString(","))} " +
+        log"or no executor eligible to kill!")
       Seq.empty[String]
     }
   }
@@ -870,8 +883,9 @@ private[spark] class ExecutorAllocationManager(
           // really complete and no tasks left
           resourceProfileIdToStageAttempt(rpForStage.head) -= stageAttempt
         } else {
-          logWarning(s"Should have exactly one resource profile for stage $stageAttempt," +
-              s" but have $rpForStage")
+          logWarning(log"Should have exactly one resource profile for stage " +
+            log"${MDC(STAGE_ATTEMPT, stageAttempt)}, but have " +
+            log"${MDC(RESOURCE_PROFILE_ID, rpForStage)}")
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index 5999040894ae5..92aea5959aab7 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -23,7 +23,8 @@ import scala.collection.mutable.{HashMap, Map}
 import scala.concurrent.Future
 
 import org.apache.spark.executor.ExecutorMetrics
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.Network
 import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEnv}
 import org.apache.spark.scheduler._
@@ -159,7 +160,8 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
         // Because Executor will sleep several seconds before sending the first "Heartbeat", this
         // case rarely happens. However, if it really happens, log it and ask the executor to
         // register itself again.
-        logWarning(s"Dropping $heartbeat because TaskScheduler is not ready yet")
+        logWarning(log"Dropping ${MDC(HEARTBEAT, heartbeat)} " +
+          log"because TaskScheduler is not ready yet")
         context.reply(HeartbeatResponse(reregisterBlockManager))
       }
   }
@@ -210,8 +212,10 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
     val now = clock.getTimeMillis()
     for ((executorId, lastSeenMs) <- executorLastSeen) {
       if (now - lastSeenMs > executorTimeoutMs) {
-        logWarning(s"Removing executor $executorId with no recent heartbeats: " +
-          s"${now - lastSeenMs} ms exceeds timeout $executorTimeoutMs ms")
+        logWarning(log"Removing executor ${MDC(EXECUTOR_ID, executorId)} " +
+          log"with no recent heartbeats: " +
+          log"${MDC(TIME_UNITS, now - lastSeenMs)} ms exceeds timeout " +
+          log"${MDC(EXECUTOR_TIMEOUT, executorTimeoutMs)} ms")
         // Asynchronously kill the executor to avoid blocking the current thread
         killExecutorThread.submit(new Runnable {
           override def run(): Unit = Utils.tryLogNonFatalError {
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 48569eb713793..a660bccd2e68f 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -34,8 +34,8 @@ import org.apache.commons.io.output.{ByteArrayOutputStream => ApacheByteArrayOut
 import org.roaringbitmap.RoaringBitmap
 
 import org.apache.spark.broadcast.{Broadcast, BroadcastManager}
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
@@ -44,7 +44,6 @@ import org.apache.spark.shuffle.MetadataFetchFailedException
 import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId, ShuffleMergedBlockId}
 import org.apache.spark.util._
 import org.apache.spark.util.ArrayImplicits._
-import org.apache.spark.util.collection.OpenHashMap
 import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStream}
 
 /**
@@ -153,17 +152,22 @@ private class ShuffleStatus(
   /**
    * Mapping from a mapId to the mapIndex, this is required to reduce the searching overhead within
    * the function updateMapOutput(mapId, bmAddress).
+   *
+   * Exposed for testing.
    */
-  private[this] val mapIdToMapIndex = new OpenHashMap[Long, Int]()
+  private[spark] val mapIdToMapIndex = new HashMap[Long, Int]()
 
   /**
    * Register a map output. If there is already a registered location for the map output then it
    * will be replaced by the new location.
    */
   def addMapOutput(mapIndex: Int, status: MapStatus): Unit = withWriteLock {
-    if (mapStatuses(mapIndex) == null) {
+    val currentMapStatus = mapStatuses(mapIndex)
+    if (currentMapStatus == null) {
       _numAvailableMapOutputs += 1
       invalidateSerializedMapOutputStatusCache()
+    } else {
+      mapIdToMapIndex.remove(currentMapStatus.mapId)
     }
     mapStatuses(mapIndex) = status
     mapIdToMapIndex(status.mapId) = mapIndex
@@ -188,26 +192,30 @@ private class ShuffleStatus(
       val mapStatusOpt = mapIndex.map(mapStatuses(_)).flatMap(Option(_))
       mapStatusOpt match {
         case Some(mapStatus) =>
-          logInfo(s"Updating map output for ${mapId} to ${bmAddress}")
+          logInfo(log"Updating map output for ${MDC(MAP_ID, mapId)}" +
+            log" to ${MDC(BLOCK_MANAGER_ID, bmAddress)}")
           mapStatus.updateLocation(bmAddress)
           invalidateSerializedMapOutputStatusCache()
         case None =>
-          if (mapIndex.map(mapStatusesDeleted).exists(_.mapId == mapId)) {
-            val index = mapIndex.get
+          val index = mapStatusesDeleted.indexWhere(x => x != null && x.mapId == mapId)
+          if (index >= 0 && mapStatuses(index) == null) {
             val mapStatus = mapStatusesDeleted(index)
             mapStatus.updateLocation(bmAddress)
             mapStatuses(index) = mapStatus
             _numAvailableMapOutputs += 1
             invalidateSerializedMapOutputStatusCache()
             mapStatusesDeleted(index) = null
-            logInfo(s"Recover ${mapStatus.mapId} ${mapStatus.location}")
+            logInfo(log"Recover ${MDC(MAP_ID, mapStatus.mapId)}" +
+              log" ${MDC(BLOCK_MANAGER_ID, mapStatus.location)}")
           } else {
-            logWarning(s"Asked to update map output ${mapId} for untracked map status.")
+            logWarning(log"Asked to update map output ${MDC(MAP_ID, mapId)} " +
+              log"for untracked map status.")
           }
       }
     } catch {
       case e: java.lang.NullPointerException =>
-        logWarning(s"Unable to update map output for ${mapId}, status removed in-flight")
+        logWarning(log"Unable to update map output for ${MDC(MAP_ID, mapId)}, " +
+          log"status removed in-flight")
     }
   }
 
@@ -218,9 +226,11 @@ private class ShuffleStatus(
    */
   def removeMapOutput(mapIndex: Int, bmAddress: BlockManagerId): Unit = withWriteLock {
     logDebug(s"Removing existing map output ${mapIndex} ${bmAddress}")
-    if (mapStatuses(mapIndex) != null && mapStatuses(mapIndex).location == bmAddress) {
+    val currentMapStatus = mapStatuses(mapIndex)
+    if (currentMapStatus != null && currentMapStatus.location == bmAddress) {
       _numAvailableMapOutputs -= 1
-      mapStatusesDeleted(mapIndex) = mapStatuses(mapIndex)
+      mapIdToMapIndex.remove(currentMapStatus.mapId)
+      mapStatusesDeleted(mapIndex) = currentMapStatus
       mapStatuses(mapIndex) = null
       invalidateSerializedMapOutputStatusCache()
     }
@@ -286,9 +296,11 @@ private class ShuffleStatus(
    */
   def removeOutputsByFilter(f: BlockManagerId => Boolean): Unit = withWriteLock {
     for (mapIndex <- mapStatuses.indices) {
-      if (mapStatuses(mapIndex) != null && f(mapStatuses(mapIndex).location)) {
+      val currentMapStatus = mapStatuses(mapIndex)
+      if (currentMapStatus != null && f(currentMapStatus.location)) {
         _numAvailableMapOutputs -= 1
-        mapStatusesDeleted(mapIndex) = mapStatuses(mapIndex)
+        mapIdToMapIndex.remove(currentMapStatus.mapId)
+        mapStatusesDeleted(mapIndex) = currentMapStatus
         mapStatuses(mapIndex) = null
         invalidateSerializedMapOutputStatusCache()
       }
@@ -488,20 +500,24 @@ private[spark] class MapOutputTrackerMasterEndpoint(
 
   logDebug("init") // force eager creation of logger
 
+  private def logInfoMsg(msg: MessageWithContext, shuffleId: Int, context: RpcCallContext): Unit = {
+    val hostPort = context.senderAddress.hostPort
+    logInfo(log"Asked to send " +
+      msg +
+      log" locations for shuffle ${MDC(SHUFFLE_ID, shuffleId)} to ${MDC(HOST_PORT, hostPort)}")
+  }
+
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
     case GetMapOutputStatuses(shuffleId: Int) =>
-      val hostPort = context.senderAddress.hostPort
-      logInfo(s"Asked to send map output locations for shuffle $shuffleId to $hostPort")
+      logInfoMsg(log"map output", shuffleId, context)
       tracker.post(GetMapOutputMessage(shuffleId, context))
 
     case GetMapAndMergeResultStatuses(shuffleId: Int) =>
-      val hostPort = context.senderAddress.hostPort
-      logInfo(s"Asked to send map/merge result locations for shuffle $shuffleId to $hostPort")
+      logInfoMsg(log"map/merge result", shuffleId, context)
       tracker.post(GetMapAndMergeOutputMessage(shuffleId, context))
 
     case GetShufflePushMergerLocations(shuffleId: Int) =>
-      logInfo(s"Asked to send shuffle push merger locations for shuffle" +
-        s" $shuffleId to ${context.senderAddress.hostPort}")
+      logInfoMsg(log"shuffle push merger", shuffleId, context)
       tracker.post(GetShufflePushMergersMessage(shuffleId, context))
 
     case StopMapOutputTracker =>
@@ -815,7 +831,8 @@ private[spark] class MapOutputTrackerMaster(
       case Some(shuffleStatus) =>
         shuffleStatus.updateMapOutput(mapId, bmAddress)
       case None if shuffleMigrationEnabled =>
-        logWarning(s"Asked to update map output for unknown shuffle ${shuffleId}")
+        logWarning(log"Asked to update map output for unknown shuffle " +
+          log"${MDC(SHUFFLE_ID, shuffleId)}")
       case None =>
         logError(log"Asked to update map output for unknown shuffle ${MDC(SHUFFLE_ID, shuffleId)}")
     }
@@ -1419,13 +1436,15 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
       val mergeOutputStatuses = mergeStatuses.get(shuffleId).orNull
 
       if (mapOutputStatuses == null || mergeOutputStatuses == null) {
-        logInfo("Don't have map/merge outputs for shuffle " + shuffleId + ", fetching them")
+        logInfo(log"Don't have map/merge outputs for" +
+          log" shuffle ${MDC(SHUFFLE_ID, shuffleId)}, fetching them")
         val startTimeNs = System.nanoTime()
         fetchingLock.withLock(shuffleId) {
           var fetchedMapStatuses = mapStatuses.get(shuffleId).orNull
           var fetchedMergeStatuses = mergeStatuses.get(shuffleId).orNull
           if (fetchedMapStatuses == null || fetchedMergeStatuses == null) {
-            logInfo("Doing the fetch; tracker endpoint = " + trackerEndpoint)
+            logInfo(log"Doing the fetch; tracker endpoint = " +
+              log"${MDC(RPC_ENDPOINT_REF, trackerEndpoint)}")
             val fetchedBytes =
               askTracker[(Array[Byte], Array[Byte])](GetMapAndMergeResultStatuses(shuffleId))
             try {
@@ -1453,12 +1472,14 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
     } else {
       val statuses = mapStatuses.get(shuffleId).orNull
       if (statuses == null) {
-        logInfo("Don't have map outputs for shuffle " + shuffleId + ", fetching them")
+        logInfo(log"Don't have map outputs for shuffle ${MDC(SHUFFLE_ID, shuffleId)}," +
+          log" fetching them")
         val startTimeNs = System.nanoTime()
         fetchingLock.withLock(shuffleId) {
           var fetchedStatuses = mapStatuses.get(shuffleId).orNull
           if (fetchedStatuses == null) {
-            logInfo("Doing the fetch; tracker endpoint = " + trackerEndpoint)
+            logInfo(log"Doing the fetch; tracker endpoint =" +
+              log" ${MDC(RPC_ENDPOINT_REF, trackerEndpoint)}")
             val fetchedBytes = askTracker[Array[Byte]](GetMapOutputStatuses(shuffleId))
             try {
               fetchedStatuses =
@@ -1497,7 +1518,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   def updateEpoch(newEpoch: Long): Unit = {
     epochLock.synchronized {
       if (newEpoch > epoch) {
-        logInfo("Updating epoch to " + newEpoch + " and clearing cache")
+        logInfo(log"Updating epoch to ${MDC(EPOCH, newEpoch)} and clearing cache")
         epoch = newEpoch
         mapStatuses.clear()
         mergeStatuses.clear()
@@ -1558,7 +1579,9 @@ private[spark] object MapOutputTracker extends Logging {
         oos.close()
       }
       val outArr = out.toByteArray
-      logInfo("Broadcast outputstatuses size = " + outArr.length + ", actual size = " + arrSize)
+      logInfo(log"Broadcast outputstatuses size = " +
+        log"${MDC(BROADCAST_OUTPUT_STATUS_SIZE, outArr.length)}," +
+        log" actual size = ${MDC(BROADCAST_OUTPUT_STATUS_SIZE, arrSize)}")
       (outArr, bcast)
     } else {
       (chunkedByteBuf.toArray, null)
@@ -1591,8 +1614,10 @@ private[spark] object MapOutputTracker extends Logging {
         try {
           // deserialize the Broadcast, pull .value array out of it, and then deserialize that
           val bcast = deserializeObject(in).asInstanceOf[Broadcast[Array[Array[Byte]]]]
-          logInfo("Broadcast outputstatuses size = " + bytes.length +
-            ", actual size = " + bcast.value.foldLeft(0L)(_ + _.length))
+          val actualSize = bcast.value.foldLeft(0L)(_ + _.length)
+          logInfo(log"Broadcast outputstatuses size =" +
+            log" ${MDC(BROADCAST_OUTPUT_STATUS_SIZE, bytes.length)}" +
+            log", actual size = ${MDC(BROADCAST_OUTPUT_STATUS_SIZE, actualSize)}")
           val bcastIn = new ChunkedByteBuffer(bcast.value.map(ByteBuffer.wrap)).toInputStream()
           // Important - ignore the DIRECT tag ! Start from offset 1
           bcastIn.skip(1)
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index ae39e2e183e4a..357e71cdf4457 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -19,6 +19,7 @@ package org.apache.spark
 
 import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
 
+import scala.collection.immutable.ArraySeq
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.math.log10
@@ -149,7 +150,9 @@ private[spark] class KeyGroupedPartitioner(
     override val numPartitions: Int) extends Partitioner {
   override def getPartition(key: Any): Int = {
     val keys = key.asInstanceOf[Seq[Any]]
-    valueMap.getOrElseUpdate(keys, Utils.nonNegativeMod(keys.hashCode, numPartitions))
+    val normalizedKeys = ArraySeq.from(keys)
+    valueMap.getOrElseUpdate(normalizedKeys,
+      Utils.nonNegativeMod(normalizedKeys.hashCode, numPartitions))
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index f1ef36bbf19c2..cfb514913694b 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -25,7 +25,8 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.avro.{Schema, SchemaNormalization}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Kryo._
@@ -507,11 +508,11 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
     // Used by Yarn in 1.1 and before
     sys.props.get("spark.driver.libraryPath").foreach { value =>
       val warning =
-        s"""
-          |spark.driver.libraryPath was detected (set to '$value').
+        log"""
+          |spark.driver.libraryPath was detected (set to '${MDC(LogKeys.CONFIG, value)}').
           |This is deprecated in Spark 1.2+.
           |
-          |Please instead use: ${DRIVER_LIBRARY_PATH.key}
+          |Please instead use: ${MDC(LogKeys.CONFIG2, DRIVER_LIBRARY_PATH.key)}
         """.stripMargin
       logWarning(warning)
     }
@@ -554,9 +555,13 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria
       val executorCores = get(EXECUTOR_CORES)
       val leftCores = totalCores % executorCores
       if (leftCores != 0) {
-        logWarning(s"Total executor cores: ${totalCores} is not " +
-          s"divisible by cores per executor: ${executorCores}, " +
-          s"the left cores: ${leftCores} will not be allocated")
+        logWarning(log"Total executor cores: " +
+          log"${MDC(LogKeys.NUM_EXECUTOR_CORES_TOTAL, totalCores)} " +
+          log"is not divisible by cores per executor: " +
+          log"${MDC(LogKeys.NUM_EXECUTOR_CORES, executorCores)}, " +
+          log"the left cores: " +
+          log"${MDC(LogKeys.NUM_EXECUTOR_CORES_REMAINING, leftCores)} " +
+          log"will not be allocated")
       }
     }
 
@@ -640,7 +645,11 @@ private[spark] object SparkConf extends Logging {
       DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0",
         "Please use spark.excludeOnFailure.killExcludedExecutors"),
       DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0",
-        "Please use spark.yarn.executor.launch.excludeOnFailure.enabled")
+        "Please use spark.yarn.executor.launch.excludeOnFailure.enabled"),
+      DeprecatedConfig("spark.network.remoteReadNioBufferConversion", "3.5.2",
+        "Please open a JIRA ticket to report it if you need to use this configuration."),
+      DeprecatedConfig("spark.shuffle.unsafe.file.output.buffer", "4.0.0",
+        "Please use spark.shuffle.localDisk.file.output.buffer")
     )
 
     Map(configs.map { cfg => (cfg.key -> cfg) } : _*)
@@ -772,15 +781,20 @@ private[spark] object SparkConf extends Logging {
   def logDeprecationWarning(key: String): Unit = {
     deprecatedConfigs.get(key).foreach { cfg =>
       logWarning(
-        s"The configuration key '$key' has been deprecated as of Spark ${cfg.version} and " +
-        s"may be removed in the future. ${cfg.deprecationMessage}")
+        log"The configuration key '${MDC(LogKeys.CONFIG, key)}' has been deprecated " +
+          log"as of Spark ${MDC(LogKeys.CONFIG_VERSION, cfg.version)} and " +
+          log"may be removed in the future. " +
+          log"${MDC(LogKeys.CONFIG_DEPRECATION_MESSAGE, cfg.deprecationMessage)}")
       return
     }
 
     allAlternatives.get(key).foreach { case (newKey, cfg) =>
       logWarning(
-        s"The configuration key '$key' has been deprecated as of Spark ${cfg.version} and " +
-        s"may be removed in the future. Please use the new key '$newKey' instead.")
+        log"The configuration key '${MDC(LogKeys.CONFIG, key)}' " +
+          log"has been deprecated as of " +
+          log"Spark ${MDC(LogKeys.CONFIG_VERSION, cfg.version)} and " +
+          log"may be removed in the future. Please use the new key " +
+          log"'${MDC(LogKeys.CONFIG_KEY_UPDATED, newKey)}' instead.")
       return
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 9d908cd8713ce..76138640dd2ae 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -28,14 +28,13 @@ import scala.collection.concurrent.{Map => ScalaConcurrentMap}
 import scala.collection.immutable
 import scala.collection.mutable.HashMap
 import scala.jdk.CollectionConverters._
-import scala.language.implicitConversions
 import scala.reflect.{classTag, ClassTag}
 import scala.util.control.NonFatal
 
 import com.google.common.collect.MapMaker
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.io.{ArrayWritable, BooleanWritable, BytesWritable, DoubleWritable, FloatWritable, IntWritable, LongWritable, NullWritable, Text, Writable}
+import org.apache.hadoop.io.{BooleanWritable, BytesWritable, DoubleWritable, FloatWritable, IntWritable, LongWritable, NullWritable, Text, Writable}
 import org.apache.hadoop.mapred.{FileInputFormat, InputFormat, JobConf, SequenceFileInputFormat, TextInputFormat}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat, Job => NewHadoopJob}
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
@@ -47,7 +46,7 @@ import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.{Executor, ExecutorMetrics, ExecutorMetricsSource}
 import org.apache.spark.input.{FixedLengthBinaryInputFormat, PortableDataStream, StreamInputFormat, WholeTextFileInputFormat}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.internal.config.UI._
@@ -88,6 +87,8 @@ class SparkContext(config: SparkConf) extends Logging {
   // The call site where this SparkContext was constructed.
   private val creationSite: CallSite = Utils.getCallSite()
 
+  private var stopSite: Option[CallSite] = None
+
   if (!config.get(EXECUTOR_ALLOW_SPARK_CONTEXT)) {
     // In order to prevent SparkContext from being created in executors.
     SparkContext.assertOnDriver()
@@ -117,6 +118,10 @@ class SparkContext(config: SparkConf) extends Logging {
            |
            |${creationSite.longForm}
            |
+           |And it was stopped at:
+           |
+           |${stopSite.getOrElse(CallSite.empty).longForm}
+           |
            |The currently active SparkContext was created at:
            |
            |$activeCreationSite
@@ -194,10 +199,11 @@ class SparkContext(config: SparkConf) extends Logging {
     this(master, appName, sparkHome, jars, Map())
 
   // log out Spark Version in Spark driver log
-  logInfo(s"Running Spark version $SPARK_VERSION")
-  logInfo(s"OS info ${System.getProperty("os.name")}, ${System.getProperty("os.version")}, " +
-    s"${System.getProperty("os.arch")}")
-  logInfo(s"Java version ${System.getProperty("java.version")}")
+  logInfo(log"Running Spark version ${MDC(LogKeys.SPARK_VERSION, SPARK_VERSION)}")
+  logInfo(log"OS info ${MDC(LogKeys.OS_NAME, System.getProperty("os.name"))}," +
+    log" ${MDC(LogKeys.OS_VERSION, System.getProperty("os.version"))}, " +
+    log"${MDC(LogKeys.OS_ARCH, System.getProperty("os.arch"))}")
+  logInfo(log"Java version ${MDC(LogKeys.JAVA_VERSION, System.getProperty("java.version"))}")
 
   /* ------------------------------------------------------------------------------------- *
    | Private variables. These variables keep the internal state of the context, and are    |
@@ -281,12 +287,7 @@ class SparkContext(config: SparkConf) extends Logging {
       conf: SparkConf,
       isLocal: Boolean,
       listenerBus: LiveListenerBus): SparkEnv = {
-    SparkEnv.createDriverEnv(
-      conf,
-      isLocal,
-      listenerBus,
-      SparkContext.numDriverCores(master, conf),
-      this)
+    SparkEnv.createDriverEnv(conf, isLocal, listenerBus, SparkContext.numDriverCores(master, conf))
   }
 
   private[spark] def env: SparkEnv = _env
@@ -420,7 +421,7 @@ class SparkContext(config: SparkConf) extends Logging {
     }
     // HADOOP-19097 Set fs.s3a.connection.establish.timeout to 30s
     // We can remove this after Apache Hadoop 3.4.1 releases
-    conf.setIfMissing("spark.hadoop.fs.s3a.connection.establish.timeout", "30s")
+    conf.setIfMissing("spark.hadoop.fs.s3a.connection.establish.timeout", "30000")
     // This should be set as early as possible.
     SparkContext.fillMissingMagicCommitterConfsIfNeeded(_conf)
 
@@ -434,7 +435,7 @@ class SparkContext(config: SparkConf) extends Logging {
     logResourceInfo(SPARK_DRIVER_PREFIX, _resources)
 
     // log out spark.app.name in the Spark driver logs
-    logInfo(s"Submitted application: $appName")
+    logInfo(log"Submitted application: ${MDC(LogKeys.APP_NAME, appName)}")
 
     // System property spark.yarn.app.id must be set if user code ran by AM on a YARN cluster
     if (master == "yarn" && deployMode == "cluster" && !_conf.contains("spark.yarn.app.id")) {
@@ -443,7 +444,7 @@ class SparkContext(config: SparkConf) extends Logging {
     }
 
     if (_conf.getBoolean("spark.logConf", false)) {
-      logInfo("Spark configuration:\n" + _conf.toDebugString)
+      logInfo(log"Spark configuration:\n${MDC(LogKeys.CONFIG, _conf.toDebugString)}")
     }
 
     // Set Spark driver host and port system properties. This explicitly sets the configuration
@@ -595,6 +596,8 @@ class SparkContext(config: SparkConf) extends Logging {
         .foreach(logLevel => _schedulerBackend.updateExecutorsLogLevel(logLevel))
     }
 
+    _conf.get(CHECKPOINT_DIR).foreach(setCheckpointDir)
+
     val _executorMetricsSource =
       if (_conf.get(METRICS_EXECUTORMETRICS_SOURCE_ENABLED)) {
         Some(new ExecutorMetricsSource)
@@ -742,15 +745,15 @@ class SparkContext(config: SparkConf) extends Logging {
           case Some(endpointRef) =>
             Some(endpointRef.askSync[Array[ThreadStackTrace]](TriggerThreadDump))
           case None =>
-            logWarning(s"Executor $executorId might already have stopped and " +
-              "can not request thread dump from it.")
+            logWarning(log"Executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} " +
+              log"might already have stopped and can not request thread dump from it.")
             None
         }
       }
     } catch {
       case e: Exception =>
         logError(
-          log"Exception getting thread dump from executor ${MDC(LogKey.EXECUTOR_ID, executorId)}",
+          log"Exception getting thread dump from executor ${MDC(LogKeys.EXECUTOR_ID, executorId)}",
           e)
         None
     }
@@ -774,8 +777,8 @@ class SparkContext(config: SparkConf) extends Logging {
           case Some(endpointRef) =>
             Some(endpointRef.askSync[Array[String]](TriggerHeapHistogram))
           case None =>
-            logWarning(s"Executor $executorId might already have stopped and " +
-              "can not request heap histogram from it.")
+            logWarning(log"Executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} " +
+              log"might already have stopped and can not request heap histogram from it.")
             None
         }
       }
@@ -783,7 +786,7 @@ class SparkContext(config: SparkConf) extends Logging {
       case e: Exception =>
         logError(
           log"Exception getting heap histogram from " +
-            log"executor ${MDC(LogKey.EXECUTOR_ID, executorId)}", e)
+            log"executor ${MDC(LogKeys.EXECUTOR_ID, executorId)}", e)
         None
     }
   }
@@ -1699,7 +1702,8 @@ class SparkContext(config: SparkConf) extends Logging {
       "Can not directly broadcast RDDs; instead, call collect() and broadcast the result.")
     val bc = env.broadcastManager.newBroadcast[T](value, isLocal, serializedOnly)
     val callSite = getCallSite()
-    logInfo("Created broadcast " + bc.id + " from " + callSite.shortForm)
+    logInfo(log"Created broadcast ${MDC(LogKeys.BROADCAST_ID, bc.id)}" +
+      log" from ${MDC(LogKeys.CALL_SITE_SHORT_FORM, callSite.shortForm)}")
     cleaner.foreach(_.registerBroadcastForCleanup(bc))
     bc
   }
@@ -1780,8 +1784,9 @@ class SparkContext(config: SparkConf) extends Logging {
     val schemeCorrectedURI = uri.getScheme match {
       case null => new File(path).getCanonicalFile.toURI
       case "local" =>
-        logWarning(s"File with 'local' scheme $path is not supported to add to file server, " +
-          s"since it is already available on every node.")
+        logWarning(log"File with 'local' scheme ${MDC(LogKeys.PATH, path)} " +
+          log"is not supported to add to file server, " +
+          log"since it is already available on every node.")
         return
       case _ => uri
     }
@@ -1827,7 +1832,8 @@ class SparkContext(config: SparkConf) extends Logging {
         addedFiles
           .getOrElseUpdate(jobArtifactUUID, new ConcurrentHashMap[String, Long]().asScala)
           .putIfAbsent(key, timestamp).isEmpty) {
-      logInfo(s"Added file $path at $key with timestamp $timestamp")
+      logInfo(log"Added file ${MDC(LogKeys.PATH, path)} at ${MDC(LogKeys.KEY, key)} with" +
+        log" timestamp ${MDC(LogKeys.TIMESTAMP, timestamp)}")
       // Fetch the file locally so that closures which are run on the driver can still use the
       // SparkFiles API to access files.
       Utils.fetchFile(uri.toString, root, conf, hadoopConfiguration, timestamp, useCache = false)
@@ -1839,7 +1845,8 @@ class SparkContext(config: SparkConf) extends Logging {
           .putIfAbsent(
           Utils.getUriBuilder(new URI(key)).fragment(uri.getFragment).build().toString,
           timestamp).isEmpty) {
-      logInfo(s"Added archive $path at $key with timestamp $timestamp")
+      logInfo(log"Added archive ${MDC(LogKeys.PATH, path)} at ${MDC(LogKeys.KEY, key)}" +
+        log" with timestamp ${MDC(LogKeys.TIMESTAMP, timestamp)}")
       // If the scheme is file, use URI to simply copy instead of downloading.
       val uriToUse = if (!isLocal && scheme == "file") uri else new URI(key)
       val uriToDownload = Utils.getUriBuilder(uriToUse).fragment(null).build()
@@ -1849,13 +1856,16 @@ class SparkContext(config: SparkConf) extends Logging {
         root,
         if (uri.getFragment != null) uri.getFragment else source.getName)
       logInfo(
-        s"Unpacking an archive $path from ${source.getAbsolutePath} to ${dest.getAbsolutePath}")
+        log"Unpacking an archive ${MDC(LogKeys.PATH, path)}" +
+          log" from ${MDC(LogKeys.SOURCE_PATH, source.getAbsolutePath)}" +
+          log" to ${MDC(LogKeys.DESTINATION_PATH, dest.getAbsolutePath)}")
       Utils.deleteRecursively(dest)
       Utils.unpack(source, dest)
       postEnvironmentUpdate()
     } else {
-      logWarning(s"The path $path has been added already. Overwriting of added paths " +
-        "is not supported in the current version.")
+      logWarning(log"The path ${MDC(LogKeys.PATH, path)} " +
+        log"has been added already. Overwriting of added paths " +
+        log"is not supported in the current version.")
     }
   }
 
@@ -2145,7 +2155,7 @@ class SparkContext(config: SparkConf) extends Logging {
         Seq(env.rpcEnv.fileServer.addJar(file))
       } catch {
         case NonFatal(e) =>
-          logError(log"Failed to add ${MDC(LogKey.PATH, path)} to Spark environment", e)
+          logError(log"Failed to add ${MDC(LogKeys.PATH, path)} to Spark environment", e)
           Nil
       }
     }
@@ -2166,7 +2176,7 @@ class SparkContext(config: SparkConf) extends Logging {
           Seq(path)
         } catch {
           case NonFatal(e) =>
-            logError(log"Failed to add ${MDC(LogKey.PATH, path)} to Spark environment", e)
+            logError(log"Failed to add ${MDC(LogKeys.PATH, path)} to Spark environment", e)
             Nil
         }
       } else {
@@ -2209,14 +2219,21 @@ class SparkContext(config: SparkConf) extends Logging {
           .getOrElseUpdate(jobArtifactUUID, new ConcurrentHashMap[String, Long]().asScala)
           .putIfAbsent(_, timestamp).isEmpty)
         if (added.nonEmpty) {
-          val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI"
-          logInfo(s"Added $jarMessage $path at ${added.mkString(",")} with timestamp $timestamp")
+          val jarMessage = if (scheme != "ivy") {
+            log"Added JAR"
+          } else {
+            log"Added dependency jars of Ivy URI"
+          }
+          logInfo(jarMessage + log" ${MDC(LogKeys.PATH, path)}" +
+            log" at ${MDC(LogKeys.ADDED_JARS, added.mkString(","))}" +
+            log" with timestamp ${MDC(LogKeys.TIMESTAMP, timestamp)}")
           postEnvironmentUpdate()
         }
         if (existed.nonEmpty) {
           val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI"
-          logWarning(s"The $jarMessage $path at ${existed.mkString(",")} has been added already." +
-            " Overwriting of added jar is not supported in the current version.")
+          logWarning(log"The ${MDC(LogKeys.JAR_MESSAGE, jarMessage)} ${MDC(LogKeys.PATH, path)} " +
+            log"at ${MDC(LogKeys.EXISTING_PATH, existed.mkString(","))} has been added already." +
+            log" Overwriting of added jar is not supported in the current version.")
         }
       }
     }
@@ -2265,7 +2282,9 @@ class SparkContext(config: SparkConf) extends Logging {
    * @param exitCode Specified exit code that will passed to scheduler backend in client mode.
    */
   def stop(exitCode: Int): Unit = {
-    logInfo(s"SparkContext is stopping with exitCode $exitCode.")
+    stopSite = Some(getCallSite())
+    logInfo(log"SparkContext is stopping with exitCode ${MDC(LogKeys.EXIT_CODE, exitCode)}" +
+      log" from ${MDC(LogKeys.STOP_SITE_SHORT_FORM, stopSite.get.shortForm)}.")
     if (LiveListenerBus.withinListenerThread.value) {
       throw new SparkException(s"Cannot stop SparkContext within listener bus thread.")
     }
@@ -2302,6 +2321,11 @@ class SparkContext(config: SparkConf) extends Logging {
       }
       _dagScheduler = null
     }
+    // In case there are still events being posted during the shutdown of plugins,
+    // invoke the shutdown of each plugin before the listenerBus is stopped.
+    Utils.tryLogNonFatalError {
+      _plugins.foreach(_.shutdown())
+    }
     if (_listenerBusStarted) {
       Utils.tryLogNonFatalError {
         listenerBus.stop()
@@ -2313,9 +2337,6 @@ class SparkContext(config: SparkConf) extends Logging {
         env.metricsSystem.report()
       }
     }
-    Utils.tryLogNonFatalError {
-      _plugins.foreach(_.shutdown())
-    }
     Utils.tryLogNonFatalError {
       FallbackStorage.cleanUp(_conf, _hadoopConfiguration)
     }
@@ -2429,9 +2450,10 @@ class SparkContext(config: SparkConf) extends Logging {
     }
     val callSite = getCallSite()
     val cleanedFunc = clean(func)
-    logInfo("Starting job: " + callSite.shortForm)
+    logInfo(log"Starting job: ${MDC(LogKeys.CALL_SITE_SHORT_FORM, callSite.shortForm)}")
     if (conf.getBoolean("spark.logLineage", false)) {
-      logInfo("RDD's recursive dependencies:\n" + rdd.toDebugString)
+      logInfo(log"RDD's recursive dependencies:\n" +
+        log"${MDC(LogKeys.RDD_DEBUG_STRING, rdd.toDebugString)}")
     }
     dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, resultHandler, localProperties.get)
     progressBar.foreach(_.finishAll())
@@ -2550,13 +2572,14 @@ class SparkContext(config: SparkConf) extends Logging {
       timeout: Long): PartialResult[R] = {
     assertNotStopped()
     val callSite = getCallSite()
-    logInfo("Starting job: " + callSite.shortForm)
-    val start = System.nanoTime
+    logInfo(log"Starting job: ${MDC(LogKeys.CALL_SITE_SHORT_FORM, callSite.shortForm)}")
+    val start = System.currentTimeMillis()
     val cleanedFunc = clean(func)
     val result = dagScheduler.runApproximateJob(rdd, cleanedFunc, evaluator, callSite, timeout,
       localProperties.get)
     logInfo(
-      "Job finished: " + callSite.shortForm + ", took " + (System.nanoTime - start) / 1e9 + " s")
+      log"Job finished: ${MDC(LogKeys.CALL_SITE_SHORT_FORM, callSite.shortForm)}," +
+        log" took ${MDC(LogKeys.TOTAL_TIME, System.currentTimeMillis() - start)}ms")
     result
   }
 
@@ -2736,9 +2759,9 @@ class SparkContext(config: SparkConf) extends Logging {
     // its own local file system, which is incorrect because the checkpoint files
     // are actually on the executor machines.
     if (!isLocal && Utils.nonLocalPaths(directory).isEmpty) {
-      logWarning("Spark is not running in local mode, therefore the checkpoint directory " +
-        s"must not be on the local filesystem. Directory '$directory' " +
-        "appears to be on the local filesystem.")
+      logWarning(log"Spark is not running in local mode, therefore the checkpoint directory " +
+        log"must not be on the local filesystem. Directory '${MDC(LogKeys.PATH, directory)}' " +
+        log"appears to be on the local filesystem.")
     }
 
     checkpointDir = Option(directory).map { dir =>
@@ -2784,7 +2807,8 @@ class SparkContext(config: SparkConf) extends Logging {
         val listeners = Utils.loadExtensions(classOf[SparkListenerInterface], classNames, conf)
         listeners.foreach { listener =>
           listenerBus.addToSharedQueue(listener)
-          logInfo(s"Registered listener ${listener.getClass().getName()}")
+          logInfo(log"Registered listener" +
+            log"${MDC(LogKeys.CLASS_NAME, listener.getClass().getName())}")
         }
       }
     } catch {
@@ -2899,10 +2923,11 @@ object SparkContext extends Logging {
         // its creationSite field being null:
         val otherContextCreationSite =
           Option(otherContext.creationSite).map(_.longForm).getOrElse("unknown location")
-        val warnMsg = "Another SparkContext is being constructed (or threw an exception in its" +
-          " constructor). This may indicate an error, since only one SparkContext should be" +
-          " running in this JVM (see SPARK-2243)." +
-          s" The other SparkContext was created at:\n$otherContextCreationSite"
+        val warnMsg = log"Another SparkContext is being constructed (or threw an exception in its" +
+          log" constructor). This may indicate an error, since only one SparkContext should be" +
+          log" running in this JVM (see SPARK-2243)." +
+          log" The other SparkContext was created at:\n" +
+          log"${MDC(LogKeys.CREATION_SITE, otherContextCreationSite)}"
         logWarning(warnMsg)
       }
     }
@@ -3039,14 +3064,6 @@ object SparkContext extends Logging {
     }
   }
 
-  private implicit def arrayToArrayWritable[T <: Writable : ClassTag](arr: Iterable[T])
-    : ArrayWritable = {
-    def anyToWritable[U <: Writable](u: U): Writable = u
-
-    new ArrayWritable(classTag[T].runtimeClass.asInstanceOf[Class[Writable]],
-        arr.map(x => anyToWritable(x)).toArray)
-  }
-
   /**
    * Find the JAR from which a given class was loaded, to make it easy for users to pass
    * their JARs to SparkContext.
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 50d0358004d40..de2d215562b9f 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -32,7 +32,8 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.python.{PythonWorker, PythonWorkerFactory}
 import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.executor.ExecutorBackend
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.memory.{MemoryManager, UnifiedMemoryManager}
 import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
@@ -130,7 +131,8 @@ class SparkEnv (
             Utils.deleteRecursively(new File(path))
           } catch {
             case e: Exception =>
-              logWarning(s"Exception while deleting Spark temp dir: $path", e)
+              logWarning(log"Exception while deleting Spark temp dir: " +
+                log"${MDC(LogKeys.PATH, path)}", e)
           }
         case None => // We just need to delete tmp dir created by driver, so do nothing on executor
       }
@@ -142,7 +144,7 @@ class SparkEnv (
       workerModule: String,
       daemonModule: String,
       envVars: Map[String, String],
-      useDaemon: Boolean): (PythonWorker, Option[Long]) = {
+      useDaemon: Boolean): (PythonWorker, Option[Int]) = {
     synchronized {
       val key = PythonWorkersKey(pythonExec, workerModule, daemonModule, envVars)
       val workerFactory = pythonWorkers.getOrElseUpdate(key, new PythonWorkerFactory(
@@ -161,7 +163,7 @@ class SparkEnv (
       pythonExec: String,
       workerModule: String,
       envVars: Map[String, String],
-      useDaemon: Boolean): (PythonWorker, Option[Long]) = {
+      useDaemon: Boolean): (PythonWorker, Option[Int]) = {
     createPythonWorker(
       pythonExec, workerModule, PythonWorkerFactory.defaultDaemonModule, envVars, useDaemon)
   }
@@ -170,7 +172,7 @@ class SparkEnv (
       pythonExec: String,
       workerModule: String,
       daemonModule: String,
-      envVars: Map[String, String]): (PythonWorker, Option[Long]) = {
+      envVars: Map[String, String]): (PythonWorker, Option[Int]) = {
     val useDaemon = conf.get(Python.PYTHON_USE_DAEMON)
     createPythonWorker(
       pythonExec, workerModule, daemonModule, envVars, useDaemon)
@@ -256,7 +258,6 @@ object SparkEnv extends Logging {
       isLocal: Boolean,
       listenerBus: LiveListenerBus,
       numCores: Int,
-      sparkContext: SparkContext,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
     assert(conf.contains(DRIVER_HOST_ADDRESS),
       s"${DRIVER_HOST_ADDRESS.key} is not set on the driver!")
@@ -279,7 +280,6 @@ object SparkEnv extends Logging {
       numCores,
       ioEncryptionKey,
       listenerBus = listenerBus,
-      Option(sparkContext),
       mockOutputCommitCoordinator = mockOutputCommitCoordinator
     )
   }
@@ -315,7 +315,6 @@ object SparkEnv extends Logging {
   /**
    * Helper method to create a SparkEnv for a driver or an executor.
    */
-  // scalastyle:off argcount
   private def create(
       conf: SparkConf,
       executorId: String,
@@ -326,9 +325,7 @@ object SparkEnv extends Logging {
       numUsableCores: Int,
       ioEncryptionKey: Option[Array[Byte]],
       listenerBus: LiveListenerBus = null,
-      sc: Option[SparkContext] = None,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
-    // scalastyle:on argcount
 
     val isDriver = executorId == SparkContext.DRIVER_IDENTIFIER
 
@@ -471,12 +468,7 @@ object SparkEnv extends Logging {
     }
 
     val outputCommitCoordinator = mockOutputCommitCoordinator.getOrElse {
-      if (isDriver) {
-        new OutputCommitCoordinator(conf, isDriver, sc)
-      } else {
-        new OutputCommitCoordinator(conf, isDriver)
-      }
-
+      new OutputCommitCoordinator(conf, isDriver)
     }
     val outputCommitCoordinatorRef = registerOrLookupEndpoint("OutputCommitCoordinator",
       new OutputCommitCoordinatorEndpoint(rpcEnv, outputCommitCoordinator))
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index e433cc10ae731..8167952d6b87f 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -25,7 +25,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.LISTENER
+import org.apache.spark.internal.LogKeys.LISTENER
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.metrics.source.Source
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
index fb0584b458463..7737822f2af2b 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
@@ -24,7 +24,7 @@ import java.nio.file.Files
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CLASS_NAME, PATH}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, PATH}
 
 /**
  * Process that starts a Py4J server on an ephemeral port.
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
index 4f7c5bc0b0c05..5e2b5553f3dca 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonHadoopUtil.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.io._
 import org.apache.spark.SparkException
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.LogKeys.CLASS_NAME
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
@@ -45,7 +45,7 @@ private[python] object Converter extends Logging {
     converterClass.map { cc =>
       Try {
         val c = Utils.classForName[Converter[T, U]](cc).getConstructor().newInstance()
-        logInfo(s"Loaded converter: $cc")
+        logInfo(log"Loaded converter: ${MDC(CLASS_NAME, cc)}")
         c
       } match {
         case Success(c) => c
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 5aa080b5fb291..d643983ef5dfe 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -37,7 +37,8 @@ import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext}
 import org.apache.spark.api.python.PythonFunction.PythonAccumulator
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.input.PortableDataStream
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{HOST, PORT}
 import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.rdd.RDD
@@ -733,7 +734,8 @@ private[spark] class PythonAccumulatorV2(
   private def openSocket(): Socket = synchronized {
     if (socket == null || socket.isClosed) {
       socket = new Socket(serverHost, serverPort)
-      logInfo(s"Connected to AccumulatorServer at host: $serverHost port: $serverPort")
+      logInfo(log"Connected to AccumulatorServer at host: ${MDC(HOST, serverHost)}" +
+        log" port: ${MDC(PORT, serverPort)}")
       // send the secret just for the initial authentication when opening a new connection
       socket.getOutputStream.write(secretToken.getBytes(StandardCharsets.UTF_8))
     }
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 17cb0c5a55ddf..b2571ffddc577 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -31,7 +31,8 @@ import scala.util.control.NonFatal
 
 import org.apache.spark._
 import org.apache.spark.api.python.PythonFunction.PythonAccumulator
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.TASK_NAME
 import org.apache.spark.internal.config.{BUFFER_SIZE, EXECUTOR_CORES, Python}
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.rdd.InputFileBlockHolder
@@ -88,7 +89,7 @@ private object BasePythonRunner {
 
   private lazy val faultHandlerLogDir = Utils.createTempDir(namePrefix = "faulthandler")
 
-  private def faultHandlerLogPath(pid: Long): Path = {
+  private def faultHandlerLogPath(pid: Int): Path = {
     new File(faultHandlerLogDir, pid.toString).toPath
   }
 }
@@ -204,7 +205,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
 
     envVars.put("SPARK_JOB_ARTIFACT_UUID", jobArtifactUUID.getOrElse("default"))
 
-    val (worker: PythonWorker, pid: Option[Long]) = env.createPythonWorker(
+    val (worker: PythonWorker, pid: Option[Int]) = env.createPythonWorker(
       pythonExec, workerModule, daemonModule, envVars.asScala.toMap)
     // Whether is the worker released into idle pool or closed. When any codes try to release or
     // close a worker, they should use `releasedOrClosed.compareAndSet` to flip the state to make
@@ -257,7 +258,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       startTime: Long,
       env: SparkEnv,
       worker: PythonWorker,
-      pid: Option[Long],
+      pid: Option[Int],
       releasedOrClosed: AtomicBoolean,
       context: TaskContext): Iterator[OUT]
 
@@ -465,7 +466,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       startTime: Long,
       env: SparkEnv,
       worker: PythonWorker,
-      pid: Option[Long],
+      pid: Option[Int],
       releasedOrClosed: AtomicBoolean,
       context: TaskContext)
     extends Iterator[OUT] {
@@ -592,7 +593,8 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
             // Mimic the task name used in `Executor` to help the user find out the task to blame.
             val taskName = s"${context.partitionId()}.${context.attemptNumber()} " +
               s"in stage ${context.stageId()} (TID ${context.taskAttemptId()})"
-            logWarning(s"Incomplete task $taskName interrupted: Attempting to kill Python Worker")
+            logWarning(log"Incomplete task ${MDC(TASK_NAME, taskName)} " +
+              log"interrupted: Attempting to kill Python Worker")
             env.destroyPythonWorker(
               pythonExec, workerModule, daemonModule, envVars.asScala.toMap, worker)
           } catch {
@@ -842,7 +844,7 @@ private[spark] class PythonRunner(
       startTime: Long,
       env: SparkEnv,
       worker: PythonWorker,
-      pid: Option[Long],
+      pid: Option[Int],
       releasedOrClosed: AtomicBoolean,
       context: TaskContext): Iterator[Array[Byte]] = {
     new ReaderIterator(
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 26c790a124470..045ed0e4c01cb 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -28,7 +28,8 @@ import scala.sys.process.Process
 
 import org.apache.spark.{SparkContext, SparkEnv}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{PATH, PYTHON_PACKAGES, PYTHON_VERSION}
 import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 import org.apache.spark.util.Utils
 
@@ -122,11 +123,11 @@ private[spark] object PythonUtils extends Logging {
         PythonUtils.sparkPythonPath,
         sys.env.getOrElse("PYTHONPATH", ""))
       val environment = Map("PYTHONPATH" -> pythonPath)
-      logInfo(s"Python path $pythonPath")
+      logInfo(log"Python path ${MDC(PATH, pythonPath)}")
 
       val processPythonVer = Process(pythonVersionCMD, None, environment.toSeq: _*)
       val output = runCommand(processPythonVer)
-      logInfo(s"Python version: ${output.getOrElse("Unable to determine")}")
+      logInfo(log"Python version: ${MDC(PYTHON_VERSION, output.getOrElse("Unable to determine"))}")
 
       val pythonCode =
         """
@@ -146,7 +147,8 @@ private[spark] object PythonUtils extends Logging {
       def formatOutput(output: String): String = {
         output.replaceAll("\\s+", ", ")
       }
-      listOfPackages.foreach(x => logInfo(s"List of Python packages :- ${formatOutput(x)}"))
+      listOfPackages.foreach(x => logInfo(log"List of Python packages :-" +
+        log" ${MDC(PYTHON_PACKAGES, formatOutput(x))}"))
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index eb740b72987c8..3221a4900f6ad 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -30,7 +30,8 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark._
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.security.SocketAuthHelper
 import org.apache.spark.util.{RedirectThread, Utils}
 
@@ -92,7 +93,7 @@ private[spark] class PythonWorkerFactory(
     envVars.getOrElse("PYTHONPATH", ""),
     sys.env.getOrElse("PYTHONPATH", ""))
 
-  def create(): (PythonWorker, Option[Long]) = {
+  def create(): (PythonWorker, Option[Int]) = {
     if (useDaemon) {
       self.synchronized {
         // Pull from idle workers until we one that is alive, otherwise create a new one.
@@ -102,12 +103,13 @@ private[spark] class PythonWorkerFactory(
           if (workerHandle.isAlive()) {
             try {
               worker.selectionKey.interestOps(SelectionKey.OP_READ | SelectionKey.OP_WRITE)
-              return (worker, Some(workerHandle.pid()))
+              return (worker, Some(workerHandle.pid().toInt))
             } catch {
               case c: CancelledKeyException => /* pass */
             }
           }
-          logWarning(s"Worker ${worker} process from idle queue is dead, discarding.")
+          logWarning(log"Worker ${MDC(WORKER, worker)} " +
+            log"process from idle queue is dead, discarding.")
           stopWorker(worker)
         }
       }
@@ -122,9 +124,9 @@ private[spark] class PythonWorkerFactory(
    * processes itself to avoid the high cost of forking from Java. This currently only works
    * on UNIX-based systems.
    */
-  private def createThroughDaemon(): (PythonWorker, Option[Long]) = {
+  private def createThroughDaemon(): (PythonWorker, Option[Int]) = {
 
-    def createWorker(): (PythonWorker, Option[Long]) = {
+    def createWorker(): (PythonWorker, Option[Int]) = {
       val socketChannel = SocketChannel.open(new InetSocketAddress(daemonHost, daemonPort))
       // These calls are blocking.
       val pid = new DataInputStream(Channels.newInputStream(socketChannel)).readInt()
@@ -165,7 +167,7 @@ private[spark] class PythonWorkerFactory(
   /**
    * Launch a worker by executing worker.py (by default) directly and telling it to connect to us.
    */
-  private[spark] def createSimpleWorker(blockingMode: Boolean): (PythonWorker, Option[Long]) = {
+  private[spark] def createSimpleWorker(blockingMode: Boolean): (PythonWorker, Option[Int]) = {
     var serverSocketChannel: ServerSocketChannel = null
     try {
       serverSocketChannel = ServerSocketChannel.open()
@@ -209,7 +211,8 @@ private[spark] class PythonWorkerFactory(
               "Timed out while waiting for the Python worker to connect back")
           }
         authHelper.authClient(socketChannel.socket())
-        val pid = workerProcess.toHandle.pid()
+        // TODO: When we drop JDK 8, we can just use workerProcess.pid()
+        val pid = new DataInputStream(Channels.newInputStream(socketChannel)).readInt()
         if (pid < 0) {
           throw new IllegalStateException("Python failed to launch worker with code " + pid)
         }
@@ -405,7 +408,7 @@ private[spark] class PythonWorkerFactory(
           daemonWorkers.get(worker).foreach { processHandle =>
             // tell daemon to kill worker by pid
             val output = new DataOutputStream(daemon.getOutputStream)
-            output.writeLong(processHandle.pid())
+            output.writeInt(processHandle.pid().toInt)
             output.flush()
             daemon.getOutputStream.flush()
           }
diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index 6a46b611019fa..0fe57dd0bb0ae 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -28,7 +28,8 @@ import net.razorvine.pickle.{Pickler, Unpickler}
 
 import org.apache.spark.SparkException
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.ArrayImplicits._
 
@@ -143,22 +144,26 @@ private[spark] object SerDeUtil extends Logging {
     }
     (kt, vt) match {
       case (Failure(kf), Failure(vf)) =>
-        logWarning(s"""
-               |Failed to pickle Java object as key: ${t._1.getClass.getSimpleName}, falling back
-               |to 'toString'. Error: ${kf.getMessage}""".stripMargin)
-        logWarning(s"""
-               |Failed to pickle Java object as value: ${t._2.getClass.getSimpleName}, falling back
-               |to 'toString'. Error: ${vf.getMessage}""".stripMargin)
+        logWarning(log"""
+               |Failed to pickle Java object as key:
+               |${MDC(CLASS_NAME, t._1.getClass.getSimpleName)}, falling back
+               |to 'toString'. Error: ${MDC(ERROR, kf.getMessage)}""".stripMargin)
+        logWarning(log"""
+               |Failed to pickle Java object as value:
+               |${MDC(CLASS_NAME, t._2.getClass.getSimpleName)}, falling back
+               |to 'toString'. Error: ${MDC(ERROR, vf.getMessage)}""".stripMargin)
         (true, true)
       case (Failure(kf), _) =>
-        logWarning(s"""
-               |Failed to pickle Java object as key: ${t._1.getClass.getSimpleName}, falling back
-               |to 'toString'. Error: ${kf.getMessage}""".stripMargin)
+        logWarning(log"""
+               |Failed to pickle Java object as key:
+               |${MDC(CLASS_NAME, t._1.getClass.getSimpleName)}, falling back
+               |to 'toString'. Error: ${MDC(ERROR, kf.getMessage)}""".stripMargin)
         (true, false)
       case (_, Failure(vf)) =>
-        logWarning(s"""
-               |Failed to pickle Java object as value: ${t._2.getClass.getSimpleName}, falling back
-               |to 'toString'. Error: ${vf.getMessage}""".stripMargin)
+        logWarning(log"""
+               |Failed to pickle Java object as value:
+               |${MDC(CLASS_NAME, t._2.getClass.getSimpleName)}, falling back
+               |to 'toString'. Error: ${MDC(ERROR, vf.getMessage)}""".stripMargin)
         (false, true)
       case _ =>
         (false, false)
diff --git a/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala
index de01a706b3f6c..0ff2b79ab6623 100644
--- a/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/StreamingPythonRunner.scala
@@ -21,8 +21,9 @@ import java.io.{BufferedInputStream, BufferedOutputStream, DataInputStream, Data
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.SparkEnv
-import org.apache.spark.internal.Logging
+import org.apache.spark.{SparkEnv, SparkPythonException}
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{PYTHON_EXEC, PYTHON_WORKER_MODULE, PYTHON_WORKER_RESPONSE, SESSION_ID}
 import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT
 
@@ -58,7 +59,8 @@ private[spark] class StreamingPythonRunner(
    * to be used with the functions.
    */
   def init(): (DataOutputStream, DataInputStream) = {
-    logInfo(s"Initializing Python runner (session: $sessionId, pythonExec: $pythonExec)")
+    logInfo(log"Initializing Python runner (session: ${MDC(SESSION_ID, sessionId)}," +
+      log" pythonExec: ${MDC(PYTHON_EXEC, pythonExec)})")
     val env = SparkEnv.get
 
     val localdir = env.blockManager.diskBlockManager.localDirs.map(f => f.getPath()).mkString(",")
@@ -91,16 +93,34 @@ private[spark] class StreamingPythonRunner(
       new BufferedInputStream(pythonWorker.get.channel.socket().getInputStream, bufferSize))
 
     val resFromPython = dataIn.readInt()
-    logInfo(s"Runner initialization succeeded (returned $resFromPython).")
+    if (resFromPython != 0) {
+      val errMessage = PythonWorkerUtils.readUTF(dataIn)
+      throw streamingPythonRunnerInitializationFailure(resFromPython, errMessage)
+    }
+    logInfo(log"Runner initialization succeeded (returned" +
+      log" ${MDC(PYTHON_WORKER_RESPONSE, resFromPython)}).")
 
     (dataOut, dataIn)
   }
 
+  def streamingPythonRunnerInitializationFailure(resFromPython: Int, errMessage: String):
+    StreamingPythonRunnerInitializationException = {
+    new StreamingPythonRunnerInitializationException(resFromPython, errMessage)
+  }
+
+  class StreamingPythonRunnerInitializationException(resFromPython: Int, errMessage: String)
+    extends SparkPythonException(
+      errorClass = "STREAMING_PYTHON_RUNNER_INITIALIZATION_FAILURE",
+      messageParameters = Map(
+        "resFromPython" -> resFromPython.toString,
+        "msg" -> errMessage))
+
   /**
    * Stops the Python worker.
    */
   def stop(): Unit = {
-    logInfo(s"Stopping streaming runner for sessionId: $sessionId, module: $workerModule.")
+    logInfo(log"Stopping streaming runner for sessionId: ${MDC(SESSION_ID, sessionId)}," +
+      log" module: ${MDC(PYTHON_WORKER_MODULE, workerModule)}.")
 
     try {
       pythonWorkerFactory.foreach { factory =>
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
index 1a05c8f35b7fb..c3d01ec47458e 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackendHandler.scala
@@ -27,7 +27,7 @@ import io.netty.handler.timeout.ReadTimeoutException
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.api.r.SerDe._
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{METHOD_NAME, OBJECT_ID}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.R._
 import org.apache.spark.util.{ThreadUtils, Utils}
 import org.apache.spark.util.ArrayImplicits._
@@ -155,10 +155,11 @@ private[r] class RBackendHandler(server: RBackend)
           args)
 
         if (index.isEmpty) {
-          logWarning(s"cannot find matching method ${cls}.$methodName. "
-            + s"Candidates are:")
+          logWarning(log"cannot find matching method " +
+            log"${MDC(CLASS_NAME, cls)}.${MDC(METHOD_NAME, methodName)}. Candidates are:")
           selectedMethods.foreach { method =>
-            logWarning(s"$methodName(${method.getParameterTypes.mkString(",")})")
+            logWarning(log"${MDC(METHOD_NAME, methodName)}(" +
+              log"${MDC(METHOD_PARAM_TYPES, method.getParameterTypes.mkString(","))})")
           }
           throw new Exception(s"No matched method found for $cls.$methodName")
         }
@@ -176,10 +177,11 @@ private[r] class RBackendHandler(server: RBackend)
           args)
 
         if (index.isEmpty) {
-          logWarning(s"cannot find matching constructor for ${cls}. "
-            + s"Candidates are:")
+          logWarning(log"cannot find matching constructor for ${MDC(CLASS_NAME, cls)}. "
+            + log"Candidates are:")
           ctors.foreach { ctor =>
-            logWarning(s"$cls(${ctor.getParameterTypes.mkString(",")})")
+            logWarning(log"${MDC(CLASS_NAME, cls)}(" +
+              log"${MDC(METHOD_PARAM_TYPES, ctor.getParameterTypes.mkString(","))})")
           }
           throw new Exception(s"No matched constructor found for $cls")
         }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index 445b7d4d7aa06..3adb540a7ad18 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -22,7 +22,7 @@ import java.io.Serializable
 import scala.reflect.ClassTag
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.util.Utils
 
 /**
@@ -106,7 +106,8 @@ abstract class Broadcast[T: ClassTag](val id: Long) extends Serializable with Lo
     assertValid()
     _isValid = false
     _destroySite = Utils.getCallSite().shortForm
-    logInfo("Destroying %s (from %s)".format(toString, _destroySite))
+    logInfo(log"Destroying ${MDC(LogKeys.BROADCAST, toString)} " +
+      log"(from ${MDC(LogKeys.CALL_SITE_SHORT_FORM, _destroySite)})")
     doDestroy(blocking)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index b6ba9bbf29f30..0c7ec5c1a98a7 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -28,7 +28,7 @@ import scala.util.Random
 
 import org.apache.spark._
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.BROADCAST_ID
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.storage._
@@ -278,11 +278,12 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long, serializedO
             }
           case None =>
             val estimatedTotalSize = Utils.bytesToString(numBlocks.toLong * blockSize)
-            logInfo(s"Started reading broadcast variable $id with $numBlocks pieces " +
-              s"(estimated total size $estimatedTotalSize)")
+            logInfo(log"Started reading broadcast variable ${MDC(BROADCAST_ID, id)} with ${MDC(NUM_BROADCAST_BLOCK, numBlocks)} pieces " +
+              log"(estimated total size ${MDC(NUM_BYTES, estimatedTotalSize)})")
             val startTimeNs = System.nanoTime()
             val blocks = readBlocks()
-            logInfo(s"Reading broadcast variable $id took ${Utils.getUsedTimeNs(startTimeNs)}")
+            logInfo(log"Reading broadcast variable ${MDC(BROADCAST_ID, id)}" +
+              log" took ${MDC(TOTAL_TIME, Utils.getUsedTimeNs(startTimeNs))}")
 
             try {
               val obj = TorrentBroadcast.unBlockifyObject[T](
diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala
index d38f94fd1ac26..226a6dcd36a16 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala
@@ -32,7 +32,7 @@ import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.{DriverState, Master}
 import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.{DRIVER_ID, ERROR, HOST_PORT}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.Network.RPC_ASK_TIMEOUT
 import org.apache.spark.resource.ResourceUtils
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
@@ -135,7 +135,8 @@ private class ClientEndpoint(
       masterEndpoint.ask[T](message).onComplete {
         case Success(v) => self.send(v)
         case Failure(e) =>
-          logWarning(s"Error sending messages to master $masterEndpoint", e)
+          logWarning(log"Error sending messages to master " +
+            log"${MDC(MASTER_URL, masterEndpoint)}", e)
       }(forwardMessageExecutionContext)
     }
   }
@@ -163,11 +164,12 @@ private class ClientEndpoint(
       // logs again when waitAppCompletion is set to true
       if (!driverStatusReported) {
         driverStatusReported = true
-        logInfo(s"State of $submittedDriverID is ${state.get}")
+        logInfo(log"State of ${MDC(DRIVER_ID, submittedDriverID)}" +
+          log" is ${MDC(DRIVER_STATE, state.get)}")
         // Worker node, if present
         (workerId, workerHostPort, state) match {
           case (Some(id), Some(hostPort), Some(DriverState.RUNNING)) =>
-            logInfo(s"Driver running on $hostPort ($id)")
+            logInfo(log"Driver running on ${MDC(HOST, hostPort)} (${MDC(WORKER_ID, id)})")
           case _ =>
         }
       }
@@ -180,17 +182,18 @@ private class ClientEndpoint(
           state.get match {
             case DriverState.FINISHED | DriverState.FAILED |
                  DriverState.ERROR | DriverState.KILLED =>
-              logInfo(s"State of driver $submittedDriverID is ${state.get}, " +
-                s"exiting spark-submit JVM.")
+              logInfo(log"State of driver ${MDC(DRIVER_ID, submittedDriverID)}" +
+                log" is ${MDC(DRIVER_STATE, state.get)}, exiting spark-submit JVM.")
               System.exit(0)
             case _ =>
               if (!waitAppCompletion) {
-                logInfo(s"spark-submit not configured to wait for completion, " +
-                  s"exiting spark-submit JVM.")
+                logInfo("spark-submit not configured to wait for completion, " +
+                  " exiting spark-submit JVM.")
                 System.exit(0)
               } else {
-                logDebug(s"State of driver $submittedDriverID is ${state.get}, " +
-                  s"continue monitoring driver status.")
+                logDebug(log"State of driver ${MDC(DRIVER_ID, submittedDriverID)}" +
+                  log" is ${MDC(DRIVER_STATE, state.get)}, " +
+                  log"continue monitoring driver status.")
               }
           }
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/DriverTimeoutPlugin.scala b/core/src/main/scala/org/apache/spark/deploy/DriverTimeoutPlugin.scala
index 9b141d6075721..736c23556ec15 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DriverTimeoutPlugin.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DriverTimeoutPlugin.scala
@@ -23,7 +23,8 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.SparkContext
 import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.DRIVER_TIMEOUT
 import org.apache.spark.util.{SparkExitCode, ThreadUtils}
 
@@ -48,8 +49,9 @@ class DriverTimeoutDriverPlugin extends DriverPlugin with Logging {
       logWarning("Disabled with the timeout value 0.")
     } else {
       val task: Runnable = () => {
-        logWarning(s"Terminate Driver JVM because it runs after $timeout minute" +
-          (if (timeout == 1) "" else "s"))
+        logWarning(log"Terminate Driver JVM because it runs after " +
+          log"${MDC(TIME_UNITS, timeout)} minute" +
+          (if (timeout == 1) log"" else log"s"))
         // We cannot use 'SparkContext.stop' because SparkContext might be in abnormal situation.
         System.exit(SparkExitCode.DRIVER_TIMEOUT)
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index a56fbd5a644ae..851fb453fd092 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -23,7 +23,8 @@ import java.util.concurrent.CountDownLatch
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys.{AUTH_ENABLED, PORT, SHUFFLE_DB_BACKEND_KEY, SHUFFLE_DB_BACKEND_NAME}
 import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.network.TransportContext
 import org.apache.spark.network.crypto.AuthServerBootstrap
@@ -70,8 +71,8 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
     if (localDirs.length >= 1) {
       new File(localDirs.find(new File(_, dbName).exists()).getOrElse(localDirs(0)), dbName)
     } else {
-      logWarning(s"'spark.local.dir' should be set first when we use db in " +
-        s"ExternalShuffleService. Note that this only affects standalone mode.")
+      logWarning("'spark.local.dir' should be set first when we use db in " +
+        "ExternalShuffleService. Note that this only affects standalone mode.")
       null
     }
   }
@@ -86,8 +87,8 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
     if (sparkConf.get(config.SHUFFLE_SERVICE_DB_ENABLED) && enabled) {
       val shuffleDBName = sparkConf.get(config.SHUFFLE_SERVICE_DB_BACKEND)
       val dbBackend = DBBackend.byName(shuffleDBName)
-      logInfo(s"Use ${dbBackend.name()} as the implementation of " +
-        s"${config.SHUFFLE_SERVICE_DB_BACKEND.key}")
+      logInfo(log"Use ${MDC(SHUFFLE_DB_BACKEND_NAME, dbBackend.name())} as the implementation of " +
+        log"${MDC(SHUFFLE_DB_BACKEND_KEY, config.SHUFFLE_SERVICE_DB_BACKEND.key)}")
       new ExternalBlockHandler(conf,
         findRegisteredExecutorsDBFile(dbBackend.fileName(registeredExecutorsDB)))
     } else {
@@ -106,7 +107,8 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
   def start(): Unit = {
     require(server == null, "Shuffle server already started")
     val authEnabled = securityManager.isAuthenticationEnabled()
-    logInfo(s"Starting shuffle service on port $port (auth enabled = $authEnabled)")
+    logInfo(log"Starting shuffle service on port ${MDC(PORT, port)}" +
+      log" (auth enabled = ${MDC(AUTH_ENABLED, authEnabled)})")
     val bootstraps: Seq[TransportServerBootstrap] =
       if (authEnabled) {
         Seq(new AuthServerBootstrap(transportConf, securityManager))
diff --git a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index 9c57269b28f47..263b1a233b808 100644
--- a/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.deploy.worker.Worker
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, LogKeys, MDC}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util.Utils
 
@@ -51,7 +51,8 @@ class LocalSparkCluster private (
   private val workerDirs = ArrayBuffer[String]()
 
   def start(): Array[String] = {
-    logInfo("Starting a local Spark cluster with " + numWorkers + " workers.")
+    logInfo(log"Starting a local Spark cluster with " +
+      log"${MDC(LogKeys.NUM_WORKERS, numWorkers)} workers.")
 
     // Disable REST server on Master in this mode unless otherwise specified
     val _conf = conf.clone()
diff --git a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
index 1a1a680c7faf5..5d996381a485e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/RPackageUtils.scala
@@ -27,13 +27,15 @@ import scala.jdk.CollectionConverters._
 import com.google.common.io.{ByteStreams, Files}
 
 import org.apache.spark.api.r.RUtils
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{LogEntry, Logging, MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.{RedirectThread, Utils}
 
 private[deploy] object RPackageUtils extends Logging {
 
   /** The key in the MANIFEST.mf that we look for, in case a jar contains R code. */
   private final val hasRPackage = "Spark-HasRPackage"
+  private final val hasRPackageMDC = MDC(CONFIG, hasRPackage)
 
   /** Base of the shell command used in order to install R packages. */
   private final val baseInstallCmd = Seq("R", "CMD", "INSTALL", "-l")
@@ -42,11 +44,11 @@ private[deploy] object RPackageUtils extends Logging {
   private final val RJarEntries = "R/pkg"
 
   /** Documentation on how the R source file layout should be in the jar. */
-  private[deploy] final val RJarDoc =
-    s"""In order for Spark to build R packages that are parts of Spark Packages, there are a few
+  private[deploy] final val RJarDoc: MessageWithContext =
+    log"""In order for Spark to build R packages that are parts of Spark Packages, there are a few
       |requirements. The R source code must be shipped in a jar, with additional Java/Scala
       |classes. The jar must be in the following format:
-      |  1- The Manifest (META-INF/MANIFEST.mf) must contain the key-value: $hasRPackage: true
+      |  1- The Manifest (META-INF/MANIFEST.mf) must contain the key-value: $hasRPackageMDC: true
       |  2- The standard R package layout must be preserved under R/pkg/ inside the jar. More
       |  information on the standard R package layout can be found in:
       |  http://cran.r-project.org/doc/contrib/Leisch-CreatingPackages.pdf
@@ -61,18 +63,17 @@ private[deploy] object RPackageUtils extends Logging {
       |R/pkg/R/myRcode.R
       |org/
       |org/apache/
-      |...
-    """.stripMargin.trim
+      |...""".stripMargin
 
   /** Internal method for logging. We log to a printStream in tests, for debugging purposes. */
   private def print(
-      msg: String,
+      msg: LogEntry,
       printStream: PrintStream,
       level: Level = Level.FINE,
       e: Throwable = null): Unit = {
     if (printStream != null) {
       // scalastyle:off println
-      printStream.println(msg)
+      printStream.println(msg.message)
       // scalastyle:on println
       if (e != null) {
         e.printStackTrace(printStream)
@@ -112,7 +113,7 @@ private[deploy] object RPackageUtils extends Logging {
     val pathToPkg = Seq(dir, "R", "pkg").mkString(File.separator)
     val installCmd = baseInstallCmd ++ Seq(libDir, pathToPkg)
     if (verbose) {
-      print(s"Building R package with the command: $installCmd", printStream)
+      print(log"Building R package with the command: ${MDC(COMMAND, installCmd)}", printStream)
     }
     try {
       val builder = new ProcessBuilder(installCmd.asJava)
@@ -131,7 +132,7 @@ private[deploy] object RPackageUtils extends Logging {
       process.waitFor() == 0
     } catch {
       case e: Throwable =>
-        print("Failed to build R package.", printStream, Level.SEVERE, e)
+        print(log"Failed to build R package.", printStream, Level.SEVERE, e)
         false
     }
   }
@@ -150,7 +151,7 @@ private[deploy] object RPackageUtils extends Logging {
         if (entry.isDirectory) {
           val dir = new File(tempDir, entryPath)
           if (verbose) {
-            print(s"Creating directory: $dir", printStream)
+            print(log"Creating directory: ${MDC(PATH, dir)}", printStream)
           }
           dir.mkdirs
         } else {
@@ -159,7 +160,7 @@ private[deploy] object RPackageUtils extends Logging {
           Files.createParentDirs(outPath)
           val outStream = new FileOutputStream(outPath)
           if (verbose) {
-            print(s"Extracting $entry to $outPath", printStream)
+            print(log"Extracting ${MDC(JAR_ENTRY, entry)} to ${MDC(PATH, outPath)}", printStream)
           }
           Utils.copyStream(inStream, outStream, closeStreams = true)
         }
@@ -181,32 +182,34 @@ private[deploy] object RPackageUtils extends Logging {
         val jar = new JarFile(file)
         Utils.tryWithSafeFinally {
           if (checkManifestForR(jar)) {
-            print(s"$file contains R source code. Now installing package.", printStream, Level.INFO)
+            print(log"${MDC(PATH, file)} contains R source code. Now installing package.",
+              printStream, Level.INFO)
             val rSource = extractRFolder(jar, printStream, verbose)
             if (RUtils.rPackages.isEmpty) {
               RUtils.rPackages = Some(Utils.createTempDir().getAbsolutePath)
             }
             try {
               if (!rPackageBuilder(rSource, printStream, verbose, RUtils.rPackages.get)) {
-                print(s"ERROR: Failed to build R package in $file.", printStream)
+                print(log"ERROR: Failed to build R package in ${MDC(PATH, file)}.", printStream)
                 print(RJarDoc, printStream)
               }
             } finally {
               // clean up
               if (!rSource.delete()) {
-                logWarning(s"Error deleting ${rSource.getPath()}")
+                logWarning(log"Error deleting ${MDC(PATH, rSource.getPath())}")
               }
             }
           } else {
             if (verbose) {
-              print(s"$file doesn't contain R source code, skipping...", printStream)
+              print(log"${MDC(PATH, file)} doesn't contain R source code, skipping...", printStream)
             }
           }
         } {
           jar.close()
         }
       } else {
-        print(s"WARN: $file resolved as dependency, but not found.", printStream, Level.WARNING)
+        print(log"WARN: ${MDC(PATH, file)} resolved as dependency, but not found.",
+          printStream, Level.WARNING)
       }
     }
   }
@@ -234,7 +237,7 @@ private[deploy] object RPackageUtils extends Logging {
     // create a zip file from scratch, do not append to existing file.
     val zipFile = new File(dir, name)
     if (!zipFile.delete()) {
-      logWarning(s"Error deleting ${zipFile.getPath()}")
+      logWarning(log"Error deleting ${MDC(PATH, zipFile.getPath())}")
     }
     val zipOutputStream = new ZipOutputStream(new FileOutputStream(zipFile, false))
     try {
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index 2edd80db2637f..ca932ef5dc05c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -37,7 +37,7 @@ import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
@@ -142,8 +142,9 @@ private[spark] class SparkHadoopUtil extends Logging {
     if (!new File(keytabFilename).exists()) {
       throw new SparkException(s"Keytab file: ${keytabFilename} does not exist")
     } else {
-      logInfo("Attempting to login to Kerberos " +
-        s"using principal: ${principalName} and keytab: ${keytabFilename}")
+      logInfo(log"Attempting to login to Kerberos using principal: " +
+        log"${MDC(LogKeys.PRINCIPAL, principalName)} and keytab: " +
+        log"${MDC(LogKeys.KEYTAB, keytabFilename)}")
       UserGroupInformation.loginUserFromKeytab(principalName, keytabFilename)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index c0df74f8d0cc6..7bb945ab9f147 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -39,8 +39,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.spark._
 import org.apache.spark.api.r.RUtils
 import org.apache.spark.deploy.rest._
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.{LogEntry, Logging, LogKeys, MDC}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
@@ -64,22 +63,39 @@ private[deploy] object SparkSubmitAction extends Enumeration {
  */
 private[spark] class SparkSubmit extends Logging {
 
+  override protected def logName: String = classOf[SparkSubmit].getName
+
   import DependencyUtils._
   import SparkSubmit._
 
   def doSubmit(args: Array[String]): Unit = {
+    val appArgs = parseArguments(args)
+    val sparkConf = appArgs.toSparkConf()
+
+    // For interpreters, structured logging is disabled by default to avoid generating mixed
+    // plain text and structured logs on the same console.
+    if (isShell(appArgs.primaryResource) || isSqlShell(appArgs.mainClass)) {
+      Logging.disableStructuredLogging()
+    } else {
+      // For non-shell applications, enable structured logging if it's not explicitly disabled
+      // via the configuration `spark.log.structuredLogging.enabled`.
+      if (sparkConf.getBoolean(STRUCTURED_LOGGING_ENABLED.key, defaultValue = true)) {
+        Logging.enableStructuredLogging()
+      } else {
+        Logging.disableStructuredLogging()
+      }
+    }
     // Initialize logging if it hasn't been done yet. Keep track of whether logging needs to
     // be reset before the application starts.
     val uninitLog = initializeLogIfNecessary(true, silent = true)
 
-    val appArgs = parseArguments(args)
     if (appArgs.verbose) {
       logInfo(appArgs.toString)
     }
     appArgs.action match {
-      case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
-      case SparkSubmitAction.KILL => kill(appArgs)
-      case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
+      case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog, sparkConf)
+      case SparkSubmitAction.KILL => kill(appArgs, sparkConf)
+      case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs, sparkConf)
       case SparkSubmitAction.PRINT_VERSION => printVersion()
     }
   }
@@ -91,12 +107,11 @@ private[spark] class SparkSubmit extends Logging {
   /**
    * Kill an existing submission.
    */
-  private def kill(args: SparkSubmitArguments): Unit = {
+  private def kill(args: SparkSubmitArguments, sparkConf: SparkConf): Unit = {
     if (RestSubmissionClient.supportsRestClient(args.master)) {
       new RestSubmissionClient(args.master)
         .killSubmission(args.submissionToKill)
     } else {
-      val sparkConf = args.toSparkConf()
       sparkConf.set("spark.master", args.master)
       SparkSubmitUtils
         .getSubmitOperations(args.master)
@@ -107,12 +122,11 @@ private[spark] class SparkSubmit extends Logging {
   /**
    * Request the status of an existing submission.
    */
-  private def requestStatus(args: SparkSubmitArguments): Unit = {
+  private def requestStatus(args: SparkSubmitArguments, sparkConf: SparkConf): Unit = {
     if (RestSubmissionClient.supportsRestClient(args.master)) {
       new RestSubmissionClient(args.master)
         .requestSubmissionStatus(args.submissionToRequestStatusFor)
     } else {
-      val sparkConf = args.toSparkConf()
       sparkConf.set("spark.master", args.master)
       SparkSubmitUtils
         .getSubmitOperations(args.master)
@@ -129,12 +143,14 @@ private[spark] class SparkSubmit extends Logging {
    /___/ .__/\_,_/_/ /_/\_\   version %s
       /_/
                         """.format(SPARK_VERSION))
-    logInfo("Using Scala %s, %s, %s".format(
-      Properties.versionString, Properties.javaVmName, Properties.javaVersion))
-    logInfo(s"Branch $SPARK_BRANCH")
-    logInfo(s"Compiled by user $SPARK_BUILD_USER on $SPARK_BUILD_DATE")
-    logInfo(s"Revision $SPARK_REVISION")
-    logInfo(s"Url $SPARK_REPO_URL")
+    logInfo(log"Using Scala ${MDC(LogKeys.SCALA_VERSION, Properties.versionString)}," +
+      log" ${MDC(LogKeys.JAVA_VM_NAME, Properties.javaVmName)}," +
+      log" ${MDC(LogKeys.JAVA_VERSION, Properties.javaVersion)}")
+    logInfo(log"Branch ${MDC(LogKeys.SPARK_BRANCH, SPARK_BRANCH)}")
+    logInfo(log"Compiled by user ${MDC(LogKeys.SPARK_BUILD_USER, SPARK_BUILD_USER)} on" +
+      log" ${MDC(LogKeys.SPARK_BUILD_DATE, SPARK_BUILD_DATE)}")
+    logInfo(log"Revision ${MDC(LogKeys.SPARK_REVISION, SPARK_REVISION)}")
+    logInfo(log"Url ${MDC(LogKeys.SPARK_REPO_URL, SPARK_REPO_URL)}")
     logInfo("Type --help for more information.")
   }
 
@@ -143,7 +159,7 @@ private[spark] class SparkSubmit extends Logging {
    * in a doAs when --proxy-user is specified.
    */
   @tailrec
-  private def submit(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
+  private def submit(args: SparkSubmitArguments, uninitLog: Boolean, sparkConf: SparkConf): Unit = {
 
     def doRunMain(): Unit = {
       if (args.proxyUser != null) {
@@ -152,7 +168,7 @@ private[spark] class SparkSubmit extends Logging {
         // is done in client mode.
         val isKubernetesClusterModeDriver = args.master.startsWith("k8s") &&
           "client".equals(args.deployMode) &&
-          args.toSparkConf().getBoolean("spark.kubernetes.submitInDriver", false)
+          sparkConf.getBoolean("spark.kubernetes.submitInDriver", false)
         if (isKubernetesClusterModeDriver) {
           logInfo("Running driver with proxy user. Cluster manager: Kubernetes")
           SparkHadoopUtil.get.runAsSparkUser(() => runMain(args, uninitLog))
@@ -196,10 +212,10 @@ private[spark] class SparkSubmit extends Logging {
       } catch {
         // Fail over to use the legacy submission gateway
         case e: SubmitRestConnectionException =>
-          logWarning(s"Master endpoint ${args.master} was not a REST server. " +
-            "Falling back to legacy submission gateway instead.")
+          logWarning(log"Master endpoint ${MDC(LogKeys.MASTER_URL, args.master)} " +
+            log"was not a REST server. Falling back to legacy submission gateway instead.")
           args.useRest = false
-          submit(args, false)
+          submit(args, false, sparkConf)
       }
     // In all other modes, just run the main class as prepared
     } else {
@@ -229,11 +245,6 @@ private[spark] class SparkSubmit extends Logging {
     val childClasspath = new ArrayBuffer[String]()
     val sparkConf = args.toSparkConf()
     if (sparkConf.contains("spark.local.connect")) sparkConf.remove("spark.remote")
-    if (sparkConf.getBoolean(STRUCTURED_LOGGING_ENABLED.key, defaultValue = true)) {
-      Logging.enableStructuredLogging()
-    } else {
-      Logging.disableStructuredLogging()
-    }
     var childMainClass = ""
 
     // Set the cluster manager
@@ -430,7 +441,9 @@ private[spark] class SparkSubmit extends Logging {
                 workingDirectory,
                 if (resolvedUri.getFragment != null) resolvedUri.getFragment else source.getName)
                 .getCanonicalFile
-              logInfo(s"Files $resolvedUri from $source to $dest")
+              logInfo(log"Files ${MDC(LogKeys.URI, resolvedUri)}" +
+                log" from ${MDC(LogKeys.SOURCE_PATH, source)}" +
+                log" to ${MDC(LogKeys.DESTINATION_PATH, dest)}")
               Utils.deleteRecursively(dest)
               if (isArchive) {
                 Utils.unpack(source, dest)
@@ -897,9 +910,11 @@ private[spark] class SparkSubmit extends Logging {
 
     if (childClasspath.nonEmpty && isCustomClasspathInClusterModeDisallowed) {
       childClasspath.clear()
-      logWarning(s"Ignore classpath ${childClasspath.mkString(", ")} with proxy user specified " +
-        s"in Cluster mode when ${ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE.key} is " +
-        s"disabled")
+      logWarning(log"Ignore classpath " +
+        log"${MDC(LogKeys.CLASS_PATH, childClasspath.mkString(", "))} " +
+        log"with proxy user specified in Cluster mode when " +
+        log"${MDC(LogKeys.CONFIG, ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE.key)} is " +
+        log"disabled")
     }
 
     (childArgs.toSeq, childClasspath.toSeq, sparkConf, childMainClass)
@@ -911,7 +926,7 @@ private[spark] class SparkSubmit extends Logging {
   private def setRMPrincipal(sparkConf: SparkConf): Unit = {
     val shortUserName = UserGroupInformation.getCurrentUser.getShortUserName
     val key = s"spark.hadoop.${YarnConfiguration.RM_PRINCIPAL}"
-    logInfo(s"Setting ${key} to ${shortUserName}")
+    logInfo(log"Setting ${MDC(LogKeys.KEY, key)} to ${MDC(LogKeys.SHORT_USER_NAME, shortUserName)}")
     sparkConf.set(key, shortUserName)
   }
 
@@ -948,11 +963,12 @@ private[spark] class SparkSubmit extends Logging {
     }
 
     if (args.verbose) {
-      logInfo(s"Main class:\n$childMainClass")
-      logInfo(s"Arguments:\n${childArgs.mkString("\n")}")
+      logInfo(log"Main class:\n${MDC(LogKeys.CLASS_NAME, childMainClass)}")
+      logInfo(log"Arguments:\n${MDC(LogKeys.ARGS, childArgs.mkString("\n"))}")
       // sysProps may contain sensitive information, so redact before printing
-      logInfo(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).sorted.mkString("\n")}")
-      logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}")
+      logInfo(log"Spark config:\n" +
+      log"${MDC(LogKeys.CONFIG, Utils.redact(sparkConf.getAll.toMap).sorted.mkString("\n"))}")
+      logInfo(log"Classpath elements:\n${MDC(LogKeys.CLASS_PATHS, childClasspath.mkString("\n"))}")
       logInfo("\n")
     }
     assert(!(args.deployMode == "cluster" && args.proxyUser != null && childClasspath.nonEmpty) ||
@@ -970,20 +986,20 @@ private[spark] class SparkSubmit extends Logging {
       mainClass = Utils.classForName(childMainClass)
     } catch {
       case e: ClassNotFoundException =>
-        logError(log"Failed to load class ${MDC(CLASS_NAME, childMainClass)}.")
+        logError(log"Failed to load class ${MDC(LogKeys.CLASS_NAME, childMainClass)}.")
         if (childMainClass.contains("thriftserver")) {
-          logInfo(s"Failed to load main class $childMainClass.")
+          logInfo(log"Failed to load main class ${MDC(LogKeys.CLASS_NAME, childMainClass)}.")
           logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
         } else if (childMainClass.contains("org.apache.spark.sql.connect")) {
-          logInfo(s"Failed to load main class $childMainClass.")
+          logInfo(log"Failed to load main class ${MDC(LogKeys.CLASS_NAME, childMainClass)}.")
           // TODO(SPARK-42375): Should point out the user-facing page here instead.
           logInfo("You need to specify Spark Connect jars with --jars or --packages.")
         }
         throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
       case e: NoClassDefFoundError =>
-        logError(log"Failed to load ${MDC(CLASS_NAME, childMainClass)}", e)
+        logError(log"Failed to load ${MDC(LogKeys.CLASS_NAME, childMainClass)}", e)
         if (e.getMessage.contains("org/apache/hadoop/hive")) {
-          logInfo(s"Failed to load hive class.")
+          logInfo("Failed to load hive class.")
           logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
         }
         throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
@@ -1082,18 +1098,32 @@ object SparkSubmit extends CommandLineUtils with Logging {
         new SparkSubmitArguments(args.toImmutableArraySeq) {
           override protected def logInfo(msg: => String): Unit = self.logInfo(msg)
 
+          override protected def logInfo(entry: LogEntry): Unit = self.logInfo(entry)
+
           override protected def logWarning(msg: => String): Unit = self.logWarning(msg)
 
+          override protected def logWarning(entry: LogEntry): Unit = self.logWarning(entry)
+
           override protected def logError(msg: => String): Unit = self.logError(msg)
+
+          override protected def logError(entry: LogEntry): Unit = self.logError(entry)
         }
       }
 
       override protected def logInfo(msg: => String): Unit = printMessage(msg)
 
+      override protected def logInfo(entry: LogEntry): Unit = printMessage(entry.message)
+
       override protected def logWarning(msg: => String): Unit = printMessage(s"Warning: $msg")
 
+      override protected def logWarning(entry: LogEntry): Unit =
+        printMessage(s"Warning: ${entry.message}")
+
       override protected def logError(msg: => String): Unit = printMessage(s"Error: $msg")
 
+      override protected def logError(entry: LogEntry): Unit =
+        printMessage(s"Error: ${entry.message}")
+
       override def doSubmit(args: Array[String]): Unit = {
         try {
           super.doSubmit(args)
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 534c14000614d..32dd2f81bbc82 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -28,7 +28,8 @@ import scala.util.Try
 
 import org.apache.spark.{SparkConf, SparkException, SparkUserAppException}
 import org.apache.spark.deploy.SparkSubmitAction._
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.DYN_ALLOCATION_ENABLED
 import org.apache.spark.launcher.SparkSubmitArgumentsParser
 import org.apache.spark.network.util.JavaUtils
@@ -49,6 +50,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var executorCores: String = null
   var totalExecutorCores: String = null
   var propertiesFile: String = null
+  private var loadSparkDefaults: Boolean = false
   var driverMemory: String = null
   var driverExtraClassPath: String = null
   var driverExtraLibraryPath: String = null
@@ -77,7 +79,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var principal: String = null
   var keytab: String = null
   private var dynamicAllocationEnabled: Boolean = false
-
   // Standalone cluster mode only
   var supervise: Boolean = false
   var driverCores: String = null
@@ -85,26 +86,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   var submissionToRequestStatusFor: String = null
   var useRest: Boolean = false // used internally
 
-  /** Default properties present in the currently defined defaults file. */
-  lazy val defaultSparkProperties: HashMap[String, String] = {
-    val defaultProperties = new HashMap[String, String]()
-    if (verbose) {
-      logInfo(s"Using properties file: $propertiesFile")
-    }
-    Option(propertiesFile).foreach { filename =>
-      val properties = Utils.getPropertiesFromFile(filename)
-      properties.foreach { case (k, v) =>
-        defaultProperties(k) = v
-      }
-      // Property files may contain sensitive information, so redact before printing
-      if (verbose) {
-        Utils.redact(properties).foreach { case (k, v) =>
-          logInfo(s"Adding default property: $k=$v")
-        }
-      }
-    }
-    defaultProperties
-  }
+  override protected def logName: String = classOf[SparkSubmitArguments].getName
 
   // Set parameters from command line arguments
   parse(args.asJava)
@@ -120,18 +102,44 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
 
   validateArguments()
 
+  /**
+   * Load properties from the file with the given path into `sparkProperties`.
+   * No-op if the file path is null
+   */
+  private def loadPropertiesFromFile(filePath: String): Unit = {
+    if (filePath != null) {
+      if (verbose) {
+        logInfo(log"Using properties file: ${MDC(PATH, filePath)}")
+      }
+      val properties = Utils.getPropertiesFromFile(filePath)
+      properties.foreach { case (k, v) =>
+        if (!sparkProperties.contains(k)) {
+          sparkProperties(k) = v
+        }
+      }
+      // Property files may contain sensitive information, so redact before printing
+      if (verbose) {
+        Utils.redact(properties).foreach { case (k, v) =>
+          logInfo(log"Adding default property: ${MDC(KEY, k)}=${MDC(VALUE, v)}")
+        }
+      }
+    }
+  }
+
   /**
    * Merge values from the default properties file with those specified through --conf.
    * When this is called, `sparkProperties` is already filled with configs from the latter.
    */
   private def mergeDefaultSparkProperties(): Unit = {
-    // Use common defaults file, if not specified by user
-    propertiesFile = Option(propertiesFile).getOrElse(Utils.getDefaultPropertiesFile(env))
-    // Honor --conf before the defaults file
-    defaultSparkProperties.foreach { case (k, v) =>
-      if (!sparkProperties.contains(k)) {
-        sparkProperties(k) = v
-      }
+    // Honor --conf before the specified properties file and defaults file
+    loadPropertiesFromFile(propertiesFile)
+
+    // Also load properties from `spark-defaults.conf` if they do not exist in the properties file
+    // and --conf list when:
+    //   - no input properties file is specified
+    //   - input properties file is specified, but `--load-spark-defaults` flag is set
+    if (propertiesFile == null || loadSparkDefaults) {
+      loadPropertiesFromFile(Utils.getDefaultPropertiesFile(env))
     }
   }
 
@@ -142,7 +150,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     sparkProperties.keys.foreach { k =>
       if (!k.startsWith("spark.")) {
         sparkProperties -= k
-        logWarning(s"Ignoring non-Spark config property: $k")
+        logWarning(log"Ignoring non-Spark config property: ${MDC(CONFIG, k)}")
       }
     }
   }
@@ -389,6 +397,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       case PROPERTIES_FILE =>
         propertiesFile = value
 
+      case LOAD_SPARK_DEFAULTS =>
+        loadSparkDefaults = true
+
       case KILL_SUBMISSION =>
         submissionToKill = value
         if (action != null) {
@@ -489,7 +500,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
 
   private def printUsageAndExit(exitCode: Int, unknownParam: Any = null): Unit = {
     if (unknownParam != null) {
-      logInfo("Unknown/unsupported param " + unknownParam)
+      logInfo(log"Unknown/unsupported param ${MDC(UNKNOWN_PARAM, unknownParam)}")
     }
     val command = sys.env.getOrElse("_SPARK_CMD_USAGE",
       """Usage: spark-submit [options] <app jar | python file | R file> [app arguments]
@@ -532,6 +543,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |  --conf, -c PROP=VALUE       Arbitrary Spark configuration property.
         |  --properties-file FILE      Path to a file from which to load extra properties. If not
         |                              specified, this will look for conf/spark-defaults.conf.
+        |  --load-spark-defaults       Whether to load properties from conf/spark-defaults.conf,
+        |                              even if --properties-file is specified. Configurations
+        |                              specified in --properties-file will take precedence over
+        |                              those in conf/spark-defaults.conf.
         |
         |  --driver-memory MEM         Memory for driver (e.g. 1000M, 2G) (Default: ${mem_mb}M).
         |  --driver-java-options       Extra Java options to pass to the driver.
@@ -592,7 +607,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       logInfo(getSqlShellOptions())
     }
 
-    throw new SparkUserAppException(exitCode)
+    throw SparkUserAppException(exitCode)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala b/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
index d317d6449f293..7b98461b01acf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/StandaloneResourceUtils.scala
@@ -28,7 +28,7 @@ import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.COMPONENT
+import org.apache.spark.internal.LogKeys.COMPONENT
 import org.apache.spark.resource.{ResourceAllocation, ResourceID, ResourceInformation, ResourceRequirement}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
diff --git a/core/src/main/scala/org/apache/spark/deploy/Utils.scala b/core/src/main/scala/org/apache/spark/deploy/Utils.scala
index 4d2546cb808c0..b3d871d75e6c7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Utils.scala
@@ -23,7 +23,7 @@ import jakarta.servlet.http.HttpServletRequest
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{LOG_TYPE, PATH}
+import org.apache.spark.internal.LogKeys.{LOG_TYPE, PATH}
 import org.apache.spark.ui.JettyUtils.createServletHandler
 import org.apache.spark.ui.WebUI
 import org.apache.spark.util.Utils.{getFileLength, offsetBytes}
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
index b0ee6018970ab..b34e5c408c3be 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
@@ -29,7 +29,8 @@ import org.apache.spark.SparkConf
 import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.Master
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.ExecutorDecommissionInfo
@@ -104,12 +105,14 @@ private[spark] class StandaloneAppClient(
             if (registered.get) {
               return
             }
-            logInfo("Connecting to master " + masterAddress.toSparkURL + "...")
+            logInfo(
+              log"Connecting to master ${MDC(LogKeys.MASTER_URL, masterAddress.toSparkURL)}...")
             val masterRef = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
             masterRef.send(RegisterApplication(appDescription, self))
           } catch {
             case ie: InterruptedException => // Cancelled
-            case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
+            case NonFatal(e) => logWarning(log"Failed to connect to master " +
+              log"${MDC(MASTER_URL, masterAddress)}", e)
           }
         })
       }
@@ -146,7 +149,8 @@ private[spark] class StandaloneAppClient(
     private def sendToMaster(message: Any): Unit = {
       master match {
         case Some(masterRef) => masterRef.send(message)
-        case None => logWarning(s"Drop $message because has not yet connected to master")
+        case None => logWarning(
+          log"Drop ${MDC(MESSAGE, message)} because has not yet connected to master")
       }
     }
 
@@ -172,14 +176,16 @@ private[spark] class StandaloneAppClient(
 
       case ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) =>
         val fullId = s"$appId/$id"
-        logInfo("Executor added: %s on %s (%s) with %d core(s)".format(fullId, workerId, hostPort,
-          cores))
+        logInfo(log"Executor added: ${MDC(LogKeys.EXECUTOR_ID, fullId)} on " +
+          log"${MDC(LogKeys.WORKER_ID, workerId)} (${MDC(LogKeys.HOST_PORT, hostPort)}) " +
+          log"with ${MDC(LogKeys.NUM_CORES, cores)} core(s)")
         listener.executorAdded(fullId, workerId, hostPort, cores, memory)
 
       case ExecutorUpdated(id, state, message, exitStatus, workerHost) =>
         val fullId = s"$appId/$id"
         val messageText = message.map(s => " (" + s + ")").getOrElse("")
-        logInfo("Executor updated: %s is now %s%s".format(fullId, state, messageText))
+        logInfo(log"Executor updated: ${MDC(LogKeys.EXECUTOR_ID, fullId)} is now " +
+          log"${MDC(LogKeys.EXECUTOR_STATE, state)}${MDC(LogKeys.MESSAGE, messageText)}")
         if (ExecutorState.isFinished(state)) {
           listener.executorRemoved(fullId, message.getOrElse(""), exitStatus, workerHost)
         } else if (state == ExecutorState.DECOMMISSIONED) {
@@ -188,11 +194,13 @@ private[spark] class StandaloneAppClient(
         }
 
       case WorkerRemoved(id, host, message) =>
-        logInfo("Master removed worker %s: %s".format(id, message))
+        logInfo(log"Master removed worker ${MDC(LogKeys.WORKER_ID, id)}: " +
+          log"${MDC(LogKeys.MESSAGE, message)}")
         listener.workerRemoved(id, host, message)
 
       case MasterChanged(masterRef, masterWebUiUrl) =>
-        logInfo("Master has changed, new master is at " + masterRef.address.toSparkURL)
+        logInfo(log"Master has changed, new master is at " +
+          log"${MDC(LogKeys.MASTER_URL, masterRef.address.toSparkURL)}")
         master = Some(masterRef)
         alreadyDisconnected = false
         masterRef.send(MasterChangeAcknowledged(appId.get))
@@ -237,14 +245,16 @@ private[spark] class StandaloneAppClient(
 
     override def onDisconnected(address: RpcAddress): Unit = {
       if (master.exists(_.address == address)) {
-        logWarning(s"Connection to $address failed; waiting for master to reconnect...")
+        logWarning(
+          log"Connection to ${MDC(MASTER_URL, address)} failed; waiting for master to reconnect...")
         markDisconnected()
       }
     }
 
     override def onNetworkError(cause: Throwable, address: RpcAddress): Unit = {
       if (isPossibleMaster(address)) {
-        logWarning(s"Could not connect to $address: $cause")
+        logWarning(log"Could not connect to ${MDC(MASTER_URL, address)}: " +
+          log"${MDC(ERROR, cause)}")
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
index 662746cf0c782..6e0fe69f3bfb6 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
@@ -28,7 +28,8 @@ import jakarta.servlet.{DispatcherType, Filter, FilterChain, FilterConfig, Servl
 import jakarta.servlet.http.{HttpServletRequest, HttpServletResponse}
 import org.eclipse.jetty.servlet.FilterHolder
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.Clock
@@ -170,19 +171,19 @@ private[history] class ApplicationCache(
    */
   @throws[NoSuchElementException]
   private def loadApplicationEntry(appId: String, attemptId: Option[String]): CacheEntry = {
-    lazy val application = s"$appId/${attemptId.mkString}"
-    logDebug(s"Loading application Entry $application")
+    lazy val application = log"${MDC(APP_ID, appId)}/${MDC(APP_ATTEMPT_ID, attemptId.mkString)}"
+    logDebug(log"Loading application Entry " + application)
     metrics.loadCount.inc()
     val loadedUI = time(metrics.loadTimer) {
       metrics.lookupCount.inc()
       operations.getAppUI(appId, attemptId) match {
         case Some(loadedUI) =>
-          logDebug(s"Loaded application $application")
+          logDebug(log"Loaded application " + application)
           loadedUI
         case None =>
           metrics.lookupFailureCount.inc()
           // guava's cache logs via java.util log, so is of limited use. Hence: our own message
-          logInfo(s"Failed to load application attempt $application")
+          logInfo(log"Failed to load application attempt " + application)
           throw new NoSuchElementException(s"no application with application Id '$appId'" +
               attemptId.map { id => s" attemptId '$id'" }.getOrElse(" and no attempt Id"))
       }
@@ -197,7 +198,7 @@ private[history] class ApplicationCache(
       new CacheEntry(loadedUI, completed)
     } catch {
       case e: Exception =>
-        logWarning(s"Failed to initialize application UI for $application", e)
+        logWarning(log"Failed to initialize application UI for ${MDC(APP_ID, application)}", e)
         operations.detachSparkUI(appId, attemptId, loadedUI.ui)
         throw e
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
index 59e52e6494987..05f109831499b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
@@ -24,7 +24,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.deploy.history.EventFilter.FilterStatistics
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{LINE, LINE_NUM, PATH}
+import org.apache.spark.internal.LogKeys.{LINE, LINE_NUM, PATH}
 import org.apache.spark.scheduler._
 import org.apache.spark.util.{JsonProtocol, Utils}
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileCompactor.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileCompactor.scala
index 07a873ac704dc..f7889e8b54edf 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileCompactor.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileCompactor.scala
@@ -28,7 +28,8 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.history.EventFilter.FilterStatistics
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.scheduler.ReplayListenerBus
 import org.apache.spark.util.Utils
 
@@ -160,7 +161,8 @@ class EventLogFileCompactor(
     }
     logWriter.stop()
     val duration = System.currentTimeMillis() - startTime
-    logInfo(s"Finished rewriting eventLog files to ${logWriter.logPath} took $duration ms.")
+    logInfo(log"Finished rewriting eventLog files to ${MDC(LogKeys.PATH, logWriter.logPath)}" +
+      log" took ${MDC(LogKeys.TOTAL_TIME, duration)} ms.")
 
     logWriter.logPath
   }
@@ -174,7 +176,7 @@ class EventLogFileCompactor(
         case _: IOException =>
       }
       if (!deleted) {
-        logWarning(s"Failed to remove ${file.getPath} / skip removing.")
+        logWarning(log"Failed to remove ${MDC(LogKeys.PATH, file.getPath)} / skip removing.")
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
index e7eb05c853671..f3bb6d5af3358 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala
@@ -21,14 +21,16 @@ import java.io._
 import java.net.URI
 import java.nio.charset.StandardCharsets
 
-import org.apache.commons.compress.utils.CountingOutputStream
+import org.apache.commons.io.output.CountingOutputStream
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataOutputStream, Path}
 import org.apache.hadoop.fs.permission.FsPermission
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.Utils
@@ -82,7 +84,7 @@ abstract class EventLogFileWriter(
 
   protected def initLogFile(path: Path)(fnSetupWriter: OutputStream => PrintWriter): Unit = {
     if (shouldOverwrite && fileSystem.delete(path, true)) {
-      logWarning(s"Event log $path already exists. Overwriting...")
+      logWarning(log"Event log ${MDC(LogKeys.PATH, path)} already exists. Overwriting...")
     }
 
     val defaultFs = FileSystem.getDefaultUri(hadoopConf).getScheme
@@ -105,7 +107,7 @@ abstract class EventLogFileWriter(
         .getOrElse(dstream)
       val bstream = new BufferedOutputStream(cstream, outputBufferSize)
       fileSystem.setPermission(path, EventLogFileWriter.LOG_FILE_PERMISSIONS)
-      logInfo(s"Logging events to $path")
+      logInfo(log"Logging events to ${MDC(PATH, path)}")
       writer = Some(fnSetupWriter(bstream))
     } catch {
       case e: Exception =>
@@ -131,9 +133,10 @@ abstract class EventLogFileWriter(
   protected def renameFile(src: Path, dest: Path, overwrite: Boolean): Unit = {
     if (fileSystem.exists(dest)) {
       if (overwrite) {
-        logWarning(s"Event log $dest already exists. Overwriting...")
+        logWarning(log"Event log ${MDC(EVENT_LOG_DESTINATION, dest)} already exists. " +
+          log"Overwriting...")
         if (!fileSystem.delete(dest, true)) {
-          logWarning(s"Error deleting $dest")
+          logWarning(log"Error deleting ${MDC(EVENT_LOG_DESTINATION, dest)}")
         }
       } else {
         throw new IOException(s"Target log file already exists ($dest)")
@@ -327,7 +330,7 @@ class RollingEventLogFilesWriter(
 
   override def writeEvent(eventJson: String, flushLogger: Boolean = false): Unit = {
     writer.foreach { w =>
-      val currentLen = countingOutputStream.get.getBytesWritten
+      val currentLen = countingOutputStream.get.getByteCount
       if (currentLen + eventJson.length > eventFileMaxLength) {
         rollEventLogFile()
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 98cbd7b3eba82..95b23c0f894f8 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -38,11 +38,13 @@ import org.apache.hadoop.security.AccessControlException
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.internal.config.Tests.IS_TESTING
+import org.apache.spark.internal.config.UI
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.ReplayListenerBus._
@@ -106,9 +108,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   private val historyUiAclsEnable = conf.get(History.HISTORY_SERVER_UI_ACLS_ENABLE)
   private val historyUiAdminAcls = conf.get(History.HISTORY_SERVER_UI_ADMIN_ACLS)
   private val historyUiAdminAclsGroups = conf.get(History.HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS)
-  logInfo(s"History server ui acls " + (if (historyUiAclsEnable) "enabled" else "disabled") +
-    "; users with admin permissions: " + historyUiAdminAcls.mkString(",") +
-    "; groups with admin permissions: " + historyUiAdminAclsGroups.mkString(","))
+  logInfo(log"History server ui acls" +
+    log" ${MDC(ACL_ENABLED, if (historyUiAclsEnable) "enabled" else "disabled")}" +
+    log"; users with admin permissions:" +
+    log" ${MDC(LogKeys.ADMIN_ACLS, historyUiAdminAcls.mkString(","))}" +
+    log"; groups with admin permissions:" +
+    log" ${MDC(ADMIN_ACL_GROUPS, historyUiAdminAclsGroups.mkString(","))}")
 
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
   // Visible for testing
@@ -481,8 +486,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
             true
           } catch {
             case e: IllegalArgumentException =>
-              logInfo("Exception in getting modificationTime of "
-                + reader.rootPath.getName + ". " + e.toString)
+              logInfo(log"Exception in getting modificationTime of"
+                + log" ${MDC(PATH, reader.rootPath.getName)}. ${MDC(EXCEPTION, e.toString)}")
               false
           }
         }
@@ -549,7 +554,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
               try {
                 if (conf.get(CLEANER_ENABLED) && reader.modificationTime <
                     clock.getTimeMillis() - conf.get(MAX_LOG_AGE_S) * 1000) {
-                  logInfo(s"Deleting expired event log ${reader.rootPath.toString}")
+                  logInfo(log"Deleting expired event log ${MDC(PATH, reader.rootPath.toString)}")
                   deleteLog(fs, reader.rootPath)
                   // If the LogInfo read had succeeded, but the ApplicationInafoWrapper
                   // read failure and throw the exception, we should also cleanup the log
@@ -569,12 +574,13 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
                 case _: FileNotFoundException => false
                 case _: NoSuchElementException => false
                 case NonFatal(e) =>
-                  logWarning(s"Error while reading new log ${reader.rootPath}", e)
+                  logWarning(log"Error while reading new log " +
+                    log"${MDC(PATH, reader.rootPath)}", e)
                   false
               }
 
             case NonFatal(e) =>
-              logWarning(s"Error while filtering log ${reader.rootPath}", e)
+              logWarning(log"Error while filtering log ${MDC(PATH, reader.rootPath)}", e)
               false
           }
         }
@@ -729,7 +735,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         throw e
       case e: AccessControlException =>
         // We don't have read permissions on the log file
-        logWarning(s"Unable to read log $rootPath", e)
+        logWarning(log"Unable to read log ${MDC(PATH, rootPath)}", e)
         markInaccessible(rootPath)
         // SPARK-28157 We should remove this inaccessible entry from the KVStore
         // to handle permission-only changes with the same file sizes later.
@@ -744,8 +750,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           // Do nothing, the application completed during processing, the final event log file
           // will be processed by next around.
         } else {
-          logWarning(s"In-progress event log file does not exist: ${reader.rootPath}, " +
-            s"neither does the final event log file: $finalFilePath.")
+          logWarning(log"In-progress event log file does not exist: " +
+            log"${MDC(PATH, reader.rootPath)}, " +
+            log"neither does the final event log file: ${MDC(FINAL_PATH, finalFilePath)}.")
         }
       case e: Exception =>
         logError("Exception while merging application listings", e)
@@ -798,7 +805,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val listener = new AppListingListener(reader, clock, shouldHalt)
     bus.addListener(listener)
 
-    logInfo(s"Parsing $logPath for listing data...")
+    logInfo(log"Parsing ${MDC(PATH, logPath)} for listing data...")
     val logFiles = reader.listEventLogFiles
     parseAppEventLogs(logFiles, bus, !appCompleted, eventsFilter)
 
@@ -826,7 +833,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       Utils.tryWithResource(EventLogFileReader.openEventLog(lastFile.getPath, fs)) { in =>
         val target = lastFile.getLen - reparseChunkSize
         if (target > 0) {
-          logInfo(s"Looking for end event; skipping $target bytes from $logPath...")
+          logInfo(log"Looking for end event; skipping ${MDC(NUM_BYTES, target)} bytes" +
+            log" from ${MDC(PATH, logPath)}...")
           var skipped = 0L
           while (skipped < target) {
             skipped += in.skip(target - skipped)
@@ -845,7 +853,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       }
     }
 
-    logInfo(s"Finished parsing $logPath")
+    logInfo(log"Finished parsing ${MDC(PATH, logPath)}")
 
     listener.applicationInfo match {
       case Some(app) if !lookForEndEvent || app.attempts.head.info.completed =>
@@ -880,7 +888,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         // In this case, the attempt is still not marked as finished but was expected to. This can
         // mean the end event is before the configured threshold, so call the method again to
         // re-parse the whole log.
-        logInfo(s"Reparsing $logPath since end event was not found.")
+        logInfo(log"Reparsing ${MDC(PATH, logPath)} since end event was not found.")
         doMergeApplicationListingInternal(reader, scanTime, enableOptimizations = false,
           lastEvaluatedForCompaction)
 
@@ -919,7 +927,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       case e: InterruptedException =>
         throw e
       case e: AccessControlException =>
-        logWarning(s"Insufficient permission while compacting log for $rootPath", e)
+        logWarning(log"Insufficient permission while compacting log for ${MDC(PATH, rootPath)}", e)
       case e: Exception =>
         logError(log"Exception while compacting log for ${MDC(PATH, rootPath)}", e)
     } finally {
@@ -949,7 +957,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val log = listing.read(classOf[LogInfo], logPath)
 
     if (log.lastProcessed <= maxTime && log.appId.isEmpty) {
-      logInfo(s"Deleting invalid / corrupt event log ${log.logPath}")
+      logInfo(log"Deleting invalid / corrupt event log ${MDC(PATH, log.logPath)}")
       deleteLog(fs, new Path(log.logPath))
       listing.delete(classOf[LogInfo], log.logPath)
     }
@@ -991,7 +999,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       .first(maxTime), Int.MaxValue) { l => l.logType == null || l.logType == LogType.EventLogs }
     stale.filterNot(isProcessing).foreach { log =>
       if (log.appId.isEmpty) {
-        logInfo(s"Deleting invalid / corrupt event log ${log.logPath}")
+        logInfo(log"Deleting invalid / corrupt event log ${MDC(PATH, log.logPath)}")
         deleteLog(fs, new Path(log.logPath))
         listing.delete(classOf[LogInfo], log.logPath)
       }
@@ -1002,7 +1010,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val num = KVUtils.size(listing.view(classOf[LogInfo]).index("lastProcessed"))
     var count = num - maxNum
     if (count > 0) {
-      logInfo(s"Try to delete $count old event logs to keep $maxNum logs in total.")
+      logInfo(log"Try to delete ${MDC(NUM_FILES, count)} old event logs" +
+        log" to keep ${MDC(MAX_NUM_FILES, maxNum)} logs in total.")
       KVUtils.foreach(listing.view(classOf[ApplicationInfoWrapper]).index("oldestAttempt")) { app =>
         if (count > 0) {
           // Applications may have multiple attempts, some of which may not be completed yet.
@@ -1011,7 +1020,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         }
       }
       if (count > 0) {
-        logWarning(s"Fail to clean up according to MAX_LOG_NUM policy ($maxNum).")
+        logWarning(log"Fail to clean up according to MAX_LOG_NUM policy " +
+          log"(${MDC(MAX_NUM_LOG_POLICY, maxNum)}).")
       }
     }
 
@@ -1030,7 +1040,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
     var countDeleted = 0
     toDelete.foreach { attempt =>
-      logInfo(s"Deleting expired event log for ${attempt.logPath}")
+      logInfo(log"Deleting expired event log for ${MDC(PATH, attempt.logPath)}")
       val logPath = new Path(logDir, attempt.logPath)
       listing.delete(classOf[LogInfo], logPath.toString())
       cleanAppData(app.id, attempt.info.attemptId, logPath.toString())
@@ -1078,7 +1088,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
             false
         }
       if (deleteFile) {
-        logInfo(s"Deleting expired driver log for: $logFileStr")
+        logInfo(log"Deleting expired driver log for: ${MDC(PATH, logFileStr)}")
         listing.delete(classOf[LogInfo], logFileStr)
         deleteLog(driverLogFs, f.getPath())
       }
@@ -1091,7 +1101,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       .reverse()
       .first(maxTime), Int.MaxValue) { l => l.logType != null && l.logType == LogType.DriverLogs }
     stale.filterNot(isProcessing).foreach { log =>
-      logInfo(s"Deleting invalid driver log ${log.logPath}")
+      logInfo(log"Deleting invalid driver log ${MDC(PATH, log.logPath)}")
       listing.delete(classOf[LogInfo], log.logPath)
       deleteLog(driverLogFs, new Path(log.logPath))
     }
@@ -1120,10 +1130,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
     try {
       val eventLogFiles = reader.listEventLogFiles
-      logInfo(s"Parsing ${reader.rootPath} to re-build UI...")
+      logInfo(log"Parsing ${MDC(PATH, reader.rootPath)} to re-build UI...")
       parseAppEventLogs(eventLogFiles, replayBus, !reader.completed)
       trackingStore.close(false)
-      logInfo(s"Finished parsing ${reader.rootPath}")
+      logInfo(log"Finished parsing ${MDC(PATH, reader.rootPath)}")
     } catch {
       case e: Exception =>
         Utils.tryLogNonFatalError {
@@ -1224,7 +1234,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         return KVUtils.open(path, metadata, conf, live = false)
       } catch {
         case e: Exception =>
-          logInfo(s"Failed to open existing store for $appId/${attempt.info.attemptId}.", e)
+          logInfo(log"Failed to open existing store for" +
+            log" ${MDC(APP_ID, appId)}/${MDC(LogKeys.APP_ATTEMPT_ID, attempt.info.attemptId)}.", e)
           dm.release(appId, attempt.info.attemptId, delete = true)
       }
     }
@@ -1240,11 +1251,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         case e: RuntimeException if e.getMessage != null &&
             e.getMessage.contains("Not enough memory to create hybrid") =>
           // Handle exception from `HistoryServerMemoryManager.lease`.
-          logInfo(s"Failed to create HybridStore for $appId/${attempt.info.attemptId}." +
-            s" Using $hybridStoreDiskBackend. " + e.getMessage)
+          logInfo(log"Failed to create HybridStore for" +
+            log" ${MDC(APP_ID, appId)}/${MDC(LogKeys.APP_ATTEMPT_ID, attempt.info.attemptId)}." +
+            log" Using ${MDC(LogKeys.HYBRID_STORE_DISK_BACKEND, hybridStoreDiskBackend)}." +
+            log" ${MDC(EXCEPTION, e.getMessage)}")
         case e: Exception =>
-          logInfo(s"Failed to create HybridStore for $appId/${attempt.info.attemptId}." +
-            s" Using $hybridStoreDiskBackend.", e)
+          logInfo(log"Failed to create HybridStore for" +
+            log" ${MDC(APP_ID, appId)}/${MDC(LogKeys.APP_ATTEMPT_ID, attempt.info.attemptId)}." +
+            log" Using ${MDC(LogKeys.HYBRID_STORE_DISK_BACKEND, hybridStoreDiskBackend)}.", e)
       }
     }
 
@@ -1275,8 +1289,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         case _: IOException if !retried =>
           // compaction may touch the file(s) which app rebuild wants to read
           // compaction wouldn't run in short interval, so try again...
-          logWarning(s"Exception occurred while rebuilding log path ${attempt.logPath} - " +
-            "trying again...")
+          logWarning(log"Exception occurred while rebuilding log path " +
+            log"${MDC(PATH, attempt.logPath)} - " +
+            log"trying again...")
           store.close()
           memoryManager.release(appId, attempt.info.attemptId)
           retried = true
@@ -1290,20 +1305,23 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // Create a disk-base KVStore and start a background thread to dump data to it
     var lease: dm.Lease = null
     try {
-      logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
+      logInfo(log"Leasing disk manager space for app" +
+        log" ${MDC(APP_ID, appId)} / ${MDC(LogKeys.APP_ATTEMPT_ID, attempt.info.attemptId)}...")
       lease = dm.lease(reader.totalSize, reader.compressionCodec.isDefined)
       val diskStore = KVUtils.open(lease.tmpPath, metadata, conf, live = false)
       hybridStore.setDiskStore(diskStore)
       hybridStore.switchToDiskStore(new HybridStore.SwitchToDiskStoreListener {
         override def onSwitchToDiskStoreSuccess(): Unit = {
-          logInfo(s"Completely switched to diskStore for app $appId / ${attempt.info.attemptId}.")
+          logInfo(log"Completely switched to diskStore for app" +
+            log" ${MDC(APP_ID, appId)} / ${MDC(LogKeys.APP_ATTEMPT_ID, attempt.info.attemptId)}.")
           diskStore.close()
           val newStorePath = lease.commit(appId, attempt.info.attemptId)
           hybridStore.setDiskStore(KVUtils.open(newStorePath, metadata, conf, live = false))
           memoryManager.release(appId, attempt.info.attemptId)
         }
         override def onSwitchToDiskStoreFail(e: Exception): Unit = {
-          logWarning(s"Failed to switch to diskStore for app $appId / ${attempt.info.attemptId}", e)
+          logWarning(log"Failed to switch to diskStore for app ${MDC(APP_ID, appId)} / " +
+            log"${MDC(LogKeys.APP_ATTEMPT_ID, attempt.info.attemptId)}", e)
           diskStore.close()
           lease.rollback()
         }
@@ -1332,7 +1350,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       val reader = EventLogFileReader(fs, new Path(logDir, attempt.logPath),
         attempt.lastIndex)
       val isCompressed = reader.compressionCodec.isDefined
-      logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
+      logInfo(log"Leasing disk manager space for app" +
+        log" ${MDC(APP_ID, appId)} / ${MDC(LogKeys.APP_ATTEMPT_ID, attempt.info.attemptId)}...")
       val lease = dm.lease(reader.totalSize, isCompressed)
       try {
         Utils.tryWithResource(KVUtils.open(lease.tmpPath, metadata, conf, live = false)) { store =>
@@ -1343,7 +1362,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         case _: IOException if !retried =>
           // compaction may touch the file(s) which app rebuild wants to read
           // compaction wouldn't run in short interval, so try again...
-          logWarning(s"Exception occurred while rebuilding app $appId - trying again...")
+          logWarning(log"Exception occurred while rebuilding app ${MDC(APP_ID, appId)} - " +
+            log"trying again...")
           lease.rollback()
           retried = true
 
@@ -1370,8 +1390,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         case _: IOException if !retried =>
           // compaction may touch the file(s) which app rebuild wants to read
           // compaction wouldn't run in short interval, so try again...
-          logWarning(s"Exception occurred while rebuilding log path ${attempt.logPath} - " +
-            "trying again...")
+          logWarning(log"Exception occurred while rebuilding log path " +
+            log"${MDC(LogKeys.PATH, attempt.logPath)} - trying again...")
           retried = true
 
         case e: Exception =>
@@ -1401,7 +1421,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         deleted = fs.delete(log, true)
       } catch {
         case _: AccessControlException =>
-          logInfo(s"No permission to delete $log, ignoring.")
+          logInfo(log"No permission to delete ${MDC(PATH, log)}, ignoring.")
         case ioe: IOException =>
           logError(log"IOException in cleaning ${MDC(PATH, log)}", ioe)
       }
@@ -1553,7 +1573,7 @@ private[history] class AppListingListener(
 
       val allProperties = event.environmentDetails("Spark Properties").toMap
       attempt.viewAcls = emptyStringToNone(allProperties.get(UI_VIEW_ACLS.key))
-      attempt.adminAcls = emptyStringToNone(allProperties.get(ADMIN_ACLS.key))
+      attempt.adminAcls = emptyStringToNone(allProperties.get(UI.ADMIN_ACLS.key))
       attempt.viewAclsGroups = emptyStringToNone(allProperties.get(UI_VIEW_ACLS_GROUPS.key))
       attempt.adminAclsGroups = emptyStringToNone(allProperties.get(ADMIN_ACLS_GROUPS.key))
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 7362634d5b09e..6e559dc4492ea 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -28,7 +28,8 @@ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.Utils.addRenderLogHandler
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History
 import org.apache.spark.internal.config.UI._
@@ -115,9 +116,9 @@ class HistoryServer(
       // requested, and the proper data should be served at that point.
       // Also, make sure that the redirect url contains the query string present in the request.
       val redirect = if (shouldAppendAttemptId) {
-        req.getRequestURI.stripSuffix("/") + "/" + attemptId.get
+        req.getRequestURI.stripSuffix("/") + "/" + attemptId.get + "/"
       } else {
-        req.getRequestURI
+        req.getRequestURI.stripSuffix("/") + "/"
       }
       val query = Option(req.getQueryString).map("?" + _).getOrElse("")
       res.sendRedirect(res.encodeRedirectURL(redirect + query))
@@ -301,7 +302,6 @@ object HistoryServer extends Logging {
     val securityManager = createSecurityManager(conf)
 
     val providerName = conf.get(History.PROVIDER)
-      .getOrElse(classOf[FsHistoryProvider].getName())
     val provider = Utils.classForName[ApplicationHistoryProvider](providerName)
       .getConstructor(classOf[SparkConf])
       .newInstance(conf)
@@ -332,8 +332,8 @@ object HistoryServer extends Logging {
     }
 
     if (config.get(ACLS_ENABLE)) {
-      logInfo(s"${ACLS_ENABLE.key} is configured, " +
-        s"clearing it and only using ${History.HISTORY_SERVER_UI_ACLS_ENABLE.key}")
+      logInfo(log"${MDC(KEY, ACLS_ENABLE.key)} is configured, " +
+        log"clearing it and only using ${MDC(KEY2, History.HISTORY_SERVER_UI_ACLS_ENABLE.key)}")
       config.set(ACLS_ENABLE, false)
     }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index 01cc59e1d2e6e..2fdf7a473a298 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -21,6 +21,7 @@ import scala.annotation.tailrec
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.{ConfigEntry, History}
 import org.apache.spark.util.Utils
 
 /**
@@ -44,47 +45,62 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
 
       case Nil =>
 
-      case _ =>
-        printUsageAndExit(1)
+      case other =>
+        val errorMsg = s"Unrecognized options: ${other.mkString(" ")}\n"
+        printUsageAndExit(1, errorMsg)
     }
   }
 
-   // This mutates the SparkConf, so all accesses to it must be made after this line
-   Utils.loadDefaultSparkProperties(conf, propertiesFile)
+  // This mutates the SparkConf, so all accesses to it must be made after this line
+  Utils.loadDefaultSparkProperties(conf, propertiesFile)
 
-  private def printUsageAndExit(exitCode: Int): Unit = {
-    // scalastyle:off println
-    System.err.println(
-      """
-      |Usage: HistoryServer [options]
-      |
-      |Options:
-      |  --properties-file FILE      Path to a custom Spark properties file.
-      |                              Default is conf/spark-defaults.conf.
-      |
-      |Configuration options can be set by setting the corresponding JVM system property.
-      |History Server options are always available; additional options depend on the provider.
-      |
-      |History Server options:
-      |
-      |  spark.history.ui.port              Port where server will listen for connections
-      |                                     (default 18080)
-      |  spark.history.acls.enable          Whether to enable view acls for all applications
-      |                                     (default false)
-      |  spark.history.provider             Name of history provider class (defaults to
-      |                                     file system-based provider)
-      |  spark.history.retainedApplications Max number of application UIs to keep loaded in memory
-      |                                     (default 50)
-      |FsHistoryProvider options:
-      |
-      |  spark.history.fs.logDirectory      Directory where app logs are stored
-      |                                     (default: file:/tmp/spark-events)
-      |  spark.history.fs.update.interval   How often to reload log data from storage
-      |                                     (in seconds, default: 10)
-      |""".stripMargin)
-    // scalastyle:on println
+  // scalastyle:off line.size.limit println
+  private def printUsageAndExit(exitCode: Int, error: String = ""): Unit = {
+    val configs = History.getClass.getDeclaredFields
+      .filter(f => classOf[ConfigEntry[_]].isAssignableFrom(f.getType))
+      .map { f =>
+        f.setAccessible(true)
+        f.get(History).asInstanceOf[ConfigEntry[_]]
+      }
+    val maxConfigLength = configs.map(_.key.length).max
+    val sb = new StringBuilder(
+      s"""
+         |${error}Usage: HistoryServer [options]
+         |
+         |Options:
+         |  ${"--properties-file FILE".padTo(maxConfigLength, ' ')} Path to a custom Spark properties file.
+         |  ${"".padTo(maxConfigLength, ' ')} Default is conf/spark-defaults.conf.
+         |
+         |Configuration options can be set by setting the corresponding JVM system property.
+         |History Server options are always available; additional options depend on the provider.
+         |
+         |""".stripMargin)
+
+    def printConfigs(configs: Array[ConfigEntry[_]]): Unit = {
+      configs.sortBy(_.key).foreach { conf =>
+        sb.append("  ").append(conf.key.padTo(maxConfigLength, ' '))
+        var currentDocLen = 0
+        val intention = "\n" + " " * (maxConfigLength + 2)
+        conf.doc.split("\\s+").foreach { word =>
+          if (currentDocLen + word.length > 60) {
+            sb.append(intention).append(" ").append(word)
+            currentDocLen = word.length + 1
+          } else {
+            sb.append(" ").append(word)
+            currentDocLen += word.length + 1
+          }
+        }
+        sb.append(intention).append(" (Default: ").append(conf.defaultValueString).append(")\n")
+      }
+    }
+    val (common, fs) = configs.partition(!_.key.startsWith("spark.history.fs."))
+    sb.append("History Server options:\n")
+    printConfigs(common)
+    sb.append("FsHistoryProvider options:\n")
+    printConfigs(fs)
+    System.err.println(sb.toString())
+    // scalastyle:on line.size.limit println
     System.exit(exitCode)
   }
-
 }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
index a84e1b1819542..122ed299242f5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
@@ -25,7 +25,9 @@ import scala.collection.mutable.{HashMap, ListBuffer}
 import org.apache.commons.io.FileUtils
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.config.History
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.History.HybridStoreDiskBackend.ROCKSDB
 import org.apache.spark.status.KVUtils
@@ -57,7 +59,7 @@ private class HistoryServerDiskManager(
     throw new IllegalArgumentException(s"Failed to create app directory ($appStoreDir).")
   }
   private val extension =
-    if (conf.get(HYBRID_STORE_DISK_BACKEND) == ROCKSDB.toString) ".rdb" else ".ldb"
+    if (conf.get(History.HYBRID_STORE_DISK_BACKEND) == ROCKSDB.toString) ".rdb" else ".ldb"
 
   private val tmpStoreDir = new File(path, "temp")
   if (!tmpStoreDir.isDirectory() && !tmpStoreDir.mkdir()) {
@@ -99,9 +101,9 @@ private class HistoryServerDiskManager(
       }
     }
 
-    logInfo("Initialized disk manager: " +
-      s"current usage = ${Utils.bytesToString(currentUsage.get())}, " +
-      s"max usage = ${Utils.bytesToString(maxUsage)}")
+    logInfo(log"Initialized disk manager:" +
+      log" current usage = ${MDC(NUM_BYTES_CURRENT, Utils.bytesToString(currentUsage.get()))}," +
+      log" max usage = ${MDC(NUM_BYTES_MAX, Utils.bytesToString(maxUsage))}")
   }
 
   /**
@@ -126,8 +128,9 @@ private class HistoryServerDiskManager(
     updateUsage(needed)
     val current = currentUsage.get()
     if (current > maxUsage) {
-      logInfo(s"Lease of ${Utils.bytesToString(needed)} may cause usage to exceed max " +
-        s"(${Utils.bytesToString(current)} > ${Utils.bytesToString(maxUsage)})")
+      logInfo(log"Lease of ${MDC(NUM_BYTES, Utils.bytesToString(needed))} may cause" +
+        log" usage to exceed max (${MDC(NUM_BYTES_CURRENT, Utils.bytesToString(current))}" +
+        log" > ${MDC(NUM_BYTES_MAX, Utils.bytesToString(maxUsage))})")
     }
 
     new Lease(tmp, needed)
@@ -237,16 +240,19 @@ private class HistoryServerDiskManager(
 
       if (evicted.nonEmpty) {
         val freed = evicted.map { info =>
-          logInfo(s"Deleting store for ${info.appId}/${info.attemptId}.")
+          logInfo(log"Deleting store for" +
+            log" ${MDC(APP_ID, info.appId)}/${MDC(APP_ATTEMPT_ID, info.attemptId)}.")
           deleteStore(new File(info.path))
           updateUsage(-info.size, committed = true)
           info.size
         }.sum
 
-        logInfo(s"Deleted ${evicted.size} store(s) to free ${Utils.bytesToString(freed)} " +
-          s"(target = ${Utils.bytesToString(size)}).")
+        logInfo(log"Deleted ${MDC(NUM_BYTES_EVICTED, evicted.size)} store(s)" +
+          log" to free ${MDC(NUM_BYTES_TO_FREE, Utils.bytesToString(freed))}" +
+          log" (target = ${MDC(NUM_BYTES, Utils.bytesToString(size))}).")
       } else {
-        logWarning(s"Unable to free any space to make room for ${Utils.bytesToString(size)}.")
+        logWarning(log"Unable to free any space to make room for " +
+          log"${MDC(NUM_BYTES, Utils.bytesToString(size))}.")
       }
     }
   }
@@ -312,8 +318,9 @@ private class HistoryServerDiskManager(
       if (committedUsage.get() > maxUsage) {
         val current = Utils.bytesToString(committedUsage.get())
         val max = Utils.bytesToString(maxUsage)
-        logWarning(s"Commit of application $appId / $attemptId causes maximum disk usage to be " +
-          s"exceeded ($current > $max)")
+        logWarning(log"Commit of application ${MDC(APP_ID, appId)} / " +
+          log"${MDC(APP_ATTEMPT_ID, attemptId)} causes maximum disk usage to be " +
+          log"exceeded (${MDC(NUM_BYTES, current)} > ${MDC(NUM_BYTES_MAX, max)}")
       }
 
       updateApplicationStoreInfo(appId, attemptId, newSize)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala
index b95f1ed24f376..6e3dbb1170998 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerMemoryManager.scala
@@ -22,7 +22,8 @@ import java.util.concurrent.atomic.AtomicLong
 import scala.collection.mutable.HashMap
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.History._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.Utils
@@ -39,9 +40,9 @@ private class HistoryServerMemoryManager(
   private[history] val active = new HashMap[(String, Option[String]), Long]()
 
   def initialize(): Unit = {
-    logInfo("Initialized memory manager: " +
-      s"current usage = ${Utils.bytesToString(currentUsage.get())}, " +
-      s"max usage = ${Utils.bytesToString(maxUsage)}")
+    logInfo(log"Initialized memory manager: " +
+      log"current usage = ${MDC(NUM_BYTES_CURRENT, Utils.bytesToString(currentUsage.get()))}, " +
+      log"max usage = ${MDC(NUM_BYTES_MAX, Utils.bytesToString(maxUsage))}")
   }
 
   def lease(
@@ -58,8 +59,8 @@ private class HistoryServerMemoryManager(
       active(appId -> attemptId) = memoryUsage
     }
     currentUsage.addAndGet(memoryUsage)
-    logInfo(s"Leasing ${Utils.bytesToString(memoryUsage)} memory usage for " +
-      s"app $appId / $attemptId")
+    logInfo(log"Leasing ${MDC(NUM_BYTES, Utils.bytesToString(memoryUsage))} memory usage for " +
+      log"app ${MDC(APP_ID, appId)} / ${MDC(APP_ATTEMPT_ID, attemptId)}")
   }
 
   def release(appId: String, attemptId: Option[String]): Unit = {
@@ -68,8 +69,8 @@ private class HistoryServerMemoryManager(
     memoryUsage match {
       case Some(m) =>
         currentUsage.addAndGet(-m)
-        logInfo(s"Released ${Utils.bytesToString(m)} memory usage for " +
-          s"app $appId / $attemptId")
+        logInfo(log"Released ${MDC(NUM_BYTES, Utils.bytesToString(m))} memory usage for " +
+          log"app ${MDC(APP_ID, appId)} / ${MDC(APP_ATTEMPT_ID, attemptId)}")
       case None =>
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
index fb067f10c5a4f..4332544e4491c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/FileSystemPersistenceEngine.scala
@@ -22,7 +22,8 @@ import java.nio.file.{FileAlreadyExistsException, Files, Paths}
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, Serializer}
 import org.apache.spark.util.ArrayImplicits._
@@ -56,7 +57,7 @@ private[master] class FileSystemPersistenceEngine(
   override def unpersist(name: String): Unit = {
     val f = new File(dir + File.separator + name)
     if (!f.delete()) {
-      logWarning(s"Error deleting ${f.getPath()}")
+      logWarning(log"Error deleting ${MDC(PATH, f.getPath())}")
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 2834434256358..84e67cba33a9f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -33,7 +33,7 @@ import org.apache.spark.deploy.master.MasterMessages._
 import org.apache.spark.deploy.master.ui.MasterWebUI
 import org.apache.spark.deploy.rest.StandaloneRestServer
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{APP_DESC, APP_ID, EXECUTOR_ID, RETRY_COUNT}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.internal.config.Deploy.WorkerSelectionPolicy._
@@ -42,7 +42,7 @@ import org.apache.spark.internal.config.Worker._
 import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
 import org.apache.spark.resource.{ResourceInformation, ResourceProfile, ResourceRequirement, ResourceUtils}
 import org.apache.spark.rpc._
-import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, Serializer}
+import org.apache.spark.serializer.{JavaSerializer, Serializer}
 import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -143,8 +143,9 @@ private[deploy] class Master(
   }
 
   override def onStart(): Unit = {
-    logInfo("Starting Spark master at " + masterUrl)
-    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
+    logInfo(log"Starting Spark master at ${MDC(LogKeys.MASTER_URL, masterUrl)}")
+    logInfo(log"Running Spark version" +
+      log" ${MDC(LogKeys.SPARK_VERSION, org.apache.spark.SPARK_VERSION)}")
     webUi = new MasterWebUI(this, webUiPort)
     webUi.bind()
     masterWebUiUrl = webUi.webUrl
@@ -157,8 +158,8 @@ private[deploy] class Master(
         masterWebUiUrl = uiReverseProxyUrl.get + "/"
       }
       webUi.addProxy()
-      logInfo(s"Spark Master is acting as a reverse proxy. Master, Workers and " +
-       s"Applications UIs are available at $masterWebUiUrl")
+      logInfo(log"Spark Master is acting as a reverse proxy. Master, Workers and " +
+       log"Applications UIs are available at ${MDC(LogKeys.WEB_URL, masterWebUiUrl)}")
     }
     checkForWorkerTimeOutTask = forwardMessageThread.scheduleAtFixedRate(
       () => Utils.tryLogNonFatalError { self.send(CheckForWorkerTimeOut) },
@@ -179,10 +180,7 @@ private[deploy] class Master(
     masterMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
     applicationMetricsSystem.getServletHandlers.foreach(webUi.attachHandler)
 
-    val serializer = RecoverySerializer.withName(conf.get(RECOVERY_SERIALIZER)) match {
-      case RecoverySerializer.JAVA => new JavaSerializer(conf)
-      case RecoverySerializer.KRYO => new KryoSerializer(conf)
-    }
+    val serializer = new JavaSerializer(conf)
     val (persistenceEngine_, leaderElectionAgent_) = recoveryMode match {
       case "ZOOKEEPER" =>
         logInfo("Persisting recovery state to ZooKeeper")
@@ -245,7 +243,7 @@ private[deploy] class Master(
       } else {
         RecoveryState.RECOVERING
       }
-      logInfo("I have been elected leader! New state: " + state)
+      logInfo(log"I have been elected leader! New state: ${MDC(LogKeys.RECOVERY_STATE, state)}")
       if (state == RecoveryState.RECOVERING) {
         if (beginRecovery(storedApps, storedDrivers, storedWorkers)) {
           recoveryCompletionTask = forwardMessageThread.schedule(new Runnable {
@@ -294,10 +292,11 @@ private[deploy] class Master(
       if (state == RecoveryState.STANDBY) {
         // ignore, don't send response
       } else {
-        logInfo("Registering app " + description.name)
+        logInfo(log"Registering app ${MDC(LogKeys.APP_NAME, description.name)}")
         val app = createApplication(description, driver)
         registerApplication(app)
-        logInfo("Registered app " + description.name + " with ID " + app.id)
+        logInfo(log"Registered app ${MDC(LogKeys.APP_NAME, description.name)} with" +
+          log" ID ${MDC(LogKeys.APP_ID, app.id)}")
         persistenceEngine.addApplication(app)
         driver.send(RegisteredApplication(app.id, self))
         schedule()
@@ -317,22 +316,23 @@ private[deploy] class Master(
           workerInfo.lastHeartbeat = System.currentTimeMillis()
         case None =>
           if (workers.map(_.id).contains(workerId)) {
-            logWarning(s"Got heartbeat from unregistered worker $workerId." +
-              " Asking it to re-register.")
+            logWarning(log"Got heartbeat from unregistered worker " +
+              log"${MDC(LogKeys.WORKER_ID, workerId)}. Asking it to re-register.")
             worker.send(ReconnectWorker(masterUrl))
           } else {
-            logWarning(s"Got heartbeat from unregistered worker $workerId." +
-              " This worker was never registered, so ignoring the heartbeat.")
+            logWarning(log"Got heartbeat from unregistered worker " +
+              log"${MDC(LogKeys.WORKER_ID, workerId)}. " +
+              log"This worker was never registered, so ignoring the heartbeat.")
           }
       }
 
     case MasterChangeAcknowledged(appId) =>
       idToApp.get(appId) match {
         case Some(app) =>
-          logInfo("Application has been re-registered: " + appId)
+          logInfo(log"Application has been re-registered: ${MDC(LogKeys.APP_ID, appId)}")
           app.state = ApplicationState.WAITING
         case None =>
-          logWarning("Master change ack from unknown app: " + appId)
+          logWarning(log"Master change ack from unknown app: ${MDC(LogKeys.APP_ID, appId)}")
       }
 
       if (canCompleteRecovery) { completeRecovery() }
@@ -340,7 +340,7 @@ private[deploy] class Master(
     case WorkerSchedulerStateResponse(workerId, execResponses, driverResponses) =>
       idToWorker.get(workerId) match {
         case Some(worker) =>
-          logInfo("Worker has been re-registered: " + workerId)
+          logInfo(log"Worker has been re-registered: ${MDC(LogKeys.WORKER_ID, workerId)}")
           worker.state = WorkerState.ALIVE
 
           val validExecutors = execResponses.filter(
@@ -366,7 +366,7 @@ private[deploy] class Master(
             }
           }
         case None =>
-          logWarning("Scheduler state from unknown worker: " + workerId)
+          logWarning(log"Scheduler state from unknown worker: ${MDC(LogKeys.WORKER_ID, workerId)}")
       }
 
       if (canCompleteRecovery) { completeRecovery() }
@@ -392,11 +392,12 @@ private[deploy] class Master(
             }
           }
         case None =>
-          logWarning("Worker state from unknown worker: " + workerId)
+          logWarning(log"Worker state from unknown worker: ${MDC(LogKeys.WORKER_ID, workerId)}")
       }
 
     case UnregisterApplication(applicationId) =>
-      logInfo(s"Received unregister request from application $applicationId")
+      logInfo(log"Received unregister request from application" +
+        log" ${MDC(LogKeys.APP_ID, applicationId)}")
       idToApp.get(applicationId).foreach(finishApplication)
 
     case CheckForWorkerTimeOut =>
@@ -411,7 +412,7 @@ private[deploy] class Master(
           "Can only accept driver submissions in ALIVE state."
         context.reply(SubmitDriverResponse(self, false, None, msg))
       } else {
-        logInfo("Driver submitted " + description.command.mainClass)
+        logInfo(log"Driver submitted ${MDC(LogKeys.CLASS_NAME, description.command.mainClass)}")
         val driver = createDriver(description)
         persistenceEngine.addDriver(driver)
         waitingDrivers += driver
@@ -431,7 +432,7 @@ private[deploy] class Master(
           s"Can only kill drivers in ALIVE state."
         context.reply(KillDriverResponse(self, driverId, success = false, msg))
       } else {
-        logInfo("Asked to kill driver " + driverId)
+        logInfo(log"Asked to kill driver ${MDC(LogKeys.DRIVER_ID, driverId)}")
         val driver = drivers.find(_.id == driverId)
         driver match {
           case Some(d) =>
@@ -447,12 +448,13 @@ private[deploy] class Master(
               }
             }
             // TODO: It would be nice for this to be a synchronous response
-            val msg = s"Kill request for $driverId submitted"
+            val msg = log"Kill request for ${MDC(LogKeys.DRIVER_ID, driverId)} submitted"
             logInfo(msg)
-            context.reply(KillDriverResponse(self, driverId, success = true, msg))
+            context.reply(KillDriverResponse(self, driverId, success = true, msg.message))
           case None =>
             val msg = s"Driver $driverId has already finished or does not exist"
-            logWarning(msg)
+            logWarning(log"Driver ${MDC(LogKeys.DRIVER_ID, driverId)} " +
+              log"has already finished or does not exist")
             context.reply(KillDriverResponse(self, driverId, success = false, msg))
         }
       }
@@ -477,7 +479,7 @@ private[deploy] class Master(
               w.endpoint.send(KillDriver(driverId))
             }
           }
-          logInfo(s"Kill request for $driverId submitted")
+          logInfo(log"Kill request for ${MDC(LogKeys.DRIVER_ID, driverId)} submitted")
         }
         context.reply(KillAllDriversResponse(self, true, "Kill request for all drivers submitted"))
       }
@@ -485,7 +487,8 @@ private[deploy] class Master(
     case RequestClearCompletedDriversAndApps =>
       val numDrivers = completedDrivers.length
       val numApps = completedApps.length
-      logInfo(s"Asked to clear $numDrivers completed drivers and $numApps completed apps.")
+      logInfo(log"Asked to clear ${MDC(LogKeys.NUM_DRIVERS, numDrivers)} completed drivers and" +
+        log" ${MDC(LogKeys.NUM_APPS, numApps)} completed apps.")
       completedDrivers.clear()
       completedApps.clear()
       context.reply(true)
@@ -550,7 +553,8 @@ private[deploy] class Master(
 
           if (ExecutorState.isFinished(state)) {
             // Remove this executor from the worker and app
-            logInfo(s"Removing executor ${exec.fullId} because it is $state")
+            logInfo(log"Removing executor ${MDC(LogKeys.EXECUTOR_ID, exec.fullId)}" +
+              log" because it is ${MDC(LogKeys.EXECUTOR_STATE, state)}")
             // If an application has already finished, preserve its
             // state to display its information properly on the UI
             if (!appInfo.isFinished) {
@@ -569,23 +573,24 @@ private[deploy] class Master(
               && maxExecutorRetries >= 0) { // < 0 disables this application-killing path
               val execs = appInfo.executors.values
               if (!execs.exists(_.state == ExecutorState.RUNNING)) {
-                logError(log"Application ${MDC(APP_DESC, appInfo.desc.name)} " +
-                  log"with ID ${MDC(APP_ID, appInfo.id)} " +
-                  log"failed ${MDC(RETRY_COUNT, appInfo.retryCount)} times; removing it")
+                logError(log"Application ${MDC(LogKeys.APP_DESC, appInfo.desc.name)} " +
+                  log"with ID ${MDC(LogKeys.APP_ID, appInfo.id)} " +
+                  log"failed ${MDC(LogKeys.NUM_RETRY, appInfo.retryCount)} times; removing it")
                 removeApplication(appInfo, ApplicationState.FAILED)
               }
             }
           }
           schedule()
         case None =>
-          logWarning(s"Got status update for unknown executor $appId/$execId")
+          logWarning(log"Got status update for unknown executor ${MDC(LogKeys.APP_ID, appId)}" +
+            log"/${MDC(LogKeys.EXECUTOR_ID, execId)}")
       }
       context.reply(true)
   }
 
   override def onDisconnected(address: RpcAddress): Unit = {
     // The disconnected client could've been either a worker or an app; remove whichever it was
-    logInfo(s"$address got disassociated, removing it.")
+    logInfo(log"${MDC(LogKeys.RPC_ADDRESS, address)} got disassociated, removing it.")
     addressToWorker.get(address).foreach(removeWorker(_, s"${address} got disassociated"))
     addressToApp.get(address).foreach(finishApplication)
     if (state == RecoveryState.RECOVERING && canCompleteRecovery) { completeRecovery() }
@@ -595,19 +600,20 @@ private[deploy] class Master(
     workers.count(_.state == WorkerState.UNKNOWN) == 0 &&
       apps.count(_.state == ApplicationState.UNKNOWN) == 0
 
-  private var recoveryStartTimeNs = 0L
+  private var recoveryStartTimeMs = 0L
 
   private def beginRecovery(storedApps: Seq[ApplicationInfo], storedDrivers: Seq[DriverInfo],
       storedWorkers: Seq[WorkerInfo]): Boolean = {
-    recoveryStartTimeNs = System.nanoTime()
+    recoveryStartTimeMs = System.currentTimeMillis()
     for (app <- storedApps) {
-      logInfo("Trying to recover app: " + app.id)
+      logInfo(log"Trying to recover app: ${MDC(LogKeys.APP_ID, app.id)}")
       try {
         registerApplication(app)
         app.state = ApplicationState.UNKNOWN
         app.driver.send(MasterChanged(self, masterWebUiUrl))
       } catch {
-        case e: Exception => logInfo("App " + app.id + " had exception on reconnect")
+        case e: Exception => logInfo(log"App ${MDC(LogKeys.APP_ID, app.id)}" +
+          log" had exception on reconnect")
       }
     }
 
@@ -618,13 +624,14 @@ private[deploy] class Master(
     }
 
     for (worker <- storedWorkers) {
-      logInfo("Trying to recover worker: " + worker.id)
+      logInfo(log"Trying to recover worker: ${MDC(LogKeys.WORKER_ID, worker.id)}")
       try {
         registerWorker(worker)
         worker.state = WorkerState.UNKNOWN
         worker.endpoint.send(MasterChanged(self, masterWebUiUrl))
       } catch {
-        case e: Exception => logInfo("Worker " + worker.id + " had exception on reconnect")
+        case e: Exception => logInfo(log"Worker ${MDC(LogKeys.WORKER_ID, worker.id)}" +
+          log" had exception on reconnect")
       }
     }
 
@@ -652,20 +659,23 @@ private[deploy] class Master(
 
     // Reschedule drivers which were not claimed by any workers
     drivers.filter(_.worker.isEmpty).foreach { d =>
-      logWarning(s"Driver ${d.id} was not found after master recovery")
+      logWarning(log"Driver ${MDC(LogKeys.DRIVER_ID, d.id)} " +
+        log"was not found after master recovery")
       if (d.desc.supervise) {
-        logWarning(s"Re-launching ${d.id}")
+        logWarning(log"Re-launching ${MDC(LogKeys.DRIVER_ID, d.id)}")
         relaunchDriver(d)
       } else {
         removeDriver(d.id, DriverState.ERROR, None)
-        logWarning(s"Did not re-launch ${d.id} because it was not supervised")
+        logWarning(log"Did not re-launch " +
+          log"${MDC(LogKeys.DRIVER_ID, d.id)} because it was not supervised")
       }
     }
 
     state = RecoveryState.ALIVE
     schedule()
-    val timeTakenNs = System.nanoTime() - recoveryStartTimeNs
-    logInfo(f"Recovery complete in ${timeTakenNs / 1000000000d}%.3fs - resuming operations!")
+    val timeTakenMs = System.currentTimeMillis() - recoveryStartTimeMs
+    logInfo(log"Recovery complete in ${MDC(LogKeys.TOTAL_TIME, timeTakenMs)} ms" +
+      log" - resuming operations!")
   }
 
   private[master] def handleRegisterWorker(
@@ -678,13 +688,15 @@ private[deploy] class Master(
       workerWebUiUrl: String,
       masterAddress: RpcAddress,
       resources: Map[String, ResourceInformation]): Unit = {
-    logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
-      workerHost, workerPort, cores, Utils.megabytesToString(memory)))
+    logInfo(log"Registering worker" +
+      log" ${MDC(LogKeys.WORKER_HOST, workerHost)}:${MDC(LogKeys.WORKER_PORT, workerPort)}" +
+      log" with ${MDC(LogKeys.NUM_CORES, cores)} cores," +
+      log" ${MDC(LogKeys.MEMORY_SIZE, Utils.megabytesToString(memory))} RAM")
     if (state == RecoveryState.STANDBY) {
       workerRef.send(MasterInStandby)
     } else if (idToWorker.contains(id)) {
       if (idToWorker(id).state == WorkerState.UNKNOWN) {
-        logInfo("Worker has been re-registered: " + id)
+        logInfo(log"Worker has been re-registered: ${MDC(LogKeys.WORKER_ID, id)}")
         idToWorker(id).state = WorkerState.ALIVE
       }
       workerRef.send(RegisteredWorker(self, masterWebUiUrl, masterAddress, true))
@@ -699,8 +711,8 @@ private[deploy] class Master(
         schedule()
       } else {
         val workerAddress = worker.endpoint.address
-        logWarning("Worker registration failed. Attempted to re-register worker at same " +
-          "address: " + workerAddress)
+        logWarning(log"Worker registration failed. Attempted to re-register worker at same " +
+          log"address: ${MDC(LogKeys.WORKER_URL, workerAddress)}")
         workerRef.send(RegisterWorkerFailed("Attempted to re-register worker at same address: "
           + workerAddress))
       }
@@ -822,7 +834,8 @@ private[deploy] class Master(
     // first.
     for (app <- waitingApps) {
       for (rpId <- app.getRequestedRPIds()) {
-        logInfo(s"Start scheduling for app ${app.id} with rpId: $rpId")
+        logInfo(log"Start scheduling for app ${MDC(LogKeys.APP_ID, app.id)} with" +
+          log" rpId: ${MDC(LogKeys.RESOURCE_PROFILE_ID, rpId)}")
         val resourceDesc = app.getResourceDescriptionForRpId(rpId)
         val coresPerExecutor = resourceDesc.coresPerExecutor.getOrElse(1)
 
@@ -836,12 +849,13 @@ private[deploy] class Master(
             case CORES_FREE_DESC => aliveWorkers.sortBy(w => (w.coresFree, w.id)).reverse
             case MEMORY_FREE_ASC => aliveWorkers.sortBy(w => (w.memoryFree, w.id))
             case MEMORY_FREE_DESC => aliveWorkers.sortBy(w => (w.memoryFree, w.id)).reverse
-            case WORKER_ID => aliveWorkers.sortBy(_.id)
+            case WorkerSelectionPolicy.WORKER_ID => aliveWorkers.sortBy(_.id)
           }
           val appMayHang = waitingApps.length == 1 &&
             waitingApps.head.executors.isEmpty && usableWorkers.isEmpty
           if (appMayHang) {
-            logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
+            logWarning(log"App ${MDC(LogKeys.APP_ID, app.id)} requires more resource " +
+              log"than any of Workers could have.")
           }
           val assignedCores =
             scheduleExecutorsOnWorkers(app, rpId, resourceDesc, usableWorkers, spreadOutApps)
@@ -956,7 +970,8 @@ private[deploy] class Master(
           curPos = (curPos + 1) % numWorkersAlive
         }
         if (!launched && isClusterIdle) {
-          logWarning(s"Driver ${driver.id} requires more resource than any of Workers could have.")
+          logWarning(log"Driver ${MDC(LogKeys.DRIVER_ID, driver.id)} " +
+            log"requires more resource than any of Workers could have.")
         }
       }
     } else {
@@ -970,8 +985,8 @@ private[deploy] class Master(
               launchDriver(worker, driver)
               waitingDrivers -= driver
             case _ =>
-              logWarning(
-                s"Driver ${driver.id} requires more resource than any of Workers could have.")
+              logWarning(log"Driver ${MDC(LogKeys.DRIVER_ID, driver.id)} " +
+                log"requires more resource than any of Workers could have.")
           }
         }
       }
@@ -980,7 +995,8 @@ private[deploy] class Master(
   }
 
   private def launchExecutor(worker: WorkerInfo, exec: ExecutorDesc): Unit = {
-    logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
+    logInfo(log"Launching executor ${MDC(LogKeys.EXECUTOR_ID, exec.fullId)}" +
+      log" on worker ${MDC(LogKeys.WORKER_ID, worker.id)}")
     worker.addExecutor(exec)
     worker.endpoint.send(LaunchExecutor(masterUrl, exec.application.id, exec.id,
       exec.rpId, exec.application.desc, exec.cores, exec.memory, exec.resources))
@@ -1005,7 +1021,8 @@ private[deploy] class Master(
         // The old worker must thus be dead, so we will remove it and accept the new worker.
         removeWorker(oldWorker, "Worker replaced by a new worker with same address")
       } else {
-        logInfo("Attempted to re-register worker at same address: " + workerAddress)
+        logInfo(log"Attempted to re-register worker at same address:" +
+          log" ${MDC(LogKeys.RPC_ADDRESS, workerAddress)}")
         return false
       }
     }
@@ -1032,7 +1049,8 @@ private[deploy] class Master(
       .values
 
     val workersToRemoveHostPorts = workersToRemove.map(_.hostPort)
-    logInfo(s"Decommissioning the workers with host:ports ${workersToRemoveHostPorts}")
+    logInfo(log"Decommissioning the workers with host:ports" +
+      log" ${MDC(LogKeys.HOST_PORT, workersToRemoveHostPorts)}")
 
     // The workers are removed async to avoid blocking the receive loop for the entire batch
     self.send(DecommissionWorkers(workersToRemove.map(_.id).toSeq))
@@ -1043,7 +1061,8 @@ private[deploy] class Master(
 
   private def decommissionWorker(worker: WorkerInfo): Unit = {
     if (worker.state != WorkerState.DECOMMISSIONED) {
-      logInfo("Decommissioning worker %s on %s:%d".format(worker.id, worker.host, worker.port))
+      logInfo(log"Decommissioning worker ${MDC(LogKeys.WORKER_ID, worker.id)}" +
+        log" on ${MDC(LogKeys.WORKER_HOST, worker.host)}:${MDC(LogKeys.WORKER_PORT, worker.port)}")
       worker.setState(WorkerState.DECOMMISSIONED)
       for (exec <- worker.executors.values) {
         logInfo("Telling app of decommission executors")
@@ -1060,19 +1079,21 @@ private[deploy] class Master(
       // On recovery do not add a decommissioned executor
       persistenceEngine.removeWorker(worker)
     } else {
-      logWarning("Skipping decommissioning worker %s on %s:%d as worker is already decommissioned".
-        format(worker.id, worker.host, worker.port))
+      logWarning(log"Skipping decommissioning worker ${MDC(LogKeys.WORKER_ID, worker.id)} " +
+        log"on ${MDC(LogKeys.WORKER_HOST, worker.host)}:" +
+        log"${MDC(LogKeys.WORKER_PORT, worker.port)} as worker is already decommissioned")
     }
   }
 
   private def removeWorker(worker: WorkerInfo, msg: String): Unit = {
-    logInfo("Removing worker " + worker.id + " on " + worker.host + ":" + worker.port)
+    logInfo(log"Removing worker ${MDC(LogKeys.WORKER_ID, worker.id)} on" +
+      log" ${MDC(LogKeys.WORKER_HOST, worker.host)}:${MDC(LogKeys.WORKER_PORT, worker.port)}")
     worker.setState(WorkerState.DEAD)
     idToWorker -= worker.id
     addressToWorker -= worker.endpoint.address
 
     for (exec <- worker.executors.values) {
-      logInfo("Telling app of lost executor: " + exec.id)
+      logInfo(log"Telling app of lost executor: ${MDC(LogKeys.EXECUTOR_ID, exec.id)}")
       exec.application.driver.send(ExecutorUpdated(
         exec.id, ExecutorState.LOST, Some(s"worker lost: $msg"), None, Some(worker.host)))
       exec.state = ExecutorState.LOST
@@ -1080,14 +1101,15 @@ private[deploy] class Master(
     }
     for (driver <- worker.drivers.values) {
       if (driver.desc.supervise) {
-        logInfo(s"Re-launching ${driver.id}")
+        logInfo(log"Re-launching ${MDC(LogKeys.DRIVER_ID, driver.id)}")
         relaunchDriver(driver)
       } else {
-        logInfo(s"Not re-launching ${driver.id} because it was not supervised")
+        logInfo(log"Not re-launching ${MDC(LogKeys.DRIVER_ID, driver.id)}" +
+          log" because it was not supervised")
         removeDriver(driver.id, DriverState.ERROR, None)
       }
     }
-    logInfo(s"Telling app of lost worker: " + worker.id)
+    logInfo(log"Telling app of lost worker: ${MDC(LogKeys.WORKER_ID, worker.id)}")
     apps.filterNot(completedApps.contains(_)).foreach { app =>
       app.driver.send(WorkerRemoved(worker.id, worker.host, msg))
     }
@@ -1127,7 +1149,8 @@ private[deploy] class Master(
   private[master] def registerApplication(app: ApplicationInfo): Unit = {
     val appAddress = app.driver.address
     if (addressToApp.contains(appAddress)) {
-      logInfo("Attempted to re-register application at same address: " + appAddress)
+      logInfo(log"Attempted to re-register application at same" +
+        log" address: ${MDC(LogKeys.RPC_ADDRESS, appAddress)}")
       return
     }
 
@@ -1145,7 +1168,7 @@ private[deploy] class Master(
 
   def removeApplication(app: ApplicationInfo, state: ApplicationState.Value): Unit = {
     if (apps.contains(app)) {
-      logInfo("Removing app " + app.id)
+      logInfo(log"Removing app ${MDC(LogKeys.APP_ID, app.id)}")
       apps -= app
       idToApp -= app.id
       endpointToApp -= app.driver
@@ -1192,13 +1215,15 @@ private[deploy] class Master(
       resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Boolean = {
     idToApp.get(appId) match {
       case Some(appInfo) =>
-        logInfo(s"Application $appId requested executors: ${resourceProfileToTotalExecs}.")
+        logInfo(log"Application ${MDC(LogKeys.APP_ID, appId)} requested executors:" +
+          log" ${MDC(LogKeys.RESOURCE_PROFILE_TO_TOTAL_EXECS, resourceProfileToTotalExecs)}.")
         appInfo.requestExecutors(resourceProfileToTotalExecs)
         schedule()
         true
       case None =>
-        logWarning(s"Unknown application $appId requested executors:" +
-          s" ${resourceProfileToTotalExecs}.")
+        logWarning(log"Unknown application " +
+          log"${MDC(LogKeys.APP_ID, appId)} requested executors:" +
+          log" ${MDC(LogKeys.RESOURCE_PROFILE_TO_TOTAL_EXECS, resourceProfileToTotalExecs)}.")
         false
     }
   }
@@ -1215,7 +1240,8 @@ private[deploy] class Master(
   private def handleKillExecutors(appId: String, executorIds: Seq[Int]): Boolean = {
     idToApp.get(appId) match {
       case Some(appInfo) =>
-        logInfo(s"Application $appId requests to kill executors: " + executorIds.mkString(", "))
+        logInfo(log"Application ${MDC(LogKeys.APP_ID, appId)} requests to kill" +
+          log" executors: ${MDC(LogKeys.EXECUTOR_IDS, executorIds.mkString(", "))}")
         val (known, unknown) = executorIds.partition(appInfo.executors.contains)
         known.foreach { executorId =>
           val desc = appInfo.executors(executorId)
@@ -1223,13 +1249,15 @@ private[deploy] class Master(
           killExecutor(desc)
         }
         if (unknown.nonEmpty) {
-          logWarning(s"Application $appId attempted to kill non-existent executors: "
-            + unknown.mkString(", "))
+          logWarning(log"Application ${MDC(LogKeys.APP_ID, appId)} attempted to kill " +
+            log"non-existent executors: " +
+            log"${MDC(LogKeys.EXECUTOR_IDS, unknown.mkString(", "))}")
         }
         schedule()
         true
       case None =>
-        logWarning(s"Unregistered application $appId requested us to kill executors!")
+        logWarning(log"Unregistered application ${MDC(LogKeys.APP_ID, appId)} " +
+          log"requested us to kill executors!")
         false
     }
   }
@@ -1248,7 +1276,8 @@ private[deploy] class Master(
       } catch {
         case e: NumberFormatException =>
           // scalastyle:off line.size.limit
-          logError(log"Encountered executor with a non-integer ID: ${MDC(EXECUTOR_ID, executorId)}. Ignoring")
+          logError(log"Encountered executor with a non-integer ID: " +
+            log"${MDC(LogKeys.EXECUTOR_ID, executorId)}. Ignoring")
           // scalastyle:on
           None
       }
@@ -1283,8 +1312,8 @@ private[deploy] class Master(
     for (worker <- toRemove) {
       if (worker.state != WorkerState.DEAD) {
         val workerTimeoutSecs = TimeUnit.MILLISECONDS.toSeconds(workerTimeoutMs)
-        logWarning("Removing %s because we got no heartbeat in %d seconds".format(
-          worker.id, workerTimeoutSecs))
+        logWarning(log"Removing ${MDC(LogKeys.WORKER_ID, worker.id)} because we got no heartbeat " +
+          log"in ${MDC(LogKeys.TIME_UNITS, workerTimeoutMs)} ms")
         removeWorker(worker, s"Not receiving heartbeat for $workerTimeoutSecs seconds")
       } else {
         if (worker.lastHeartbeat < currentTime - ((reaperIterations + 1) * workerTimeoutMs)) {
@@ -1308,7 +1337,7 @@ private[deploy] class Master(
   }
 
   private def launchDriver(worker: WorkerInfo, driver: DriverInfo): Unit = {
-    logInfo("Launching driver " + driver.id + " on worker " + worker.id)
+    logInfo(log"Launching driver ${MDC(LogKeys.DRIVER_ID, driver.id)} on worker ${MDC(LogKeys.WORKER_ID, worker.id)}")
     worker.addDriver(driver)
     driver.worker = Some(worker)
     worker.endpoint.send(LaunchDriver(driver.id, driver.desc, driver.resources))
@@ -1321,7 +1350,8 @@ private[deploy] class Master(
       exception: Option[Exception]): Unit = {
     drivers.find(d => d.id == driverId) match {
       case Some(driver) =>
-        logInfo(s"Removing driver: $driverId ($finalState)")
+        logInfo(log"Removing driver: ${MDC(LogKeys.DRIVER_ID, driverId)}" +
+          log" (${MDC(LogKeys.DRIVER_STATE, finalState)})")
         drivers -= driver
         if (completedDrivers.size >= retainedDrivers) {
           val toRemove = math.max(retainedDrivers / 10, 1)
@@ -1334,7 +1364,7 @@ private[deploy] class Master(
         driver.worker.foreach(w => w.removeDriver(driver))
         schedule()
       case None =>
-        logWarning(s"Asked to remove unknown driver: $driverId")
+        logWarning(log"Asked to remove unknown driver: ${MDC(LogKeys.DRIVER_ID, driverId)}")
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
index 106acc9a79446..964b115865aef 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/RecoveryModeFactory.scala
@@ -19,7 +19,7 @@ package org.apache.spark.deploy.master
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.Deploy.{RECOVERY_COMPRESSION_CODEC, RECOVERY_DIRECTORY}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.serializer.Serializer
@@ -57,7 +57,7 @@ private[master] class FileSystemRecoveryModeFactory(conf: SparkConf, serializer:
   val recoveryDir = conf.get(RECOVERY_DIRECTORY)
 
   def createPersistenceEngine(): PersistenceEngine = {
-    logInfo("Persisting recovery state to directory: " + recoveryDir)
+    logInfo(log"Persisting recovery state to directory: ${MDC(LogKeys.PATH, recoveryDir)}")
     val codec = conf.get(RECOVERY_COMPRESSION_CODEC).map(c => CompressionCodec.createCodec(conf, c))
     new FileSystemPersistenceEngine(recoveryDir, serializer, codec)
   }
@@ -76,7 +76,8 @@ private[master] class RocksDBRecoveryModeFactory(conf: SparkConf, serializer: Se
 
   def createPersistenceEngine(): PersistenceEngine = {
     val recoveryDir = conf.get(RECOVERY_DIRECTORY)
-    logInfo("Persisting recovery state to directory: " + recoveryDir)
+    logInfo(log"Persisting recovery state to directory: " +
+      log"${MDC(LogKeys.PATH, recoveryDir)}")
     new RocksDBPersistenceEngine(recoveryDir, serializer)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/EnvironmentPage.scala
new file mode 100644
index 0000000000000..190e821524ba0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/EnvironmentPage.scala
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master.ui
+
+import scala.xml.Node
+
+import jakarta.servlet.http.HttpServletRequest
+
+import org.apache.spark.{SparkConf, SparkEnv}
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.ui._
+import org.apache.spark.util.Utils
+
+private[ui] class EnvironmentPage(
+    parent: MasterWebUI,
+    conf: SparkConf) extends WebUIPage("Environment") {
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    val details = SparkEnv.environmentDetails(conf, SparkHadoopUtil.get.newConfiguration(conf),
+      "", Seq.empty, Seq.empty, Seq.empty, Map.empty)
+    val jvmInformation = details("JVM Information").sorted
+    val sparkProperties = Utils.redact(conf, details("Spark Properties")).sorted
+    val hadoopProperties = Utils.redact(conf, details("Hadoop Properties")).sorted
+    val systemProperties = Utils.redact(conf, details("System Properties")).sorted
+    val metricsProperties = Utils.redact(conf, details("Metrics Properties")).sorted
+    val classpathEntries = details("Classpath Entries").sorted
+
+    val runtimeInformationTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      jvmInformation, fixedWidth = true, headerClasses = headerClasses)
+    val sparkPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      sparkProperties, fixedWidth = true, headerClasses = headerClasses)
+    val hadoopPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      hadoopProperties, fixedWidth = true, headerClasses = headerClasses)
+    val systemPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      systemProperties, fixedWidth = true, headerClasses = headerClasses)
+    val metricsPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      metricsProperties, fixedWidth = true, headerClasses = headerClasses)
+    val classpathEntriesTable = UIUtils.listingTable(classPathHeader, classPathRow,
+      classpathEntries, fixedWidth = true, headerClasses = headerClasses)
+
+    val content =
+      <div>
+        <p><a href="/">Back to Master</a></p>
+      </div>
+      <span>
+        <span class="collapse-aggregated-runtimeInformation collapse-table"
+            onClick="collapseTable('collapse-aggregated-runtimeInformation',
+            'aggregated-runtimeInformation')">
+          <h4>
+            <span class="collapse-table-arrow arrow-open"></span>
+            <a>Runtime Information</a>
+          </h4>
+        </span>
+        <div class="aggregated-runtimeInformation collapsible-table">
+          {runtimeInformationTable}
+        </div>
+        <span class="collapse-aggregated-sparkProperties collapse-table"
+            onClick="collapseTable('collapse-aggregated-sparkProperties',
+            'aggregated-sparkProperties')">
+          <h4>
+            <span class="collapse-table-arrow arrow-open"></span>
+            <a>Spark Properties</a>
+          </h4>
+        </span>
+        <div class="aggregated-sparkProperties collapsible-table">
+          {sparkPropertiesTable}
+        </div>
+        <span class="collapse-aggregated-hadoopProperties collapse-table"
+              onClick="collapseTable('collapse-aggregated-hadoopProperties',
+            'aggregated-hadoopProperties')">
+          <h4>
+            <span class="collapse-table-arrow arrow-closed"></span>
+            <a>Hadoop Properties</a>
+          </h4>
+        </span>
+        <div class="aggregated-hadoopProperties collapsible-table collapsed">
+          {hadoopPropertiesTable}
+        </div>
+        <span class="collapse-aggregated-systemProperties collapse-table"
+            onClick="collapseTable('collapse-aggregated-systemProperties',
+            'aggregated-systemProperties')">
+          <h4>
+            <span class="collapse-table-arrow arrow-closed"></span>
+            <a>System Properties</a>
+          </h4>
+        </span>
+        <div class="aggregated-systemProperties collapsible-table collapsed">
+          {systemPropertiesTable}
+        </div>
+        <span class="collapse-aggregated-metricsProperties collapse-table"
+              onClick="collapseTable('collapse-aggregated-metricsProperties',
+            'aggregated-metricsProperties')">
+          <h4>
+            <span class="collapse-table-arrow arrow-closed"></span>
+            <a>Metrics Properties</a>
+          </h4>
+        </span>
+        <div class="aggregated-metricsProperties collapsible-table collapsed">
+          {metricsPropertiesTable}
+        </div>
+        <span class="collapse-aggregated-classpathEntries collapse-table"
+            onClick="collapseTable('collapse-aggregated-classpathEntries',
+            'aggregated-classpathEntries')">
+          <h4>
+            <span class="collapse-table-arrow arrow-closed"></span>
+            <a>Classpath Entries</a>
+          </h4>
+        </span>
+        <div class="aggregated-classpathEntries collapsible-table collapsed">
+          {classpathEntriesTable}
+        </div>
+      </span>
+    UIUtils.basicSparkPage(request, content, "Environment")
+  }
+
+  private def propertyHeader = Seq("Name", "Value")
+  private def classPathHeader = Seq("Resource", "Source")
+  private def headerClasses = Seq("sorttable_alpha", "sorttable_alpha")
+  private def headerClassesNoSortValues = Seq("sorttable_numeric", "sorttable_nosort")
+
+  private def jvmRowDataPre(kv: (String, String)) =
+    <tr><td>{kv._1}</td><td><pre>{kv._2}</pre></td></tr>
+  private def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
+  private def classPathRow(data: (String, String)) = <tr><td>{data._1}</td><td>{data._2}</td></tr>
+}
+
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index d07f299d52ba2..1248b1c368e71 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -169,8 +169,9 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
                 {state.completedDrivers.count(_.state == DriverState.ERROR)} Error,
                 {state.completedDrivers.count(_.state == DriverState.RELAUNCHING)} Relaunching)
               </li>
-              <li><strong>Status:</strong>
-                <a href={"/logPage/?self&logType=out"}>{state.status}</a>
+              <li><strong>Status:</strong> {state.status}
+                (<a href={"/environment"}>Environment</a>,
+                <a href={"/logPage/?self&logType=out"}>Log</a>)
               </li>
             </ul>
           </div>
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index da3c91956689e..6c7a8f582d915 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -24,7 +24,8 @@ import jakarta.servlet.http.{HttpServlet, HttpServletRequest, HttpServletRespons
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.Utils.addRenderLogHandler
 import org.apache.spark.deploy.master.Master
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{HOSTS, NUM_REMOVED_WORKERS}
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.internal.config.UI.MASTER_UI_DECOMMISSION_ALLOW_MODE
 import org.apache.spark.internal.config.UI.UI_KILL_ENABLED
@@ -54,6 +55,11 @@ class MasterWebUI(
     val masterPage = new MasterPage(this)
     attachPage(new ApplicationPage(this))
     attachPage(new LogPage(this))
+    val envPage = new EnvironmentPage(this, master.conf)
+    attachPage(envPage)
+    this.attachHandler(createServletHandler("/environment",
+      (request: HttpServletRequest) => envPage.render(request),
+      master.conf))
     attachPage(masterPage)
     addStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR)
     addRenderLogHandler(this, master.conf)
@@ -73,7 +79,8 @@ class MasterWebUI(
           } else {
             val removedWorkers = masterEndpointRef.askSync[Integer](
               DecommissionWorkersOnHosts(hostnames))
-            logInfo(s"Decommissioning of hosts $hostnames decommissioned $removedWorkers workers")
+            logInfo(log"Decommissioning of hosts ${MDC(HOSTS, hostnames)}" +
+              log" decommissioned ${MDC(NUM_REMOVED_WORKERS, removedWorkers)} workers")
             if (removedWorkers > 0) {
               resp.setStatus(HttpServletResponse.SC_OK)
             } else if (removedWorkers == 0) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
index 9107b2f5528c1..4fb95033cecef 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
@@ -34,7 +34,7 @@ import jakarta.servlet.http.HttpServletResponse
 import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf, SparkException}
 import org.apache.spark.deploy.SparkApplication
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CLASS_NAME, ERROR, SUBMISSION_ID}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.Utils
 
 /**
@@ -79,7 +79,7 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
    * it to the user. Otherwise, report the error message provided by the server.
    */
   def createSubmission(request: CreateSubmissionRequest): SubmitRestProtocolResponse = {
-    logInfo(s"Submitting a request to launch an application in $master.")
+    logInfo(log"Submitting a request to launch an application in ${MDC(MASTER_URL, master)}.")
     var handled: Boolean = false
     var response: SubmitRestProtocolResponse = null
     for (m <- masters if !handled) {
@@ -109,7 +109,9 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
 
   /** Request that the server kill the specified submission. */
   def killSubmission(submissionId: String): SubmitRestProtocolResponse = {
-    logInfo(s"Submitting a request to kill submission $submissionId in $master.")
+    logInfo(log"Submitting a request to kill submission " +
+      log"${MDC(SUBMISSION_ID, submissionId)} in " +
+      log"${MDC(MASTER_URL, master)}.")
     var handled: Boolean = false
     var response: SubmitRestProtocolResponse = null
     for (m <- masters if !handled) {
@@ -138,7 +140,7 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
 
   /** Request that the server kill all submissions. */
   def killAllSubmissions(): SubmitRestProtocolResponse = {
-    logInfo(s"Submitting a request to kill all submissions in $master.")
+    logInfo(log"Submitting a request to kill all submissions in ${MDC(MASTER_URL, master)}.")
     var handled: Boolean = false
     var response: SubmitRestProtocolResponse = null
     for (m <- masters if !handled) {
@@ -167,7 +169,7 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
 
   /** Request that the server clears all submissions and applications. */
   def clear(): SubmitRestProtocolResponse = {
-    logInfo(s"Submitting a request to clear $master.")
+    logInfo(log"Submitting a request to clear ${MDC(MASTER_URL, master)}.")
     var handled: Boolean = false
     var response: SubmitRestProtocolResponse = null
     for (m <- masters if !handled) {
@@ -196,7 +198,7 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
 
   /** Check the readiness of Master. */
   def readyz(): SubmitRestProtocolResponse = {
-    logInfo(s"Submitting a request to check the status of $master.")
+    logInfo(log"Submitting a request to check the status of ${MDC(MASTER_URL, master)}.")
     var handled: Boolean = false
     var response: SubmitRestProtocolResponse = new ErrorResponse
     for (m <- masters if !handled) {
@@ -227,7 +229,9 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
   def requestSubmissionStatus(
       submissionId: String,
       quiet: Boolean = false): SubmitRestProtocolResponse = {
-    logInfo(s"Submitting a request for the status of submission $submissionId in $master.")
+      logInfo(log"Submitting a request for the status of submission " +
+      log"${MDC(SUBMISSION_ID, submissionId)} in " +
+      log"${MDC(MASTER_URL, master)}.")
 
     var handled: Boolean = false
     var response: SubmitRestProtocolResponse = null
@@ -440,7 +444,8 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
     if (submitResponse.success) {
       val submissionId = submitResponse.submissionId
       if (submissionId != null) {
-        logInfo(s"Submission successfully created as $submissionId. Polling submission state...")
+        logInfo(log"Submission successfully created as ${MDC(SUBMISSION_ID, submissionId)}. " +
+          log"Polling submission state...")
         pollSubmissionStatus(submissionId)
       } else {
         // should never happen
@@ -470,13 +475,17 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
         val exception = Option(statusResponse.message)
         // Log driver state, if present
         driverState match {
-          case Some(state) => logInfo(s"State of driver $submissionId is now $state.")
+          case Some(state) =>
+            logInfo(log"State of driver ${MDC(SUBMISSION_ID, submissionId)} is now " +
+              log"${MDC(DRIVER_STATE, state)}.")
           case _ =>
             logError(log"State of driver ${MDC(SUBMISSION_ID, submissionId)} was not found!")
         }
         // Log worker node, if present
         (workerId, workerHostPort) match {
-          case (Some(id), Some(hp)) => logInfo(s"Driver is running on worker $id at $hp.")
+          case (Some(id), Some(hp)) =>
+            logInfo(
+              log"Driver is running on worker ${MDC(WORKER_ID, id)} at ${MDC(HOST_PORT, hp)}.")
           case _ =>
         }
         // Log exception stack trace, if present
@@ -490,7 +499,8 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
 
   /** Log the response sent by the server in the REST application submission protocol. */
   private def handleRestResponse(response: SubmitRestProtocolResponse): Unit = {
-    logInfo(s"Server responded with ${response.messageType}:\n${response.toJson}")
+    logInfo(log"Server responded with ${MDC(CLASS_NAME, response.messageType)}:\n" +
+      log"${MDC(RESULT, response.toJson)}")
   }
 
   /** Log an appropriate error if the response sent by the server is not of the expected type. */
@@ -509,7 +519,7 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
    */
   private def handleConnectionException(masterUrl: String): Boolean = {
     if (!lostMasters.contains(masterUrl)) {
-      logWarning(s"Unable to connect to server ${masterUrl}.")
+      logWarning(log"Unable to connect to server ${MDC(MASTER_URL, masterUrl)}.")
       lostMasters += masterUrl
     }
     lostMasters.size >= masters.length
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
index bb91c7e7f4a22..8e534828e7778 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
@@ -28,7 +28,8 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.Utils
 
 /**
@@ -76,7 +77,8 @@ private[spark] abstract class RestSubmissionServer(
   def start(): Int = {
     val (server, boundPort) = Utils.startServiceOnPort[Server](requestedPort, doStart, masterConf)
     _server = Some(server)
-    logInfo(s"Started REST server for submitting applications on $host with port $boundPort")
+    logInfo(log"Started REST server for submitting applications on ${MDC(HOST, host)}" +
+      log" with port ${MDC(PORT, boundPort)}")
     boundPort
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
index 1b2e41bc0a2e2..3a262a0d19fb5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HBaseDelegationTokenProvider.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.security.token.{Token, TokenIdentifier}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.SERVICE_NAME
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.security.HadoopDelegationTokenProvider
 import org.apache.spark.util.Utils
 
@@ -50,7 +50,7 @@ private[security] class HBaseDelegationTokenProvider
       logDebug("Attempting to fetch HBase security token.")
       val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
         .asInstanceOf[Token[_ <: TokenIdentifier]]
-      logInfo(s"Get token from HBase: ${token.toString}")
+      logInfo(log"Get token from HBase: ${MDC(TOKEN, token.toString)}")
       creds.addToken(token.getService, token)
     } catch {
       case NonFatal(e) =>
@@ -94,7 +94,7 @@ private[security] class HBaseDelegationTokenProvider
       logDebug("Attempting to fetch HBase security token.")
       val token = obtainTokenMethod.invoke(null, hbaseConnection)
         .asInstanceOf[Token[_ <: TokenIdentifier]]
-      logInfo(s"Get token from HBase: ${token.toString}")
+      logInfo(log"Get token from HBase: ${MDC(TOKEN, token.toString)}")
       creds.addToken(token.getService, token)
     } catch {
       case NonFatal(e) =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index 54a24927ded4e..de517acbf8c5b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -31,7 +31,8 @@ import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.UpdateDelegationTokens
@@ -182,7 +183,7 @@ private[spark] class HadoopDelegationTokenManager(
 
   private def scheduleRenewal(delay: Long): Unit = {
     val _delay = math.max(0, delay)
-    logInfo(s"Scheduling renewal in ${UIUtils.formatDuration(_delay)}.")
+    logInfo(log"Scheduling renewal in ${MDC(LogKeys.TIME_UNITS, UIUtils.formatDuration(_delay))}.")
 
     val renewalTask = new Runnable() {
       override def run(): Unit = {
@@ -211,8 +212,9 @@ private[spark] class HadoopDelegationTokenManager(
         null
       case e: Exception =>
         val delay = TimeUnit.SECONDS.toMillis(sparkConf.get(CREDENTIALS_RENEWAL_RETRY_WAIT))
-        logWarning(s"Failed to update tokens, will try again in ${UIUtils.formatDuration(delay)}!" +
-          " If this happens too often tasks will fail.", e)
+        logWarning(log"Failed to update tokens, will try again in " +
+          log"${MDC(LogKeys.TIME_UNITS, UIUtils.formatDuration(delay))}!" +
+          log" If this happens too often tasks will fail.", e)
         scheduleRenewal(delay)
         null
     }
@@ -234,8 +236,10 @@ private[spark] class HadoopDelegationTokenManager(
         val now = System.currentTimeMillis
         val ratio = sparkConf.get(CREDENTIALS_RENEWAL_INTERVAL_RATIO)
         val delay = (ratio * (nextRenewal - now)).toLong
-        logInfo(s"Calculated delay on renewal is $delay, based on next renewal $nextRenewal " +
-          s"and the ratio $ratio, and current time $now")
+        logInfo(log"Calculated delay on renewal is ${MDC(LogKeys.DELAY, delay)}," +
+          log" based on next renewal ${MDC(LogKeys.NEXT_RENEWAL_TIME, nextRenewal)}" +
+          log" and the ratio ${MDC(LogKeys.CREDENTIALS_RENEWAL_INTERVAL_RATIO, ratio)}," +
+          log" and current time ${MDC(LogKeys.CURRENT_TIME, now)}")
         scheduleRenewal(delay)
         creds
       }
@@ -244,13 +248,13 @@ private[spark] class HadoopDelegationTokenManager(
 
   private def doLogin(): UserGroupInformation = {
     if (principal != null) {
-      logInfo(s"Attempting to login to KDC using principal: $principal")
+      logInfo(log"Attempting to login to KDC using principal: ${MDC(LogKeys.PRINCIPAL, principal)}")
       require(new File(keytab).isFile(), s"Cannot find keytab at $keytab.")
       val ugi = UserGroupInformation.loginUserFromKeytabAndReturnUGI(principal, keytab)
       logInfo("Successfully logged into KDC.")
       ugi
     } else if (!SparkHadoopUtil.get.isProxyUser(UserGroupInformation.getCurrentUser())) {
-      logInfo(s"Attempting to load user's ticket cache.")
+      logInfo("Attempting to load user's ticket cache.")
       val ccache = sparkConf.getenv("KRB5CCNAME")
       val user = Option(sparkConf.getenv("KRB5PRINCIPAL")).getOrElse(
         UserGroupInformation.getCurrentUser().getUserName())
@@ -296,7 +300,8 @@ private[spark] object HadoopDelegationTokenManager extends Logging {
     deprecatedProviderEnabledConfigs.foreach { pattern =>
       val deprecatedKey = pattern.format(serviceName)
       if (sparkConf.contains(deprecatedKey)) {
-        logWarning(s"${deprecatedKey} is deprecated.  Please use ${key} instead.")
+        logWarning(log"${MDC(LogKeys.DEPRECATED_KEY, deprecatedKey)} is deprecated. " +
+          log"Please use ${MDC(LogKeys.CONFIG, key)} instead.")
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 8eb45238b4772..b47f9e5a43afc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -28,7 +28,8 @@ import org.apache.hadoop.security.{Credentials, UserGroupInformation}
 import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier
 
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.security.HadoopDelegationTokenProvider
 import org.apache.spark.util.Utils
@@ -78,7 +79,7 @@ private[deploy] class HadoopFSDelegationTokenProvider
       nextRenewalDate
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Failed to get token from service $serviceName", e)
+        logWarning(log"Failed to get token from service ${MDC(SERVICE_NAME, serviceName)}", e)
         None
     }
   }
@@ -116,10 +117,11 @@ private[deploy] class HadoopFSDelegationTokenProvider
     filesystems.foreach { fs =>
       if (fsToExclude.contains(fs.getUri.getHost)) {
         // YARN RM skips renewing token with empty renewer
-        logInfo(s"getting token for: $fs with empty renewer to skip renewal")
+        logInfo(log"getting token for: ${MDC(FILE_SYSTEM, fs)} with empty renewer to skip renewal")
         Utils.tryLogNonFatalError { fs.addDelegationTokens("", creds) }
       } else {
-        logInfo(s"getting token for: $fs with renewer $renewer")
+        logInfo(log"getting token for: ${MDC(FILE_SYSTEM, fs)} with" +
+          log" renewer ${MDC(TOKEN_RENEWER, renewer)}")
         Utils.tryLogNonFatalError { fs.addDelegationTokens(renewer, creds) }
       }
     }
@@ -146,7 +148,8 @@ private[deploy] class HadoopFSDelegationTokenProvider
         val identifier = token.decodeIdentifier().asInstanceOf[AbstractDelegationTokenIdentifier]
         val tokenKind = token.getKind.toString
         val interval = newExpiration - getIssueDate(tokenKind, identifier)
-        logInfo(s"Renewal interval is $interval for token $tokenKind")
+        logInfo(log"Renewal interval is ${MDC(TOTAL_TIME, interval)} for" +
+          log" token ${MDC(TOKEN_KIND, tokenKind)}")
         interval
       }.toOption
     }
@@ -157,17 +160,20 @@ private[deploy] class HadoopFSDelegationTokenProvider
     val now = System.currentTimeMillis()
     val issueDate = identifier.getIssueDate
     if (issueDate > now) {
-      logWarning(s"Token $kind has set up issue date later than current time. (provided: " +
-        s"$issueDate / current timestamp: $now) Please make sure clocks are in sync between " +
-        "machines. If the issue is not a clock mismatch, consult token implementor to check " +
-        "whether issue date is valid.")
+      logWarning(log"Token ${MDC(TOKEN_KIND, kind)} has set up issue date later than " +
+        log"current time (provided: " +
+        log"${MDC(ISSUE_DATE, issueDate)} / current timestamp: ${MDC(CURRENT_TIME, now)}). " +
+        log"Please make sure clocks are in sync between " +
+        log"machines. If the issue is not a clock mismatch, consult token implementor to check " +
+        log"whether issue date is valid.")
       issueDate
     } else if (issueDate > 0L) {
       issueDate
     } else {
-      logWarning(s"Token $kind has not set up issue date properly. (provided: $issueDate) " +
-        s"Using current timestamp ($now) as issue date instead. Consult token implementor to fix " +
-        "the behavior.")
+      logWarning(log"Token ${MDC(TOKEN_KIND, kind)} has not set up issue date properly " +
+        log"(provided: ${MDC(ISSUE_DATE, issueDate)}). " +
+        log"Using current timestamp (${MDC(CURRENT_TIME, now)} as issue date instead. " +
+        log"Consult token implementor to fix the behavior.")
       now
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
index d1190ca46c2a8..a3e7276fc83e1 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/CommandUtils.scala
@@ -24,7 +24,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.{SecurityManager, SSLOptions}
 import org.apache.spark.deploy.Command
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.launcher.WorkerCommandBuilder
 import org.apache.spark.util.Utils
 
@@ -120,7 +120,8 @@ object CommandUtils extends Logging {
           Utils.copyStream(in, out, true)
         } catch {
           case e: IOException =>
-            logInfo("Redirection to " + file + " closed: " + e.getMessage)
+            logInfo(log"Redirection to ${MDC(LogKeys.FILE_NAME, file)} closed: " +
+              log"${MDC(LogKeys.ERROR, e.getMessage)}")
         }
       }
     }.start()
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index e7fca402a8870..bb96ecb38a640 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -31,7 +31,8 @@ import org.apache.spark.deploy.DeployMessages.DriverStateChanged
 import org.apache.spark.deploy.StandaloneResourceUtils.prepareResourcesFile
 import org.apache.spark.deploy.master.DriverState
 import org.apache.spark.deploy.master.DriverState.DriverState
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{DRIVER_RESOURCES_FILE, SPARK_DRIVER_PREFIX}
 import org.apache.spark.internal.config.UI.UI_REVERSE_PROXY
 import org.apache.spark.internal.config.Worker.WORKER_DRIVER_TERMINATE_TIMEOUT
@@ -91,7 +92,7 @@ private[deploy] class DriverRunner(
         var shutdownHook: AnyRef = null
         try {
           shutdownHook = ShutdownHookManager.addShutdownHook { () =>
-            logInfo(s"Worker shutting down, killing driver $driverId")
+            logInfo(log"Worker shutting down, killing driver ${MDC(DRIVER_ID, driverId)}")
             kill()
           }
 
@@ -131,8 +132,8 @@ private[deploy] class DriverRunner(
       process.foreach { p =>
         val exitCode = Utils.terminateProcess(p, driverTerminateTimeoutMs)
         if (exitCode.isEmpty) {
-          logWarning("Failed to terminate driver process: " + p +
-              ". This process will likely be orphaned.")
+          logWarning(log"Failed to terminate driver process: ${MDC(PROCESS, p)} " +
+              log". This process will likely be orphaned.")
         }
       }
     }
@@ -158,7 +159,8 @@ private[deploy] class DriverRunner(
     val jarFileName = new URI(driverDesc.jarUrl).getPath.split("/").last
     val localJarFile = new File(driverDir, jarFileName)
     if (!localJarFile.exists()) { // May already exist if running multiple workers on one node
-      logInfo(s"Copying user jar ${driverDesc.jarUrl} to $localJarFile")
+      logInfo(log"Copying user jar ${MDC(JAR_URL, driverDesc.jarUrl)}" +
+        log" to ${MDC(FILE_NAME, localJarFile)}")
       Utils.fetchFile(
         driverDesc.jarUrl,
         driverDir,
@@ -232,7 +234,7 @@ private[deploy] class DriverRunner(
     val redactedCommand = Utils.redactCommandLineArgs(conf, command.command)
       .mkString("\"", "\" \"", "\"")
     while (keepTrying) {
-      logInfo("Launch Command: " + redactedCommand)
+      logInfo(log"Launch Command: ${MDC(COMMAND, redactedCommand)}")
 
       synchronized {
         if (killed) { return exitCode }
@@ -249,7 +251,8 @@ private[deploy] class DriverRunner(
         if (clock.getTimeMillis() - processStart > successfulRunDuration * 1000L) {
           waitSeconds = 1
         }
-        logInfo(s"Command exited with status $exitCode, re-launching after $waitSeconds s.")
+        logInfo(log"Command exited with status ${MDC(EXIT_CODE, exitCode)}," +
+          log" re-launching after ${MDC(TIME_UNITS, waitSeconds)} s.")
         sleeper.sleep(waitSeconds)
         waitSeconds = waitSeconds * 2 // exponential back-off
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
index 9176897163846..4f42088903464 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverWrapper.scala
@@ -21,7 +21,8 @@ import java.io.File
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys.RPC_ADDRESS
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.util._
 
@@ -43,7 +44,7 @@ object DriverWrapper extends Logging {
         val host: String = Utils.localHostName()
         val port: Int = sys.props.getOrElse(config.DRIVER_PORT.key, "0").toInt
         val rpcEnv = RpcEnv.create("Driver", host, port, conf, new SecurityManager(conf))
-        logInfo(s"Driver address: ${rpcEnv.address}")
+        logInfo(log"Driver address: ${MDC(RPC_ADDRESS, rpcEnv.address)}")
         rpcEnv.setupEndpoint("workerWatcher", new WorkerWatcher(rpcEnv, workerUrl))
 
         val currentLoader = Thread.currentThread.getContextClassLoader
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 5547593a28f5e..8d0fb7a54f72a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -28,7 +28,8 @@ import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
 import org.apache.spark.deploy.StandaloneResourceUtils.prepareResourcesFile
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.SPARK_EXECUTOR_PREFIX
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.resource.ResourceInformation
@@ -87,7 +88,7 @@ private[deploy] class ExecutorRunner(
       if (state == ExecutorState.LAUNCHING || state == ExecutorState.RUNNING) {
         state = ExecutorState.FAILED
       }
-      killProcess(Some("Worker shutting down")) }
+      killProcess("Worker shutting down") }
   }
 
   /**
@@ -95,7 +96,7 @@ private[deploy] class ExecutorRunner(
    *
    * @param message the exception message which caused the executor's death
    */
-  private def killProcess(message: Option[String]): Unit = {
+  private def killProcess(message: String): Unit = {
     var exitCode: Option[Int] = None
     if (process != null) {
       logInfo("Killing process!")
@@ -107,14 +108,14 @@ private[deploy] class ExecutorRunner(
       }
       exitCode = Utils.terminateProcess(process, EXECUTOR_TERMINATE_TIMEOUT_MS)
       if (exitCode.isEmpty) {
-        logWarning("Failed to terminate process: " + process +
-          ". This process will likely be orphaned.")
+        logWarning(log"Failed to terminate process: ${MDC(PROCESS, process)}" +
+          log". This process will likely be orphaned.")
       }
     }
     try {
-      worker.send(ExecutorStateChanged(appId, execId, state, message, exitCode))
+      worker.send(ExecutorStateChanged(appId, execId, state, Some(message), exitCode))
     } catch {
-      case e: IllegalStateException => logWarning(e.getMessage(), e)
+      case e: IllegalStateException => logWarning(log"${MDC(ERROR, e.getMessage())}", e)
     }
   }
 
@@ -162,7 +163,7 @@ private[deploy] class ExecutorRunner(
       val command = builder.command()
       val redactedCommand = Utils.redactCommandLineArgs(conf, command.asScala.toSeq)
         .mkString("\"", "\" \"", "\"")
-      logInfo(s"Launch command: $redactedCommand")
+      logInfo(log"Launch command: ${MDC(COMMAND, redactedCommand)}")
 
       builder.directory(executorDir)
       builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
@@ -203,13 +204,13 @@ private[deploy] class ExecutorRunner(
       worker.send(ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)))
     } catch {
       case interrupted: InterruptedException =>
-        logInfo("Runner thread for executor " + fullId + " interrupted")
+        logInfo(log"Runner thread for executor ${MDC(EXECUTOR_ID, fullId)} interrupted")
         state = ExecutorState.KILLED
-        killProcess(None)
+        killProcess(s"Runner thread for executor $fullId interrupted")
       case e: Exception =>
         logError("Error running executor", e)
         state = ExecutorState.FAILED
-        killProcess(Some(e.toString))
+        killProcess(s"Error running executor: $e")
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 0659c26fd15b6..7ff7974ab59f6 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -38,7 +38,7 @@ import org.apache.spark.deploy.StandaloneResourceUtils._
 import org.apache.spark.deploy.master.{DriverState, Master}
 import org.apache.spark.deploy.worker.ui.WorkerWebUI
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.internal.config.Worker._
@@ -73,9 +73,9 @@ private[deploy] class Worker(
   // If worker decommissioning is enabled register a handler on the configured signal to shutdown.
   if (conf.get(config.DECOMMISSION_ENABLED)) {
     val signal = conf.get(config.Worker.WORKER_DECOMMISSION_SIGNAL)
-    logInfo(s"Registering SIG$signal handler to trigger decommissioning.")
-    SignalUtils.register(signal, s"Failed to register SIG$signal handler - " +
-      "disabling worker decommission feature.") {
+    logInfo(log"Registering SIG${MDC(SIGNAL, signal)} handler to trigger decommissioning.")
+    SignalUtils.register(signal, log"Failed to register SIG${MDC(SIGNAL, signal)} " +
+      log"handler - disabling worker decommission feature.") {
        self.send(WorkerDecommissionSigReceived)
        true
     }
@@ -105,8 +105,12 @@ private[deploy] class Worker(
   private val INITIAL_REGISTRATION_RETRIES = conf.get(WORKER_INITIAL_REGISTRATION_RETRIES)
   private val TOTAL_REGISTRATION_RETRIES = conf.get(WORKER_MAX_REGISTRATION_RETRIES)
   if (INITIAL_REGISTRATION_RETRIES > TOTAL_REGISTRATION_RETRIES) {
-    logInfo(s"${WORKER_INITIAL_REGISTRATION_RETRIES.key} ($INITIAL_REGISTRATION_RETRIES) is " +
-      s"capped by ${WORKER_MAX_REGISTRATION_RETRIES.key} ($TOTAL_REGISTRATION_RETRIES)")
+    logInfo(
+      log"${MDC(CONFIG, WORKER_INITIAL_REGISTRATION_RETRIES.key)} " +
+      log"(${MDC(VALUE, INITIAL_REGISTRATION_RETRIES)}) is capped by " +
+      log"${MDC(CONFIG2, WORKER_MAX_REGISTRATION_RETRIES.key)} " +
+      log"(${MDC(MAX_ATTEMPTS, TOTAL_REGISTRATION_RETRIES)})"
+      )
   }
   private val FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND = 0.500
   private val REGISTRATION_RETRY_FUZZ_MULTIPLIER = {
@@ -235,10 +239,11 @@ private[deploy] class Worker(
 
   override def onStart(): Unit = {
     assert(!registered)
-    logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
-      host, port, cores, Utils.megabytesToString(memory)))
-    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
-    logInfo("Spark home: " + sparkHome)
+    logInfo(log"Starting Spark worker ${MDC(HOST, host)}:${MDC(PORT, port)} " +
+      log"with ${MDC(NUM_CORES, cores)} cores, " +
+      log"${MDC(MEMORY_SIZE, Utils.megabytesToString(memory))} RAM")
+    logInfo(log"Running Spark version ${MDC(SPARK_VERSION, org.apache.spark.SPARK_VERSION)}")
+    logInfo(log"Spark home: ${MDC(PATH, sparkHome)}")
     createWorkDir()
     startExternalShuffleService()
     setupWorkerResources()
@@ -299,8 +304,9 @@ private[deploy] class Worker(
     master = Some(masterRef)
     connected = true
     if (reverseProxy) {
-      logInfo("WorkerWebUI is available at %s/proxy/%s".format(
-        activeMasterWebUiUrl.stripSuffix("/"), workerId))
+      logInfo(
+        log"WorkerWebUI is available at ${MDC(WEB_URL, activeMasterWebUiUrl.stripSuffix("/"))}" +
+        log"/proxy/${MDC(WORKER_ID, workerId)}")
       // if reverseProxyUrl is not set, then we continue to generate relative URLs
       // starting with "/" throughout the UI and do not use activeMasterWebUiUrl
       val proxyUrl = conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/")
@@ -317,12 +323,13 @@ private[deploy] class Worker(
       registerMasterThreadPool.submit(new Runnable {
         override def run(): Unit = {
           try {
-            logInfo("Connecting to master " + masterAddress + "...")
+            logInfo(log"Connecting to master ${MDC(MASTER_URL, masterAddress)}...")
             val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
             sendRegisterMessageToMaster(masterEndpoint)
           } catch {
             case ie: InterruptedException => // Cancelled
-            case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
+            case NonFatal(e) => logWarning(
+              log"Failed to connect to master ${MDC(MASTER_URL, masterAddress)}", e)
           }
         }
       })
@@ -340,7 +347,8 @@ private[deploy] class Worker(
       if (registered) {
         cancelLastRegistrationRetry()
       } else if (connectionAttemptCount <= TOTAL_REGISTRATION_RETRIES) {
-        logInfo(s"Retrying connection to master (attempt # $connectionAttemptCount)")
+        logInfo(log"Retrying connection to master (attempt # " +
+          log"${MDC(NUM_ATTEMPT, connectionAttemptCount)})")
         /**
          * Re-register with the active master this worker has been communicating with. If there
          * is none, then it means this worker is still bootstrapping and hasn't established a
@@ -374,12 +382,14 @@ private[deploy] class Worker(
             registerMasterFutures = Array(registerMasterThreadPool.submit(new Runnable {
               override def run(): Unit = {
                 try {
-                  logInfo("Connecting to master " + masterAddress + "...")
+                  logInfo(log"Connecting to master ${MDC(MASTER_URL, masterAddress)}...")
                   val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
                   sendRegisterMessageToMaster(masterEndpoint)
                 } catch {
                   case ie: InterruptedException => // Cancelled
-                  case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
+                  case NonFatal(e) =>
+                    logWarning(log"Failed to connect to master " +
+                      log"${MDC(MASTER_URL, masterAddress)}", e)
                 }
               }
             }))
@@ -475,10 +485,11 @@ private[deploy] class Worker(
         // e.g. Master disconnect(maybe due to network drop) and recover immediately, see
         // SPARK-23191 for more details.
         if (duplicate) {
-          logWarning(s"Duplicate registration at master $preferredMasterAddress")
+          logWarning(log"Duplicate registration at master " +
+            log"${MDC(MASTER_URL, preferredMasterAddress)}")
         }
 
-        logInfo(s"Successfully registered with master $preferredMasterAddress")
+        logInfo(log"Successfully registered with master ${MDC(MASTER_URL, preferredMasterAddress)}")
         registered = true
         changeMaster(masterRef, masterWebUiUrl, masterAddress)
         forwardMessageScheduler.scheduleAtFixedRate(
@@ -486,7 +497,8 @@ private[deploy] class Worker(
           0, HEARTBEAT_MILLIS, TimeUnit.MILLISECONDS)
         if (CLEANUP_ENABLED) {
           logInfo(
-            s"Worker cleanup enabled; old application directories will be deleted in: $workDir")
+            log"Worker cleanup enabled; old application directories will be deleted in: " +
+            log"${MDC(PATH, workDir)}")
           forwardMessageScheduler.scheduleAtFixedRate(
             () => Utils.tryLogNonFatalError { self.send(WorkDirCleanup) },
             CLEANUP_INTERVAL_MILLIS, CLEANUP_INTERVAL_MILLIS, TimeUnit.MILLISECONDS)
@@ -534,7 +546,7 @@ private[deploy] class Worker(
             dir.isDirectory && !isAppStillRunning &&
               !Utils.doesDirectoryContainAnyNewFiles(dir, APP_DATA_RETENTION_SECONDS)
           }.foreach { dir =>
-            logInfo(s"Removing directory: ${dir.getPath}")
+            logInfo(log"Removing directory: ${MDC(PATH, dir.getPath)}")
             Utils.deleteRecursively(dir)
 
             // Remove some registeredExecutors information of DB in external shuffle service when
@@ -557,7 +569,8 @@ private[deploy] class Worker(
       }
 
     case MasterChanged(masterRef, masterWebUiUrl) =>
-      logInfo("Master has changed, new master is at " + masterRef.address.toSparkURL)
+      logInfo(log"Master has changed, new master is at " +
+        log"${MDC(MASTER_URL, masterRef.address.toSparkURL)}")
       changeMaster(masterRef, masterWebUiUrl, masterRef.address)
 
       val executorResponses = executors.values.map { e =>
@@ -570,17 +583,20 @@ private[deploy] class Worker(
         workerId, executorResponses.toList, driverResponses.toSeq))
 
     case ReconnectWorker(masterUrl) =>
-      logInfo(s"Master with url $masterUrl requested this worker to reconnect.")
+      logInfo(
+        log"Master with url ${MDC(MASTER_URL, masterUrl)} requested this worker to reconnect.")
       registerWithMaster()
 
     case LaunchExecutor(masterUrl, appId, execId, rpId, appDesc, cores_, memory_, resources_) =>
       if (masterUrl != activeMasterUrl) {
-        logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
+        logWarning(log"Invalid Master (${MDC(MASTER_URL, masterUrl)}) " +
+          log"attempted to launch executor.")
       } else if (decommissioned) {
         logWarning("Asked to launch an executor while decommissioned. Not launching executor.")
       } else {
         try {
-          logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
+          logInfo(log"Asked to launch executor ${MDC(APP_ID, appId)}/${MDC(EXECUTOR_ID, execId)}" +
+            log" for ${MDC(APP_DESC, appDesc.name)}")
 
           // Create the executor's working directory
           val executorDir = new File(workDir, appId + "/" + execId)
@@ -600,7 +616,7 @@ private[deploy] class Worker(
                 Some(appDir.getAbsolutePath())
               } catch {
                 case e: IOException =>
-                  logWarning(s"${e.getMessage}. Ignoring this directory.")
+                  logWarning(log"${MDC(ERROR, e.getMessage)}. Ignoring this directory.")
                   None
               }
             }.toImmutableArraySeq
@@ -639,8 +655,8 @@ private[deploy] class Worker(
         } catch {
           case e: Exception =>
             logError(
-              log"Failed to launch executor ${MDC(APP_ID, appId)}/${MDC(EXECUTOR_ID, execId)} " +
-                log"for ${MDC(APP_DESC, appDesc.name)}.", e)
+              log"Failed to launch executor ${MDC(APP_ID, appId)}/" +
+              log"${MDC(EXECUTOR_ID, execId)} for ${MDC(APP_DESC, appDesc.name)}.", e)
             if (executors.contains(appId + "/" + execId)) {
               executors(appId + "/" + execId).kill()
               executors -= appId + "/" + execId
@@ -655,20 +671,21 @@ private[deploy] class Worker(
 
     case KillExecutor(masterUrl, appId, execId) =>
       if (masterUrl != activeMasterUrl) {
-        logWarning("Invalid Master (" + masterUrl + ") attempted to kill executor " + execId)
+        logWarning(log"Invalid Master (${MDC(MASTER_URL, masterUrl)}) " +
+          log"attempted to kill executor ${MDC(EXECUTOR_ID, execId)}")
       } else {
         val fullId = appId + "/" + execId
         executors.get(fullId) match {
           case Some(executor) =>
-            logInfo("Asked to kill executor " + fullId)
+            logInfo(log"Asked to kill executor ${MDC(EXECUTOR_ID, fullId)}")
             executor.kill()
           case None =>
-            logInfo("Asked to kill unknown executor " + fullId)
+            logInfo(log"Asked to kill unknown executor ${MDC(EXECUTOR_ID, fullId)}")
         }
       }
 
     case LaunchDriver(driverId, driverDesc, resources_) =>
-      logInfo(s"Asked to launch driver $driverId")
+      logInfo(log"Asked to launch driver ${MDC(DRIVER_ID, driverId)}")
       val driver = new DriverRunner(
         conf,
         driverId,
@@ -688,7 +705,7 @@ private[deploy] class Worker(
       addResourcesUsed(resources_)
 
     case KillDriver(driverId) =>
-      logInfo(s"Asked to kill driver $driverId")
+      logInfo(log"Asked to kill driver ${MDC(DRIVER_ID, driverId)}")
       drivers.get(driverId) match {
         case Some(runner) =>
           runner.kill()
@@ -728,7 +745,7 @@ private[deploy] class Worker(
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
     if (master.exists(_.address == remoteAddress) ||
         masterAddressToConnect.contains(remoteAddress)) {
-      logInfo(s"$remoteAddress Disassociated !")
+      logInfo(log"${MDC(REMOTE_ADDRESS, remoteAddress)} Disassociated !")
       masterDisconnected()
     }
   }
@@ -746,7 +763,7 @@ private[deploy] class Worker(
       try {
         appDirectories.remove(id).foreach { dirList =>
           concurrent.Future {
-            logInfo(s"Cleaning up local directories for application $id")
+            logInfo(log"Cleaning up local directories for application ${MDC(APP_ID, id)}")
             dirList.foreach { dir =>
               Utils.deleteRecursively(new File(dir))
             }
@@ -771,7 +788,8 @@ private[deploy] class Worker(
       case Some(masterRef) => masterRef.send(message)
       case None =>
         logWarning(
-          s"Dropping $message because the connection to master has not yet been established")
+          log"Dropping ${MDC(MESSAGE, message)} " +
+            log"because the connection to master has not yet been established")
     }
   }
 
@@ -821,7 +839,8 @@ private[deploy] class Worker(
 
       case None =>
         logWarning(
-          s"Dropping $newState because the connection to master has not yet been established")
+          log"Dropping ${MDC(NEW_STATE, newState)} " +
+            log"because the connection to master has not yet been established")
     }
   }
 
@@ -865,11 +884,11 @@ private[deploy] class Worker(
   private[deploy] def decommissionSelf(): Unit = {
     if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) {
       decommissioned = true
-      logInfo(s"Decommission worker $workerId.")
+      logInfo(log"Decommission worker ${MDC(WORKER_ID, workerId)}.")
     } else if (decommissioned) {
-      logWarning(s"Worker $workerId already started decommissioning.")
+      logWarning(log"Worker ${MDC(WORKER_ID, workerId)} already started decommissioning.")
     } else {
-      logWarning(s"Receive decommission request, but decommission feature is disabled.")
+      logWarning("Receive decommission request, but decommission feature is disabled.")
     }
   }
 
@@ -879,18 +898,20 @@ private[deploy] class Worker(
     val state = driverStateChanged.state
     state match {
       case DriverState.ERROR =>
-        logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
+        logWarning(log"Driver ${MDC(DRIVER_ID, driverId)} " +
+          log"failed with unrecoverable exception: ${MDC(ERROR, exception.get)}")
       case DriverState.FAILED =>
-        logWarning(s"Driver $driverId exited with failure")
+        logWarning(log"Driver ${MDC(DRIVER_ID, driverId)} exited with failure")
       case DriverState.FINISHED =>
         registrationRetryTimer match {
           case Some(_) =>
-            logWarning(s"Driver $driverId exited successfully while master is disconnected.")
+            logWarning(log"Driver ${MDC(DRIVER_ID, driverId)} " +
+              log"exited successfully while master is disconnected.")
           case _ =>
-            logInfo(s"Driver $driverId exited successfully")
+            logInfo(log"Driver ${MDC(DRIVER_ID, driverId)} exited successfully")
         }
       case DriverState.KILLED =>
-        logInfo(s"Driver $driverId was killed by user")
+        logInfo(log"Driver ${MDC(DRIVER_ID, driverId)} was killed by user")
       case _ =>
         logDebug(s"Driver $driverId changed state to $state")
     }
@@ -910,13 +931,22 @@ private[deploy] class Worker(
     if (ExecutorState.isFinished(state)) {
       val appId = executorStateChanged.appId
       val fullId = appId + "/" + executorStateChanged.execId
-      val message = executorStateChanged.message
-      val exitStatus = executorStateChanged.exitStatus
+      val message = executorStateChanged.message match {
+        case Some(msg) =>
+          log" message ${MDC(MESSAGE, msg)}"
+        case None =>
+          log""
+      }
+      val exitStatus = executorStateChanged.exitStatus match {
+        case Some(status) =>
+          log" exitStatus ${MDC(EXIT_CODE, status)}"
+        case None =>
+          log""
+      }
       executors.get(fullId) match {
         case Some(executor) =>
-          logInfo("Executor " + fullId + " finished with state " + state +
-            message.map(" message " + _).getOrElse("") +
-            exitStatus.map(" exitStatus " + _).getOrElse(""))
+          logInfo(log"Executor ${MDC(EXECUTOR_ID, fullId)} finished with state " +
+            log"${MDC(EXECUTOR_STATE, state)}" + message + exitStatus)
           executors -= fullId
           finishedExecutors(fullId) = executor
           trimFinishedExecutorsIfNecessary()
@@ -928,9 +958,8 @@ private[deploy] class Worker(
             shuffleService.executorRemoved(executorStateChanged.execId.toString, appId)
           }
         case None =>
-          logInfo("Unknown Executor " + fullId + " finished with state " + state +
-            message.map(" message " + _).getOrElse("") +
-            exitStatus.map(" exitStatus " + _).getOrElse(""))
+          logInfo(log"Unknown Executor ${MDC(EXECUTOR_ID, fullId)} finished with state " +
+            log"${MDC(EXECUTOR_STATE, state)}" + message + exitStatus)
       }
       maybeCleanupApplication(appId)
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
index 2488994112742..bd07a0ade523d 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
@@ -19,8 +19,8 @@ package org.apache.spark.deploy.worker
 
 import java.util.concurrent.atomic.AtomicBoolean
 
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.WORKER_URL
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.WORKER_URL
 import org.apache.spark.rpc._
 
 /**
@@ -35,7 +35,7 @@ private[spark] class WorkerWatcher(
     isChildProcessStopping: AtomicBoolean = new AtomicBoolean(false))
   extends RpcEndpoint with Logging {
 
-  logInfo(s"Connecting to worker $workerUrl")
+  logInfo(log"Connecting to worker ${MDC(WORKER_URL, workerUrl)}")
   if (!isTesting) {
     rpcEnv.asyncSetupEndpointRefByURI(workerUrl)
   }
@@ -64,12 +64,12 @@ private[spark] class WorkerWatcher(
     }
 
   override def receive: PartialFunction[Any, Unit] = {
-    case e => logWarning(s"Received unexpected message: $e")
+    case e => logWarning(log"Received unexpected message: ${MDC(LogKeys.ERROR, e)}")
   }
 
   override def onConnected(remoteAddress: RpcAddress): Unit = {
     if (isWorker(remoteAddress)) {
-      logInfo(s"Successfully connected to $workerUrl")
+      logInfo(log"Successfully connected to ${MDC(WORKER_URL, workerUrl)}")
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
index 006a388e98b5b..defce5acc6168 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/LogPage.scala
@@ -24,7 +24,7 @@ import scala.xml.{Node, Unparsed}
 import jakarta.servlet.http.HttpServletRequest
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{LOG_TYPE, PATH}
+import org.apache.spark.internal.LogKeys.{LOG_TYPE, PATH}
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 import org.apache.spark.util.logging.RollingFileAppender
diff --git a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
index a131f8233b0df..8a790291b4e72 100644
--- a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
+++ b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
@@ -501,6 +501,15 @@ private[spark] object SparkCoreErrors {
         "configVal" -> toConfVal(FALLBACK_COMPRESSION_CODEC)))
   }
 
+  def tooManyArrayElementsError(numElements: Long, maxRoundedArrayLength: Int): Throwable = {
+    new SparkIllegalArgumentException(
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.INITIALIZE",
+      messageParameters = Map(
+        "numberOfElements" -> numElements.toString,
+        "maxRoundedArrayLength" -> maxRoundedArrayLength.toString)
+    )
+  }
+
   private def quoteByDefault(elem: String): String = {
     "\"" + elem + "\""
   }
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 8488333ec3ceb..1b1053a7013e0 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -32,7 +32,7 @@ import org.apache.spark.TaskState.TaskState
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CONFIG, CONFIG2, REASON}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.util.NettyUtils
@@ -74,12 +74,14 @@ private[spark] class CoarseGrainedExecutorBackend(
   override def onStart(): Unit = {
     if (env.conf.get(DECOMMISSION_ENABLED)) {
       val signal = env.conf.get(EXECUTOR_DECOMMISSION_SIGNAL)
-      logInfo(s"Registering SIG$signal handler to trigger decommissioning.")
-      SignalUtils.register(signal, s"Failed to register SIG$signal handler - disabling" +
-        s" executor decommission feature.") (self.askSync[Boolean](ExecutorDecommissionSigReceived))
+      logInfo(log"Registering SIG${MDC(LogKeys.SIGNAL, signal)}" +
+        log" handler to trigger decommissioning.")
+      SignalUtils.register(signal, log"Failed to register SIG${MDC(LogKeys.SIGNAL, signal)} " +
+        log"handler - disabling executor decommission feature.")(
+        self.askSync[Boolean](ExecutorDecommissionSigReceived))
     }
 
-    logInfo("Connecting to driver: " + driverUrl)
+    logInfo(log"Connecting to driver: ${MDC(LogKeys.URL, driverUrl)}" )
     try {
       val securityManager = new SecurityManager(env.conf)
       val shuffleClientTransportConf = SparkTransportConf.fromSparkConf(
@@ -181,7 +183,7 @@ private[spark] class CoarseGrainedExecutorBackend(
         exitExecutor(1, "Received LaunchTask command but executor was null")
       } else {
         val taskDesc = TaskDescription.decode(data.value)
-        logInfo("Got assigned task " + taskDesc.taskId)
+        logInfo(log"Got assigned task ${MDC(LogKeys.TASK_ID, taskDesc.taskId)}")
         executor.launchTask(this, taskDesc)
       }
 
@@ -218,7 +220,7 @@ private[spark] class CoarseGrainedExecutorBackend(
       }.start()
 
     case UpdateDelegationTokens(tokenBytes) =>
-      logInfo(s"Received tokens of ${tokenBytes.length} bytes")
+      logInfo(log"Received tokens of ${MDC(LogKeys.NUM_BYTES, tokenBytes.length)} bytes")
       SparkHadoopUtil.get.addDelegationTokens(tokenBytes, env.conf)
 
     case DecommissionExecutor =>
@@ -251,12 +253,14 @@ private[spark] class CoarseGrainedExecutorBackend(
 
   override def onDisconnected(remoteAddress: RpcAddress): Unit = {
     if (stopping.get()) {
-      logInfo(s"Driver from $remoteAddress disconnected during shutdown")
+      logInfo(log"Driver from ${MDC(LogKeys.RPC_ADDRESS, remoteAddress)}" +
+        log" disconnected during shutdown")
     } else if (driver.exists(_.address == remoteAddress)) {
       exitExecutor(1, s"Driver $remoteAddress disassociated! Shutting down.", null,
         notifyDriver = false)
     } else {
-      logWarning(s"An unknown ($remoteAddress) driver disconnected.")
+      logWarning(log"An unknown (${MDC(LogKeys.REMOTE_ADDRESS, remoteAddress)} " +
+        log"driver disconnected.")
     }
   }
 
@@ -269,7 +273,8 @@ private[spark] class CoarseGrainedExecutorBackend(
     }
     driver match {
       case Some(driverRef) => driverRef.send(msg)
-      case None => logWarning(s"Drop $msg because has not yet connected to driver")
+      case None =>
+        logWarning(log"Drop ${MDC(LogKeys.MESSAGE, msg)} because has not yet connected to driver")
     }
   }
 
@@ -283,7 +288,7 @@ private[spark] class CoarseGrainedExecutorBackend(
                              throwable: Throwable = null,
                              notifyDriver: Boolean = true) = {
     if (stopping.compareAndSet(false, true)) {
-      val message = log"Executor self-exiting due to : ${MDC(REASON, reason)}"
+      val message = log"Executor self-exiting due to : ${MDC(LogKeys.REASON, reason)}"
       if (throwable != null) {
         logError(message, throwable)
       } else {
@@ -305,14 +310,14 @@ private[spark] class CoarseGrainedExecutorBackend(
 
   private def decommissionSelf(): Unit = {
     if (!env.conf.get(DECOMMISSION_ENABLED)) {
-      logWarning(s"Receive decommission request, but decommission feature is disabled.")
+      logWarning("Receive decommission request, but decommission feature is disabled.")
       return
     } else if (decommissioned) {
-      logWarning(s"Executor $executorId already started decommissioning.")
+      logWarning(log"Executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} " +
+        log"already started decommissioning.")
       return
     }
-    val msg = s"Decommission executor $executorId."
-    logInfo(msg)
+    logInfo(log"Decommission executor ${MDC(LogKeys.EXECUTOR_ID, executorId)}.")
     try {
       decommissioned = true
       val migrationEnabled = env.conf.get(STORAGE_DECOMMISSION_ENABLED) &&
@@ -322,8 +327,8 @@ private[spark] class CoarseGrainedExecutorBackend(
         env.blockManager.decommissionBlockManager()
       } else if (env.conf.get(STORAGE_DECOMMISSION_ENABLED)) {
         logError(log"Storage decommissioning attempted but neither " +
-          log"${MDC(CONFIG, STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key)} or " +
-          log"${MDC(CONFIG2, STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED.key)} is enabled ")
+          log"${MDC(LogKeys.CONFIG, STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED.key)} or " +
+          log"${MDC(LogKeys.CONFIG2, STORAGE_DECOMMISSION_RDD_BLOCKS_ENABLED.key)} is enabled ")
       }
       if (executor != null) {
         executor.decommission()
@@ -365,7 +370,8 @@ private[spark] class CoarseGrainedExecutorBackend(
                 exitExecutor(0, ExecutorLossMessage.decommissionFinished, notifyDriver = true)
               }
             } else {
-              logInfo(s"Blocked from shutdown by ${executor.numRunningTasks} running tasks")
+              logInfo(log"Blocked from shutdown by" +
+                log" ${MDC(LogKeys.NUM_TASKS, executor.numRunningTasks)} running tasks")
             }
             Thread.sleep(sleep_time)
           }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index a7657cd78cd9b..586a8a7db28a3 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -40,9 +40,10 @@ import org.slf4j.MDC
 
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.{Logging, MDC => LogMDC}
-import org.apache.spark.internal.LogKey.{CLASS_NAME, ERROR, MAX_ATTEMPTS, TASK_ID, TASK_NAME, TIMEOUT}
+import org.apache.spark.internal.{Logging, LogKeys, MDC => LogMDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
+import org.apache.spark.internal.config.{EXECUTOR_USER_CLASS_PATH_FIRST => EXECUTOR_USER_CLASS_PATH_FIRST_CONFIG}
 import org.apache.spark.internal.plugin.PluginContainer
 import org.apache.spark.memory.{SparkOutOfMemoryError, TaskMemoryManager}
 import org.apache.spark.metrics.source.JVMCPUSource
@@ -81,10 +82,12 @@ private[spark] class Executor(
     resources: immutable.Map[String, ResourceInformation])
   extends Logging {
 
-  logInfo(s"Starting executor ID $executorId on host $executorHostname")
-  logInfo(s"OS info ${System.getProperty("os.name")}, ${System.getProperty("os.version")}, " +
-    s"${System.getProperty("os.arch")}")
-  logInfo(s"Java version ${System.getProperty("java.version")}")
+  logInfo(log"Starting executor ID ${LogMDC(LogKeys.EXECUTOR_ID, executorId)}" +
+    log" on host ${LogMDC(HOST, executorHostname)}")
+  logInfo(log"OS info ${LogMDC(OS_NAME, System.getProperty("os.name"))}," +
+    log" ${LogMDC(OS_VERSION, System.getProperty("os.version"))}, " +
+    log"${LogMDC(OS_ARCH, System.getProperty("os.arch"))}")
+  logInfo(log"Java version ${LogMDC(JAVA_VERSION, System.getProperty("java.version"))}")
 
   private val executorShutdown = new AtomicBoolean(false)
   val stopHookReference = ShutdownHookManager.addShutdownHook(
@@ -95,6 +98,13 @@ private[spark] class Executor(
 
   private[executor] val conf = env.conf
 
+  // SPARK-48131: Unify MDC key mdc.taskName and task_name in Spark 4.0 release.
+  private[executor] val taskNameMDCKey = if (conf.get(LEGACY_TASK_NAME_MDC_ENABLED)) {
+    "mdc.taskName"
+  } else {
+    LogKeys.TASK_NAME.name
+  }
+
   // SPARK-40235: updateDependencies() uses a ReentrantLock instead of the `synchronized` keyword
   // so that tasks can exit quickly if they are interrupted while waiting on another task to
   // finish downloading dependencies.
@@ -162,7 +172,7 @@ private[spark] class Executor(
   }
 
   // Whether to load classes in user jars before those in Spark jars
-  private val userClassPathFirst = conf.get(EXECUTOR_USER_CLASS_PATH_FIRST)
+  private val userClassPathFirst = conf.get(EXECUTOR_USER_CLASS_PATH_FIRST_CONFIG)
 
   // Whether to monitor killed / interrupted tasks
   private val taskReaperEnabled = conf.get(TASK_REAPER_ENABLED)
@@ -212,7 +222,7 @@ private[spark] class Executor(
         if (sessionBasedRoot.isDirectory && sessionBasedRoot.exists()) {
           Utils.deleteRecursively(sessionBasedRoot)
         }
-        logInfo(s"Session evicted: ${state.sessionUUID}")
+        logInfo(log"Session evicted: ${LogMDC(SESSION_ID, state.sessionUUID)}")
       }
     })
     .build[String, IsolatedSessionState]
@@ -494,7 +504,8 @@ private[spark] class Executor(
     @volatile var task: Task[Any] = _
 
     def kill(interruptThread: Boolean, reason: String): Unit = {
-      logInfo(s"Executor is trying to kill $taskName, reason: $reason")
+      logInfo(log"Executor is trying to kill ${LogMDC(TASK_NAME, taskName)}," +
+        log" reason: ${LogMDC(REASON, reason)}")
       reasonIfKilled = Some(reason)
       if (task != null) {
         synchronized {
@@ -565,7 +576,7 @@ private[spark] class Executor(
       } else 0L
       Thread.currentThread.setContextClassLoader(isolatedSession.replClassLoader)
       val ser = env.closureSerializer.newInstance()
-      logInfo(s"Running $taskName")
+      logInfo(log"Running ${LogMDC(TASK_NAME, taskName)}")
       execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
       var taskStartTimeNs: Long = 0
       var taskStartCpu: Long = 0
@@ -638,9 +649,10 @@ private[spark] class Executor(
           val freedMemory = taskMemoryManager.cleanUpAllAllocatedMemory()
 
           if (freedMemory > 0 && !threwException) {
-            val errMsg = s"Managed memory leak detected; size = $freedMemory bytes, $taskName"
+            val errMsg = log"Managed memory leak detected; size = " +
+              log"${LogMDC(NUM_BYTES, freedMemory)} bytes, ${LogMDC(TASK_NAME, taskName)}"
             if (conf.get(UNSAFE_EXCEPTION_ON_MEMORY_LEAK)) {
-              throw SparkException.internalError(errMsg, category = "EXECUTOR")
+              throw SparkException.internalError(errMsg.message, category = "EXECUTOR")
             } else {
               logWarning(errMsg)
             }
@@ -648,10 +660,11 @@ private[spark] class Executor(
 
           if (releasedLocks.nonEmpty && !threwException) {
             val errMsg =
-              s"${releasedLocks.size} block locks were not released by $taskName\n" +
-                releasedLocks.mkString("[", ", ", "]")
+              log"${LogMDC(NUM_RELEASED_LOCKS, releasedLocks.size)} block locks" +
+                log" were not released by ${LogMDC(TASK_NAME, taskName)}\n" +
+                log" ${LogMDC(RELEASED_LOCKS, releasedLocks.mkString("[", ", ", "]"))})"
             if (conf.get(STORAGE_EXCEPTION_PIN_LEAK)) {
-              throw SparkException.internalError(errMsg, category = "EXECUTOR")
+              throw SparkException.internalError(errMsg.message, category = "EXECUTOR")
             } else {
               logInfo(errMsg)
             }
@@ -727,9 +740,11 @@ private[spark] class Executor(
         // directSend = sending directly back to the driver
         val serializedResult: ByteBuffer = {
           if (maxResultSize > 0 && resultSize > maxResultSize) {
-            logWarning(s"Finished $taskName. Result is larger than maxResultSize " +
-              s"(${Utils.bytesToString(resultSize)} > ${Utils.bytesToString(maxResultSize)}), " +
-              s"dropping it.")
+            logWarning(log"Finished ${LogMDC(TASK_NAME, taskName)}. " +
+              log"Result is larger than maxResultSize " +
+              log"(${LogMDC(RESULT_SIZE_BYTES, Utils.bytesToString(resultSize))} > " +
+              log"${LogMDC(RESULT_SIZE_BYTES_MAX, Utils.bytesToString(maxResultSize))}), " +
+              log"dropping it.")
             ser.serialize(new IndirectTaskResult[Any](TaskResultBlockId(taskId), resultSize))
           } else if (resultSize > maxDirectResultSize) {
             val blockId = TaskResultBlockId(taskId)
@@ -737,10 +752,12 @@ private[spark] class Executor(
               blockId,
               serializedDirectResult,
               StorageLevel.MEMORY_AND_DISK_SER)
-            logInfo(s"Finished $taskName. $resultSize bytes result sent via BlockManager)")
+            logInfo(log"Finished ${LogMDC(TASK_NAME, taskName)}." +
+              log" ${LogMDC(NUM_BYTES, resultSize)} bytes result sent via BlockManager)")
             ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
           } else {
-            logInfo(s"Finished $taskName. $resultSize bytes result sent to driver")
+            logInfo(log"Finished ${LogMDC(TASK_NAME, taskName)}." +
+              log" ${LogMDC(NUM_BYTES, resultSize)} bytes result sent to driver")
             // toByteBuffer is safe here, guarded by maxDirectResultSize
             serializedDirectResult.toByteBuffer
           }
@@ -752,7 +769,8 @@ private[spark] class Executor(
         execBackend.statusUpdate(taskId, TaskState.FINISHED, serializedResult)
       } catch {
         case t: TaskKilledException =>
-          logInfo(s"Executor killed $taskName, reason: ${t.reason}")
+          logInfo(log"Executor killed ${LogMDC(TASK_NAME, taskName)}," +
+            log" reason: ${LogMDC(REASON, t.reason)}")
 
           val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
           // Here and below, put task metric peaks in an immutable.ArraySeq to expose them as an
@@ -765,7 +783,8 @@ private[spark] class Executor(
         case _: InterruptedException | NonFatal(_) if
             task != null && task.reasonIfKilled.isDefined =>
           val killReason = task.reasonIfKilled.getOrElse("unknown reason")
-          logInfo(s"Executor interrupted and killed $taskName, reason: $killReason")
+          logInfo(log"Executor interrupted and killed ${LogMDC(TASK_NAME, taskName)}," +
+            log" reason: ${LogMDC(REASON, killReason)}")
 
           val (accums, accUpdates) = collectAccumulatorsAndResetStatusOnFailure(taskStartTimeNs)
           val metricPeaks = metricsPoller.getTaskMetricPeaks(taskId).toImmutableArraySeq
@@ -778,11 +797,12 @@ private[spark] class Executor(
           if (!t.isInstanceOf[FetchFailedException]) {
             // there was a fetch failure in the task, but some user code wrapped that exception
             // and threw something else.  Regardless, we treat it as a fetch failure.
-            val fetchFailedCls = classOf[FetchFailedException].getName
-            logWarning(s"$taskName encountered a ${fetchFailedCls} and " +
-              s"failed, but the ${fetchFailedCls} was hidden by another " +
-              s"exception.  Spark is handling this like a fetch failure and ignoring the " +
-              s"other exception: $t")
+            logWarning(log"${LogMDC(TASK_NAME, taskName)} encountered a " +
+              log"${LogMDC(CLASS_NAME, classOf[FetchFailedException].getName)} " +
+              log"and failed, but the " +
+              log"${LogMDC(CLASS_NAME, classOf[FetchFailedException].getName)} " +
+              log"was hidden by another exception. Spark is handling this like a fetch failure " +
+              log"and ignoring the other exception: ${LogMDC(ERROR, t)}")
           }
           setTaskFinishedAndClearInterruptStatus()
           plugins.foreach(_.onTaskFailed(reason))
@@ -910,7 +930,7 @@ private[spark] class Executor(
     try {
       mdc.foreach { case (key, value) => MDC.put(key, value) }
       // avoid overriding the takName by the user
-      MDC.put("mdc.taskName", taskName)
+      MDC.put(taskNameMDCKey, taskName)
     } catch {
       case _: NoSuchFieldError => logInfo("MDC is not supported.")
     }
@@ -919,7 +939,7 @@ private[spark] class Executor(
   private def cleanMDCForTask(taskName: String, mdc: Seq[(String, String)]): Unit = {
     try {
       mdc.foreach { case (key, _) => MDC.remove(key) }
-      MDC.remove("mdc.taskName")
+      MDC.remove(taskNameMDCKey)
     } catch {
       case _: NoSuchFieldError => logInfo("MDC is not supported.")
     }
@@ -994,12 +1014,14 @@ private[spark] class Executor(
             finished = true
           } else {
             val elapsedTimeMs = TimeUnit.NANOSECONDS.toMillis(elapsedTimeNs)
-            logWarning(s"Killed task $taskId is still running after $elapsedTimeMs ms")
+            logWarning(log"Killed task ${LogMDC(TASK_ID, taskId)} " +
+              log"is still running after ${LogMDC(TIME_UNITS, elapsedTimeMs)} ms")
             if (takeThreadDump) {
               try {
                 taskRunner.theadDump().foreach { thread =>
                   if (thread.threadName == taskRunner.threadName) {
-                    logWarning(s"Thread dump from task $taskId:\n${thread.toString}")
+                    logWarning(log"Thread dump from task ${LogMDC(TASK_ID, taskId)}:\n" +
+                      log"${LogMDC(THREAD, thread.toString)}")
                   }
                 }
               } catch {
@@ -1019,9 +1041,8 @@ private[spark] class Executor(
           } else {
             // In non-local-mode, the exception thrown here will bubble up to the uncaught exception
             // handler and cause the executor JVM to exit.
-            throw SparkException.internalError(
-              s"Killing executor JVM because killed task $taskId could not be stopped within " +
-                s"$killTimeoutMs ms.", category = "EXECUTOR")
+            throw new KilledByTaskReaperException(s"Killing executor JVM because killed task " +
+              s"$taskId could not be stopped within $killTimeoutMs ms.")
           }
         }
       } finally {
@@ -1065,8 +1086,10 @@ private[spark] class Executor(
 
   private def createClassLoader(urls: Array[URL], useStub: Boolean): MutableURLClassLoader = {
     logInfo(
-      s"Starting executor with user classpath (userClassPathFirst = $userClassPathFirst): " +
-      urls.mkString("'", ",", "'")
+      log"Starting executor with user classpath" +
+        log" (userClassPathFirst =" +
+        log" ${LogMDC(LogKeys.EXECUTOR_USER_CLASS_PATH_FIRST, userClassPathFirst)}): " +
+        log"${LogMDC(URLS, urls.mkString("'", ",", "'"))}"
     )
 
     if (useStub) {
@@ -1110,12 +1133,13 @@ private[spark] class Executor(
       sessionUUID: String): ClassLoader = {
     val classUri = sessionClassUri.getOrElse(conf.get("spark.repl.class.uri", null))
     val classLoader = if (classUri != null) {
-      logInfo("Using REPL class URI: " + classUri)
+      logInfo(log"Using REPL class URI: ${LogMDC(LogKeys.URI, classUri)}")
       new ExecutorClassLoader(conf, env, classUri, parent, userClassPathFirst)
     } else {
       parent
     }
-    logInfo(s"Created or updated repl class loader $classLoader for $sessionUUID.")
+    logInfo(log"Created or updated repl class loader ${LogMDC(CLASS_LOADER, classLoader)}" +
+      log" for ${LogMDC(SESSION_ID, sessionUUID)}.")
     classLoader
   }
 
@@ -1150,14 +1174,16 @@ private[spark] class Executor(
 
       // Fetch missing dependencies
       for ((name, timestamp) <- newFiles if state.currentFiles.getOrElse(name, -1L) < timestamp) {
-        logInfo(s"Fetching $name with timestamp $timestamp")
+        logInfo(log"Fetching ${LogMDC(FILE_NAME, name)} with" +
+          log" timestamp ${LogMDC(TIMESTAMP, timestamp)}")
         // Fetch file with useCache mode, close cache for local mode.
         Utils.fetchFile(name, root, conf, hadoopConf, timestamp, useCache = !isLocal)
         state.currentFiles(name) = timestamp
       }
       for ((name, timestamp) <- newArchives if
           state.currentArchives.getOrElse(name, -1L) < timestamp) {
-        logInfo(s"Fetching $name with timestamp $timestamp")
+        logInfo(log"Fetching ${LogMDC(ARCHIVE_NAME, name)} with" +
+          log" timestamp ${LogMDC(TIMESTAMP, timestamp)}")
         val sourceURI = new URI(name)
         val uriToDownload = Utils.getUriBuilder(sourceURI).fragment(null).build()
         val source = Utils.fetchFile(uriToDownload.toString, Utils.createTempDir(), conf,
@@ -1166,7 +1192,9 @@ private[spark] class Executor(
           root,
           if (sourceURI.getFragment != null) sourceURI.getFragment else source.getName)
         logInfo(
-          s"Unpacking an archive $name from ${source.getAbsolutePath} to ${dest.getAbsolutePath}")
+          log"Unpacking an archive ${LogMDC(ARCHIVE_NAME, name)}" +
+            log" from ${LogMDC(SOURCE_PATH, source.getAbsolutePath)}" +
+            log" to ${LogMDC(DESTINATION_PATH, dest.getAbsolutePath)}")
         Utils.deleteRecursively(dest)
         Utils.unpack(source, dest)
         state.currentArchives(name) = timestamp
@@ -1177,7 +1205,8 @@ private[spark] class Executor(
           .orElse(state.currentJars.get(localName))
           .getOrElse(-1L)
         if (currentTimeStamp < timestamp) {
-          logInfo(s"Fetching $name with timestamp $timestamp")
+          logInfo(log"Fetching ${LogMDC(JAR_URL, name)} with" +
+            log" timestamp ${LogMDC(TIMESTAMP, timestamp)}")
           // Fetch file with useCache mode, close cache for local mode.
           Utils.fetchFile(name, root, conf,
             hadoopConf, timestamp, useCache = !isLocal)
@@ -1185,7 +1214,8 @@ private[spark] class Executor(
           // Add it to our class loader
           val url = new File(root, localName).toURI.toURL
           if (!state.urlClassLoader.getURLs().contains(url)) {
-            logInfo(s"Adding $url to class loader ${state.sessionUUID}")
+            logInfo(log"Adding ${LogMDC(LogKeys.URL, url)} to" +
+              log" class loader ${LogMDC(UUID, state.sessionUUID)}")
             state.urlClassLoader.addURL(url)
             if (isStubbingEnabledForState(state.sessionUUID)) {
               renewClassLoader = true
@@ -1259,7 +1289,7 @@ private[spark] class Executor(
     if (runner != null) {
       runner.theadDump()
     } else {
-      logWarning(s"Failed to dump thread for task $taskId")
+      logWarning(log"Failed to dump thread for task ${LogMDC(TASK_ID, taskId)}")
       None
     }
   }
@@ -1297,3 +1327,5 @@ private[spark] object Executor {
     }
   }
 }
+
+class KilledByTaskReaperException(message: String) extends SparkException(message)
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorClassLoader.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorClassLoader.scala
index c7047ddd278b2..48d7f150ad9bd 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorClassLoader.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorClassLoader.scala
@@ -30,7 +30,7 @@ import org.apache.xbean.asm9.Opcodes._
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.{Logging, LogKey, MDC}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.util.ParentClassLoader
 
 /**
@@ -183,8 +183,8 @@ class ExecutorClassLoader(
         None
       case e: Exception =>
         // Something bad happened while checking if the class exists
-        logError(log"Failed to check existence of class ${MDC(LogKey.CLASS_NAME, name)} " +
-          log"on REPL class server at ${MDC(LogKey.URI, uri)}", e)
+        logError(log"Failed to check existence of class ${MDC(LogKeys.CLASS_NAME, name)} " +
+          log"on REPL class server at ${MDC(LogKeys.URI, uri)}", e)
         if (userClassPathFirst) {
           // Allow to try to load from "parentLoader"
           None
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
index 99858f785600d..5300598ef53eb 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
@@ -45,6 +45,10 @@ object ExecutorExitCode {
    */
   val HEARTBEAT_FAILURE = 56
 
+  /** The default uncaught exception handler was reached and the exception was thrown by
+   * TaskReaper. */
+  val KILLED_BY_TASK_REAPER = 57
+
   def explainExitCode(exitCode: Int): String = {
     exitCode match {
       case UNCAUGHT_EXCEPTION => "Uncaught exception"
@@ -59,6 +63,8 @@ object ExecutorExitCode {
         "ExternalBlockStore failed to create a local temporary directory."
       case HEARTBEAT_FAILURE =>
         "Unable to send heartbeats to driver."
+      case KILLED_BY_TASK_REAPER =>
+        "Executor killed by TaskReaper."
       case _ =>
         "Unknown executor exit code (" + exitCode + ")" + (
           if (exitCode > 128) {
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala
index 0ddeef8e9a82d..2202489509fc4 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorLogUrlHandler.scala
@@ -21,7 +21,8 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.util.matching.Regex
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
 
 private[spark] class ExecutorLogUrlHandler(logUrlPattern: Option[String]) extends Logging {
   import ExecutorLogUrlHandler._
@@ -82,8 +83,10 @@ private[spark] class ExecutorLogUrlHandler(logUrlPattern: Option[String]) extend
       allPatterns: Set[String],
       allAttributes: Set[String]): Unit = {
     if (informedForMissingAttributes.compareAndSet(false, true)) {
-      logInfo(s"Fail to renew executor log urls: $reason. Required: $allPatterns / " +
-        s"available: $allAttributes. Falling back to show app's original log urls.")
+      logInfo(log"Fail to renew executor log urls: ${MDC(LogKeys.REASON, reason)}." +
+        log" Required: ${MDC(LogKeys.REGEX, allPatterns)} / " +
+        log"available: ${MDC(LogKeys.ATTRIBUTE_MAP, allAttributes)}." +
+        log" Falling back to show app's original log urls.")
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index dbfd02b7d3b34..263de0121f7c7 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -328,16 +328,19 @@ private[spark] object TaskMetrics extends Logging {
    */
   def fromAccumulators(accums: Seq[AccumulatorV2[_, _]]): TaskMetrics = {
     val tm = new TaskMetrics
+    val externalAccums = new java.util.ArrayList[AccumulatorV2[Any, Any]]()
     for (acc <- accums) {
       val name = acc.name
+      val tmpAcc = acc.asInstanceOf[AccumulatorV2[Any, Any]]
       if (name.isDefined && tm.nameToAccums.contains(name.get)) {
         val tmAcc = tm.nameToAccums(name.get).asInstanceOf[AccumulatorV2[Any, Any]]
         tmAcc.metadata = acc.metadata
-        tmAcc.merge(acc.asInstanceOf[AccumulatorV2[Any, Any]])
+        tmAcc.merge(tmpAcc)
       } else {
-        tm._externalAccums.add(acc)
+        externalAccums.add(tmpAcc)
       }
     }
+    tm._externalAccums.addAll(externalAccums)
     tm
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
index 1f19e9444d383..f50cc0f88842a 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigBuilder.scala
@@ -94,7 +94,7 @@ private[spark] class TypedConfigBuilder[T](
   import ConfigHelpers._
 
   def this(parent: ConfigBuilder, converter: String => T) = {
-    this(parent, converter, Option(_).map(_.toString).orNull)
+    this(parent, converter, { v: T => v.toString })
   }
 
   /** Apply a transformation to the user-provided values of the config entry. */
@@ -157,6 +157,7 @@ private[spark] class TypedConfigBuilder[T](
 
   /** Creates a [[ConfigEntry]] that has a default value. */
   def createWithDefault(default: T): ConfigEntry[T] = {
+    assert(default != null, "Use createOptional.")
     // Treat "String" as a special case, so that both createWithDefault and createWithDefaultString
     // behave the same w.r.t. variable expansion of default values.
     default match {
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
index a295ef06a6376..17d3329e6b494 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala
@@ -89,14 +89,14 @@ private[spark] abstract class ConfigEntry[T] (
   def defaultValueString: String
 
   protected def readString(reader: ConfigReader): Option[String] = {
-    val values = Seq(
-      prependedKey.flatMap(reader.get(_)),
-      alternatives.foldLeft(reader.get(key))((res, nextKey) => res.orElse(reader.get(nextKey)))
-    ).flatten
-    if (values.nonEmpty) {
-      Some(values.mkString(prependSeparator))
-    } else {
-      None
+    // SPARK-48678: performance optimization: this code could be expressed more succinctly
+    // using flatten and mkString, but doing so adds lots of Scala collections perf. overhead.
+    val maybePrependedValue: Option[String] = prependedKey.flatMap(reader.get)
+    val maybeValue: Option[String] = alternatives
+      .foldLeft(reader.get(key))((res, nextKey) => res.orElse(reader.get(nextKey)))
+    (maybePrependedValue, maybeValue) match {
+      case (Some(prependedValue), Some(value)) => Some(s"$prependedValue$prependSeparator$value")
+      case _ => maybeValue.orElse(maybePrependedValue)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala b/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala
index c1ab22150d024..8824d196489a8 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/ConfigReader.scala
@@ -84,7 +84,9 @@ private[spark] class ConfigReader(conf: ConfigProvider) {
   def substitute(input: String): String = substitute(input, Set())
 
   private def substitute(input: String, usedRefs: Set[String]): String = {
-    if (input != null) {
+    // SPARK-48678: performance optimization: skip the costly regex processing
+    // if the string cannot possibly contain a variable reference:
+    if (input != null && input.contains("${")) {
       ConfigReader.REF_RE.replaceAllIn(input, { m =>
         val prefix = m.group(1)
         val name = m.group(2)
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
index b09fbd7a5bb28..0c2db21905d1f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Deploy.scala
@@ -26,20 +26,6 @@ private[spark] object Deploy {
     .stringConf
     .createWithDefault("NONE")
 
-  object RecoverySerializer extends Enumeration {
-    val JAVA, KRYO = Value
-  }
-
-  val RECOVERY_SERIALIZER = ConfigBuilder("spark.deploy.recoverySerializer")
-    .doc("Serializer for writing/reading objects to/from persistence engines; " +
-      "JAVA or KRYO. Java serializer has been the default mode since Spark 0.8.1." +
-      "KRYO serializer is a new fast and compact mode from Spark 4.0.0.")
-    .version("4.0.0")
-    .stringConf
-    .transform(_.toUpperCase(Locale.ROOT))
-    .checkValues(RecoverySerializer.values.map(_.toString))
-    .createWithDefault(RecoverySerializer.JAVA.toString)
-
   val RECOVERY_COMPRESSION_CODEC = ConfigBuilder("spark.deploy.recoveryCompressionCodec")
     .doc("A compression codec for persistence engines. none (default), lz4, lzf, snappy, and " +
       "zstd. Currently, only FILESYSTEM mode supports this configuration.")
diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
index 2306856f9331e..64a8681ca2954 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/History.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -28,16 +28,19 @@ private[spark] object History {
 
   val HISTORY_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory")
     .version("1.1.0")
+    .doc("Directory where app logs are stored")
     .stringConf
     .createWithDefault(DEFAULT_LOG_DIR)
 
   val SAFEMODE_CHECK_INTERVAL_S = ConfigBuilder("spark.history.fs.safemodeCheck.interval")
     .version("1.6.0")
+    .doc("Interval between HDFS safemode checks for the event log directory")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefaultString("5s")
 
   val UPDATE_INTERVAL_S = ConfigBuilder("spark.history.fs.update.interval")
     .version("1.4.0")
+    .doc("How often(in seconds) to reload log data from storage")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefaultString("10s")
 
@@ -53,16 +56,21 @@ private[spark] object History {
 
   val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled")
     .version("1.4.0")
+    .doc("Whether the History Server should periodically clean up event logs from storage")
     .booleanConf
     .createWithDefault(false)
 
   val CLEANER_INTERVAL_S = ConfigBuilder("spark.history.fs.cleaner.interval")
     .version("1.4.0")
+    .doc("When spark.history.fs.cleaner.enabled=true, specifies how often the filesystem " +
+      "job history cleaner checks for files to delete.")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefaultString("1d")
 
   val MAX_LOG_AGE_S = ConfigBuilder("spark.history.fs.cleaner.maxAge")
     .version("1.4.0")
+    .doc("When spark.history.fs.cleaner.enabled=true, history files older than this will be " +
+      "deleted when the filesystem history cleaner runs.")
     .timeConf(TimeUnit.SECONDS)
     .createWithDefaultString("7d")
 
@@ -96,6 +104,8 @@ private[spark] object History {
 
   val MAX_LOCAL_DISK_USAGE = ConfigBuilder("spark.history.store.maxDiskUsage")
     .version("2.3.0")
+    .doc("Maximum disk usage for the local directory where the cache application history " +
+      "information are stored.")
     .bytesConf(ByteUnit.BYTE)
     .createWithDefaultString("10g")
 
@@ -145,60 +155,90 @@ private[spark] object History {
 
   val DRIVER_LOG_CLEANER_ENABLED = ConfigBuilder("spark.history.fs.driverlog.cleaner.enabled")
     .version("3.0.0")
+    .doc("Specifies whether the History Server should periodically clean up driver logs from " +
+      "storage.")
     .fallbackConf(CLEANER_ENABLED)
 
-  val DRIVER_LOG_CLEANER_INTERVAL = ConfigBuilder("spark.history.fs.driverlog.cleaner.interval")
-    .version("3.0.0")
-    .fallbackConf(CLEANER_INTERVAL_S)
-
   val MAX_DRIVER_LOG_AGE_S = ConfigBuilder("spark.history.fs.driverlog.cleaner.maxAge")
     .version("3.0.0")
+    .doc(s"When ${DRIVER_LOG_CLEANER_ENABLED.key}=true, driver log files older than this will be " +
+      s"deleted when the driver log cleaner runs.")
     .fallbackConf(MAX_LOG_AGE_S)
 
+  val DRIVER_LOG_CLEANER_INTERVAL = ConfigBuilder("spark.history.fs.driverlog.cleaner.interval")
+    .version("3.0.0")
+    .doc(s" When ${DRIVER_LOG_CLEANER_ENABLED.key}=true, specifies how often the filesystem " +
+      s"driver log cleaner checks for files to delete. Files are only deleted if they are older " +
+      s"than ${MAX_DRIVER_LOG_AGE_S.key}.")
+    .fallbackConf(CLEANER_INTERVAL_S)
+
   val HISTORY_SERVER_UI_ACLS_ENABLE = ConfigBuilder("spark.history.ui.acls.enable")
     .version("1.0.1")
+    .doc("Specifies whether ACLs should be checked to authorize users viewing the applications " +
+      "in the history server. If enabled, access control checks are performed regardless of " +
+      "what the individual applications had set for spark.ui.acls.enable. The application owner " +
+      "will always have authorization to view their own application and any users specified via " +
+      "spark.ui.view.acls and groups specified via spark.ui.view.acls.groups when the " +
+      "application was run will also have authorization to view that application. If disabled, " +
+      "no access control checks are made for any application UIs available through the history " +
+      "server.")
     .booleanConf
     .createWithDefault(false)
 
   val HISTORY_SERVER_UI_ADMIN_ACLS = ConfigBuilder("spark.history.ui.admin.acls")
     .version("2.1.1")
+    .doc("Comma separated list of users that have view access to all the Spark applications in " +
+      "history server.")
     .stringConf
     .toSequence
     .createWithDefault(Nil)
 
   val HISTORY_SERVER_UI_ADMIN_ACLS_GROUPS = ConfigBuilder("spark.history.ui.admin.acls.groups")
     .version("2.1.1")
+    .doc("Comma separated list of groups that have view access to all the Spark applications " +
+      "in history server.")
     .stringConf
     .toSequence
     .createWithDefault(Nil)
 
   val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads")
     .version("2.0.0")
+    .doc("Number of threads that will be used by history server to process event logs.")
     .intConf
     .createWithDefaultFunction(() => Math.ceil(Runtime.getRuntime.availableProcessors() / 4f).toInt)
 
   val RETAINED_APPLICATIONS = ConfigBuilder("spark.history.retainedApplications")
     .version("1.0.0")
+    .doc("The number of applications to retain UI data for in the cache. If this cap is " +
+      "exceeded, then the oldest applications will be removed from the cache. If an application " +
+      "is not in the cache, it will have to be loaded from disk if it is accessed from the UI.")
     .intConf
     .createWithDefault(50)
 
   val PROVIDER = ConfigBuilder("spark.history.provider")
     .version("1.1.0")
+    .doc("Name of the class implementing the application history backend.")
     .stringConf
-    .createOptional
+    .createWithDefault("org.apache.spark.deploy.history.FsHistoryProvider")
 
   val KERBEROS_ENABLED = ConfigBuilder("spark.history.kerberos.enabled")
     .version("1.0.1")
+    .doc("Indicates whether the history server should use kerberos to login. This is required " +
+      "if the history server is accessing HDFS files on a secure Hadoop cluster.")
     .booleanConf
     .createWithDefault(false)
 
   val KERBEROS_PRINCIPAL = ConfigBuilder("spark.history.kerberos.principal")
     .version("1.0.1")
+    .doc(s"When ${KERBEROS_ENABLED.key}=true, specifies kerberos principal name for " +
+      s" the History Server.")
     .stringConf
     .createOptional
 
   val KERBEROS_KEYTAB = ConfigBuilder("spark.history.kerberos.keytab")
     .version("1.0.1")
+    .doc(s"When ${KERBEROS_ENABLED.key}=true, specifies location of the kerberos keytab file " +
+      s"for the History Server.")
     .stringConf
     .createOptional
 
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index b2cbb6f6deb69..9fcd9ba529c16 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -143,14 +143,25 @@ package object config {
 
   private[spark] val STRUCTURED_LOGGING_ENABLED =
     ConfigBuilder("spark.log.structuredLogging.enabled")
-      .doc("When true, the default log4j output format is structured JSON lines, and there will " +
-        "be Mapped Diagnostic Context (MDC) from Spark added to the logs. This is useful for log " +
-        "aggregation and analysis tools. When false, the default log4j output will be plain " +
-        "text and no MDC from Spark will be set.")
+      .doc("When true, Spark logs are output as structured JSON lines with added Spark " +
+        "Mapped Diagnostic Context (MDC), facilitating easier integration with log aggregation " +
+        "and analysis tools. When false, logs are plain text without MDC. This configuration " +
+        "does not apply to interactive environments such as spark-shell, spark-sql, and " +
+        "PySpark shell.")
       .version("4.0.0")
       .booleanConf
       .createWithDefault(true)
 
+  private[spark] val LEGACY_TASK_NAME_MDC_ENABLED =
+    ConfigBuilder("spark.log.legacyTaskNameMdc.enabled")
+      .doc("When true, the MDC (Mapped Diagnostic Context) key `mdc.taskName` will be set in the " +
+        "log output, which is the behavior of Spark version 3.1 through Spark 3.5 releases. " +
+        "When false, the logging framework will use `task_name` as the MDC key, " +
+        "aligning it with the naming convention of newer MDC keys introduced in Spark 4.0 release.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val DRIVER_LOG_LOCAL_DIR =
     ConfigBuilder("spark.driver.log.localDir")
       .doc("Specifies a local directory to write driver logs and enable Driver Log UI Tab.")
@@ -1306,6 +1317,15 @@ package object config {
           s" be less than or equal to ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
       .createWithDefault(64 * 1024 * 1024)
 
+  private[spark] val CHECKPOINT_DIR =
+    ConfigBuilder("spark.checkpoint.dir")
+      .doc(
+          "Set the default directory for checkpointing. It can be overwritten by " +
+          "SparkContext.setCheckpointDir.")
+      .version("4.0.0")
+      .stringConf
+      .createOptional
+
   private[spark] val CHECKPOINT_COMPRESS =
     ConfigBuilder("spark.checkpoint.compress")
       .doc("Whether to compress RDD checkpoints. Generally a good idea. Compression will use " +
@@ -1443,8 +1463,7 @@ package object config {
 
   private[spark] val SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE =
     ConfigBuilder("spark.shuffle.unsafe.file.output.buffer")
-      .doc("The file system for this buffer size after each partition " +
-        "is written in unsafe shuffle writer. In KiB unless otherwise specified.")
+      .doc("(Deprecated since Spark 4.0, please use 'spark.shuffle.localDisk.file.output.buffer'.)")
       .version("2.3.0")
       .bytesConf(ByteUnit.KiB)
       .checkValue(v => v > 0 && v <= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024,
@@ -1452,6 +1471,13 @@ package object config {
           s" ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024}.")
       .createWithDefaultString("32k")
 
+  private[spark] val SHUFFLE_LOCAL_DISK_FILE_OUTPUT_BUFFER_SIZE =
+    ConfigBuilder("spark.shuffle.localDisk.file.output.buffer")
+      .doc("The file system for this buffer size after each partition " +
+        "is written in all local disk shuffle writers. In KiB unless otherwise specified.")
+      .version("4.0.0")
+      .fallbackConf(SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE)
+
   private[spark] val SHUFFLE_DISK_WRITE_BUFFER_SIZE =
     ConfigBuilder("spark.shuffle.spill.diskWriteBufferSize")
       .doc("The buffer size, in bytes, to use when writing the sorted records to an on-disk file.")
@@ -2011,6 +2037,13 @@ package object config {
       .intConf
       .createWithDefault(1)
 
+  private[spark] val IO_COMPRESSION_LZF_PARALLEL =
+    ConfigBuilder("spark.io.compression.lzf.parallel.enabled")
+      .doc("When true, LZF compression will use multiple threads to compress data in parallel.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val IO_WARNING_LARGEFILETHRESHOLD =
     ConfigBuilder("spark.io.warning.largeFileThreshold")
       .internal()
@@ -2403,7 +2436,7 @@ package object config {
         s"count ${STAGE_MAX_CONSECUTIVE_ATTEMPTS.key}")
       .version("3.4.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   private[spark] val SCHEDULER_MAX_RETAINED_REMOVED_EXECUTORS =
     ConfigBuilder("spark.scheduler.maxRetainedRemovedDecommissionExecutors")
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala
index af0aa41518766..44f8d7cd63635 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapRedCommitProtocol.scala
@@ -20,6 +20,9 @@ package org.apache.spark.internal.io
 import org.apache.hadoop.mapred._
 import org.apache.hadoop.mapreduce.{TaskAttemptContext => NewTaskAttemptContext}
 
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.MDC
+
 /**
  * An [[FileCommitProtocol]] implementation backed by an underlying Hadoop OutputCommitter
  * (from the old mapred API).
@@ -32,7 +35,8 @@ class HadoopMapRedCommitProtocol(jobId: String, path: String)
   override def setupCommitter(context: NewTaskAttemptContext): OutputCommitter = {
     val config = context.getConfiguration.asInstanceOf[JobConf]
     val committer = config.getOutputCommitter
-    logInfo(s"Using output committer class ${committer.getClass.getCanonicalName}")
+    logInfo(log"Using output committer class" +
+      log" ${MDC(LogKeys.CLASS_NAME, committer.getClass.getCanonicalName)}")
     committer
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index 3a24da98ecc24..f245d2d4e4074 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -29,7 +29,8 @@ import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.mapred.SparkHadoopMapRedUtil
 
 /**
@@ -252,7 +253,7 @@ class HadoopMapReduceCommitProtocol(
       committer.abortJob(jobContext, JobStatus.State.FAILED)
     } catch {
       case e: IOException =>
-        logWarning(s"Exception while aborting ${jobContext.getJobID}", e)
+        logWarning(log"Exception while aborting ${MDC(JOB_ID, jobContext.getJobID)}", e)
     }
     try {
       if (hasValidPath) {
@@ -261,7 +262,7 @@ class HadoopMapReduceCommitProtocol(
       }
     } catch {
       case e: IOException =>
-        logWarning(s"Exception while aborting ${jobContext.getJobID}", e)
+        logWarning(log"Exception while aborting ${MDC(JOB_ID, jobContext.getJobID)}", e)
     }
   }
 
@@ -292,7 +293,8 @@ class HadoopMapReduceCommitProtocol(
       committer.abortTask(taskContext)
     } catch {
       case e: IOException =>
-        logWarning(s"Exception while aborting ${taskContext.getTaskAttemptID}", e)
+        logWarning(log"Exception while aborting " +
+          log"${MDC(TASK_ATTEMPT_ID, taskContext.getTaskAttemptID)}", e)
     }
     // best effort cleanup of other staged files
     try {
@@ -302,7 +304,8 @@ class HadoopMapReduceCommitProtocol(
       }
     } catch {
       case e: IOException =>
-        logWarning(s"Exception while aborting ${taskContext.getTaskAttemptID}", e)
+        logWarning(log"Exception while aborting " +
+          log"${MDC(TASK_ATTEMPT_ID, taskContext.getTaskAttemptID)}", e)
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
index 95ea814042d35..db961b3c42f4c 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriter.scala
@@ -33,7 +33,7 @@ import org.apache.hadoop.mapreduce.task.{TaskAttemptContextImpl => NewTaskAttemp
 import org.apache.spark.{SerializableWritable, SparkConf, SparkException, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{JOB_ID, TASK_ATTEMPT_ID}
+import org.apache.spark.internal.LogKeys.{DURATION, JOB_ID, TASK_ATTEMPT_ID}
 import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.rdd.{HadoopRDD, RDD}
 import org.apache.spark.util.{SerializableConfiguration, SerializableJobConf, Utils}
@@ -98,10 +98,11 @@ object SparkHadoopWriter extends Logging {
           iterator = iter)
       })
 
-      logInfo(s"Start to commit write Job ${jobContext.getJobID}.")
+      logInfo(log"Start to commit write Job ${MDC(JOB_ID, jobContext.getJobID)}.")
       val (_, duration) = Utils
         .timeTakenMs { committer.commitJob(jobContext, ret.toImmutableArraySeq) }
-      logInfo(s"Write Job ${jobContext.getJobID} committed. Elapsed time: $duration ms.")
+      logInfo(log"Write Job ${MDC(JOB_ID, jobContext.getJobID)} committed." +
+        log" Elapsed time: ${MDC(DURATION, duration)} ms.")
     } catch {
       case cause: Throwable =>
         logError(log"Aborting job ${MDC(JOB_ID, jobContext.getJobID)}.", cause)
diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
index 261e016ce9bf0..a0c07bd75f885 100644
--- a/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
+++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginContainer.scala
@@ -22,7 +22,7 @@ import scala.util.{Either, Left, Right}
 
 import org.apache.spark.{SparkContext, SparkEnv, TaskFailedReason}
 import org.apache.spark.api.plugin._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.util.Utils
@@ -56,7 +56,7 @@ private class DriverPluginContainer(
           sc.conf.set(s"${PluginContainer.EXTRA_CONF_PREFIX}$name.$k", v)
         }
       }
-      logInfo(s"Initialized driver component for plugin $name.")
+      logInfo(log"Initialized driver component for plugin ${MDC(LogKeys.CLASS_NAME, name)}.")
       Some((p.getClass().getName(), driverPlugin, ctx))
     } else {
       None
@@ -83,7 +83,7 @@ private class DriverPluginContainer(
         plugin.shutdown()
       } catch {
         case t: Throwable =>
-          logInfo(s"Exception while shutting down plugin $name.", t)
+          logInfo(log"Exception while shutting down plugin ${MDC(LogKeys.CLASS_NAME, name)}.", t)
       }
     }
   }
@@ -125,7 +125,7 @@ private class ExecutorPluginContainer(
         executorPlugin.init(ctx, extraConf)
         ctx.registerMetrics()
 
-        logInfo(s"Initialized executor component for plugin $name.")
+        logInfo(log"Initialized executor component for plugin ${MDC(LogKeys.CLASS_NAME, name)}.")
         Some(p.getClass().getName() -> executorPlugin)
       } else {
         None
@@ -144,7 +144,7 @@ private class ExecutorPluginContainer(
         plugin.shutdown()
       } catch {
         case t: Throwable =>
-          logInfo(s"Exception while shutting down plugin $name.", t)
+          logInfo(log"Exception while shutting down plugin ${MDC(LogKeys.CLASS_NAME, name)}.", t)
       }
     }
   }
@@ -155,7 +155,8 @@ private class ExecutorPluginContainer(
         plugin.onTaskStart()
       } catch {
         case t: Throwable =>
-          logInfo(s"Exception while calling onTaskStart on plugin $name.", t)
+          logInfo(log"Exception while calling onTaskStart on" +
+            log" plugin ${MDC(LogKeys.CLASS_NAME, name)}.", t)
       }
     }
   }
@@ -166,7 +167,8 @@ private class ExecutorPluginContainer(
         plugin.onTaskSucceeded()
       } catch {
         case t: Throwable =>
-          logInfo(s"Exception while calling onTaskSucceeded on plugin $name.", t)
+          logInfo(log"Exception while calling onTaskSucceeded on" +
+            log" plugin ${MDC(LogKeys.CLASS_NAME, name)}.", t)
       }
     }
   }
@@ -177,7 +179,8 @@ private class ExecutorPluginContainer(
         plugin.onTaskFailed(failureReason)
       } catch {
         case t: Throwable =>
-          logInfo(s"Exception while calling onTaskFailed on plugin $name.", t)
+          logInfo(log"Exception while calling onTaskFailed on" +
+            log" plugin ${MDC(LogKeys.CLASS_NAME, name)}.", t)
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
index bc45aefa560ed..6ff918979c9ed 100644
--- a/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.internal.plugin
 
 import org.apache.spark.api.plugin.DriverPlugin
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEnv}
 
 case class PluginMessage(pluginName: String, message: AnyRef)
@@ -36,13 +37,15 @@ private class PluginEndpoint(
             val reply = plugin.receive(message)
             if (reply != null) {
               logWarning(
-                s"Plugin $pluginName returned reply for one-way message of type " +
-                s"${message.getClass().getName()}.")
+                log"Plugin ${MDC(PLUGIN_NAME, pluginName)} " +
+                  log"returned reply for one-way message of type " +
+                  log"${MDC(CLASS_NAME, message.getClass().getName())}.")
             }
           } catch {
             case e: Exception =>
-              logWarning(s"Error in plugin $pluginName when handling message of type " +
-              s"${message.getClass().getName()}.", e)
+              logWarning(log"Error in plugin ${MDC(PLUGIN_NAME, pluginName)} " +
+                log"when handling message of type " +
+                log"${MDC(CLASS_NAME, message.getClass().getName())}.", e)
           }
 
         case None =>
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 07e694b6c5b03..233228a9c6d4c 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 
 import com.github.luben.zstd.{NoPool, RecyclingBufferPool, ZstdInputStreamNoFinalizer, ZstdOutputStreamNoFinalizer}
 import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
+import com.ning.compress.lzf.parallel.PLZFOutputStream
 import net.jpountz.lz4.{LZ4BlockInputStream, LZ4BlockOutputStream, LZ4Factory}
 import net.jpountz.xxhash.XXHashFactory
 import org.xerial.snappy.{Snappy, SnappyInputStream, SnappyOutputStream}
@@ -100,8 +101,9 @@ private[spark] object CompressionCodec {
    * If it is already a short name, just return it.
    */
   def getShortName(codecName: String): String = {
-    if (shortCompressionCodecNames.contains(codecName)) {
-      codecName
+    val lowercasedCodec = codecName.toLowerCase(Locale.ROOT)
+    if (shortCompressionCodecNames.contains(lowercasedCodec)) {
+      lowercasedCodec
     } else {
       shortCompressionCodecNames
         .collectFirst { case (k, v) if v == codecName => k }
@@ -170,9 +172,14 @@ class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
  */
 @DeveloperApi
 class LZFCompressionCodec(conf: SparkConf) extends CompressionCodec {
+  private val parallelCompression = conf.get(IO_COMPRESSION_LZF_PARALLEL)
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
-    new LZFOutputStream(s).setFinishBlockOnFlush(true)
+    if (parallelCompression) {
+      new PLZFOutputStream(s)
+    } else {
+      new LZFOutputStream(s).setFinishBlockOnFlush(true)
+    }
   }
 
   override def compressedInputStream(s: InputStream): InputStream = new LZFInputStream(s)
diff --git a/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
index c68999f34079d..0aaa222e6195e 100644
--- a/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala
@@ -25,7 +25,7 @@ import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter}
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.executor.CommitDeniedException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.TASK_ATTEMPT_ID
+import org.apache.spark.internal.LogKeys.{TASK_ATTEMPT_ID, TOTAL_TIME}
 import org.apache.spark.util.Utils
 
 object SparkHadoopMapRedUtil extends Logging {
@@ -50,7 +50,8 @@ object SparkHadoopMapRedUtil extends Logging {
     def performCommit(): Unit = {
       try {
         val (_, timeCost) = Utils.timeTakenMs(committer.commitTask(mrTaskContext))
-        logInfo(s"$mrTaskAttemptID: Committed. Elapsed time: $timeCost ms.")
+        logInfo(log"${MDC(TASK_ATTEMPT_ID, mrTaskAttemptID)}: Committed." +
+          log" Elapsed time: ${MDC(TOTAL_TIME, timeCost)} ms.")
       } catch {
         case cause: IOException =>
           logError(
@@ -80,12 +81,13 @@ object SparkHadoopMapRedUtil extends Logging {
         if (canCommit) {
           performCommit()
         } else {
-          val message =
-            s"$mrTaskAttemptID: Not committed because the driver did not authorize commit"
+          val message = log"${MDC(TASK_ATTEMPT_ID, mrTaskAttemptID)}: Not committed because" +
+            log" the driver did not authorize commit"
           logInfo(message)
           // We need to abort the task so that the driver can reschedule new attempts, if necessary
           committer.abortTask(mrTaskContext)
-          throw new CommitDeniedException(message, ctx.stageId(), splitId, ctx.attemptNumber())
+          throw new CommitDeniedException(message.message, ctx.stageId(), splitId,
+            ctx.attemptNumber())
         }
       } else {
         // Speculation is disabled or a user has chosen to manually bypass the commit coordination
@@ -93,7 +95,8 @@ object SparkHadoopMapRedUtil extends Logging {
       }
     } else {
       // Some other attempt committed the output, so we do nothing and signal success
-      logInfo(s"No need to commit output of task because needsTaskCommit=false: $mrTaskAttemptID")
+      logInfo(log"No need to commit output of task because needsTaskCommit=false:" +
+        log" ${MDC(TASK_ATTEMPT_ID, mrTaskAttemptID)}")
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
index 4cffbb2a5701c..7098961d1649a 100644
--- a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
+++ b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
@@ -21,7 +21,8 @@ import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 
 /**
  * Implements policies and bookkeeping for sharing an adjustable-sized pool of memory between tasks.
@@ -136,7 +137,8 @@ private[memory] class ExecutionMemoryPool(
       // if we can't give it this much now, wait for other tasks to free up memory
       // (this happens if older tasks allocated lots of memory before N grew)
       if (toGrant < numBytes && curMem + toGrant < minMemoryPerTask) {
-        logInfo(s"TID $taskAttemptId waiting for at least 1/2N of $poolName pool to be free")
+        logInfo(log"TID ${MDC(TASK_ATTEMPT_ID, taskAttemptId)} waiting for at least 1/2N of" +
+          log" ${MDC(POOL_NAME, poolName)} pool to be free")
         lock.wait()
       } else {
         memoryForTask(taskAttemptId) += toGrant
@@ -153,8 +155,9 @@ private[memory] class ExecutionMemoryPool(
     val curMem = memoryForTask.getOrElse(taskAttemptId, 0L)
     val memoryToFree = if (curMem < numBytes) {
       logWarning(
-        s"Internal error: release called on $numBytes bytes but task only has $curMem bytes " +
-          s"of memory from the $poolName pool")
+        log"Internal error: release called on ${MDC(NUM_BYTES, numBytes)} " +
+          log"bytes but task only has ${MDC(CURRENT_MEMORY_SIZE, curMem)} bytes " +
+          log"of memory from the ${MDC(MEMORY_POOL_NAME, poolName)} pool")
       curMem
     } else {
       numBytes
diff --git a/core/src/main/scala/org/apache/spark/memory/StorageMemoryPool.scala b/core/src/main/scala/org/apache/spark/memory/StorageMemoryPool.scala
index 0abdca99de1b2..24fcb5b17f388 100644
--- a/core/src/main/scala/org/apache/spark/memory/StorageMemoryPool.scala
+++ b/core/src/main/scala/org/apache/spark/memory/StorageMemoryPool.scala
@@ -20,7 +20,8 @@ package org.apache.spark.memory
 import javax.annotation.concurrent.GuardedBy
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.storage.BlockId
 import org.apache.spark.storage.memory.MemoryStore
 
@@ -104,8 +105,8 @@ private[memory] class StorageMemoryPool(
 
   def releaseMemory(size: Long): Unit = lock.synchronized {
     if (size > _memoryUsed) {
-      logWarning(s"Attempted to release $size bytes of storage " +
-        s"memory when we only have ${_memoryUsed} bytes")
+      logWarning(log"Attempted to release ${MDC(NUM_BYTES, size)} bytes of storage " +
+        log"memory when we only have ${MDC(NUM_BYTES_USED, _memoryUsed)} bytes")
       _memoryUsed = 0
     } else {
       _memoryUsed -= size
diff --git a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index 73805c11e0371..d4ec6ed8495af 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.memory
 
 import org.apache.spark.{SparkConf, SparkIllegalArgumentException}
-import org.apache.spark.internal.config
+import org.apache.spark.internal.{config, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.storage.BlockId
 
@@ -166,8 +167,9 @@ private[spark] class UnifiedMemoryManager(
     }
     if (numBytes > maxMemory) {
       // Fail fast if the block simply won't fit
-      logInfo(s"Will not store $blockId as the required space ($numBytes bytes) exceeds our " +
-        s"memory limit ($maxMemory bytes)")
+      logInfo(log"Will not store ${MDC(BLOCK_ID, blockId)} as the required space" +
+        log" (${MDC(NUM_BYTES, numBytes)} bytes) exceeds our" +
+        log" memory limit (${MDC(NUM_BYTES_MAX, maxMemory)} bytes)")
       return false
     }
     if (numBytes > storagePool.memoryFree) {
diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
index 50b7ddcb13ae1..965468ac2418f 100644
--- a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -24,7 +24,8 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.executor.ProcfsMetricsGetter
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.memory.MemoryManager
 
 /**
@@ -157,10 +158,13 @@ case object GarbageCollectionMetrics extends ExecutorMetricType with Logging {
       } else if (!nonBuiltInCollectors.contains(mxBean.getName)) {
         nonBuiltInCollectors = mxBean.getName +: nonBuiltInCollectors
         // log it when first seen
-        logWarning(s"To enable non-built-in garbage collector(s) " +
-          s"$nonBuiltInCollectors, users should configure it(them) to " +
-          s"${config.EVENT_LOG_GC_METRICS_YOUNG_GENERATION_GARBAGE_COLLECTORS.key} or " +
-          s"${config.EVENT_LOG_GC_METRICS_OLD_GENERATION_GARBAGE_COLLECTORS.key}")
+        val youngGenerationGc = MDC(YOUNG_GENERATION_GC,
+          config.EVENT_LOG_GC_METRICS_YOUNG_GENERATION_GARBAGE_COLLECTORS.key)
+        val oldGenerationGc = MDC(OLD_GENERATION_GC,
+          config.EVENT_LOG_GC_METRICS_OLD_GENERATION_GARBAGE_COLLECTORS.key)
+        logWarning(log"To enable non-built-in garbage collector(s) " +
+          log"${MDC(NON_BUILT_IN_CONNECTORS, nonBuiltInCollectors)}, " +
+          log"users should configure it(them) to $youngGenerationGc or $oldGenerationGc")
       } else {
         // do nothing
       }
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
index 12df40c3476a0..a845feeb67ff0 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -26,7 +26,7 @@ import scala.util.matching.Regex
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.internal.config.METRICS_CONF
 import org.apache.spark.util.Utils
 
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index 555083bb65d24..709ce0060e150 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -27,7 +27,7 @@ import org.eclipse.jetty.servlet.ServletContextHandler
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.metrics.sink.{MetricsServlet, PrometheusServlet, Sink}
 import org.apache.spark.metrics.source.{Source, StaticSources}
@@ -141,12 +141,13 @@ private[spark] class MetricsSystem private (
         // Only Driver and Executor set spark.app.id and spark.executor.id.
         // Other instance types, e.g. Master and Worker, are not related to a specific application.
         if (metricsNamespace.isEmpty) {
-          logWarning(s"Using default name $defaultName for source because neither " +
-            s"${METRICS_NAMESPACE.key} nor spark.app.id is set.")
+          logWarning(log"Using default name ${MDC(LogKeys.DEFAULT_NAME, defaultName)} " +
+            log"for source because neither " +
+            log"${MDC(LogKeys.CONFIG, METRICS_NAMESPACE.key)} nor spark.app.id is set.")
         }
         if (executorId.isEmpty) {
-          logWarning(s"Using default name $defaultName for source because spark.executor.id is " +
-            s"not set.")
+          logWarning(log"Using default name ${MDC(LogKeys.DEFAULT_NAME, defaultName)} " +
+            log"for source because spark.executor.id is not set.")
         }
         defaultName
       }
@@ -189,7 +190,8 @@ private[spark] class MetricsSystem private (
         registerSource(source)
       } catch {
         case e: Exception =>
-          logError(log"Source class ${MDC(CLASS_NAME, classPath)} cannot be instantiated", e)
+          logError(log"Source class ${MDC(LogKeys.CLASS_NAME, classPath)} " +
+            log"cannot be instantiated", e)
       }
     }
   }
@@ -229,7 +231,8 @@ private[spark] class MetricsSystem private (
           }
         } catch {
           case e: Exception =>
-            logError(log"Sink class ${MDC(CLASS_NAME, classPath)} cannot be instantiated")
+            logError(log"Sink class ${MDC(LogKeys.CLASS_NAME, classPath)} " +
+              log"cannot be instantiated")
             throw e
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
index c506b86b4563b..30b10d64882ac 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
@@ -22,7 +22,8 @@ import java.util.concurrent.TimeUnit
 
 import com.codahale.metrics.{Metric, MetricFilter, MetricRegistry}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.PREFIX
 import org.apache.spark.metrics.MetricsSystem
 
 private[spark] object StatsdSink {
@@ -69,7 +70,7 @@ private[spark] class StatsdSink(
 
   override def start(): Unit = {
     reporter.start(pollPeriod, pollUnit)
-    logInfo(s"StatsdSink started with prefix: '$prefix'")
+    logInfo(log"StatsdSink started with prefix: '${MDC(PREFIX, prefix)}'")
   }
 
   override def stop(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
index aa0da153f7fa3..a922eb336c28f 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala
@@ -23,7 +23,8 @@ import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.network.BlockDataManager
 import org.apache.spark.network.buffer.NioManagedBuffer
 import org.apache.spark.network.client.{RpcResponseCallback, StreamCallbackWithID, TransportClient}
@@ -55,17 +56,21 @@ class NettyBlockRpcServer(
       BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage)
     } catch {
       case e: IllegalArgumentException if e.getMessage.startsWith("Unknown message type") =>
-        logWarning(s"This could be a corrupted RPC message (capacity: ${rpcMessage.capacity()}) " +
-          s"from ${client.getSocketAddress}. Please use `spark.authenticate.*` configurations " +
-          "in case of security incidents.")
+        logWarning(log"This could be a corrupted RPC message (capacity: " +
+          log"${MDC(RPC_MESSAGE_CAPACITY, rpcMessage.capacity())}) " +
+          log"from ${MDC(SOCKET_ADDRESS, client.getSocketAddress)}. " +
+          log"Please use `spark.authenticate.*` configurations " +
+          log"in case of security incidents.")
         throw e
 
       case _: IndexOutOfBoundsException | _: NegativeArraySizeException =>
         // Netty may throw non-'IOException's for corrupted buffers. In this case,
         // we ignore the entire message with warnings because we cannot trust any contents.
-        logWarning(s"Ignored a corrupted RPC message (capacity: ${rpcMessage.capacity()}) " +
-          s"from ${client.getSocketAddress}. Please use `spark.authenticate.*` configurations " +
-          "in case of security incidents.")
+        logWarning(log"Ignored a corrupted RPC message (capacity: " +
+          log"${MDC(RPC_MESSAGE_CAPACITY, rpcMessage.capacity())}) " +
+          log"from ${MDC(SOCKET_ADDRESS, client.getSocketAddress)}. " +
+          log"Please use `spark.authenticate.*` configurations " +
+          log"in case of security incidents.")
         return
     }
     logTrace(s"Received request: $message")
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 6b785a07c7f43..7ceb50db5966a 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -30,7 +30,7 @@ import com.codahale.metrics.{Metric, MetricSet}
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.ExecutorDeadException
-import org.apache.spark.internal.config
+import org.apache.spark.internal.{config, LogKeys, MDC}
 import org.apache.spark.network._
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.client.{RpcResponseCallback, TransportClientBootstrap}
@@ -85,9 +85,11 @@ private[spark] class NettyBlockTransferService(
     appId = conf.getAppId
 
     if (hostName.equals(bindAddress)) {
-      logger.info(s"Server created on $hostName:${server.getPort}")
+      logger.info("Server created on {}:{}",
+        MDC(LogKeys.HOST, hostName), MDC(LogKeys.PORT, server.getPort))
     } else {
-      logger.info(s"Server created on $hostName $bindAddress:${server.getPort}")
+      logger.info("Server created on {} {}:{}", MDC(LogKeys.HOST, hostName),
+        MDC(LogKeys.BIND_ADDRESS, bindAddress), MDC(LogKeys.PORT, server.getPort))
     }
   }
 
@@ -193,7 +195,11 @@ private[spark] class NettyBlockTransferService(
       }
 
       override def onFailure(e: Throwable): Unit = {
-        logger.error(s"Error while uploading $blockId${if (asStream) " as stream" else ""}", e)
+        if (asStream) {
+          logger.error(s"Error while uploading {} as stream", e, MDC.of(LogKeys.BLOCK_ID, blockId))
+        } else {
+          logger.error(s"Error while uploading {}", e, MDC.of(LogKeys.BLOCK_ID, blockId))
+        }
         result.failure(e)
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index fddd35b657479..850c07f460b75 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -22,6 +22,7 @@ import java.io.{IOException, ObjectOutputStream}
 import scala.reflect.ClassTag
 
 import org.apache.spark._
+import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.util.Utils
 
 private[spark]
@@ -57,7 +58,11 @@ class CartesianRDD[T: ClassTag, U: ClassTag](
 
   override def getPartitions: Array[Partition] = {
     // create the cross product split
-    val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length)
+    val partitionNum: Long = numPartitionsInRdd2.toLong * rdd1.partitions.length
+    if (partitionNum > Int.MaxValue) {
+      throw SparkCoreErrors.tooManyArrayElementsError(partitionNum, Int.MaxValue)
+    }
+    val array = new Array[Partition](partitionNum.toInt)
     for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) {
       val idx = s1.index * numPartitionsInRdd2 + s2.index
       array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index)
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 908ce1b233c57..545eafe7a4449 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -37,7 +37,8 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.rdd.HadoopRDD.HadoopMapPartitionsWithSplitRDD
 import org.apache.spark.scheduler.{HDFSCacheTaskLocation, HostTaskLocation}
@@ -239,11 +240,13 @@ class HadoopRDD[K, V](
         if (fileSplit.getLength > conf.get(IO_WARNING_LARGEFILETHRESHOLD)) {
           val codecFactory = new CompressionCodecFactory(jobConf)
           if (Utils.isFileSplittable(path, codecFactory)) {
-            logWarning(s"Loading one large file ${path.toString} with only one partition, " +
-              s"we can increase partition numbers for improving performance.")
+            logWarning(log"Loading one large file ${MDC(PATH, path.toString)} " +
+              log"with only one partition, " +
+              log"we can increase partition numbers for improving performance.")
           } else {
-            logWarning(s"Loading one large unsplittable file ${path.toString} with only one " +
-              s"partition, because the file is compressed by unsplittable compression codec.")
+            logWarning(log"Loading one large unsplittable file ${MDC(PATH, path.toString)} " +
+              log"with only one " +
+              log"partition, because the file is compressed by unsplittable compression codec.")
           }
         }
       }
@@ -254,8 +257,8 @@ class HadoopRDD[K, V](
       array
     } catch {
       case e: InvalidInputException if ignoreMissingFiles =>
-        logWarning(s"${jobConf.get(FileInputFormat.INPUT_DIR)} doesn't exist and no" +
-            s" partitions returned from this path.", e)
+        logWarning(log"${MDC(PATH, jobConf.get(FileInputFormat.INPUT_DIR))} " +
+          log"doesn't exist and no partitions returned from this path.", e)
         Array.empty[Partition]
       case e: IOException if e.getMessage.startsWith("Not a file:") =>
         val path = e.getMessage.split(":").map(_.trim).apply(2)
@@ -267,7 +270,8 @@ class HadoopRDD[K, V](
     val iter = new NextIterator[(K, V)] {
 
       private val split = theSplit.asInstanceOf[HadoopPartition]
-      logInfo("Input split: " + split.inputSplit)
+      logInfo(log"Task (TID ${MDC(TASK_ID, context.taskAttemptId())}) input split: " +
+        log"${MDC(INPUT_SPLIT, split.inputSplit)}")
       private val jobConf = getJobConf()
 
       private val inputMetrics = context.taskMetrics().inputMetrics
@@ -310,13 +314,14 @@ class HadoopRDD[K, V](
           inputFormat.getRecordReader(split.inputSplit.value, jobConf, Reporter.NULL)
         } catch {
           case e: FileNotFoundException if ignoreMissingFiles =>
-            logWarning(s"Skipped missing file: ${split.inputSplit}", e)
+            logWarning(log"Skipped missing file: ${MDC(PATH, split.inputSplit)}", e)
             finished = true
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
           case e: IOException if ignoreCorruptFiles =>
-            logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
+            logWarning(log"Skipped the rest content in the corrupted file: " +
+              log"${MDC(PATH, split.inputSplit)}", e)
             finished = true
             null
         }
@@ -336,12 +341,13 @@ class HadoopRDD[K, V](
           finished = !reader.next(key, value)
         } catch {
           case e: FileNotFoundException if ignoreMissingFiles =>
-            logWarning(s"Skipped missing file: ${split.inputSplit}", e)
+            logWarning(log"Skipped missing file: ${MDC(PATH, split.inputSplit)}", e)
             finished = true
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
           case e: IOException if ignoreCorruptFiles =>
-            logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
+            logWarning(log"Skipped the rest content in the corrupted file: " +
+              log"${MDC(PATH, split.inputSplit)}", e)
             finished = true
         }
         if (!finished) {
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index c41255491e976..8c10bcbc25a86 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -25,7 +25,8 @@ import org.apache.spark.{Partition, SparkContext, TaskContext}
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
 import org.apache.spark.api.java.function.{Function => JFunction}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.NextIterator
 
 private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) extends Partition {
@@ -93,7 +94,7 @@ class JdbcRDD[T: ClassTag](
       stmt.setFetchSize(100)
     }
 
-    logInfo(s"statement fetch size set to: ${stmt.getFetchSize}")
+    logInfo(log"statement fetch size set to: ${MDC(FETCH_SIZE, stmt.getFetchSize)}")
 
     stmt.setLong(1, part.lower)
     stmt.setLong(2, part.upper)
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 7db8531e4a59a..bf539320b5985 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -37,7 +37,8 @@ import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD
 import org.apache.spark.storage.StorageLevel
@@ -168,11 +169,13 @@ class NewHadoopRDD[K, V](
         if (fileSplit.getLength > conf.get(IO_WARNING_LARGEFILETHRESHOLD)) {
           val codecFactory = new CompressionCodecFactory(_conf)
           if (Utils.isFileSplittable(path, codecFactory)) {
-            logWarning(s"Loading one large file ${path.toString} with only one partition, " +
-              s"we can increase partition numbers for improving performance.")
+            logWarning(log"Loading one large file ${MDC(PATH, path.toString)} " +
+              log"with only one partition, " +
+              log"we can increase partition numbers for improving performance.")
           } else {
-            logWarning(s"Loading one large unsplittable file ${path.toString} with only one " +
-              s"partition, because the file is compressed by unsplittable compression codec.")
+            logWarning(log"Loading one large unsplittable file ${MDC(PATH, path.toString)} " +
+              log"with only one " +
+              log"partition, because the file is compressed by unsplittable compression codec.")
           }
         }
       }
@@ -185,8 +188,8 @@ class NewHadoopRDD[K, V](
       result
     } catch {
       case e: InvalidInputException if ignoreMissingFiles =>
-        logWarning(s"${_conf.get(FileInputFormat.INPUT_DIR)} doesn't exist and no" +
-            s" partitions returned from this path.", e)
+        logWarning(log"${MDC(PATH, _conf.get(FileInputFormat.INPUT_DIR))} " +
+          log"doesn't exist and no partitions returned from this path.", e)
         Array.empty[Partition]
     }
   }
@@ -194,7 +197,8 @@ class NewHadoopRDD[K, V](
   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
     val iter = new Iterator[(K, V)] {
       private val split = theSplit.asInstanceOf[NewHadoopPartition]
-      logInfo("Input split: " + split.serializableHadoopSplit)
+      logInfo(log"Task (TID ${MDC(TASK_ID, context.taskAttemptId())}) input split: " +
+        log"${MDC(INPUT_SPLIT, split.serializableHadoopSplit)}")
       private val conf = getConf
 
       private val inputMetrics = context.taskMetrics().inputMetrics
@@ -244,14 +248,15 @@ class NewHadoopRDD[K, V](
           _reader
         } catch {
           case e: FileNotFoundException if ignoreMissingFiles =>
-            logWarning(s"Skipped missing file: ${split.serializableHadoopSplit}", e)
+            logWarning(log"Skipped missing file: ${MDC(PATH, split.serializableHadoopSplit)}", e)
             finished = true
             null
           // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
           case e: FileNotFoundException if !ignoreMissingFiles => throw e
           case e: IOException if ignoreCorruptFiles =>
             logWarning(
-              s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
+              log"Skipped the rest content in the corrupted file: " +
+                log"${MDC(PATH, split.serializableHadoopSplit)}",
               e)
             finished = true
             null
@@ -273,13 +278,14 @@ class NewHadoopRDD[K, V](
             finished = !reader.nextKeyValue
           } catch {
             case e: FileNotFoundException if ignoreMissingFiles =>
-              logWarning(s"Skipped missing file: ${split.serializableHadoopSplit}", e)
+              logWarning(log"Skipped missing file: ${MDC(PATH, split.serializableHadoopSplit)}", e)
               finished = true
             // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
             case e: FileNotFoundException if !ignoreMissingFiles => throw e
             case e: IOException if ignoreCorruptFiles =>
               logWarning(
-                s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
+                log"Skipped the rest content in the corrupted file: " +
+                  log"${MDC(PATH, split.serializableHadoopSplit)}",
                 e)
               finished = true
           }
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index f5a731d134eaf..c0966dd5ede14 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -35,7 +35,8 @@ import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewO
 import org.apache.spark._
 import org.apache.spark.Partitioner.defaultPartitioner
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.SPECULATION_ENABLED
 import org.apache.spark.internal.io._
 import org.apache.spark.partial.{BoundedDouble, PartialResult}
@@ -1051,10 +1052,11 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
     val outputCommitterClass = hadoopConf.get("mapred.output.committer.class", "")
     if (speculationEnabled && outputCommitterClass.contains("Direct")) {
       val warningMessage =
-        s"$outputCommitterClass may be an output committer that writes data directly to " +
-          "the final location. Because speculation is enabled, this output committer may " +
-          "cause data loss (see the case in SPARK-10063). If possible, please use an output " +
-          "committer that does not have this behavior (e.g. FileOutputCommitter)."
+        log"${MDC(CLASS_NAME, outputCommitterClass)} " +
+          log"may be an output committer that writes data directly to " +
+          log"the final location. Because speculation is enabled, this output committer may " +
+          log"cause data loss (see the case in SPARK-10063). If possible, please use an output " +
+          log"committer that does not have this behavior (e.g. FileOutputCommitter)."
       logWarning(warningMessage)
     }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 127bdf6d91812..a806b72766c6f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -34,7 +34,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{Partition, TaskContext}
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.LogKey.{COMMAND, ERROR, PATH}
+import org.apache.spark.internal.LogKeys.{COMMAND, ERROR, PATH}
 import org.apache.spark.internal.MDC
 import org.apache.spark.util.Utils
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index a48eaa253ad1d..ac93abf3fe7a0 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -36,7 +36,8 @@ import org.apache.spark.Partitioner._
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.RDD_LIMIT_SCALE_UP_FACTOR
 import org.apache.spark.partial.BoundedDouble
@@ -210,7 +211,7 @@ abstract class RDD[T: ClassTag](
    * @return This RDD.
    */
   def unpersist(blocking: Boolean = false): this.type = {
-    logInfo(s"Removing RDD $id from persistence list")
+    logInfo(log"Removing RDD ${MDC(RDD_ID, id)} from persistence list")
     sc.unpersistRDD(id, blocking)
     storageLevel = StorageLevel.NONE
     this
@@ -643,7 +644,8 @@ abstract class RDD[T: ClassTag](
           // this shouldn't happen often because we use a big multiplier for the initial size
           var numIters = 0
           while (samples.length < num) {
-            logWarning(s"Needed to re-sample due to insufficient sample size. Repeat #$numIters")
+            logWarning(log"Needed to re-sample due to insufficient sample size. " +
+              log"Repeat #${MDC(NUM_ITERATIONS, numIters)}")
             samples = this.sample(withReplacement, fraction, rand.nextInt()).collect()
             numIters += 1
           }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
index 863bcd5b12d35..cc777659f541e 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala
@@ -29,7 +29,8 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark._
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{BUFFER_SIZE, CACHE_CHECKPOINT_PREFERRED_LOCS_EXPIRE_TIME, CHECKPOINT_COMPRESS}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.util.{SerializableConfiguration, Utils}
@@ -172,7 +173,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
 
     val checkpointDurationMs =
       TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - checkpointStartTimeNs)
-    logInfo(s"Checkpointing took $checkpointDurationMs ms.")
+    logInfo(log"Checkpointing took ${MDC(TOTAL_TIME, checkpointDurationMs)} ms.")
 
     val newRDD = new ReliableCheckpointRDD[T](
       sc, checkpointDirPath.toString, originalRDD.partitioner)
@@ -219,7 +220,7 @@ private[spark] object ReliableCheckpointRDD extends Logging {
     } (catchBlock = {
       val deleted = fs.delete(tempOutputPath, false)
       if (!deleted) {
-        logInfo(s"Failed to delete tempOutputPath $tempOutputPath.")
+        logInfo(log"Failed to delete tempOutputPath ${MDC(TEMP_OUTPUT_PATH, tempOutputPath)}.")
       }
     }, finallyBlock = {
       serializeStream.close()
@@ -227,14 +228,15 @@ private[spark] object ReliableCheckpointRDD extends Logging {
 
     if (!fs.rename(tempOutputPath, finalOutputPath)) {
       if (!fs.exists(finalOutputPath)) {
-        logInfo(s"Deleting tempOutputPath $tempOutputPath")
+        logInfo(log"Deleting tempOutputPath ${MDC(TEMP_OUTPUT_PATH, tempOutputPath)}")
         fs.delete(tempOutputPath, false)
         throw SparkCoreErrors.checkpointFailedToSaveError(ctx.attemptNumber(), finalOutputPath)
       } else {
         // Some other copy of this task must've finished before us and renamed it
-        logInfo(s"Final output path $finalOutputPath already exists; not overwriting it")
+        logInfo(log"Final output path" +
+          log" ${MDC(FINAL_OUTPUT_PATH, finalOutputPath)} already exists; not overwriting it")
         if (!fs.delete(tempOutputPath, false)) {
-          logWarning(s"Error deleting ${tempOutputPath}")
+          logWarning(log"Error deleting ${MDC(PATH, tempOutputPath)}")
         }
       }
     }
@@ -261,7 +263,8 @@ private[spark] object ReliableCheckpointRDD extends Logging {
       logDebug(s"Written partitioner to $partitionerFilePath")
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Error writing partitioner $partitioner to $checkpointDirPath")
+        logWarning(log"Error writing partitioner ${MDC(PARTITIONER, partitioner)} to " +
+          log"${MDC(PATH, checkpointDirPath)}")
     }
   }
 
@@ -298,8 +301,8 @@ private[spark] object ReliableCheckpointRDD extends Logging {
         logDebug("No partitioner file", e)
         None
       case NonFatal(e) =>
-        logWarning(s"Error reading partitioner from $checkpointDirPath, " +
-            s"partitioner will not be recovered which may lead to performance loss", e)
+        logWarning(log"Error reading partitioner from ${MDC(PATH, checkpointDirPath)}, " +
+          log"partitioner will not be recovered which may lead to performance loss", e)
         None
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
index 0d1bc1425161e..b468a38fcf229 100644
--- a/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ReliableRDDCheckpointData.scala
@@ -23,7 +23,8 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark._
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{NEW_RDD_ID, RDD_CHECKPOINT_DIR, RDD_ID}
 import org.apache.spark.internal.config.CLEANER_REFERENCE_TRACKING_CLEAN_CHECKPOINTS
 
 /**
@@ -66,7 +67,8 @@ private[spark] class ReliableRDDCheckpointData[T: ClassTag](@transient private v
       }
     }
 
-    logInfo(s"Done checkpointing RDD ${rdd.id} to $cpDir, new parent is RDD ${newRDD.id}")
+    logInfo(log"Done checkpointing RDD ${MDC(RDD_ID, rdd.id)}" +
+      log" to ${MDC(RDD_CHECKPOINT_DIR, cpDir)}, new parent is RDD ${MDC(NEW_RDD_ID, newRDD.id)}")
     newRDD
   }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 2f6ff0acdf024..118660ef69476 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapred.SequenceFileOutputFormat
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 
 /**
  * Extra functions available on RDDs of (key, value) pairs to create a Hadoop SequenceFile,
@@ -58,8 +58,9 @@ class SequenceFileRDDFunctions[K: IsWritable: ClassTag, V: IsWritable: ClassTag]
     val convertKey = self.keyClass != _keyWritableClass
     val convertValue = self.valueClass != _valueWritableClass
 
-    logInfo("Saving as sequence file of type " +
-      s"(${_keyWritableClass.getSimpleName},${_valueWritableClass.getSimpleName})" )
+    logInfo(log"Saving as sequence file of type " +
+      log"(${MDC(LogKeys.KEY, _keyWritableClass.getSimpleName)}," +
+      log"${MDC(LogKeys.VALUE, _valueWritableClass.getSimpleName)})")
     val format = classOf[SequenceFileOutputFormat[Writable, Writable]]
     val jobConf = new JobConf(self.context.hadoopConfiguration)
     if (!convertKey && !convertValue) {
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala b/core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala
index d861e91771673..51de7e2b9ac70 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceDiscoveryScriptPlugin.scala
@@ -23,7 +23,8 @@ import java.util.Optional
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.resource.ResourceDiscoveryPlugin
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.util.Utils.executeAndGetOutput
 
 /**
@@ -44,7 +45,8 @@ class ResourceDiscoveryScriptPlugin extends ResourceDiscoveryPlugin with Logging
     val resourceName = request.id.resourceName
     val result = if (script.isPresent) {
       val scriptFile = new File(script.get)
-      logInfo(s"Discovering resources for $resourceName with script: $scriptFile")
+      logInfo(log"Discovering resources for ${MDC(LogKeys.RESOURCE_NAME, resourceName)}" +
+        log" with script: ${MDC(LogKeys.PATH, scriptFile)}")
       // check that script exists and try to execute
       if (scriptFile.exists()) {
         val output = executeAndGetOutput(Seq(script.get), new File("."))
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
index e95dbe973691a..7dcde35de2518 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
@@ -26,7 +26,8 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkException}
 import org.apache.spark.annotation.{Evolving, Since}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
 import org.apache.spark.util.Utils
@@ -221,8 +222,8 @@ class ResourceProfile(
         }
         taskResourcesToCheck -= rName
       } else {
-        logWarning(s"The executor resource config for resource: $rName was specified but " +
-          "no corresponding task resource request was specified.")
+        logWarning(log"The executor resource config for resource: ${MDC(RESOURCE_NAME, rName)} " +
+          log"was specified but no corresponding task resource request was specified.")
       }
     }
     if (taskResourcesToCheck.nonEmpty) {
@@ -231,7 +232,7 @@ class ResourceProfile(
     }
     val limiting =
       if (taskLimit == -1) "cpu" else s"$limitingResource at $taskLimit tasks per executor"
-    logInfo(s"Limiting resource is $limiting")
+    logInfo(log"Limiting resource is ${MDC(RESOURCE, limiting)}")
     _executorResourceSlotsPerAddr = Some(numPartsPerResourceMap.toMap)
     _maxTasksPerExecutor = if (taskLimit == -1) Some(1) else Some(taskLimit)
     _limitingResource = Some(limitingResource)
@@ -373,9 +374,9 @@ object ResourceProfile extends Logging {
           val defProf = new ResourceProfile(executorResources, taskResources)
           defProf.setToDefaultProfile()
           defaultProfile = Some(defProf)
-          logInfo("Default ResourceProfile created, executor resources: " +
-            s"${defProf.executorResources}, task resources: " +
-            s"${defProf.taskResources}")
+          logInfo(log"Default ResourceProfile created, executor resources: " +
+            log"${MDC(EXECUTOR_RESOURCES, defProf.executorResources)}, task resources: " +
+            log"${MDC(TASK_RESOURCES, defProf.taskResources)}")
           defProf
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
index 580a5b7bb07ac..6a6b5067f70f2 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
@@ -23,7 +23,8 @@ import scala.collection.mutable.HashMap
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.Evolving
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerResourceProfileAdded}
 import org.apache.spark.util.Utils
@@ -140,7 +141,7 @@ private[spark] class ResourceProfileManager(sparkConf: SparkConf,
     if (putNewProfile) {
       // force the computation of maxTasks and limitingResource now so we don't have cost later
       rp.limitingResource(sparkConf)
-      logInfo(s"Added ResourceProfile id: ${rp.id}")
+      logInfo(log"Added ResourceProfile id: ${MDC(LogKeys.RESOURCE_PROFILE_ID, rp.id)}")
       listenerBus.post(SparkListenerResourceProfileAdded(rp))
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 8718ce8ea0833..78c45cdc75418 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -28,7 +28,8 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.resource.ResourceDiscoveryPlugin
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX}
 import org.apache.spark.internal.config.Tests.RESOURCES_WARNING_TESTING
 import org.apache.spark.util.ArrayImplicits._
@@ -454,13 +455,15 @@ private[spark] object ResourceUtils extends Logging {
     if (limitingResource.nonEmpty && !limitingResource.equals(ResourceProfile.CPUS)) {
       if ((taskCpus * maxTaskPerExec) < cores) {
         val resourceNumSlots = Math.floor(cores/taskCpus).toInt
-        val message = s"The configuration of cores (exec = ${cores} " +
-          s"task = ${taskCpus}, runnable tasks = ${resourceNumSlots}) will " +
-          s"result in wasted resources due to resource ${limitingResource} limiting the " +
-          s"number of runnable tasks per executor to: ${maxTaskPerExec}. Please adjust " +
-          "your configuration."
+        val message = log"The configuration of cores (exec = ${MDC(NUM_CORES, cores)} " +
+          log"task = ${MDC(NUM_TASK_CPUS, taskCpus)}, runnable tasks = " +
+          log"${MDC(NUM_RESOURCE_SLOTS, resourceNumSlots)}) will " +
+          log"result in wasted resources due to resource ${MDC(RESOURCE, limitingResource)} " +
+          log"limiting the number of runnable tasks per executor to: " +
+          log"${MDC(NUM_TASKS, maxTaskPerExec)}. Please adjust " +
+          log"your configuration."
         if (sparkConf.get(RESOURCES_WARNING_TESTING)) {
-          throw new SparkException(message)
+          throw new SparkException(message.message)
         } else {
           logWarning(message)
         }
@@ -476,14 +479,16 @@ private[spark] object ResourceUtils extends Logging {
         val origTaskAmount = treq.amount
         val taskReqStr = s"${origTaskAmount}/${numParts}"
         val resourceNumSlots = (execAmount * numParts / taskAmount).toInt
-        val message = s"The configuration of resource: ${treq.resourceName} " +
-          s"(exec = ${execAmount}, task = ${taskReqStr}, " +
-          s"runnable tasks = ${resourceNumSlots}) will " +
-          s"result in wasted resources due to resource ${limitingResource} limiting the " +
-          s"number of runnable tasks per executor to: ${maxTaskPerExec}. Please adjust " +
-          "your configuration."
+        val message = log"The configuration of resource: " +
+          log"${MDC(RESOURCE_NAME, treq.resourceName)} " +
+          log"(exec = ${MDC(EXEC_AMOUNT, execAmount)}, " +
+          log"task = ${MDC(TASK_REQUIREMENTS, taskReqStr)}, " +
+          log"runnable tasks = ${MDC(NUM_RESOURCE_SLOTS, resourceNumSlots)}) will " +
+          log"result in wasted resources due to resource ${MDC(RESOURCE, limitingResource)} " +
+          log"limiting the number of runnable tasks per executor to: " +
+          log"${MDC(NUM_TASKS, maxTaskPerExec)}. Please adjust your configuration."
         if (sparkConf.get(RESOURCES_WARNING_TESTING)) {
-          throw new SparkException(message)
+          throw new SparkException(message.message)
         } else {
           logWarning(message)
         }
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
index 0e35842fece92..8acfef38659c0 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Dispatcher.scala
@@ -25,7 +25,8 @@ import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkEnv, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.network.client.RpcResponseCallback
 import org.apache.spark.rpc._
 
@@ -123,7 +124,8 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
       val name = iter.next
         postMessage(name, message, (e) => { e match {
           case e: RpcEnvStoppedException => logDebug(s"Message $message dropped. ${e.getMessage}")
-          case e: Throwable => logWarning(s"Message $message dropped. ${e.getMessage}")
+          case e: Throwable =>
+            logWarning(log"Message ${MDC(MESSAGE, message)} dropped. ${MDC(ERROR, e.getMessage)}")
         }}
       )}
   }
@@ -154,7 +156,8 @@ private[netty] class Dispatcher(nettyEnv: NettyRpcEnv, numUsableCores: Int) exte
         // cluster in spark shell.
         case re: RpcEnvStoppedException => logDebug(s"Message $message dropped. ${re.getMessage}")
         case e if SparkEnv.get.isStopped =>
-          logWarning(s"Message $message dropped due to sparkEnv is stopped. ${e.getMessage}")
+          logWarning(log"Message ${MDC(MESSAGE, message)} dropped due to sparkEnv " +
+            log"is stopped. ${MDC(ERROR, e.getMessage)}")
         case e => throw e
       })
   }
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
index b503c5a0f8089..0de67a65593b1 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
@@ -23,7 +23,7 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.END_POINT
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, ThreadSafeRpcEndpoint}
 
 
@@ -194,7 +194,8 @@ private[netty] class Inbox(val endpointName: String, val endpoint: RpcEndpoint)
    * Exposed for testing.
    */
   protected def onDrop(message: InboxMessage): Unit = {
-    logWarning(s"Drop $message because endpoint $endpointName is stopped")
+    logWarning(log"Drop ${MDC(MESSAGE, message)} " +
+      log"because endpoint ${MDC(END_POINT, endpointName)} is stopped")
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala b/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
index 2d94ed5d05e1c..2fd1c6d7fe71e 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
@@ -23,7 +23,7 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.ERROR
+import org.apache.spark.internal.LogKeys.ERROR
 import org.apache.spark.internal.config.EXECUTOR_ID
 import org.apache.spark.internal.config.Network._
 import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcEndpoint}
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
index 7909f2327cdf7..c2688610fe8b1 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala
@@ -30,7 +30,8 @@ import scala.util.{DynamicVariable, Failure, Success, Try}
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.EXECUTOR_ID
 import org.apache.spark.internal.config.Network._
 import org.apache.spark.network.TransportContext
@@ -216,7 +217,7 @@ private[netty] class NettyRpcEnv(
       if (!promise.tryFailure(e)) {
         e match {
           case e : RpcEnvStoppedException => logDebug(s"Ignored failure: $e")
-          case _ => logWarning(s"Ignored failure: $e")
+          case _ => logWarning(log"Ignored failure: ${MDC(ERROR, e)}")
         }
       }
     }
@@ -225,7 +226,7 @@ private[netty] class NettyRpcEnv(
       case RpcFailure(e) => onFailure(e)
       case rpcReply =>
         if (!promise.trySuccess(rpcReply)) {
-          logWarning(s"Ignored message: $reply")
+          logWarning(log"Ignored message: ${MDC(MESSAGE, reply)}")
         }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
index 205e6e966866f..b212a818ffc49 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Outbox.scala
@@ -46,7 +46,7 @@ private[netty] case class OneWayOutboxMessage(content: ByteBuffer) extends Outbo
   override def onFailure(e: Throwable): Unit = {
     e match {
       case e1: RpcEnvStoppedException => logDebug(e1.getMessage)
-      case e1: Throwable => logWarning(s"Failed to send one-way RPC.", e1)
+      case e1: Throwable => logWarning(log"Failed to send one-way RPC.", e1)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
index 271fc9ac92ba6..16e9211b54851 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/AsyncEventQueue.scala
@@ -24,7 +24,7 @@ import com.codahale.metrics.{Gauge, Timer}
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.EVENT_QUEUE
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.util.Utils
 
@@ -187,8 +187,9 @@ private class AsyncEventQueue(
       if (lastReportTimestamp.compareAndSet(lastReportTime, curTime)) {
         val previous = new java.util.Date(lastReportTime)
         lastDroppedEventsCounter = droppedEventsCount
-        logWarning(s"Dropped $droppedCountIncreased events from $name since " +
-          s"${if (lastReportTime == 0) "the application started" else s"$previous"}.")
+        logWarning(log"Dropped ${MDC(NUM_EVENTS, droppedCountIncreased)} events from " +
+          log"${MDC(EVENT_NAME, name)} since " +
+          (if (lastReportTime == 0) log"the application started" else log"${MDC(TIME, previous)}"))
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 41cbd795b7e5e..f50e8bd25fec8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -37,7 +37,7 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.{ACCUMULATOR_ID, CLASS_NAME, JOB_ID, PARTITION_ID, STAGE_ID, TASK_ID}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{LEGACY_ABORT_STAGE_AFTER_KILL_TASKS, RDD_CACHE_VISIBILITY_TRACKING_ENABLED}
 import org.apache.spark.internal.config.Tests.TEST_NO_STAGE_RETRY
 import org.apache.spark.network.shuffle.{BlockStoreClient, MergeFinalizerListener}
@@ -535,8 +535,9 @@ private[spark] class DAGScheduler(
     if (!mapOutputTracker.containsShuffle(shuffleDep.shuffleId)) {
       // Kind of ugly: need to register RDDs with the cache and map output tracker here
       // since we can't do it in the RDD constructor because # of partitions is unknown
-      logInfo(s"Registering RDD ${rdd.id} (${rdd.getCreationSite}) as input to " +
-        s"shuffle ${shuffleDep.shuffleId}")
+      logInfo(log"Registering RDD ${MDC(RDD_ID, rdd.id)} " +
+        log"(${MDC(CREATION_SITE, rdd.getCreationSite)}) as input to " +
+        log"shuffle ${MDC(SHUFFLE_ID, shuffleDep.shuffleId)}")
       mapOutputTracker.registerShuffle(shuffleDep.shuffleId, rdd.partitions.length,
         shuffleDep.partitioner.numPartitions)
     }
@@ -1097,7 +1098,7 @@ private[spark] class DAGScheduler(
    * Cancel a job that is running or waiting in the queue.
    */
   def cancelJob(jobId: Int, reason: Option[String]): Unit = {
-    logInfo("Asked to cancel job " + jobId)
+    logInfo(log"Asked to cancel job ${MDC(JOB_ID, jobId)}")
     eventProcessLoop.post(JobCancelled(jobId, reason))
   }
 
@@ -1106,7 +1107,8 @@ private[spark] class DAGScheduler(
    * @param cancelFutureJobs if true, future submitted jobs in this job group will be cancelled
    */
   def cancelJobGroup(groupId: String, cancelFutureJobs: Boolean = false): Unit = {
-    logInfo(s"Asked to cancel job group $groupId with cancelFutureJobs=$cancelFutureJobs")
+    logInfo(log"Asked to cancel job group ${MDC(GROUP_ID, groupId)} with " +
+      log"cancelFutureJobs=${MDC(CANCEL_FUTURE_JOBS, cancelFutureJobs)}")
     eventProcessLoop.post(JobGroupCancelled(groupId, cancelFutureJobs))
   }
 
@@ -1115,7 +1117,7 @@ private[spark] class DAGScheduler(
    */
   def cancelJobsWithTag(tag: String): Unit = {
     SparkContext.throwIfInvalidTag(tag)
-    logInfo(s"Asked to cancel jobs with tag $tag")
+    logInfo(log"Asked to cancel jobs with tag ${MDC(TAG, tag)}")
     eventProcessLoop.post(JobTagCancelled(tag))
   }
 
@@ -1209,7 +1211,7 @@ private[spark] class DAGScheduler(
     // If cancelFutureJobs is true, store the cancelled job group id into internal states.
     // When a job belonging to this job group is submitted, skip running it.
     if (cancelFutureJobs) {
-      logInfo(s"Add job group $groupId into cancelled job groups")
+      logInfo(log"Add job group ${MDC(GROUP_ID, groupId)} into cancelled job groups")
       cancelledJobGroups.add(groupId)
     }
 
@@ -1221,7 +1223,8 @@ private[spark] class DAGScheduler(
       }
     }
     if (activeInGroup.isEmpty && !cancelFutureJobs) {
-      logWarning(s"Failed to cancel job group $groupId. Cannot find active jobs for it.")
+      logWarning(log"Failed to cancel job group ${MDC(GROUP_ID, groupId)}. " +
+        log"Cannot find active jobs for it.")
     }
     val jobIds = activeInGroup.map(_.jobId)
     jobIds.foreach(handleJobCancellation(_,
@@ -1313,7 +1316,7 @@ private[spark] class DAGScheduler(
     if (jobGroupIdOpt.exists(cancelledJobGroups.contains(_))) {
       listener.jobFailed(
         SparkCoreErrors.sparkJobCancelledAsPartOfJobGroupError(jobId, jobGroupIdOpt.get))
-      logInfo(s"Skip running a job that belongs to the cancelled job group ${jobGroupIdOpt.get}.")
+      logInfo(log"Skip running a job that belongs to the cancelled job group ${MDC(GROUP_ID, jobGroupIdOpt.get)}")
       return
     }
 
@@ -1328,9 +1331,11 @@ private[spark] class DAGScheduler(
         val numCheckFailures = barrierJobIdToNumTasksCheckFailures.compute(jobId,
           (_: Int, value: Int) => value + 1)
 
-        logWarning(s"Barrier stage in job $jobId requires ${e.requiredConcurrentTasks} slots, " +
-          s"but only ${e.maxConcurrentTasks} are available. " +
-          s"Will retry up to ${maxFailureNumTasksCheck - numCheckFailures + 1} more times")
+        logWarning(log"Barrier stage in job ${MDC(JOB_ID, jobId)} " +
+          log"requires ${MDC(NUM_SLOTS, e.requiredConcurrentTasks)} slots, " +
+          log"but only ${MDC(MAX_SLOTS, e.maxConcurrentTasks)} are available. " +
+          log"Will retry up to ${MDC(NUM_RETRIES, maxFailureNumTasksCheck - numCheckFailures + 1)} " +
+          log"more times")
 
         if (numCheckFailures <= maxFailureNumTasksCheck) {
           messageScheduler.schedule(
@@ -1350,7 +1355,7 @@ private[spark] class DAGScheduler(
         }
 
       case e: Exception =>
-        logWarning("Creating new stage failed due to exception - job: " + jobId, e)
+        logWarning(log"Creating new stage failed due to exception - job: ${MDC(JOB_ID, jobId)}", e)
         listener.jobFailed(e)
         return
     }
@@ -1359,11 +1364,13 @@ private[spark] class DAGScheduler(
 
     val job = new ActiveJob(jobId, finalStage, callSite, listener, artifacts, properties)
     clearCacheLocs()
-    logInfo("Got job %s (%s) with %d output partitions".format(
-      job.jobId, callSite.shortForm, partitions.length))
-    logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")")
-    logInfo("Parents of final stage: " + finalStage.parents)
-    logInfo("Missing parents: " + getMissingParentStages(finalStage))
+    logInfo(
+      log"Got job ${MDC(JOB_ID, job.jobId)} (${MDC(CALL_SITE_SHORT_FORM, callSite.shortForm)}) " +
+      log"with ${MDC(NUM_PARTITIONS, partitions.length)} output partitions")
+    logInfo(log"Final stage: ${MDC(STAGE_ID, finalStage)} " +
+      log"(${MDC(STAGE_NAME, finalStage.name)})")
+    logInfo(log"Parents of final stage: ${MDC(STAGE_ID, finalStage.parents)}")
+    logInfo(log"Missing parents: ${MDC(MISSING_PARENT_STAGES, getMissingParentStages(finalStage))}")
 
     val jobSubmissionTime = clock.getTimeMillis()
     jobIdToActiveJob(jobId) = job
@@ -1393,18 +1400,20 @@ private[spark] class DAGScheduler(
       finalStage = getOrCreateShuffleMapStage(dependency, jobId)
     } catch {
       case e: Exception =>
-        logWarning("Creating new stage failed due to exception - job: " + jobId, e)
+        logWarning(log"Creating new stage failed due to exception - job: ${MDC(JOB_ID, jobId)}", e)
         listener.jobFailed(e)
         return
     }
 
     val job = new ActiveJob(jobId, finalStage, callSite, listener, artifacts, properties)
     clearCacheLocs()
-    logInfo("Got map stage job %s (%s) with %d output partitions".format(
-      jobId, callSite.shortForm, dependency.rdd.partitions.length))
-    logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")")
-    logInfo("Parents of final stage: " + finalStage.parents)
-    logInfo("Missing parents: " + getMissingParentStages(finalStage))
+    logInfo(log"Got map stage job ${MDC(JOB_ID, jobId)} " +
+      log"(${MDC(CALL_SITE_SHORT_FORM, callSite.shortForm)}) with " +
+      log"${MDC(NUM_PARTITIONS, dependency.rdd.partitions.length)} output partitions")
+    logInfo(log"Final stage: ${MDC(STAGE_ID, finalStage)} " +
+      log"(${MDC(STAGE_NAME, finalStage.name)})")
+    logInfo(log"Parents of final stage: ${MDC(PARENT_STAGES, finalStage.parents.toString)}")
+    logInfo(log"Missing parents: ${MDC(MISSING_PARENT_STAGES, getMissingParentStages(finalStage))}")
 
     val jobSubmissionTime = clock.getTimeMillis()
     jobIdToActiveJob(jobId) = job
@@ -1441,7 +1450,8 @@ private[spark] class DAGScheduler(
           val missing = getMissingParentStages(stage).sortBy(_.id)
           logDebug("missing: " + missing)
           if (missing.isEmpty) {
-            logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
+            logInfo(log"Submitting ${MDC(STAGE_ID, stage)} (${MDC(RDD_ID, stage.rdd)}), " +
+                    log"which has no missing parents")
             submitMissingTasks(stage, jobId.get)
           } else {
             for (parent <- missing) {
@@ -1492,13 +1502,16 @@ private[spark] class DAGScheduler(
     val shuffleId = stage.shuffleDep.shuffleId
     val shuffleMergeId = stage.shuffleDep.shuffleMergeId
     if (stage.shuffleDep.shuffleMergeEnabled) {
-      logInfo(s"Shuffle merge enabled before starting the stage for $stage with shuffle" +
-        s" $shuffleId and shuffle merge $shuffleMergeId with" +
-        s" ${stage.shuffleDep.getMergerLocs.size} merger locations")
+      logInfo(log"Shuffle merge enabled before starting the stage for ${MDC(STAGE_ID, stage)}" +
+        log" with shuffle ${MDC(SHUFFLE_ID, shuffleId)} and shuffle merge" +
+        log" ${MDC(SHUFFLE_MERGE_ID, shuffleMergeId)} with" +
+        log" ${MDC(NUM_MERGER_LOCATIONS, stage.shuffleDep.getMergerLocs.size.toString)} merger locations")
     } else {
-      logInfo(s"Shuffle merge disabled for $stage with shuffle $shuffleId" +
-        s" and shuffle merge $shuffleMergeId, but can get enabled later adaptively" +
-        s" once enough mergers are available")
+      logInfo(log"Shuffle merge disabled for ${MDC(STAGE_ID, stage)} with " +
+        log"shuffle ${MDC(SHUFFLE_ID, shuffleId)} and " +
+        log"shuffle merge ${MDC(SHUFFLE_MERGE_ID, shuffleMergeId)}, " +
+        log"but can get enabled later adaptively once enough " +
+        log"mergers are available")
     }
   }
 
@@ -1555,8 +1568,8 @@ private[spark] class DAGScheduler(
             // merger locations but the corresponding shuffle map stage did not complete
             // successfully, we would still enable push for its retry.
             s.shuffleDep.setShuffleMergeAllowed(false)
-            logInfo(s"Push-based shuffle disabled for $stage (${stage.name}) since it" +
-              " is already shuffle merge finalized")
+            logInfo(log"Push-based shuffle disabled for ${MDC(STAGE_ID, stage)} " +
+              log"(${MDC(STAGE_NAME, stage.name)}) since it is already shuffle merge finalized")
           }
         }
       case s: ResultStage =>
@@ -1622,8 +1635,8 @@ private[spark] class DAGScheduler(
       }
 
       if (taskBinaryBytes.length > TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024) {
-        logWarning(s"Broadcasting large task binary with size " +
-          s"${Utils.bytesToString(taskBinaryBytes.length)}")
+        logWarning(log"Broadcasting large task binary with size " +
+          log"${MDC(NUM_BYTES, Utils.bytesToString(taskBinaryBytes.length))}")
       }
       taskBinary = sc.broadcast(taskBinaryBytes)
     } catch {
@@ -1678,8 +1691,9 @@ private[spark] class DAGScheduler(
     }
 
     if (tasks.nonEmpty) {
-      logInfo(s"Submitting ${tasks.size} missing tasks from $stage (${stage.rdd}) (first 15 " +
-        s"tasks are for partitions ${tasks.take(15).map(_.partitionId)})")
+      logInfo(log"Submitting ${MDC(NUM_TASKS, tasks.size)} missing tasks from " +
+        log"${MDC(STAGE_ID, stage)} (${MDC(RDD_ID, stage.rdd)}) (first 15 tasks are " +
+        log"for partitions ${MDC(PARTITION_IDS, tasks.take(15).map(_.partitionId))})")
       val shuffleId = stage match {
         case s: ShuffleMapStage => Some(s.shuffleDep.shuffleId)
         case _: ResultStage => None
@@ -1748,9 +1762,10 @@ private[spark] class DAGScheduler(
             case Some(accum) => accum.getClass.getName
             case None => "Unknown class"
           }
-          logError(
-            log"Failed to update accumulator ${MDC(ACCUMULATOR_ID, id)} (${MDC(CLASS_NAME, accumClassName)}) " +
-              log"for task ${MDC(PARTITION_ID, task.partitionId)}", e)
+              logError(
+                log"Failed to update accumulator ${MDC(ACCUMULATOR_ID, id)} " +
+                log"(${MDC(CLASS_NAME, accumClassName)}) for task " +
+                log"${MDC(PARTITION_ID, task.partitionId)}", e)
       }
     }
   }
@@ -1791,8 +1806,10 @@ private[spark] class DAGScheduler(
         shouldInterruptThread.toBoolean
       } catch {
         case e: IllegalArgumentException =>
-          logWarning(s"${SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL} in Job ${job.jobId} " +
-            s"is invalid: $shouldInterruptThread. Using 'false' instead", e)
+          logWarning(log"${MDC(CONFIG, SparkContext.SPARK_JOB_INTERRUPT_ON_CANCEL)} " +
+            log"in Job ${MDC(JOB_ID, job.jobId)} " +
+            log"is invalid: ${MDC(CONFIG2, shouldInterruptThread)}. " +
+            log"Using 'false' instead", e)
           false
       }
     }
@@ -1921,8 +1938,8 @@ private[spark] class DAGScheduler(
                     try {
                       // killAllTaskAttempts will fail if a SchedulerBackend does not implement
                       // killTask.
-                      logInfo(s"Job ${job.jobId} is finished. Cancelling potential speculative " +
-                        "or zombie tasks for this job")
+                      logInfo(log"Job ${MDC(JOB_ID, job.jobId)} is finished. Cancelling " +
+                        log"potential speculative or zombie tasks for this job")
                       // ResultStage is only used by this job. It's safe to kill speculative or
                       // zombie tasks in this stage.
                       taskScheduler.killAllTaskAttempts(
@@ -1931,7 +1948,8 @@ private[spark] class DAGScheduler(
                         reason = "Stage finished")
                     } catch {
                       case e: UnsupportedOperationException =>
-                        logWarning(s"Could not cancel tasks for stage $stageId", e)
+                        logWarning(log"Could not cancel tasks " +
+                          log"for stage ${MDC(STAGE_ID, stageId)}", e)
                     }
                     listenerBus.post(
                       SparkListenerJobEnd(job.jobId, clock.getTimeMillis(), JobSucceeded))
@@ -1948,7 +1966,7 @@ private[spark] class DAGScheduler(
                   }
                 }
               case None =>
-                logInfo("Ignoring result from " + rt + " because its job has finished")
+                logInfo(log"Ignoring result from ${MDC(RESULT, rt)} because its job has finished")
             }
 
           case smt: ShuffleMapTask =>
@@ -1963,7 +1981,8 @@ private[spark] class DAGScheduler(
               logDebug("ShuffleMapTask finished on " + execId)
               if (executorFailureEpoch.contains(execId) &&
                 smt.epoch <= executorFailureEpoch(execId)) {
-                logInfo(s"Ignoring possibly bogus $smt completion from executor $execId")
+                logInfo(log"Ignoring possibly bogus ${MDC(STAGE_ID, smt)} completion from " +
+                  log"executor ${MDC(EXECUTOR_ID, execId)}")
               } else {
                 // The epoch of the task is acceptable (i.e., the task was launched after the most
                 // recent failure we're aware of for the executor), so mark the task's output as
@@ -1972,7 +1991,7 @@ private[spark] class DAGScheduler(
                   shuffleStage.shuffleDep.shuffleId, smt.partitionId, status)
               }
             } else {
-              logInfo(s"Ignoring $smt completion from an older attempt of indeterminate stage")
+              logInfo(log"Ignoring ${MDC(TASK_NAME, smt)} completion from an older attempt of indeterminate stage")
             }
 
             if (runningStages.contains(shuffleStage) && shuffleStage.pendingPartitions.isEmpty) {
@@ -1990,17 +2009,22 @@ private[spark] class DAGScheduler(
         val mapStage = shuffleIdToMapStage(shuffleId)
 
         if (failedStage.latestInfo.attemptNumber() != task.stageAttemptId) {
-          logInfo(s"Ignoring fetch failure from $task as it's from $failedStage attempt" +
-            s" ${task.stageAttemptId} and there is a more recent attempt for that stage " +
-            s"(attempt ${failedStage.latestInfo.attemptNumber()}) running")
+          logInfo(log"Ignoring fetch failure from " +
+            log"${MDC(TASK_ID, task)} as it's from " +
+            log"${MDC(STAGE_ID, failedStage)} attempt " +
+            log"${MDC(STAGE_ATTEMPT, task.stageAttemptId)} and there is a more recent attempt for " +
+            log"that stage (attempt " +
+            log"${MDC(NUM_ATTEMPT, failedStage.latestInfo.attemptNumber())}) running")
         } else {
           val ignoreStageFailure = ignoreDecommissionFetchFailure &&
             isExecutorDecommissioningOrDecommissioned(taskScheduler, bmAddress)
           if (ignoreStageFailure) {
-            logInfo(s"Ignoring fetch failure from $task of $failedStage attempt " +
-              s"${task.stageAttemptId} when count ${config.STAGE_MAX_CONSECUTIVE_ATTEMPTS.key} " +
-              s"as executor ${bmAddress.executorId} is decommissioned and " +
-              s" ${config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE.key}=true")
+            logInfo(log"Ignoring fetch failure from ${MDC(TASK_NAME, task)} of " +
+              log"${MDC(STAGE, failedStage)} attempt " +
+              log"${MDC(STAGE_ATTEMPT, task.stageAttemptId)} when count " +
+              log"${MDC(MAX_ATTEMPTS, config.STAGE_MAX_CONSECUTIVE_ATTEMPTS.key)} " +
+              log"as executor ${MDC(EXECUTOR_ID, bmAddress.executorId)} is decommissioned and " +
+              log"${MDC(CONFIG, config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE.key)}=true")
           } else {
             failedStage.failedAttemptIds.add(task.stageAttemptId)
           }
@@ -2013,8 +2037,10 @@ private[spark] class DAGScheduler(
           // multiple tasks running concurrently on different executors). In that case, it is
           // possible the fetch failure has already been handled by the scheduler.
           if (runningStages.contains(failedStage)) {
-            logInfo(s"Marking $failedStage (${failedStage.name}) as failed " +
-              s"due to a fetch failure from $mapStage (${mapStage.name})")
+            logInfo(log"Marking ${MDC(FAILED_STAGE, failedStage)} " +
+              log"(${MDC(FAILED_STAGE_NAME, failedStage.name)}) as failed " +
+              log"due to a fetch failure from ${MDC(STAGE, mapStage)} " +
+              log"(${MDC(STAGE_NAME, mapStage.name)})")
             markStageAsFinished(failedStage, errorMessage = Some(failureMessage),
               willRetry = !shouldAbortStage)
           } else {
@@ -2142,9 +2168,9 @@ private[spark] class DAGScheduler(
 
                   case _ =>
                 }
-                logInfo(s"The shuffle map stage $mapStage with indeterminate output was failed, " +
-                  s"we will roll back and rerun below stages which include itself and all its " +
-                  s"indeterminate child stages: $rollingBackStages")
+                logInfo(log"The shuffle map stage ${MDC(SHUFFLE_ID, mapStage)} with indeterminate output was failed, " +
+                  log"we will roll back and rerun below stages which include itself and all its " +
+                  log"indeterminate child stages: ${MDC(STAGES, rollingBackStages)}")
               }
 
               // We expect one executor failure to trigger many FetchFailures in rapid succession,
@@ -2156,9 +2182,9 @@ private[spark] class DAGScheduler(
               // producing a resubmit for each failed stage makes debugging and logging a little
               // simpler while not producing an overwhelming number of scheduler events.
               logInfo(
-                s"Resubmitting $mapStage (${mapStage.name}) and " +
-                  s"$failedStage (${failedStage.name}) due to fetch failure"
-              )
+                log"Resubmitting ${MDC(STAGE, mapStage)} " +
+                log"(${MDC(STAGE_NAME, mapStage.name)}) and ${MDC(FAILED_STAGE, failedStage)} " +
+                log"(${MDC(FAILED_STAGE_NAME, failedStage.name)}) due to fetch failure")
               messageScheduler.schedule(
                 new Runnable {
                   override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages)
@@ -2217,12 +2243,13 @@ private[spark] class DAGScheduler(
         // Always fail the current stage and retry all the tasks when a barrier task fail.
         val failedStage = stageIdToStage(task.stageId)
         if (failedStage.latestInfo.attemptNumber() != task.stageAttemptId) {
-          logInfo(s"Ignoring task failure from $task as it's from $failedStage attempt" +
-            s" ${task.stageAttemptId} and there is a more recent attempt for that stage " +
-            s"(attempt ${failedStage.latestInfo.attemptNumber()}) running")
+          logInfo(log"Ignoring task failure from ${MDC(TASK_NAME, task)} as it's from " +
+            log"${MDC(FAILED_STAGE, failedStage)} attempt ${MDC(STAGE_ATTEMPT, task.stageAttemptId)} " +
+            log"and there is a more recent attempt for that stage (attempt " +
+            log"${MDC(NUM_ATTEMPT, failedStage.latestInfo.attemptNumber())}) running")
         } else {
-          logInfo(s"Marking $failedStage (${failedStage.name}) as failed due to a barrier task " +
-            "failed.")
+              logInfo(log"Marking ${MDC(STAGE_ID, failedStage.id)} (${MDC(STAGE_NAME, failedStage.name)}) " +
+                log"as failed due to a barrier task failed.")
           val message = s"Stage failed because barrier task $task finished unsuccessfully.\n" +
             failure.toErrorString
           try {
@@ -2236,7 +2263,7 @@ private[spark] class DAGScheduler(
             case e: UnsupportedOperationException =>
               // Cannot continue with barrier stage if failed to cancel zombie barrier tasks.
               // TODO SPARK-24877 leave the zombie tasks and ignore their completion events.
-              logWarning(s"Could not kill all tasks for stage $stageId", e)
+              logWarning(log"Could not kill all tasks for stage ${MDC(STAGE_ID, stageId)}", e)
               abortStage(failedStage, "Could not kill zombie barrier tasks for stage " +
                 s"$failedStage (${failedStage.name})", Some(e))
           }
@@ -2277,8 +2304,8 @@ private[spark] class DAGScheduler(
             val noResubmitEnqueued = !failedStages.contains(failedStage)
             failedStages += failedStage
             if (noResubmitEnqueued) {
-              logInfo(s"Resubmitting $failedStage (${failedStage.name}) due to barrier stage " +
-                "failure.")
+              logInfo(log"Resubmitting ${MDC(FAILED_STAGE, failedStage)} " +
+                log"(${MDC(FAILED_STAGE_NAME, failedStage.name)}) due to barrier stage failure.")
               messageScheduler.schedule(new Runnable {
                 override def run(): Unit = eventProcessLoop.post(ResubmitFailedStages)
               }, DAGScheduler.RESUBMIT_TIMEOUT, TimeUnit.MILLISECONDS)
@@ -2355,8 +2382,8 @@ private[spark] class DAGScheduler(
         // delay should be 0 and registerMergeResults should be true.
         assert(delay == 0 && registerMergeResults)
         if (task.getDelay(TimeUnit.NANOSECONDS) > 0 && task.cancel(false)) {
-          logInfo(s"$stage (${stage.name}) scheduled for finalizing shuffle merge immediately " +
-            s"after cancelling previously scheduled task.")
+          logInfo(log"${MDC(STAGE, stage)} (${MDC(STAGE_NAME, stage.name)}) scheduled " +
+            log"for finalizing shuffle merge immediately after cancelling previously scheduled task.")
           shuffleDep.setFinalizeTask(
             shuffleMergeFinalizeScheduler.schedule(
               new Runnable {
@@ -2367,13 +2394,15 @@ private[spark] class DAGScheduler(
             )
           )
         } else {
-          logInfo(s"$stage (${stage.name}) existing scheduled task for finalizing shuffle merge" +
-            s"would either be in-progress or finished. No need to schedule shuffle merge" +
-            s" finalization again.")
+          logInfo(
+            log"${MDC(STAGE, stage)} (${MDC(STAGE_NAME, stage.name)}) existing scheduled task " +
+            log"for finalizing shuffle merge would either be in-progress or finished. " +
+            log"No need to schedule shuffle merge finalization again.")
         }
       case None =>
         // If no previous finalization task is scheduled, schedule the finalization task.
-        logInfo(s"$stage (${stage.name}) scheduled for finalizing shuffle merge in $delay s")
+        logInfo(log"${MDC(STAGE, stage)} (${MDC(STAGE_NAME, stage.name)}) scheduled for " +
+          log"finalizing shuffle merge in ${MDC(DELAY, delay * 1000L)} ms")
         shuffleDep.setFinalizeTask(
           shuffleMergeFinalizeScheduler.schedule(
             new Runnable {
@@ -2402,8 +2431,9 @@ private[spark] class DAGScheduler(
   private[scheduler] def finalizeShuffleMerge(
       stage: ShuffleMapStage,
       registerMergeResults: Boolean = true): Unit = {
-    logInfo(s"$stage (${stage.name}) finalizing the shuffle merge with registering merge " +
-      s"results set to $registerMergeResults")
+    logInfo(
+      log"${MDC(STAGE, stage)} (${MDC(STAGE_NAME, stage.name)}) finalizing the shuffle merge with" +
+      log" registering merge results set to ${MDC(REGISTER_MERGE_RESULTS, registerMergeResults)}")
     val shuffleId = stage.shuffleDep.shuffleId
     val shuffleMergeId = stage.shuffleDep.shuffleMergeId
     val numMergers = stage.shuffleDep.getMergerLocs.length
@@ -2451,8 +2481,9 @@ private[spark] class DAGScheduler(
                       }
 
                       override def onShuffleMergeFailure(e: Throwable): Unit = {
-                        logWarning(s"Exception encountered when trying to finalize shuffle " +
-                          s"merge on ${shuffleServiceLoc.host} for shuffle $shuffleId", e)
+                        logWarning(log"Exception encountered when trying to finalize shuffle " +
+                          log"merge on ${MDC(HOST_PORT, shuffleServiceLoc.host)} " +
+                          log"for shuffle ${MDC(SHUFFLE_ID, shuffleId)}", e)
                         // Do not fail the future as this would cause dag scheduler to prematurely
                         // give up on waiting for merge results from the remaining shuffle services
                         // if one fails
@@ -2472,8 +2503,9 @@ private[spark] class DAGScheduler(
       } catch {
         case _: TimeoutException =>
           timedOut = true
-          logInfo(s"Timed out on waiting for merge results from all " +
-            s"$numMergers mergers for shuffle $shuffleId")
+              logInfo(log"Timed out on waiting for merge results from all " +
+                log"${MDC(NUM_MERGERS, numMergers)} mergers for " +
+                log"shuffle ${MDC(SHUFFLE_ID, shuffleId)}")
       } finally {
         if (timedOut || !registerMergeResults) {
           cancelFinalizeShuffleMergeFutures(scheduledFutures,
@@ -2504,9 +2536,9 @@ private[spark] class DAGScheduler(
   private def processShuffleMapStageCompletion(shuffleStage: ShuffleMapStage): Unit = {
     markStageAsFinished(shuffleStage)
     logInfo("looking for newly runnable stages")
-    logInfo("running: " + runningStages)
-    logInfo("waiting: " + waitingStages)
-    logInfo("failed: " + failedStages)
+    logInfo(log"running: ${MDC(STAGES, runningStages)}")
+    logInfo(log"waiting: ${MDC(STAGES, waitingStages)}")
+    logInfo(log"failed: ${MDC(STAGES, failedStages)}")
 
     // This call to increment the epoch may not be strictly necessary, but it is retained
     // for now in order to minimize the changes in behavior from an earlier version of the
@@ -2522,9 +2554,10 @@ private[spark] class DAGScheduler(
     if (!shuffleStage.isAvailable) {
       // Some tasks had failed; let's resubmit this shuffleStage.
       // TODO: Lower-level scheduler should also deal with this
-      logInfo("Resubmitting " + shuffleStage + " (" + shuffleStage.name +
-        ") because some of its tasks had failed: " +
-        shuffleStage.findMissingPartitions().mkString(", "))
+      logInfo(log"Resubmitting ${MDC(STAGE, shuffleStage)} " +
+        log"(${MDC(STAGE_NAME, shuffleStage.name)}) " +
+        log"because some of its tasks had failed: " +
+        log"${MDC(PARTITION_IDS, shuffleStage.findMissingPartitions().mkString(", "))}")
       submitStage(shuffleStage)
     } else {
       markMapStageJobsAsFinished(shuffleStage)
@@ -2596,7 +2629,7 @@ private[spark] class DAGScheduler(
   }
 
   private def handleResubmittedFailure(task: Task[_], stage: Stage): Unit = {
-    logInfo(s"Resubmitted $task, so marking it as still running.")
+              logInfo(log"Resubmitted ${MDC(TASK_NAME, task)}, so marking it as still running.")
     stage match {
       case sms: ShuffleMapStage =>
         sms.pendingPartitions += task.partitionId
@@ -2672,7 +2705,7 @@ private[spark] class DAGScheduler(
     if (!isShuffleMerger &&
       (!executorFailureEpoch.contains(execId) || executorFailureEpoch(execId) < currentEpoch)) {
       executorFailureEpoch(execId) = currentEpoch
-      logInfo(s"Executor lost: $execId (epoch $currentEpoch)")
+      logInfo(log"Executor lost: ${MDC(EXECUTOR_ID, execId)} (epoch ${MDC(EPOCH, currentEpoch)})")
       if (pushBasedShuffleEnabled) {
         // Remove fetchFailed host in the shuffle push merger list for push based shuffle
         hostToUnregisterOutputs.foreach(
@@ -2696,10 +2729,12 @@ private[spark] class DAGScheduler(
       if (remove) {
         hostToUnregisterOutputs match {
           case Some(host) =>
-            logInfo(s"Shuffle files lost for host: $host (epoch $currentEpoch)")
+            logInfo(log"Shuffle files lost for host: ${MDC(HOST, host)} (epoch " +
+              log"${MDC(EPOCH, currentEpoch)}")
             mapOutputTracker.removeOutputsOnHost(host)
           case None =>
-            logInfo(s"Shuffle files lost for executor: $execId (epoch $currentEpoch)")
+              logInfo(log"Shuffle files lost for executor: ${MDC(EXECUTOR_ID, execId)} " +
+                log"(epoch ${MDC(EPOCH, currentEpoch)})")
             mapOutputTracker.removeOutputsOnExecutor(execId)
         }
       }
@@ -2721,7 +2756,8 @@ private[spark] class DAGScheduler(
       workerId: String,
       host: String,
       message: String): Unit = {
-    logInfo("Shuffle files lost for worker %s on host %s".format(workerId, host))
+    logInfo(log"Shuffle files lost for worker ${MDC(WORKER_ID, workerId)} " +
+      log"on host ${MDC(HOST, host)}")
     mapOutputTracker.removeOutputsOnHost(host)
     clearCacheLocs()
   }
@@ -2729,7 +2765,7 @@ private[spark] class DAGScheduler(
   private[scheduler] def handleExecutorAdded(execId: String, host: String): Unit = {
     // remove from executorFailureEpoch(execId) ?
     if (executorFailureEpoch.contains(execId)) {
-      logInfo("Host added was in lost list earlier: " + host)
+      logInfo(log"Host added was in lost list earlier: ${MDC(HOST, host)}")
       executorFailureEpoch -= execId
     }
     shuffleFileLostEpoch -= execId
@@ -2742,10 +2778,10 @@ private[spark] class DAGScheduler(
       }.foreach { case (_, stage: ShuffleMapStage) =>
         configureShufflePushMergerLocations(stage)
         if (stage.shuffleDep.getMergerLocs.nonEmpty) {
-          logInfo(s"Shuffle merge enabled adaptively for $stage with shuffle" +
-            s" ${stage.shuffleDep.shuffleId} and shuffle merge" +
-            s" ${stage.shuffleDep.shuffleMergeId} with ${stage.shuffleDep.getMergerLocs.size}" +
-            s" merger locations")
+          logInfo(log"Shuffle merge enabled adaptively for ${MDC(STAGE, stage)} with shuffle" +
+            log" ${MDC(SHUFFLE_ID, stage.shuffleDep.shuffleId)} and shuffle merge" +
+            log" ${MDC(SHUFFLE_MERGE_ID, stage.shuffleDep.shuffleMergeId)} with " +
+            log"${MDC(NUM_MERGER_LOCATIONS, stage.shuffleDep.getMergerLocs.size)} merger locations")
         }
       }
     }
@@ -2765,7 +2801,7 @@ private[spark] class DAGScheduler(
           handleJobCancellation(jobId, Option(reasonStr))
         }
       case None =>
-        logInfo("No active jobs to kill for Stage " + stageId)
+        logInfo(log"No active jobs to kill for Stage ${MDC(STAGE_ID, stageId)}")
     }
   }
 
@@ -2788,11 +2824,12 @@ private[spark] class DAGScheduler(
       errorMessage: Option[String] = None,
       willRetry: Boolean = false): Unit = {
     val serviceTime = stage.latestInfo.submissionTime match {
-      case Some(t) => "%.03f".format((clock.getTimeMillis() - t) / 1000.0)
+      case Some(t) => clock.getTimeMillis() - t
       case _ => "Unknown"
     }
     if (errorMessage.isEmpty) {
-      logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime))
+      logInfo(log"${MDC(STAGE, stage)} (${MDC(STAGE_NAME, stage.name)}) " +
+        log"finished in ${MDC(TIME_UNITS, serviceTime)} ms")
       stage.latestInfo.completionTime = Some(clock.getTimeMillis())
 
       // Clear failure count for this stage, now that it's succeeded.
@@ -2802,7 +2839,8 @@ private[spark] class DAGScheduler(
       stage.clearFailures()
     } else {
       stage.latestInfo.stageFailed(errorMessage.get)
-      logInfo(s"$stage (${stage.name}) failed in $serviceTime s due to ${errorMessage.get}")
+      logInfo(log"${MDC(STAGE, stage)} (${MDC(STAGE_NAME, stage.name)}) failed in " +
+        log"${MDC(TIME_UNITS, serviceTime)} ms due to ${MDC(ERROR, errorMessage.get)}")
     }
     updateStageInfoForPushBasedShuffle(stage)
     if (!willRetry) {
@@ -2848,7 +2886,8 @@ private[spark] class DAGScheduler(
       failJobAndIndependentStages(job, finalException)
     }
     if (dependentJobs.isEmpty) {
-      logInfo("Ignoring failure of " + failedStage + " because all jobs depending on it are done")
+      logInfo(log"Ignoring failure of ${MDC(FAILED_STAGE, failedStage)} because all jobs " +
+        log"depending on it are done")
     }
   }
 
@@ -2892,7 +2931,7 @@ private[spark] class DAGScheduler(
               markStageAsFinished(stage, Some(reason))
             } catch {
               case e: UnsupportedOperationException =>
-                logWarning(s"Could not cancel tasks for stage $stageId", e)
+                logWarning(log"Could not cancel tasks for stage ${MDC(STAGE_ID, stageId)}", e)
                 ableToCancelStages = false
             }
           }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
index aaa9e5bdd9e1c..1606072153906 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 
 import org.apache.spark.{ExecutorAllocationClient, SparkConf, SparkContext}
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.HOST
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
@@ -111,8 +111,8 @@ private[scheduler] class HealthTracker (
       val execsToInclude = executorIdToExcludedStatus.filter(_._2.expiryTime < now).keys
       if (execsToInclude.nonEmpty) {
         // Include any executors that have been excluded longer than the excludeOnFailure timeout.
-        logInfo(s"Removing executors $execsToInclude from exclude list because the " +
-          s"the executors have reached the timed out")
+        logInfo(log"Removing executors ${MDC(EXECUTOR_IDS, execsToInclude)} from " +
+          log"exclude list because the executors have reached the timed out")
         execsToInclude.foreach { exec =>
           val status = executorIdToExcludedStatus.remove(exec).get
           val failedExecsOnNode = nodeToExcludedExecs(status.node)
@@ -128,8 +128,8 @@ private[scheduler] class HealthTracker (
       val nodesToInclude = nodeIdToExcludedExpiryTime.filter(_._2 < now).keys
       if (nodesToInclude.nonEmpty) {
         // Include any nodes that have been excluded longer than the excludeOnFailure timeout.
-        logInfo(s"Removing nodes $nodesToInclude from exclude list because the " +
-          s"nodes have reached has timed out")
+        logInfo(log"Removing nodes ${MDC(NODES, nodesToInclude)} from exclude list because the " +
+          log"nodes have reached has timed out")
         nodesToInclude.foreach { node =>
           nodeIdToExcludedExpiryTime.remove(node)
           // post both to keep backwards compatibility
@@ -173,8 +173,8 @@ private[scheduler] class HealthTracker (
             force = true)
         }
       case None =>
-        logInfo(s"Not attempting to kill excluded executor id $exec " +
-          s"since allocation client is not defined.")
+        logInfo(log"Not attempting to kill excluded executor id ${MDC(EXECUTOR_ID, exec)}" +
+          log" since allocation client is not defined.")
     }
   }
 
@@ -196,21 +196,23 @@ private[scheduler] class HealthTracker (
       allocationClient match {
         case Some(a) =>
           if (EXCLUDE_ON_FAILURE_DECOMMISSION_ENABLED) {
-            logInfo(s"Decommissioning all executors on excluded host $node " +
-              s"since ${config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key} is set.")
+            logInfo(log"Decommissioning all executors on excluded host ${MDC(HOST, node)} " +
+              log"since ${MDC(CONFIG, config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key)} " +
+              log"is set.")
             if (!a.decommissionExecutorsOnHost(node)) {
               logError(log"Decommissioning executors on ${MDC(HOST, node)} failed.")
             }
           } else {
-            logInfo(s"Killing all executors on excluded host $node " +
-              s"since ${config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key} is set.")
+            logInfo(log"Killing all executors on excluded host ${MDC(HOST, node)} " +
+              log"since ${MDC(CONFIG, config.EXCLUDE_ON_FAILURE_KILL_ENABLED.key)} is set.")
             if (!a.killExecutorsOnHost(node)) {
               logError(log"Killing executors on node ${MDC(HOST, node)} failed.")
             }
           }
         case None =>
-          logWarning(s"Not attempting to kill executors on excluded host $node " +
-            s"since allocation client is not defined.")
+          logWarning(
+            log"Not attempting to kill executors on excluded host ${MDC(HOST_PORT, node)} " +
+              log"since allocation client is not defined.")
       }
     }
   }
@@ -230,7 +232,8 @@ private[scheduler] class HealthTracker (
 
       if (conf.get(config.SHUFFLE_SERVICE_ENABLED)) {
         if (!nodeIdToExcludedExpiryTime.contains(host)) {
-          logInfo(s"excluding node $host due to fetch failure of external shuffle service")
+          logInfo(log"excluding node ${MDC(HOST, host)} due to fetch failure of " +
+            log"external shuffle service")
 
           nodeIdToExcludedExpiryTime.put(host, expiryTimeForNewExcludes)
           // post both to keep backwards compatibility
@@ -241,7 +244,7 @@ private[scheduler] class HealthTracker (
           updateNextExpiryTime()
         }
       } else if (!executorIdToExcludedStatus.contains(exec)) {
-        logInfo(s"Excluding executor $exec due to fetch failure")
+        logInfo(log"Excluding executor ${MDC(EXECUTOR_ID, exec)} due to fetch failure")
 
         executorIdToExcludedStatus.put(exec, ExcludedExecutor(host, expiryTimeForNewExcludes))
         // We hardcoded number of failure tasks to 1 for fetch failure, because there's no
@@ -279,8 +282,8 @@ private[scheduler] class HealthTracker (
       // some of the logic around expiry times a little more confusing.  But it also wouldn't be a
       // problem to re-exclude, with a later expiry time.
       if (newTotal >= MAX_FAILURES_PER_EXEC && !executorIdToExcludedStatus.contains(exec)) {
-        logInfo(s"Excluding executor id: $exec because it has $newTotal" +
-          s" task failures in successful task sets")
+        logInfo(log"Excluding executor id: ${MDC(EXECUTOR_ID, exec)} because it has " +
+          log"${MDC(TOTAL, newTotal)} task failures in successful task sets")
         val node = failuresInTaskSet.node
         executorIdToExcludedStatus.put(exec, ExcludedExecutor(node, expiryTimeForNewExcludes))
         // post both to keep backwards compatibility
@@ -298,8 +301,9 @@ private[scheduler] class HealthTracker (
         // time.
         if (excludedExecsOnNode.size >= MAX_FAILED_EXEC_PER_NODE &&
             !nodeIdToExcludedExpiryTime.contains(node)) {
-          logInfo(s"Excluding node $node because it has ${excludedExecsOnNode.size} " +
-            s"executors excluded: ${excludedExecsOnNode}")
+          logInfo(log"Excluding node ${MDC(HOST, node)} because it has " +
+            log"${MDC(NUM_EXECUTORS, excludedExecsOnNode.size)} executors " +
+            log"excluded: ${MDC(EXECUTOR_IDS, excludedExecsOnNode)}")
           nodeIdToExcludedExpiryTime.put(node, expiryTimeForNewExcludes)
           // post both to keep backwards compatibility
           listenerBus.post(SparkListenerNodeBlacklisted(now, node, excludedExecsOnNode.size))
@@ -437,10 +441,12 @@ private[spark] object HealthTracker extends Logging {
         val legacyKey = config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF.key
         conf.get(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF).exists { legacyTimeout =>
           if (legacyTimeout == 0) {
-            logWarning(s"Turning off excludeOnFailure due to legacy configuration: $legacyKey == 0")
+            logWarning(log"Turning off excludeOnFailure due to legacy configuration: " +
+              log"${MDC(CONFIG, legacyKey)} == 0")
             false
           } else {
-            logWarning(s"Turning on excludeOnFailure due to legacy configuration: $legacyKey > 0")
+            logWarning(log"Turning on excludeOnFailure due to legacy configuration: " +
+              log"${MDC(CONFIG, legacyKey)} > 0")
             true
           }
         }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index bd0bff18ff578..7251eb2c86ea1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -31,7 +31,7 @@ import com.codahale.metrics.{Counter, MetricRegistry, Timer}
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CLASS_NAME, MAX_SIZE}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, MAX_SIZE}
 import org.apache.spark.internal.config._
 import org.apache.spark.metrics.MetricsSystem
 import org.apache.spark.metrics.source.Source
diff --git a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
index cd5d6b8f9c90d..df28a97a349ea 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
@@ -20,7 +20,7 @@ package org.apache.spark.scheduler
 import scala.collection.mutable
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.util.{RpcUtils, ThreadUtils}
 
@@ -44,10 +44,7 @@ private case class AskPermissionToCommitOutput(
  * This class was introduced in SPARK-4879; see that JIRA issue (and the associated pull requests)
  * for an extensive design discussion.
  */
-private[spark] class OutputCommitCoordinator(
-    conf: SparkConf,
-    isDriver: Boolean,
-    sc: Option[SparkContext] = None) extends Logging {
+private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean) extends Logging {
 
   // Initialized by SparkEnv
   var coordinatorRef: Option[RpcEndpointRef] = None
@@ -124,7 +121,7 @@ private[spark] class OutputCommitCoordinator(
     stageStates.get(stage) match {
       case Some(state) =>
         require(state.authorizedCommitters.length == maxPartitionId + 1)
-        logInfo(s"Reusing state from previous attempt of stage $stage.")
+        logInfo(log"Reusing state from previous attempt of stage ${MDC(LogKeys.STAGE_ID, stage)}")
 
       case _ =>
         stageStates(stage) = new StageState(maxPartitionId + 1)
@@ -151,17 +148,18 @@ private[spark] class OutputCommitCoordinator(
       case Success =>
       // The task output has been committed successfully
       case _: TaskCommitDenied =>
-        logInfo(s"Task was denied committing, stage: $stage.$stageAttempt, " +
-          s"partition: $partition, attempt: $attemptNumber")
+        logInfo(log"Task was denied committing, stage: ${MDC(LogKeys.STAGE_ID, stage)}." +
+          log"${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)}, " +
+          log"partition: ${MDC(LogKeys.PARTITION_ID, partition)}, " +
+          log"attempt: ${MDC(LogKeys.NUM_ATTEMPT, attemptNumber)}")
       case _ =>
         // Mark the attempt as failed to exclude from future commit protocol
         val taskId = TaskIdentifier(stageAttempt, attemptNumber)
         stageState.failures.getOrElseUpdate(partition, mutable.Set()) += taskId
         if (stageState.authorizedCommitters(partition) == taskId) {
-          sc.foreach(_.dagScheduler.stageFailed(stage, s"Authorized committer " +
-            s"(attemptNumber=$attemptNumber, stage=$stage, partition=$partition) failed; " +
-            s"but task commit success, data duplication may happen. " +
-            s"reason=$reason"))
+          logDebug(s"Authorized committer (attemptNumber=$attemptNumber, stage=$stage, " +
+            s"partition=$partition) failed; clearing lock")
+          stageState.authorizedCommitters(partition) = null
         }
     }
   }
@@ -182,8 +180,10 @@ private[spark] class OutputCommitCoordinator(
       attemptNumber: Int): Boolean = synchronized {
     stageStates.get(stage) match {
       case Some(state) if attemptFailed(state, stageAttempt, partition, attemptNumber) =>
-        logInfo(s"Commit denied for stage=$stage.$stageAttempt, partition=$partition: " +
-          s"task attempt $attemptNumber already marked as failed.")
+        logInfo(log"Commit denied for stage=${MDC(LogKeys.STAGE_ID, stage)}." +
+          log"${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)}, partition=" +
+          log"${MDC(LogKeys.PARTITION_ID, partition)}: task attempt " +
+          log"${MDC(LogKeys.NUM_ATTEMPT, attemptNumber)} already marked as failed.")
         false
       case Some(state) =>
         val existing = state.authorizedCommitters(partition)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 24c25d2377948..26c7486010c02 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -25,7 +25,7 @@ import com.fasterxml.jackson.core.JsonParseException
 import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{LINE, LINE_NUM, PATH}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.scheduler.ReplayListenerBus._
 import org.apache.spark.util.JsonProtocol
 
@@ -92,7 +92,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
             // Ignore unknown events, parse through the event log file.
             // To avoid spamming, warnings are only displayed once for each unknown event.
             if (!unrecognizedEvents.contains(e.getMessage)) {
-              logWarning(s"Drop unrecognized event: ${e.getMessage}")
+              logWarning(log"Drop unrecognized event: ${MDC(ERROR, e.getMessage)}")
               unrecognizedEvents.add(e.getMessage)
             }
             logDebug(s"Drop incompatible event log: $currentLine")
@@ -100,7 +100,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
             // Ignore unrecognized properties, parse through the event log file.
             // To avoid spamming, warnings are only displayed once for each unrecognized property.
             if (!unrecognizedProperties.contains(e.getMessage)) {
-              logWarning(s"Drop unrecognized property: ${e.getMessage}")
+              logWarning(log"Drop unrecognized property: ${MDC(ERROR, e.getMessage)}")
               unrecognizedProperties.add(e.getMessage)
             }
             logDebug(s"Drop incompatible event log: $currentLine")
@@ -111,8 +111,9 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
             if (!maybeTruncated || lineEntries.hasNext) {
               throw jpe
             } else {
-              logWarning(s"Got JsonParseException from log file $sourceName" +
-                s" at line $lineNumber, the file might not have finished writing cleanly.")
+              logWarning(log"Got JsonParseException from log file ${MDC(FILE_NAME, sourceName)}" +
+                log" at line ${MDC(LINE_NUM, lineNumber)}, " +
+                log"the file might not have finished writing cleanly.")
             }
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index 7e61dad3c141b..6f64dff3f39d6 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -27,7 +27,8 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{SCHEDULER_ALLOCATION_FILE, SCHEDULER_MODE}
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.util.Utils
@@ -79,20 +80,23 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
       fileData = schedulerAllocFile.map { f =>
         val filePath = new Path(f)
         val fis = filePath.getFileSystem(sc.hadoopConfiguration).open(filePath)
-        logInfo(s"Creating Fair Scheduler pools from $f")
+        logInfo(log"Creating Fair Scheduler pools from ${MDC(LogKeys.FILE_NAME, f)}")
         Some((fis, f))
       }.getOrElse {
         val is = Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_SCHEDULER_FILE)
         if (is != null) {
-          logInfo(s"Creating Fair Scheduler pools from default file: $DEFAULT_SCHEDULER_FILE")
+          logInfo(log"Creating Fair Scheduler pools from default file: " +
+            log"${MDC(LogKeys.FILE_NAME, DEFAULT_SCHEDULER_FILE)}")
           Some((is, DEFAULT_SCHEDULER_FILE))
         } else {
           val schedulingMode = SchedulingMode.withName(sc.conf.get(SCHEDULER_MODE))
           rootPool.addSchedulable(new Pool(
             DEFAULT_POOL_NAME, schedulingMode, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
-          logInfo("Fair scheduler configuration not found, created default pool: " +
-            "%s, schedulingMode: %s, minShare: %d, weight: %d".format(
-            DEFAULT_POOL_NAME, schedulingMode, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
+          logInfo(log"Fair scheduler configuration not found, created default pool: " +
+            log"${MDC(LogKeys.DEFAULT_NAME, DEFAULT_POOL_NAME)}, " +
+            log"schedulingMode: ${MDC(LogKeys.SCHEDULING_MODE, schedulingMode)}, " +
+            log"minShare: ${MDC(LogKeys.MIN_SHARE, DEFAULT_MINIMUM_SHARE)}, " +
+            log"weight: ${MDC(LogKeys.WEIGHT, DEFAULT_WEIGHT)}")
           None
         }
       }
@@ -121,8 +125,10 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
       val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE,
         DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
       rootPool.addSchedulable(pool)
-      logInfo("Created default pool: %s, schedulingMode: %s, minShare: %d, weight: %d".format(
-        DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
+      logInfo(log"Created default pool: ${MDC(LogKeys.POOL_NAME, DEFAULT_POOL_NAME)}, " +
+        log"schedulingMode: ${MDC(LogKeys.SCHEDULING_MODE, DEFAULT_SCHEDULING_MODE)}, " +
+        log"minShare: ${MDC(LogKeys.MIN_SHARE, DEFAULT_MINIMUM_SHARE)}, " +
+        log"weight: ${MDC(LogKeys.WEIGHT, DEFAULT_WEIGHT)}")
     }
   }
 
@@ -141,8 +147,10 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
 
       rootPool.addSchedulable(new Pool(poolName, schedulingMode, minShare, weight))
 
-      logInfo("Created pool: %s, schedulingMode: %s, minShare: %d, weight: %d".format(
-        poolName, schedulingMode, minShare, weight))
+      logInfo(log"Created pool: ${MDC(LogKeys.POOL_NAME, poolName)}, " +
+        log"schedulingMode: ${MDC(LogKeys.SCHEDULING_MODE, schedulingMode)}, " +
+        log"minShare: ${MDC(LogKeys.MIN_SHARE, minShare)}, " +
+        log"weight: ${MDC(LogKeys.WEIGHT, weight)}")
     }
   }
 
@@ -154,9 +162,12 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
 
     val xmlSchedulingMode =
       (poolNode \ SCHEDULING_MODE_PROPERTY).text.trim.toUpperCase(Locale.ROOT)
-    val warningMessage = s"Unsupported schedulingMode: $xmlSchedulingMode found in " +
-      s"Fair Scheduler configuration file: $fileName, using " +
-      s"the default schedulingMode: $defaultValue for pool: $poolName"
+    val warningMessage = log"Unsupported schedulingMode: " +
+      log"${MDC(XML_SCHEDULING_MODE, xmlSchedulingMode)} found in " +
+      log"Fair Scheduler configuration file: ${MDC(FILE_NAME, fileName)}, using " +
+      log"the default schedulingMode: " +
+      log"${MDC(LogKeys.SCHEDULING_MODE, defaultValue)} for pool: " +
+      log"${MDC(POOL_NAME, poolName)}"
     try {
       if (SchedulingMode.withName(xmlSchedulingMode) != SchedulingMode.NONE) {
         SchedulingMode.withName(xmlSchedulingMode)
@@ -165,7 +176,7 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
         defaultValue
       }
     } catch {
-      case e: NoSuchElementException =>
+      case _: NoSuchElementException =>
         logWarning(warningMessage)
         defaultValue
     }
@@ -182,10 +193,12 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
     try {
       data.toInt
     } catch {
-      case e: NumberFormatException =>
-        logWarning(s"Error while loading fair scheduler configuration from $fileName: " +
-          s"$propertyName is blank or invalid: $data, using the default $propertyName: " +
-          s"$defaultValue for pool: $poolName")
+      case _: NumberFormatException =>
+        logWarning(log"Error while loading fair scheduler configuration from " +
+          log"${MDC(FILE_NAME, fileName)}: " +
+          log"${MDC(PROPERTY_NAME, propertyName)} is blank or invalid: ${MDC(DATA, data)}, " +
+          log"using the default ${MDC(DEFAULT_NAME, propertyName)}: " +
+          log"${MDC(DEFAULT_VALUE, defaultValue)} for pool: ${MDC(POOL_NAME, poolName)}")
         defaultValue
     }
   }
@@ -203,13 +216,18 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
       parentPool = new Pool(poolName, DEFAULT_SCHEDULING_MODE,
         DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
       rootPool.addSchedulable(parentPool)
-      logWarning(s"A job was submitted with scheduler pool $poolName, which has not been " +
-        "configured. This can happen when the file that pools are read from isn't set, or " +
-        s"when that file doesn't contain $poolName. Created $poolName with default " +
-        s"configuration (schedulingMode: $DEFAULT_SCHEDULING_MODE, " +
-        s"minShare: $DEFAULT_MINIMUM_SHARE, weight: $DEFAULT_WEIGHT)")
+      logWarning(log"A job was submitted with scheduler pool " +
+        log"${MDC(SCHEDULER_POOL_NAME, poolName)}, which has not been " +
+        log"configured. This can happen when the file that pools are read from isn't set, or " +
+        log"when that file doesn't contain ${MDC(POOL_NAME, poolName)}. " +
+        log"Created ${MDC(CREATED_POOL_NAME, poolName)} with default " +
+        log"configuration (schedulingMode: " +
+        log"${MDC(LogKeys.SCHEDULING_MODE, DEFAULT_SCHEDULING_MODE)}, " +
+        log"minShare: ${MDC(MIN_SHARE, DEFAULT_MINIMUM_SHARE)}, " +
+        log"weight: ${MDC(WEIGHT, DEFAULT_WEIGHT)}")
     }
     parentPool.addSchedulable(manager)
-    logInfo("Added task set " + manager.name + " tasks to pool " + poolName)
+    logInfo(log"Added task set ${MDC(LogKeys.TASK_SET_MANAGER, manager.name)} tasks to pool " +
+      log"${MDC(LogKeys.POOL_NAME, poolName)}")
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
index 1f12b46412bc5..e46dde5561a26 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StatsReportListener.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.executor.TaskMetrics
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.util.{Distribution, Utils}
 
 
@@ -46,7 +46,8 @@ class StatsReportListener extends SparkListener with Logging {
 
   override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = {
     implicit val sc = stageCompleted
-    this.logInfo(s"Finished stage: ${getStatusDetail(stageCompleted.stageInfo)}")
+    this.logInfo(
+      log"Finished stage: ${MDC(LogKeys.STAGE, getStatusDetail(stageCompleted.stageInfo))}")
     showMillisDistribution("task runtime:", (info, _) => info.duration, taskInfoMetrics.toSeq)
 
     // Shuffle write
@@ -111,9 +112,9 @@ private[spark] object StatsReportListener extends Logging {
   def showDistribution(heading: String, d: Distribution, formatNumber: Double => String): Unit = {
     val stats = d.statCounter
     val quantiles = d.getQuantiles(probabilities).map(formatNumber)
-    logInfo(heading + stats)
+    logInfo(log"${MDC(LogKeys.DESCRIPTION, heading)}${MDC(LogKeys.STATS, stats)}")
     logInfo(percentilesHeader)
-    logInfo("\t" + quantiles.mkString("\t"))
+    logInfo(log"\t" + log"${MDC(LogKeys.QUANTILES, quantiles.mkString("\t"))}")
   }
 
   def showDistribution(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index e93bc0747349c..97c539bb05a58 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -26,7 +26,7 @@ import scala.util.control.NonFatal
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_LOADER
+import org.apache.spark.internal.LogKeys.CLASS_LOADER
 import org.apache.spark.serializer.{SerializerHelper, SerializerInstance}
 import org.apache.spark.util.{LongAccumulator, ThreadUtils, Utils}
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 17c44926d6265..ec678256a708e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -33,8 +33,8 @@ import org.apache.spark.InternalAccumulator.{input, shuffleRead}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.ExecutorMetrics
-import org.apache.spark.internal.{config, Logging, LogKey, MDC}
-import org.apache.spark.internal.LogKey.{REASON, TASK_SET_NAME, TASK_STATE, TID}
+import org.apache.spark.internal.{config, Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.RpcEndpoint
@@ -250,8 +250,9 @@ private[spark] class TaskSchedulerImpl(
 
   override def submitTasks(taskSet: TaskSet): Unit = {
     val tasks = taskSet.tasks
-    logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks "
-      + "resource profile " + taskSet.resourceProfileId)
+    logInfo(log"Adding task set " + taskSet.logId +
+      log" with ${MDC(LogKeys.NUM_TASKS, tasks.length)} tasks resource profile " +
+      log"${MDC(LogKeys.RESOURCE_PROFILE_ID, taskSet.resourceProfileId)}")
     this.synchronized {
       val manager = createTaskSetManager(taskSet, maxTaskFailures)
       val stage = taskSet.stageId
@@ -306,9 +307,10 @@ private[spark] class TaskSchedulerImpl(
       stageId: Int,
       interruptThread: Boolean,
       reason: String): Unit = synchronized {
-    logInfo("Cancelling stage " + stageId)
+    logInfo(log"Canceling stage ${MDC(LogKeys.STAGE_ID, stageId)}")
     // Kill all running tasks for the stage.
-    logInfo(s"Killing all running tasks in stage $stageId: $reason")
+    logInfo(log"Killing all running tasks in stage ${MDC(LogKeys.STAGE_ID, stageId)}: " +
+      log"${MDC(LogKeys.REASON, reason)}")
     taskSetsByStageIdAndAttempt.get(stageId).foreach { attempts =>
       attempts.foreach { case (_, tsm) =>
         // There are two possible cases here:
@@ -322,7 +324,8 @@ private[spark] class TaskSchedulerImpl(
           }
         }
         tsm.suspend()
-        logInfo("Stage %s.%s was cancelled".format(stageId, tsm.taskSet.stageAttemptId))
+        logInfo(log"Stage ${MDC(LogKeys.STAGE_ID, stageId)}." +
+          log"${MDC(LogKeys.STAGE_ATTEMPT, tsm.taskSet.stageAttemptId)} was cancelled")
       }
     }
   }
@@ -331,13 +334,14 @@ private[spark] class TaskSchedulerImpl(
       taskId: Long,
       interruptThread: Boolean,
       reason: String): Boolean = synchronized {
-    logInfo(s"Killing task $taskId: $reason")
+    logInfo(log"Killing task ${MDC(LogKeys.TASK_ID, taskId)}: ${MDC(LogKeys.REASON, reason)}")
     val execId = taskIdToExecutorId.get(taskId)
     if (execId.isDefined) {
       backend.killTask(taskId, execId.get, interruptThread, reason)
       true
     } else {
-      logWarning(s"Could not kill task $taskId because no task with that ID was found.")
+      logWarning(log"Could not kill task ${MDC(TASK_ID, taskId)} " +
+        log"because no task with that ID was found.")
       false
     }
   }
@@ -360,8 +364,9 @@ private[spark] class TaskSchedulerImpl(
     }
     noRejectsSinceLastReset -= manager.taskSet
     manager.parent.removeSchedulable(manager)
-    logInfo(s"Removed TaskSet ${manager.taskSet.id}, whose tasks have all completed, from pool" +
-      s" ${manager.parent.name}")
+    logInfo(log"Removed TaskSet " + manager.taskSet.logId +
+      log" whose tasks have all completed, from pool ${MDC(LogKeys.POOL_NAME, manager.parent.name)}"
+    )
   }
 
   /**
@@ -428,7 +433,8 @@ private[spark] class TaskSchedulerImpl(
           } catch {
             case e: TaskNotSerializableException =>
               // scalastyle:off line.size.limit
-              logError(log"Resource offer failed, task set ${MDC(TASK_SET_NAME, taskSet.name)} was not serializable")
+              logError(log"Resource offer failed, task set " +
+                log"${MDC(LogKeys.TASK_SET_NAME, taskSet.name)} was not serializable")
               // scalastyle:on
               // Do not offer resources for this task, but don't throw an error to allow other
               // task sets to be submitted.
@@ -557,9 +563,10 @@ private[spark] class TaskSchedulerImpl(
         // Skip the launch process.
         // TODO SPARK-24819 If the job requires more slots than available (both busy and free
         // slots), fail the job on submit.
-        logInfo(s"Skip current round of resource offers for barrier stage ${taskSet.stageId} " +
-          s"because the barrier taskSet requires ${taskSet.numTasks} slots, while the total " +
-          s"number of available slots is $numBarrierSlotsAvailable.")
+        logInfo(log"Skip current round of resource offers for barrier stage " +
+          log"${MDC(LogKeys.STAGE_ID, taskSet.stageId)} because the barrier taskSet requires " +
+          log"${MDC(LogKeys.TASK_SET_NAME, taskSet.numTasks)} slots, while the total " +
+          log"number of available slots is ${MDC(LogKeys.NUM_SLOTS, numBarrierSlotsAvailable)}.")
       } else {
         var launchedAnyTask = false
         var noDelaySchedulingRejects = true
@@ -617,18 +624,18 @@ private[spark] class TaskSchedulerImpl(
                   // in order to provision more executors to make them schedulable
                   if (Utils.isDynamicAllocationEnabled(conf)) {
                     if (!unschedulableTaskSetToExpiryTime.contains(taskSet)) {
-                      logInfo("Notifying ExecutorAllocationManager to allocate more executors to" +
-                        " schedule the unschedulable task before aborting" +
-                        s" stage ${taskSet.stageId}.")
+                      logInfo(log"Notifying ExecutorAllocationManager to allocate more executors to" +
+                        log" schedule the unschedulable task before aborting" +
+                        log" stage ${MDC(LogKeys.STAGE_ID, taskSet.stageId)}.")
                       dagScheduler.unschedulableTaskSetAdded(taskSet.taskSet.stageId,
                         taskSet.taskSet.stageAttemptId)
                       updateUnschedulableTaskSetTimeoutAndStartAbortTimer(taskSet, taskIndex)
                     }
                   } else {
                     // Abort Immediately
-                    logInfo("Cannot schedule any task because all executors excluded from " +
-                      "failures. No idle executors can be found to kill. Aborting stage " +
-                      s"${taskSet.stageId}.")
+                    logInfo(log"Cannot schedule any task because all executors excluded from " +
+                      log"failures. No idle executors can be found to kill. Aborting stage " +
+                      log"${MDC(LogKeys.STAGE_ID, taskSet.stageId)}.")
                     taskSet.abortSinceCompletelyExcludedOnFailure(taskIndex)
                   }
               }
@@ -641,8 +648,8 @@ private[spark] class TaskSchedulerImpl(
           // non-excluded executor and the abort timer doesn't kick in because of a constant
           // submission of new TaskSets. See the PR for more details.
           if (unschedulableTaskSetToExpiryTime.nonEmpty) {
-            logInfo("Clearing the expiry times for all unschedulable taskSets as a task was " +
-              "recently scheduled.")
+            logInfo(log"Clearing the expiry times for all unschedulable taskSets as a task " +
+              log"was recently scheduled.")
             // Notify ExecutorAllocationManager as well as other subscribers that a task now
             // recently becomes schedulable
             dagScheduler.unschedulableTaskSetRemoved(taskSet.taskSet.stageId,
@@ -662,20 +669,23 @@ private[spark] class TaskSchedulerImpl(
               // always reject the offered resources. As a result, the barrier taskset can't get
               // launched. And if we retry the resourceOffer, we'd go through the same path again
               // and get into the endless loop in the end.
-              val errorMsg = s"Fail resource offers for barrier stage ${taskSet.stageId} " +
-                s"because only ${barrierPendingLaunchTasks.length} out of a total number " +
-                s"of ${taskSet.numTasks} tasks got resource offers. We highly recommend " +
-                "you to use the non-legacy delay scheduling by setting " +
-                s"${LEGACY_LOCALITY_WAIT_RESET.key} to false to get rid of this error."
-              logWarning(errorMsg)
-              taskSet.abort(errorMsg)
-              throw SparkCoreErrors.sparkError(errorMsg)
+              val logMsg = log"Fail resource offers for barrier stage " +
+                log"${MDC(STAGE_ID, taskSet.stageId)} because only " +
+                log"${MDC(NUM_PENDING_LAUNCH_TASKS, barrierPendingLaunchTasks.length)} " +
+                log"out of a total number " +
+                log"of ${MDC(NUM_TASKS, taskSet.numTasks)} tasks got resource offers. " +
+                log"We highly recommend you to use the non-legacy delay scheduling by setting " +
+                log"${MDC(CONFIG, LEGACY_LOCALITY_WAIT_RESET.key)} to false " +
+                log"to get rid of this error."
+              logWarning(logMsg)
+              taskSet.abort(logMsg.message)
+              throw SparkCoreErrors.sparkError(logMsg.message)
             } else {
               val curTime = clock.getTimeMillis()
               if (curTime - taskSet.lastResourceOfferFailLogTime >
                 TaskSetManager.BARRIER_LOGGING_INTERVAL) {
-                logInfo("Releasing the assigned resource offers since only partial tasks can " +
-                  "be launched. Waiting for later round resource offers.")
+                logInfo(log"Releasing the assigned resource offers since only partial tasks can " +
+                  log"be launched. Waiting for later round resource offers.")
                 taskSet.lastResourceOfferFailLogTime = curTime
               }
               barrierPendingLaunchTasks.foreach { task =>
@@ -717,8 +727,8 @@ private[spark] class TaskSchedulerImpl(
               .mkString(",")
             addressesWithDescs.foreach(_._2.properties.setProperty("addresses", addressesStr))
 
-            logInfo(s"Successfully scheduled all the ${addressesWithDescs.length} tasks for " +
-              s"barrier stage ${taskSet.stageId}.")
+            logInfo(log"Successfully scheduled all the ${MDC(LogKeys.NUM_TASKS, addressesWithDescs.length)} " +
+              log"tasks for barrier stage ${MDC(LogKeys.STAGE_ID, taskSet.stageId)}.")
           }
           taskSet.barrierPendingLaunchTasks.clear()
         }
@@ -738,8 +748,8 @@ private[spark] class TaskSchedulerImpl(
       taskIndex: Int): Unit = {
     val timeout = conf.get(config.UNSCHEDULABLE_TASKSET_TIMEOUT) * 1000
     unschedulableTaskSetToExpiryTime(taskSet) = clock.getTimeMillis() + timeout
-    logInfo(s"Waiting for $timeout ms for completely " +
-      s"excluded task to be schedulable again before aborting stage ${taskSet.stageId}.")
+    logInfo(log"Waiting for ${MDC(LogKeys.TIMEOUT, timeout)} ms for completely " +
+      log"excluded task to be schedulable again before aborting stage ${MDC(LogKeys.STAGE_ID, taskSet.stageId)}.")
     abortTimer.schedule(
       createUnschedulableTaskSetAbortTimer(taskSet, taskIndex), timeout, TimeUnit.MILLISECONDS)
   }
@@ -751,8 +761,8 @@ private[spark] class TaskSchedulerImpl(
       override def run(): Unit = TaskSchedulerImpl.this.synchronized {
         if (unschedulableTaskSetToExpiryTime.contains(taskSet) &&
             unschedulableTaskSetToExpiryTime(taskSet) <= clock.getTimeMillis()) {
-          logInfo("Cannot schedule any task because all executors excluded due to failures. " +
-            s"Wait time for scheduling expired. Aborting stage ${taskSet.stageId}.")
+          logInfo(log"Cannot schedule any task because all executors excluded due to failures. " +
+            log"Wait time for scheduling expired. Aborting stage ${MDC(LogKeys.STAGE_ID, taskSet.stageId)}.")
           taskSet.abortSinceCompletelyExcludedOnFailure(taskIndex)
         } else {
           this.cancel()
@@ -807,9 +817,10 @@ private[spark] class TaskSchedulerImpl(
               taskSet.taskInfos(tid).launchSucceeded()
             }
           case None =>
-            logError(
-              log"Ignoring update with state ${MDC(TASK_STATE, state)} for TID ${MDC(TID, tid)} because its task set is gone (this is " +
-                log"likely the result of receiving duplicate task finished status updates) or its executor has been marked as failed.")
+            logError(log"Ignoring update with state ${MDC(LogKeys.TASK_STATE, state)} for " +
+              log"TID ${MDC(LogKeys.TASK_ID, tid)} because its task set is gone (this is " +
+              log"likely the result of receiving duplicate task finished status updates) or its " +
+              log"executor has been marked as failed.")
         }
       } catch {
         case e: Exception => logError("Exception in statusUpdate", e)
@@ -1024,7 +1035,8 @@ private[spark] class TaskSchedulerImpl(
             // one may be triggered by a dropped connection from the worker while another may be a
             // report of executor termination. We produce log messages for both so we
             // eventually report the termination reason.
-            logError(log"Lost an executor ${MDC(LogKey.EXECUTOR_ID, executorId)} (already removed): ${MDC(REASON, reason)}")
+            logError(log"Lost an executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} " +
+              log"(already removed): ${MDC(LogKeys.REASON, reason)}")
         }
       }
     }
@@ -1036,7 +1048,8 @@ private[spark] class TaskSchedulerImpl(
   }
 
   override def workerRemoved(workerId: String, host: String, message: String): Unit = {
-    logInfo(s"Handle removed worker $workerId: $message")
+    logInfo(log"Handle removed worker ${MDC(LogKeys.WORKER_ID, workerId)}: " +
+      log"${MDC(LogKeys.MESSAGE, message)}")
     dagScheduler.workerRemoved(workerId, host, message)
   }
 
@@ -1047,12 +1060,15 @@ private[spark] class TaskSchedulerImpl(
     case LossReasonPending =>
       logDebug(s"Executor $executorId on $hostPort lost, but reason not yet known.")
     case ExecutorKilled =>
-      logInfo(s"Executor $executorId on $hostPort killed by driver.")
+      logInfo(log"Executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} on " +
+        log"${MDC(LogKeys.HOST_PORT, hostPort)} killed by driver.")
     case _: ExecutorDecommission =>
-      logInfo(s"Executor $executorId on $hostPort is decommissioned" +
-        s"${getDecommissionDuration(executorId)}.")
+      logInfo(log"Executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} on " +
+        log"${MDC(LogKeys.HOST_PORT, hostPort)} is decommissioned" +
+        log"${MDC(DURATION, getDecommissionDuration(executorId))}.")
     case _ =>
-      logError(log"Lost executor ${MDC(LogKey.EXECUTOR_ID, executorId)} on ${MDC(LogKey.HOST, hostPort)}: ${MDC(REASON, reason)}")
+      logError(log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} on " +
+        log"${MDC(LogKeys.HOST, hostPort)}: ${MDC(LogKeys.REASON, reason)}")
   }
 
   // return decommission duration in string or "" if decommission startTime not exists
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index e03c4101709cd..2474a1342eb2e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -19,6 +19,9 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 
+import org.apache.spark.internal.LogKeys.{STAGE_ATTEMPT, STAGE_ID}
+import org.apache.spark.internal.MessageWithContext
+
 /**
  * A set of tasks submitted together to the low-level TaskScheduler, usually representing
  * missing partitions of a particular stage.
@@ -34,4 +37,12 @@ private[spark] class TaskSet(
   val id: String = s"$stageId.$stageAttemptId"
 
   override def toString: String = "TaskSet " + id
+
+  // Identifier used in the structured logging framework.
+  lazy val logId: MessageWithContext = {
+    val hashMap = new java.util.HashMap[String, String]()
+    hashMap.put(STAGE_ID.name, stageId.toString)
+    hashMap.put(STAGE_ATTEMPT.name, stageAttemptId.toString)
+    MessageWithContext(id, hashMap)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
index f479e5e32bc2f..c9aa74e0852be 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
@@ -19,8 +19,7 @@ package org.apache.spark.scheduler
 import scala.collection.mutable.{HashMap, HashSet}
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config
+import org.apache.spark.internal.{config, Logging, LogKeys, MDC}
 import org.apache.spark.util.Clock
 
 /**
@@ -134,7 +133,8 @@ private[scheduler] class TaskSetExcludelist(
     val numFailures = execFailures.numUniqueTasksWithFailures
     if (numFailures >= MAX_FAILURES_PER_EXEC_STAGE) {
       if (excludedExecs.add(exec)) {
-        logInfo(s"Excluding executor ${exec} for stage $stageId")
+        logInfo(log"Excluding executor ${MDC(LogKeys.EXECUTOR_ID, exec)} for stage " +
+          log"${MDC(LogKeys.STAGE_ID, stageId)}")
         // This executor has been excluded for this stage.  Let's check if it
         // the whole node should be excluded.
         val excludedExecutorsOnNode =
@@ -149,7 +149,8 @@ private[scheduler] class TaskSetExcludelist(
         val numFailExec = excludedExecutorsOnNode.size
         if (numFailExec >= MAX_FAILED_EXEC_PER_NODE_STAGE) {
           if (excludedNodes.add(host)) {
-            logInfo(s"Excluding ${host} for stage $stageId")
+            logInfo(log"Excluding ${MDC(LogKeys.HOST, host)} for " +
+              log"stage ${MDC(LogKeys.STAGE_ID, stageId)}")
             // SparkListenerNodeBlacklistedForStage is deprecated but post both events
             // to keep backward compatibility
             listenerBus.post(
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 1418901e3442c..7dba4a6dc8fc4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -30,8 +30,8 @@ import org.apache.spark.InternalAccumulator
 import org.apache.spark.InternalAccumulator.{input, shuffleRead}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.{config, Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.scheduler.SchedulingMode._
 import org.apache.spark.util.{AccumulatorV2, Clock, LongAccumulator, SystemClock, Utils}
@@ -279,8 +279,9 @@ private[spark] class TaskSetManager(
               for (e <- set) {
                 pendingTaskSetToAddTo.forExecutor.getOrElseUpdate(e, new ArrayBuffer) += index
               }
-              logInfo(s"Pending task $index has a cached location at ${e.host} " +
-                ", where there are executors " + set.mkString(","))
+              logInfo(log"Pending task ${MDC(INDEX, index)} has a cached location at " +
+                log"${MDC(HOST, e.host)}, where there are executors " +
+                log"${MDC(EXECUTOR_IDS, set.mkString(","))}")
             case None => logDebug(s"Pending task $index has a cached location at ${e.host} " +
               ", but there are no executors alive there.")
           }
@@ -542,9 +543,10 @@ private[spark] class TaskSetManager(
     if (serializedTask.limit() > TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024 &&
       !emittedTaskSizeWarning) {
       emittedTaskSizeWarning = true
-      logWarning(s"Stage ${task.stageId} contains a task of very large size " +
-        s"(${serializedTask.limit() / 1024} KiB). The maximum recommended task size is " +
-        s"${TaskSetManager.TASK_SIZE_TO_WARN_KIB} KiB.")
+      logWarning(log"Stage ${MDC(STAGE_ID, task.stageId)} contains a task of very large size " +
+        log"(${MDC(NUM_BYTES, serializedTask.limit() / 1024)} KiB). " +
+        log"The maximum recommended task size is " +
+        log"${MDC(NUM_BYTES_TO_WARN, TaskSetManager.TASK_SIZE_TO_WARN_KIB)} KiB.")
     }
     addRunningTask(taskId)
 
@@ -552,10 +554,16 @@ private[spark] class TaskSetManager(
     // a good proxy to task serialization time.
     // val timeTaken = clock.getTime() - startTime
     val tName = taskName(taskId)
-    logInfo(s"Starting $tName ($host, executor ${info.executorId}, " +
-      s"partition ${task.partitionId}, $taskLocality, ${serializedTask.limit()} bytes) " +
-      (if (taskResourceAssignments.nonEmpty) s"taskResourceAssignments ${taskResourceAssignments}"
-      else ""))
+    logInfo(log"Starting ${MDC(TASK_NAME, tName)} (${MDC(HOST, host)}," +
+      log"executor ${MDC(LogKeys.EXECUTOR_ID, info.executorId)}, " +
+      log"partition ${MDC(PARTITION_ID, task.partitionId)}, " +
+      log"${MDC(TASK_LOCALITY, taskLocality)}, " +
+      log"${MDC(SIZE, serializedTask.limit())} bytes) " +
+      (if (taskResourceAssignments.nonEmpty) {
+        log"taskResourceAssignments ${MDC(TASK_RESOURCE_ASSIGNMENTS, taskResourceAssignments)}"
+      } else {
+        log""
+      }))
 
     sched.dagScheduler.taskStarted(task, info)
     new TaskDescription(
@@ -827,8 +835,11 @@ private[spark] class TaskSetManager(
     // Kill any other attempts for the same task (since those are unnecessary now that one
     // attempt completed successfully).
     for (attemptInfo <- taskAttempts(index) if attemptInfo.running) {
-      logInfo(s"Killing attempt ${attemptInfo.attemptNumber} for ${taskName(attemptInfo.taskId)}" +
-        s" on ${attemptInfo.host} as the attempt ${info.attemptNumber} succeeded on ${info.host}")
+      logInfo(log"Killing attempt ${MDC(NUM_ATTEMPT, attemptInfo.attemptNumber)} for " +
+        log"${MDC(TASK_NAME, taskName(attemptInfo.taskId))} on " +
+        log"${MDC(HOST, attemptInfo.host)} as the attempt " +
+        log"${MDC(TASK_ATTEMPT_ID, info.attemptNumber)} succeeded on " +
+        log"${MDC(HOST, info.host)}")
       killedByOtherAttempt += attemptInfo.taskId
       sched.backend.killTask(
         attemptInfo.taskId,
@@ -838,8 +849,10 @@ private[spark] class TaskSetManager(
     }
     if (!successful(index)) {
       tasksSuccessful += 1
-      logInfo(s"Finished ${taskName(info.taskId)} in ${info.duration} ms " +
-        s"on ${info.host} (executor ${info.executorId}) ($tasksSuccessful/$numTasks)")
+      logInfo(log"Finished ${MDC(TASK_NAME, taskName(info.taskId))} in " +
+        log"${MDC(DURATION, info.duration)} ms on ${MDC(HOST, info.host)} " +
+        log"(executor ${MDC(LogKeys.EXECUTOR_ID, info.executorId)}) " +
+        log"(${MDC(NUM_SUCCESSFUL_TASKS, tasksSuccessful)}/${MDC(NUM_TASKS, numTasks)})")
       // Mark successful and stop if all the tasks have succeeded.
       successful(index) = true
       numFailures(index) = 0
@@ -847,8 +860,9 @@ private[spark] class TaskSetManager(
         isZombie = true
       }
     } else {
-      logInfo(s"Ignoring task-finished event for ${taskName(info.taskId)} " +
-        s"because it has already completed successfully")
+      logInfo(log"Ignoring task-finished event for " +
+        log"${MDC(TASK_NAME, taskName(info.taskId))} " +
+        log"because it has already completed successfully")
     }
     // This method is called by "TaskSchedulerImpl.handleSuccessfulTask" which holds the
     // "TaskSchedulerImpl" lock until exiting. To avoid the SPARK-7655 issue, we should not
@@ -936,7 +950,11 @@ private[spark] class TaskSetManager(
     copiesRunning(index) -= 1
     var accumUpdates: Seq[AccumulatorV2[_, _]] = Seq.empty
     var metricPeaks: Array[Long] = Array.empty
-    val failureReason = s"Lost ${taskName(tid)} (${info.host} " +
+    val failureReason = log"Lost ${MDC(TASK_NAME, taskName(tid))} " +
+      log"(${MDC(HOST_PORT, info.host)} " +
+      log"executor ${MDC(LogKeys.EXECUTOR_ID, info.executorId)}): " +
+      log"${MDC(ERROR, reason.toErrorString)}"
+    val failureReasonString = s"Lost ${taskName(tid)} (${info.host} " +
       s"executor ${info.executorId}): ${reason.toErrorString}"
     val failureException: Option[Throwable] = reason match {
       case fetchFailed: FetchFailed =>
@@ -971,9 +989,11 @@ private[spark] class TaskSetManager(
         if (ef.className == classOf[TaskOutputFileAlreadyExistException].getName) {
           // If we can not write to output file in the task, there's no point in trying to
           // re-execute it.
-          logError(log"Task ${MDC(TASK_ID, info.id)} in stage ${MDC(STAGE_ID, taskSet.id)} " +
-            log"(TID ${MDC(TID, tid)}) can not write to output file: " +
-            log"${MDC(ERROR, ef.description)}; not retrying")
+          logError(
+            log"Task ${MDC(TASK_INDEX, info.index)}.${MDC(TASK_ATTEMPT_ID, info.attemptNumber)} " +
+              log"in stage ${MDC(STAGE_ID, taskSet.stageId)}." +
+            log"${MDC(STAGE_ATTEMPT, taskSet.stageAttemptId)} (TID ${MDC(TASK_ID, tid)}) " +
+            log"can not write to output file: ${MDC(ERROR, ef.description)}; not retrying")
           emptyTaskInfoAccumulablesAndNotifyDagScheduler(tid, tasks(index), reason, null,
             accumUpdates, metricPeaks)
           abort("Task %s in stage %s (TID %d) can not write to output file: %s".format(
@@ -1001,8 +1021,10 @@ private[spark] class TaskSetManager(
           logWarning(failureReason)
         } else {
           logInfo(
-            s"Lost $task on ${info.host}, executor ${info.executorId}: " +
-              s"${ef.className} (${ef.description}) [duplicate $dupCount]")
+            log"Lost ${MDC(TASK_NAME, task)} on ${MDC(HOST, info.host)}, " +
+            log"executor ${MDC(LogKeys.EXECUTOR_ID, info.executorId)}: " +
+            log"${MDC(CLASS_NAME, ef.className)} " +
+            log"(${MDC(DESCRIPTION, ef.description)}) [duplicate ${MDC(COUNT, dupCount)}]")
         }
         ef.exception
 
@@ -1014,12 +1036,12 @@ private[spark] class TaskSetManager(
         None
 
       case e: ExecutorLostFailure if !e.exitCausedByApp =>
-        logInfo(s"${taskName(tid)} failed because while it was being computed, its executor " +
-          "exited for a reason unrelated to the task. Not counting this failure towards the " +
-          "maximum number of failures for the task.")
+        logInfo(log"${MDC(TASK_NAME, taskName(tid))} failed because while it was being computed," +
+          log" its executor exited for a reason unrelated to the task. " +
+          log"Not counting this failure towards the maximum number of failures for the task.")
         None
 
-      case e: TaskFailedReason =>  // TaskResultLost and others
+      case _: TaskFailedReason =>  // TaskResultLost and others
         logWarning(failureReason)
         None
     }
@@ -1034,22 +1056,22 @@ private[spark] class TaskSetManager(
     if (!isZombie && reason.countTowardsTaskFailures) {
       assert (null != failureReason)
       taskSetExcludelistHelperOpt.foreach(_.updateExcludedForFailedTask(
-        info.host, info.executorId, index, failureReason))
+        info.host, info.executorId, index, failureReasonString))
       numFailures(index) += 1
       if (numFailures(index) >= maxTaskFailures) {
-        logError(log"Task ${MDC(TASK_ID, index)} in stage ${MDC(STAGE_ID, taskSet.id)} failed " +
-          log"${MDC(MAX_ATTEMPTS, maxTaskFailures)} times; aborting job")
+        logError(log"Task ${MDC(TASK_INDEX, index)} in stage " + taskSet.logId +
+          log" failed ${MDC(MAX_ATTEMPTS, maxTaskFailures)} times; aborting job")
         abort("Task %d in stage %s failed %d times, most recent failure: %s\nDriver stacktrace:"
-          .format(index, taskSet.id, maxTaskFailures, failureReason), failureException)
+          .format(index, taskSet.id, maxTaskFailures, failureReasonString), failureException)
         return
       }
     }
 
     if (successful(index)) {
-      logInfo(s"${taskName(info.taskId)} failed, but the task will not" +
-        " be re-executed (either because the task failed with a shuffle data fetch failure," +
-        " so the previous stage needs to be re-run, or because a different copy of the task" +
-        " has already succeeded).")
+      logInfo(log"${MDC(LogKeys.TASK_NAME, taskName(info.taskId))} failed, but the task will not" +
+        log" be re-executed (either because the task failed with a shuffle data fetch failure," +
+        log" so the previous stage needs to be re-run, or because a different copy of the task" +
+        log" has already succeeded).")
     } else {
       addPendingTask(index)
     }
@@ -1232,9 +1254,10 @@ private[spark] class TaskSetManager(
         if (speculated) {
           addPendingTask(index, speculatable = true)
           logInfo(
-            ("Marking task %d in stage %s (on %s) as speculatable because it ran more" +
-              " than %.0f ms(%d speculatable tasks in this taskset now)")
-              .format(index, taskSet.id, info.host, threshold, speculatableTasks.size + 1))
+            log"Marking task ${MDC(TASK_INDEX, index)} in stage " + taskSet.logId +
+            log" (on ${MDC(HOST, info.host)}) as speculatable because it ran more than " +
+            log"${MDC(TIMEOUT, threshold)} ms(${MDC(NUM_TASKS, speculatableTasks.size + 1)}" +
+            log"speculatable tasks in this taskset now)")
           speculatableTasks += index
           sched.dagScheduler.speculativeTaskSubmitted(tasks(index), index)
         }
@@ -1281,7 +1304,8 @@ private[spark] class TaskSetManager(
     if (foundTasks) {
       val elapsedMs = clock.getTimeMillis() - timeMs
       if (elapsedMs > minTimeToSpeculation) {
-        logWarning(s"Time to checkSpeculatableTasks ${elapsedMs}ms > ${minTimeToSpeculation}ms")
+        logWarning(log"Time to checkSpeculatableTasks ${MDC(TIME_UNITS, elapsedMs)}ms > " +
+          log"${MDC(MIN_TIME, minTimeToSpeculation)}ms")
       }
     }
     foundTasks
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 06cfb53e2dede..deaa1b4e47906 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -33,7 +33,8 @@ import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.ExecutorLogUrlHandler
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.ERROR
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Network._
 import org.apache.spark.resource.ResourceProfile
@@ -41,6 +42,7 @@ import org.apache.spark.rpc._
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend.ENDPOINT_NAME
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.status.api.v1.ThreadStackTrace
 import org.apache.spark.util.{RpcUtils, SerializableBuffer, ThreadUtils, Utils}
 import org.apache.spark.util.ArrayImplicits._
@@ -181,8 +183,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
               makeOffers(executorId)
             case None =>
               // Ignoring the update since we don't know about the executor.
-              logWarning(s"Ignored task status update ($taskId state $state) " +
-                s"from unknown executor with ID $executorId")
+              logWarning(log"Ignored task status update (${MDC(TASK_ID, taskId)} " +
+                log"state ${MDC(TASK_STATE, state)}) " +
+                log"from unknown executor with ID ${MDC(LogKeys.EXECUTOR_ID, executorId)}")
           }
         }
 
@@ -199,7 +202,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
               KillTask(taskId, executorId, interruptThread, reason))
           case None =>
             // Ignoring the task kill since the executor is not registered.
-            logWarning(s"Attempted to kill task $taskId for unknown executor $executorId.")
+            logWarning(log"Attempted to kill task ${MDC(TASK_ID, taskId)} " +
+              log"for unknown executor ${MDC(LogKeys.EXECUTOR_ID, executorId)}.")
         }
 
       case KillExecutorsOnHost(host) =>
@@ -255,7 +259,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           // If the cluster manager gives us an executor on an excluded node (because it
           // already started allocating those resources before we informed it of our exclusion,
           // or if it ignored our exclusion), then we reject that executor immediately.
-          logInfo(s"Rejecting $executorId as it has been excluded.")
+          logInfo(log"Rejecting ${MDC(LogKeys.EXECUTOR_ID, executorId)} as it has been excluded.")
           context.sendFailure(
             new IllegalStateException(s"Executor is excluded due to failures: $executorId"))
         } else {
@@ -266,8 +270,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
             } else {
               context.senderAddress
             }
-          logInfo(s"Registered executor $executorRef ($executorAddress) with ID $executorId, " +
-            s" ResourceProfileId $resourceProfileId")
+          logInfo(log"Registered executor ${MDC(LogKeys.RPC_ENDPOINT_REF, executorRef)} " +
+            log"(${MDC(LogKeys.RPC_ADDRESS, executorAddress)}) " +
+            log"with ID ${MDC(LogKeys.EXECUTOR_ID, executorId)}, " +
+            log"ResourceProfileId ${MDC(LogKeys.RESOURCE_PROFILE_ID, resourceProfileId)}")
           addressToExecutorId(executorAddress) = executorId
           totalCoreCount.addAndGet(cores)
           totalRegisteredExecutors.addAndGet(1)
@@ -321,7 +327,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
       case UpdateExecutorsLogLevel(logLevel) =>
         currentLogLevel = Some(logLevel)
-        logInfo(s"Asking each executor to refresh the log level to $logLevel")
+        logInfo(log"Asking each executor to refresh the log level to " +
+          log"${MDC(LogKeys.LOG_LEVEL, logLevel)}")
         for ((_, executorData) <- executorDataMap) {
           executorData.executorEndpoint.send(UpdateExecutorLogLevel(logLevel))
         }
@@ -340,7 +347,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
       // Do not change this code without running the K8s integration suites
       case ExecutorDecommissioning(executorId) =>
-        logWarning(s"Received executor $executorId decommissioned message")
+        logWarning(log"Received executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} " +
+          log"decommissioned message")
         context.reply(
           decommissionExecutor(
             executorId,
@@ -493,7 +501,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           // forever. Therefore, we should also post `SparkListenerExecutorRemoved` here.
           listenerBus.post(SparkListenerExecutorRemoved(
             System.currentTimeMillis(), executorId, reason.toString))
-          logInfo(s"Asked to remove non-existent executor $executorId")
+          logInfo(
+            log"Asked to remove non-existent executor ${MDC(LogKeys.EXECUTOR_ID, executorId)}")
       }
     }
 
@@ -522,7 +531,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       }
 
       if (shouldDisable) {
-        logInfo(s"Disabling executor $executorId.")
+        logInfo(log"Disabling executor ${MDC(LogKeys.EXECUTOR_ID, executorId)}.")
         scheduler.executorLost(executorId, LossReasonPending)
       }
 
@@ -566,7 +575,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       return executorsToDecommission.toImmutableArraySeq
     }
 
-    logInfo(s"Decommission executors: ${executorsToDecommission.mkString(", ")}")
+    logInfo(log"Decommission executors: " +
+      log"${MDC(LogKeys.EXECUTOR_IDS, executorsToDecommission.mkString(", "))}")
 
     // If we don't want to replace the executors we are decommissioning
     if (adjustTargetNumExecutors) {
@@ -585,7 +595,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     if (!triggeredByExecutor) {
       executorsToDecommission.foreach { executorId =>
-        logInfo(s"Notify executor $executorId to decommission.")
+        logInfo(log"Notify executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} to decommission.")
         executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor)
       }
     }
@@ -597,7 +607,9 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
             executorsToDecommission.filter(executorsPendingDecommission.contains)
           }
           if (stragglers.nonEmpty) {
-            logInfo(s"${stragglers.toList} failed to decommission in ${cleanupInterval}, killing.")
+            logInfo(
+              log"${MDC(LogKeys.EXECUTOR_IDS, stragglers.toList)} failed to decommission in " +
+              log"${MDC(LogKeys.INTERVAL, cleanupInterval)}, killing.")
             killExecutors(stragglers.toImmutableArraySeq, false, false, true)
           }
         }
@@ -714,13 +726,14 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
   override def isReady(): Boolean = {
     if (sufficientResourcesRegistered()) {
-      logInfo("SchedulerBackend is ready for scheduling beginning after " +
-        s"reached minRegisteredResourcesRatio: $minRegisteredRatio")
+      logInfo(log"SchedulerBackend is ready for scheduling beginning after " +
+        log"reached minRegisteredResourcesRatio: ${MDC(LogKeys.MIN_SIZE, minRegisteredRatio)}")
       return true
     }
     if ((System.nanoTime() - createTimeNs) >= maxRegisteredWaitingTimeNs) {
-      logInfo("SchedulerBackend is ready for scheduling beginning after waiting " +
-        s"maxRegisteredResourcesWaitingTime: $maxRegisteredWaitingTimeNs(ns)")
+      logInfo(log"SchedulerBackend is ready for scheduling beginning after waiting " +
+        log"maxRegisteredResourcesWaitingTime: " +
+        log"${MDC(LogKeys.TIMEOUT, maxRegisteredWaitingTimeNs / NANOS_PER_MILLIS.toDouble)}(ms)")
       return true
     }
     false
@@ -797,7 +810,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         "Attempted to request a negative number of additional executor(s) " +
         s"$numAdditionalExecutors from the cluster manager. Please specify a positive number!")
     }
-    logInfo(s"Requesting $numAdditionalExecutors additional executor(s) from the cluster manager")
+    logInfo(log"Requesting ${MDC(LogKeys.NUM_EXECUTORS, numAdditionalExecutors)} additional " +
+      log"executor(s) from the cluster manager")
 
     val response = synchronized {
       val defaultProf = scheduler.sc.resourceProfileManager.defaultResourceProfile
@@ -947,12 +961,13 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       adjustTargetNumExecutors: Boolean,
       countFailures: Boolean,
       force: Boolean): Seq[String] = {
-    logInfo(s"Requesting to kill executor(s) ${executorIds.mkString(", ")}")
+    logInfo(
+      log"Requesting to kill executor(s) ${MDC(LogKeys.EXECUTOR_IDS, executorIds.mkString(", "))}")
 
     val response = withLock {
       val (knownExecutors, unknownExecutors) = executorIds.partition(executorDataMap.contains)
       unknownExecutors.foreach { id =>
-        logWarning(s"Executor to kill $id does not exist!")
+        logWarning(log"Executor to kill ${MDC(LogKeys.EXECUTOR_ID, id)} does not exist!")
       }
 
       // If an executor is already pending to be removed, do not kill it again (SPARK-9795)
@@ -962,7 +977,8 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         .filter { id => force || !scheduler.isExecutorBusy(id) }
       executorsToKill.foreach { id => executorsPendingToRemove(id) = !countFailures }
 
-      logInfo(s"Actual list of executor(s) to be killed is ${executorsToKill.mkString(", ")}")
+      logInfo(log"Actual list of executor(s) to be killed is " +
+        log"${MDC(LogKeys.EXECUTOR_IDS, executorsToKill.mkString(", "))}")
 
       // If we do not wish to replace the executors we kill, sync the target number of executors
       // with the cluster manager to avoid allocating new ones. When computing the new target,
@@ -1003,7 +1019,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * @return whether the decommission request is acknowledged.
    */
   final override def decommissionExecutorsOnHost(host: String): Boolean = {
-    logInfo(s"Requesting to kill any and all executors on host $host")
+    logInfo(log"Requesting to kill any and all executors on host ${MDC(LogKeys.HOST, host)}")
     // A potential race exists if a new executor attempts to register on a host
     // that is on the exclude list and is no longer valid. To avoid this race,
     // all executor registration and decommissioning happens in the event loop. This way, either
@@ -1019,7 +1035,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
    * @return whether the kill request is acknowledged.
    */
   final override def killExecutorsOnHost(host: String): Boolean = {
-    logInfo(s"Requesting to kill any and all executors on host $host")
+    logInfo(log"Requesting to kill any and all executors on host ${MDC(LogKeys.HOST, host)}")
     // A potential race exists if a new executor attempts to register on a host
     // that is on the exclude list and is no longer valid. To avoid this race,
     // all executor registration and killing happens in the event loop. This way, either
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index f92756105977c..f4caecd7d6741 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -27,8 +27,7 @@ import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.deploy.{ApplicationDescription, Command}
 import org.apache.spark.deploy.client.{StandaloneAppClient, StandaloneAppClientListener}
 import org.apache.spark.executor.ExecutorExitCode
-import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.REASON
+import org.apache.spark.internal.{config, Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.EXECUTOR_REMOVE_DELAY
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
@@ -145,7 +144,7 @@ private[spark] class StandaloneSchedulerBackend(
   }
 
   override def connected(appId: String): Unit = {
-    logInfo("Connected to Spark cluster with app ID " + appId)
+    logInfo(log"Connected to Spark cluster with app ID ${MDC(LogKeys.APP_ID, appId)}")
     this.appId = appId
     notifyContext()
     launcherBackend.setAppId(appId)
@@ -162,7 +161,7 @@ private[spark] class StandaloneSchedulerBackend(
     notifyContext()
     if (!stopping.get) {
       launcherBackend.setState(SparkAppHandle.State.KILLED)
-      logError(log"Application has been killed. Reason: ${MDC(REASON, reason)}")
+      logError(log"Application has been killed. Reason: ${MDC(LogKeys.REASON, reason)}")
       try {
         scheduler.error(reason)
       } finally {
@@ -174,8 +173,9 @@ private[spark] class StandaloneSchedulerBackend(
 
   override def executorAdded(fullId: String, workerId: String, hostPort: String, cores: Int,
     memory: Int): Unit = {
-    logInfo("Granted executor ID %s on hostPort %s with %d core(s), %s RAM".format(
-      fullId, hostPort, cores, Utils.megabytesToString(memory)))
+    logInfo(log"Granted executor ID ${MDC(LogKeys.EXECUTOR_ID, fullId)} on hostPort " +
+      log"${MDC(LogKeys.HOST_PORT, hostPort)} with ${MDC(LogKeys.NUM_CORES, cores)} core(s), " +
+      log"${MDC(LogKeys.MEMORY_SIZE, Utils.megabytesToString(memory))} RAM")
   }
 
   override def executorRemoved(
@@ -192,23 +192,28 @@ private[spark] class StandaloneSchedulerBackend(
       case Some(code) => ExecutorExited(code, exitCausedByApp = true, message)
       case None => ExecutorProcessLost(message, workerHost, causedByApp = workerHost.isEmpty)
     }
-    logInfo("Executor %s removed: %s".format(fullId, message))
+    logInfo(
+      log"Executor ${MDC(LogKeys.EXECUTOR_ID, fullId)} removed: ${MDC(LogKeys.MESSAGE, message)}")
     removeExecutor(fullId.split("/")(1), reason)
   }
 
   override def executorDecommissioned(fullId: String,
       decommissionInfo: ExecutorDecommissionInfo): Unit = {
-    logInfo(s"Asked to decommission executor $fullId")
+    logInfo(log"Asked to decommission executor ${MDC(LogKeys.EXECUTOR_ID, fullId)}")
     val execId = fullId.split("/")(1)
     decommissionExecutors(
       Array((execId, decommissionInfo)),
       adjustTargetNumExecutors = false,
       triggeredByExecutor = false)
-    logInfo("Executor %s decommissioned: %s".format(fullId, decommissionInfo))
+    logInfo(
+      log"Executor ${MDC(LogKeys.EXECUTOR_ID, fullId)} " +
+        log"decommissioned: ${MDC(LogKeys.DESCRIPTION, decommissionInfo)}"
+      )
   }
 
   override def workerRemoved(workerId: String, host: String, message: String): Unit = {
-    logInfo("Worker %s removed: %s".format(workerId, message))
+    logInfo(log"Worker ${MDC(LogKeys.WORKER_ID, workerId)} removed: " +
+      log"${MDC(LogKeys.MESSAGE, message)}")
     removeWorker(workerId, host, message)
   }
 
@@ -349,8 +354,8 @@ private[spark] class StandaloneSchedulerBackend(
             _executorRemoveDelay, TimeUnit.MILLISECONDS)
         } catch {
           case _: RejectedExecutionException if stopping.get() =>
-            logWarning(
-              "Skipping onDisconnected RemoveExecutor call because the scheduler is stopping")
+            logWarning("Skipping onDisconnected RemoveExecutor call " +
+              "because the scheduler is stopping")
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
index c389b0c988f4d..57505c87f879e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
@@ -25,7 +25,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark._
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID
 import org.apache.spark.scheduler._
@@ -342,7 +342,8 @@ private[spark] class ExecutorMonitor(
   override def onExecutorAdded(event: SparkListenerExecutorAdded): Unit = {
     val exec = ensureExecutorIsTracked(event.executorId, event.executorInfo.resourceProfileId)
     exec.updateRunningTasks(0)
-    logInfo(s"New executor ${event.executorId} has registered (new total is ${executors.size()})")
+    logInfo(log"New executor ${MDC(LogKeys.EXECUTOR_ID, event.executorId)} has registered " +
+      log"(new total is ${MDC(LogKeys.COUNT, executors.size())})")
   }
 
   private def decrementExecResourceProfileCount(rpId: Int): Unit = {
@@ -365,11 +366,14 @@ private[spark] class ExecutorMonitor(
       } else {
         metrics.exitedUnexpectedly.inc()
       }
-      logInfo(s"Executor ${event.executorId} is removed. Remove reason statistics: (" +
-        s"gracefully decommissioned: ${metrics.gracefullyDecommissioned.getCount()}, " +
-        s"decommision unfinished: ${metrics.decommissionUnfinished.getCount()}, " +
-        s"driver killed: ${metrics.driverKilled.getCount()}, " +
-        s"unexpectedly exited: ${metrics.exitedUnexpectedly.getCount()}).")
+      // scalastyle:off line.size.limit
+      logInfo(log"Executor ${MDC(LogKeys.EXECUTOR_ID, event.executorId)} is removed. " +
+        log"Remove reason statistics: (gracefully decommissioned: " +
+        log"${MDC(LogKeys.NUM_DECOMMISSIONED, metrics.gracefullyDecommissioned.getCount())}, " +
+        log"decommission unfinished: ${MDC(LogKeys.NUM_UNFINISHED_DECOMMISSIONED, metrics.decommissionUnfinished.getCount())}, " +
+        log"driver killed: ${MDC(LogKeys.NUM_EXECUTORS_KILLED, metrics.driverKilled.getCount())}, " +
+        log"unexpectedly exited: ${MDC(LogKeys.NUM_EXECUTORS_EXITED, metrics.exitedUnexpectedly.getCount())}).")
+      // scalastyle:on line.size.limit
       if (!removed.pendingRemoval || !removed.decommissioning) {
         nextTimeout.set(Long.MinValue)
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
index a00fe2a06899f..298669327a39c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalSchedulerBackend.scala
@@ -142,6 +142,7 @@ private[spark] class LocalSchedulerBackend(
         Map.empty)))
     launcherBackend.setAppId(appId)
     launcherBackend.setState(SparkAppHandle.State.RUNNING)
+    reviveOffers()
   }
 
   override def stop(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
index 409223132a626..1ee46d51ce70b 100644
--- a/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
+++ b/core/src/main/scala/org/apache/spark/security/CryptoStreamUtils.scala
@@ -31,7 +31,8 @@ import org.apache.commons.crypto.random._
 import org.apache.commons.crypto.stream._
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.{CryptoUtils, JavaUtils}
 
@@ -131,8 +132,8 @@ private[spark] object CryptoStreamUtils extends Logging {
     val initialIVFinish = System.nanoTime()
     val initialIVTime = TimeUnit.NANOSECONDS.toMillis(initialIVFinish - initialIVStart)
     if (initialIVTime > 2000) {
-      logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " +
-        s"used by CryptoStream")
+      logWarning(log"It costs ${MDC(TIME_UNITS, initialIVTime)} milliseconds " +
+        log"to create the Initialization Vector used by CryptoStream")
     }
     iv
   }
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index 5a0b2ba3735c5..ec5d53e91b3e0 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -40,7 +40,7 @@ import org.roaringbitmap.RoaringBitmap
 import org.apache.spark._
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.LogKeys.CLASS_NAME
 import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.internal.io.FileCommitProtocol._
 import org.apache.spark.network.util.ByteUnit
@@ -227,10 +227,7 @@ class KryoSerializer(conf: SparkConf)
 
     // scalastyle:on
 
-    kryo.register(None.getClass)
-    kryo.register(Nil.getClass)
     kryo.register(Utils.classForName("scala.collection.immutable.ArraySeq$ofRef"))
-    kryo.register(Utils.classForName("scala.collection.immutable.$colon$colon"))
     kryo.register(Utils.classForName("scala.collection.immutable.Map$EmptyMap$"))
     kryo.register(Utils.classForName("scala.math.Ordering$Reverse"))
     kryo.register(Utils.classForName("scala.reflect.ClassTag$GenericClassTag"))
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index b878c88c43b03..30bc1382fb021 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -21,13 +21,16 @@ import java.io._
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
 import java.nio.file.Files
+import java.util.{Collections, Map => JMap}
 
 import scala.collection.mutable.ArrayBuffer
 
+import com.google.common.cache.CacheBuilder
+
 import org.apache.spark.{SecurityManager, SparkConf, SparkEnv, SparkException}
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.{config, Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.io.NioBufferedFileInputStream
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.client.StreamCallbackWithID
@@ -38,6 +41,7 @@ import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * Create and maintain the shuffle blocks' mapping between logic block and physical file location.
@@ -53,10 +57,23 @@ import org.apache.spark.util.Utils
 private[spark] class IndexShuffleBlockResolver(
     conf: SparkConf,
     // var for testing
-    var _blockManager: BlockManager = null)
+    var _blockManager: BlockManager,
+    val taskIdMapsForShuffle: JMap[Int, OpenHashSet[Long]])
   extends ShuffleBlockResolver
   with Logging with MigratableResolver {
 
+  def this(conf: SparkConf) = {
+    this(conf, null, Collections.emptyMap())
+  }
+
+  def this(conf: SparkConf, _blockManager: BlockManager) = {
+    this(conf, _blockManager, Collections.emptyMap())
+  }
+
+  def this(conf: SparkConf, taskIdMapsForShuffle: JMap[Int, OpenHashSet[Long]]) = {
+    this(conf, null, taskIdMapsForShuffle)
+  }
+
   private lazy val blockManager = Option(_blockManager).getOrElse(SparkEnv.get.blockManager)
 
   private val transportConf = {
@@ -76,13 +93,21 @@ private[spark] class IndexShuffleBlockResolver(
   override def getStoredShuffles(): Seq[ShuffleBlockInfo] = {
     val allBlocks = blockManager.diskBlockManager.getAllBlocks()
     allBlocks.flatMap {
-      case ShuffleIndexBlockId(shuffleId, mapId, _) =>
+      case ShuffleIndexBlockId(shuffleId, mapId, _)
+        if Option(shuffleIdsToSkip.getIfPresent(shuffleId)).isEmpty =>
         Some(ShuffleBlockInfo(shuffleId, mapId))
       case _ =>
         None
     }
   }
 
+  private val shuffleIdsToSkip =
+    CacheBuilder.newBuilder().maximumSize(1000).build[java.lang.Integer, java.lang.Boolean]()
+
+  override def addShuffleToSkip(shuffleId: ShuffleId): Unit = {
+    shuffleIdsToSkip.put(shuffleId, true)
+  }
+
   private def getShuffleBytesStored(): Long = {
     val shuffleFiles: Seq[File] = getStoredShuffles().map {
       si => getDataFile(si.shuffleId, si.mapId)
@@ -162,17 +187,17 @@ private[spark] class IndexShuffleBlockResolver(
   def removeDataByMap(shuffleId: Int, mapId: Long): Unit = {
     var file = getDataFile(shuffleId, mapId)
     if (file.exists() && !file.delete()) {
-      logWarning(s"Error deleting data ${file.getPath()}")
+      logWarning(log"Error deleting data ${MDC(PATH, file.getPath())}")
     }
 
     file = getIndexFile(shuffleId, mapId)
     if (file.exists() && !file.delete()) {
-      logWarning(s"Error deleting index ${file.getPath()}")
+      logWarning(log"Error deleting index ${MDC(PATH, file.getPath())}")
     }
 
     file = getChecksumFile(shuffleId, mapId, conf.get(config.SHUFFLE_CHECKSUM_ALGORITHM))
     if (file.exists() && !file.delete()) {
-      logWarning(s"Error deleting checksum ${file.getPath()}")
+      logWarning(log"Error deleting checksum ${MDC(PATH, file.getPath())}")
     }
   }
 
@@ -275,12 +300,27 @@ private[spark] class IndexShuffleBlockResolver(
             throw SparkCoreErrors.failedRenameTempFileError(fileTmp, file)
           }
         }
+        blockId match {
+          case ShuffleIndexBlockId(shuffleId, mapId, _) =>
+            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+              shuffleId, _ => new OpenHashSet[Long](8)
+            )
+            mapTaskIds.add(mapId)
+
+          case ShuffleDataBlockId(shuffleId, mapId, _) =>
+            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+              shuffleId, _ => new OpenHashSet[Long](8)
+            )
+            mapTaskIds.add(mapId)
+
+          case _ => // Unreachable
+        }
         blockManager.reportBlockStatus(blockId, BlockStatus(StorageLevel.DISK_ONLY, 0, diskSize))
       }
 
       override def onFailure(streamId: String, cause: Throwable): Unit = {
         // the framework handles the connection itself, we just need to do local cleanup
-        logWarning(s"Error while uploading $blockId", cause)
+        logWarning(log"Error while uploading ${MDC(BLOCK_ID, blockId)}", cause)
         channel.close()
         fileTmp.delete()
       }
@@ -318,8 +358,9 @@ private[spark] class IndexShuffleBlockResolver(
       }
     } catch {
       case _: Exception => // If we can't load the blocks ignore them.
-        logWarning(s"Failed to resolve shuffle block ${shuffleBlockInfo}. " +
-          "This is expected to occur if a block is removed after decommissioning has started.")
+        logWarning(log"Failed to resolve shuffle block " +
+          log"${MDC(SHUFFLE_BLOCK_INFO, shuffleBlockInfo)}. " +
+          log"This is expected to occur if a block is removed after decommissioning has started.")
         List.empty[(BlockId, ManagedBuffer)]
     }
   }
@@ -425,8 +466,8 @@ private[spark] class IndexShuffleBlockResolver(
         if (checksumTmp.exists()) {
           try {
             if (!checksumTmp.delete()) {
-              logError(s"Failed to delete temporary checksum file " +
-                s"at ${checksumTmp.getAbsolutePath}")
+              logError(log"Failed to delete temporary checksum file at " +
+                log"${MDC(LogKeys.PATH, checksumTmp.getAbsolutePath)}")
             }
           } catch {
             case e: Exception =>
@@ -475,7 +516,8 @@ private[spark] class IndexShuffleBlockResolver(
       if (propagateError) {
         throw SparkCoreErrors.failedRenameTempFileError(tmpFile, targetFile)
       } else {
-        logWarning(s"fail to rename file $tmpFile to $targetFile")
+        logWarning(log"fail to rename file ${MDC(TEMP_FILE, tmpFile)} " +
+          log"to ${MDC(TARGET_PATH, targetFile)}")
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/MigratableResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/MigratableResolver.scala
index 9908281deed84..19835d515fec2 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/MigratableResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/MigratableResolver.scala
@@ -35,6 +35,11 @@ trait MigratableResolver {
    */
   def getStoredShuffles(): Seq[ShuffleBlockInfo]
 
+  /**
+   * Mark a shuffle that should not be migrated.
+   */
+  def addShuffleToSkip(shuffleId: Int): Unit = {}
+
   /**
    * Write a provided shuffle block as a stream. Used for block migrations.
    * Up to the implementation to support STORAGE_REMOTE_SHUFFLE_MAX_DISK
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
index bd03934cada45..4e3191e44fbdf 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
@@ -27,7 +27,8 @@ import scala.util.control.NonFatal
 import org.apache.spark.{SecurityManager, ShuffleDependency, SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.annotation.Since
 import org.apache.spark.executor.{CoarseGrainedExecutorBackend, ExecutorBackend}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer}
@@ -248,7 +249,8 @@ private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
         if (!errorHandler.shouldLogError(exception)) {
           logTrace(s"Pushing block $blockId to $address failed.", exception)
         } else {
-          logWarning(s"Pushing block $blockId to $address failed.", exception)
+          logWarning(log"Pushing block ${MDC(BLOCK_ID, blockId)} " +
+            log"to ${MDC(HOST_PORT, address)} failed.", exception)
         }
         handleResult(PushResult(blockId, exception))
       }
@@ -329,9 +331,9 @@ private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
         unreachableBlockMgrs.add(address)
         removed += pushRequests.dequeueAll(req => req.address == address).length
         removed += deferredPushRequests.remove(address).map(_.length).getOrElse(0)
-        logWarning(s"Received a ConnectException from $address. " +
-          s"Dropping $removed push-requests and " +
-          s"not pushing any more blocks to this address.")
+        logWarning(log"Received a ConnectException from ${MDC(HOST_PORT, address)}. " +
+          log"Dropping ${MDC(NUM_REQUESTS, removed)} push-requests and " +
+          log"not pushing any more blocks to this address.")
       }
     }
     if (pushResult.failure != null && !errorHandler.shouldRetryError(pushResult.failure)) {
@@ -360,7 +362,8 @@ private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
         case Some(cb: CoarseGrainedExecutorBackend) =>
           cb.notifyDriverAboutPushCompletion(shuffleId, shuffleMergeId, mapIndex)
         case Some(eb: ExecutorBackend) =>
-          logWarning(s"Currently $eb doesn't support push-based shuffle")
+          logWarning(log"Currently ${MDC(EXECUTOR_BACKEND, eb)} " +
+            log"doesn't support push-based shuffle")
         case None =>
       }
       pushCompletionNotified = true
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
index 1ab77643c0364..be42af092f24a 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleWriteProcessor.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.shuffle
 
 import org.apache.spark.{ShuffleDependency, SparkEnv, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{NUM_MERGER_LOCATIONS, SHUFFLE_ID, STAGE_ID}
 import org.apache.spark.scheduler.MapStatus
 
 /**
@@ -72,8 +73,10 @@ private[spark] class ShuffleWriteProcessor extends Serializable with Logging {
         if (!dep.shuffleMergeFinalized) {
           manager.shuffleBlockResolver match {
             case resolver: IndexShuffleBlockResolver =>
-              logInfo(s"Shuffle merge enabled with ${dep.getMergerLocs.size} merger locations " +
-                s" for stage ${context.stageId()} with shuffle ID ${dep.shuffleId}")
+              logInfo(log"Shuffle merge enabled with" +
+                log" ${MDC(NUM_MERGER_LOCATIONS, dep.getMergerLocs.size)} merger locations" +
+                log" for stage ${MDC(STAGE_ID, context.stageId())}" +
+                log" with shuffle ID ${MDC(SHUFFLE_ID, dep.shuffleId)}")
               logDebug(s"Starting pushing blocks for the task ${context.taskAttemptId()}")
               val dataFile = resolver.getDataFile(dep.shuffleId, mapId)
               new ShuffleBlockPusher(SparkEnv.get.conf)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 344020935f211..efffda43695cc 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -87,7 +87,8 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
 
   private lazy val shuffleExecutorComponents = loadShuffleExecutorComponents(conf)
 
-  override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
+  override val shuffleBlockResolver =
+    new IndexShuffleBlockResolver(conf, taskIdMapsForShuffle = taskIdMapsForShuffle)
 
   /**
    * Obtains a [[ShuffleHandle]] to pass to tasks.
@@ -176,7 +177,7 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
           metrics,
           shuffleExecutorComponents)
       case other: BaseShuffleHandle[K @unchecked, V @unchecked, _] =>
-        new SortShuffleWriter(other, mapId, context, shuffleExecutorComponents)
+        new SortShuffleWriter(other, mapId, context, metrics, shuffleExecutorComponents)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 8613fe11a4c2f..3be7d24f7e4ec 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -21,6 +21,7 @@ import org.apache.spark._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.shuffle.{BaseShuffleHandle, ShuffleWriter}
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter
 import org.apache.spark.shuffle.api.ShuffleExecutorComponents
 import org.apache.spark.util.collection.ExternalSorter
 
@@ -28,6 +29,7 @@ private[spark] class SortShuffleWriter[K, V, C](
     handle: BaseShuffleHandle[K, V, C],
     mapId: Long,
     context: TaskContext,
+    writeMetrics: ShuffleWriteMetricsReporter,
     shuffleExecutorComponents: ShuffleExecutorComponents)
   extends ShuffleWriter[K, V] with Logging {
 
@@ -46,8 +48,6 @@ private[spark] class SortShuffleWriter[K, V, C](
 
   private var partitionLengths: Array[Long] = _
 
-  private val writeMetrics = context.taskMetrics().shuffleWriteMetrics
-
   /** Write a bunch of records to this task's output */
   override def write(records: Iterator[Product2[K, V]]): Unit = {
     sorter = if (dep.mapSideCombine) {
@@ -67,7 +67,7 @@ private[spark] class SortShuffleWriter[K, V, C](
     // (see SPARK-3570).
     val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter(
       dep.shuffleId, mapId, dep.partitioner.numPartitions)
-    sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter)
+    sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter, writeMetrics)
     partitionLengths = mapOutputWriter.commitAllPartitions(sorter.getChecksums).getPartitionLengths
     mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths, mapId)
   }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 24f4ff1bd6728..5c93bf4bf77a0 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -26,7 +26,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark._
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.CPUS_PER_TASK
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.resource.ResourceProfile.CPUS
@@ -662,7 +662,7 @@ private[spark] class AppStatusListener(
         case e: TaskFailedReason => // All other failure cases
           Some(e.toErrorString)
         case other =>
-          logInfo(s"Unhandled task end reason: $other")
+          logInfo(log"Unhandled task end reason: ${MDC(LogKeys.REASON, other)}")
           None
       }
       task.errorMessage = errorMessage
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 109a9a2e3eb94..87f876467c30e 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -26,7 +26,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.{JobExecutionStatus, SparkConf, SparkContext}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.internal.config.Status.LIVE_UI_LOCAL_STORE_DIR
 import org.apache.spark.status.AppStatusUtils.getQuantilesValue
 import org.apache.spark.status.api.v1
@@ -861,7 +861,7 @@ private[spark] object AppStatusStore extends Logging {
     def createStorePath(rootDir: String): Option[File] = {
       try {
         val localDir = Utils.createDirectory(rootDir, "spark-ui")
-        logInfo(s"Created spark ui store directory at $rootDir")
+        logInfo(log"Created spark ui store directory at ${MDC(PATH, rootDir)}")
         Some(localDir)
       } catch {
         case e: IOException =>
diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
index 821686803f469..e334626413dc0 100644
--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
@@ -31,7 +31,8 @@ import org.rocksdb.RocksDBException
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.history.{FsHistoryProvider, FsHistoryProviderMetadata}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.History
 import org.apache.spark.internal.config.History.HYBRID_STORE_DISK_BACKEND
 import org.apache.spark.internal.config.History.HybridStoreDiskBackend
@@ -154,7 +155,7 @@ private[spark] object KVUtils extends Logging {
           open(dbPath, metadata, conf, live)
         case dbExc @ (_: NativeDB.DBException | _: RocksDBException) =>
           // Get rid of the corrupted data and re-create it.
-          logWarning(s"Failed to load disk store $dbPath :", dbExc)
+          logWarning(log"Failed to load disk store ${MDC(PATH, dbPath)} :", dbExc)
           Utils.deleteRecursively(dbPath)
           open(dbPath, metadata, conf, live)
       }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index 7a0c69e294883..6ae1dce57f31c 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -510,7 +510,7 @@ case class StackTrace(elems: Seq[String]) {
   override def toString: String = elems.mkString
 
   def html: NodeSeq = {
-    val withNewLine = elems.foldLeft(NodeSeq.Empty) { (acc, elem) =>
+    val withNewLine = elems.map(_.stripLineEnd).foldLeft(NodeSeq.Empty) { (acc, elem) =>
       if (acc.isEmpty) {
         acc :+ Text(elem)
       } else {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockId.scala b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
index 585d9a886b473..6eb015d56b2c7 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockId.scala
@@ -170,6 +170,11 @@ case class StreamBlockId(streamId: Int, uniqueId: Long) extends BlockId {
   override def name: String = "input-" + streamId + "-" + uniqueId
 }
 
+@DeveloperApi
+case class PythonStreamBlockId(streamId: Int, uniqueId: Long) extends BlockId {
+  override def name: String = "python-stream-" + streamId + "-" + uniqueId
+}
+
 /** Id associated with temporary local data managed as blocks. Not serializable. */
 private[spark] case class TempLocalBlockId(id: UUID) extends BlockId {
   override def name: String = "temp_local_" + id
@@ -213,6 +218,7 @@ object BlockId {
   val BROADCAST = "broadcast_([0-9]+)([_A-Za-z0-9]*)".r
   val TASKRESULT = "taskresult_([0-9]+)".r
   val STREAM = "input-([0-9]+)-([0-9]+)".r
+  val PYTHON_STREAM = "python-stream-([0-9]+)-([0-9]+)".r
   val TEMP_LOCAL = "temp_local_([-A-Fa-f0-9]+)".r
   val TEMP_SHUFFLE = "temp_shuffle_([-A-Fa-f0-9]+)".r
   val TEST = "test_(.*)".r
@@ -250,6 +256,8 @@ object BlockId {
       TaskResultBlockId(taskId.toLong)
     case STREAM(streamId, uniqueId) =>
       StreamBlockId(streamId.toInt, uniqueId.toLong)
+    case PYTHON_STREAM(streamId, uniqueId) =>
+      PythonStreamBlockId(streamId.toInt, uniqueId.toLong)
     case TEMP_LOCAL(uuid) =>
       TempLocalBlockId(UUID.fromString(uuid))
     case TEMP_SHUFFLE(uuid) =>
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 31669e688a197..8655b72310795 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -42,7 +42,7 @@ import org.apache.spark._
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.DataReadMethod
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.{BLOCK_ID, COUNT, SLEEP_TIME}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{Network, RDD_CACHE_VISIBILITY_TRACKING_ENABLED, Tests}
 import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.metrics.source.Source
@@ -305,7 +305,7 @@ private[spark] class BlockManager(
 
   // This is a lazy val so someone can migrating RDDs even if they don't have a MigratableResolver
   // for shuffles. Used in BlockManagerDecommissioner & block puts.
-  private[storage] lazy val migratableResolver: MigratableResolver = {
+  lazy val migratableResolver: MigratableResolver = {
     shuffleManager.shuffleBlockResolver.asInstanceOf[MigratableResolver]
   }
 
@@ -423,7 +423,7 @@ private[spark] class BlockManager(
             saveSerializedValuesToMemoryStore(readToByteBuffer())
           }
           if (!putSucceeded && level.useDisk) {
-            logWarning(s"Persisting block $blockId to disk instead.")
+            logWarning(log"Persisting block ${MDC(BLOCK_ID, blockId)} to disk instead.")
             saveToDiskStore()
           }
         } else if (level.useDisk) {
@@ -535,7 +535,7 @@ private[spark] class BlockManager(
       val priorityClass = conf.get(config.STORAGE_REPLICATION_POLICY)
       val clazz = Utils.classForName(priorityClass)
       val ret = clazz.getConstructor().newInstance().asInstanceOf[BlockReplicationPolicy]
-      logInfo(s"Using $priorityClass for block replication policy")
+      logInfo(log"Using ${MDC(CLASS_NAME, priorityClass)} for block replication policy")
       ret
     }
 
@@ -547,7 +547,7 @@ private[spark] class BlockManager(
     // the registration with the ESS. Therefore, this registration should be prior to
     // the BlockManager registration. See SPARK-39647.
     if (externalShuffleServiceEnabled) {
-      logInfo(s"external shuffle service port = $externalShuffleServicePort")
+      logInfo(log"external shuffle service port = ${MDC(PORT, externalShuffleServicePort)}")
       shuffleServerId = BlockManagerId(executorId, blockTransferService.hostName,
         externalShuffleServicePort)
       if (!isDriver && !(Utils.isTesting && conf.get(Tests.TEST_SKIP_ESS_REGISTER))) {
@@ -585,7 +585,7 @@ private[spark] class BlockManager(
       }
     }
 
-    logInfo(s"Initialized BlockManager: $blockManagerId")
+    logInfo(log"Initialized BlockManager: ${MDC(BLOCK_MANAGER_ID, blockManagerId)}")
   }
 
   def shuffleMetricsSource: Source = {
@@ -646,7 +646,7 @@ private[spark] class BlockManager(
    * will be made then.
    */
   private def reportAllBlocks(): Unit = {
-    logInfo(s"Reporting ${blockInfoManager.size} blocks to the master.")
+    logInfo(log"Reporting ${MDC(NUM_BLOCKS, blockInfoManager.size)} blocks to the master.")
     for ((blockId, info) <- blockInfoManager.entries) {
       val status = getCurrentBlockStatus(blockId, info)
       if (info.tellMaster && !tryToReportBlockStatus(blockId, status)) {
@@ -664,7 +664,7 @@ private[spark] class BlockManager(
    */
   def reregister(): Unit = {
     // TODO: We might need to rate limit re-registering.
-    logInfo(s"BlockManager $blockManagerId re-registering with master")
+    logInfo(log"BlockManager ${MDC(BLOCK_MANAGER_ID, blockManagerId)} re-registering with master")
     val id = master.registerBlockManager(blockManagerId, diskBlockManager.localDirsString,
       maxOnHeapMemory, maxOffHeapMemory, storageEndpoint, isReRegister = true)
     if (id.executorId != BlockManagerId.INVALID_EXECUTOR_ID) {
@@ -875,7 +875,7 @@ private[spark] class BlockManager(
       droppedMemorySize: Long = 0L): Unit = {
     val needReregister = !tryToReportBlockStatus(blockId, status, droppedMemorySize)
     if (needReregister) {
-      logInfo(s"Got told to re-register updating block $blockId")
+      logInfo(log"Got told to re-register updating block ${MDC(BLOCK_ID, blockId)}")
       // Re-registering will report our new block for free.
       asyncReregister()
     }
@@ -1139,8 +1139,9 @@ private[spark] class BlockManager(
               None
           }
         }
-        logInfo(s"Read $blockId from the disk of a same host executor is " +
-          (if (res.isDefined) "successful." else "failed."))
+        logInfo(
+          log"Read ${MDC(BLOCK_ID, blockId)} from the disk of a same host executor is " +
+          log"${MDC(STATUS, if (res.isDefined) "successful." else "failed.")}")
         res
       }.orElse {
         fetchRemoteManagedBuffer(blockId, blockSize, locationsAndStatus).map(bufferTransformer)
@@ -1214,14 +1215,16 @@ private[spark] class BlockManager(
             // Give up trying anymore locations. Either we've tried all of the original locations,
             // or we've refreshed the list of locations from the master, and have still
             // hit failures after trying locations from the refreshed list.
-            logWarning(s"Failed to fetch remote block $blockId " +
-              s"from [${locations.mkString(", ")}] after $totalFailureCount fetch failures. " +
-              s"Most recent failure cause:", e)
+            logWarning(log"Failed to fetch remote block ${MDC(BLOCK_ID, blockId)} " +
+              log"from [${MDC(BLOCK_MANAGER_IDS, locations.mkString(", "))}] " +
+              log"after ${MDC(NUM_FAILURES, totalFailureCount)} fetch failures. " +
+              log"Most recent failure cause:", e)
             return None
           }
 
-          logWarning(s"Failed to fetch remote block $blockId " +
-            s"from $loc (failed attempt $runningFailureCount)", e)
+          logWarning(log"Failed to fetch remote block ${MDC(BLOCK_ID, blockId)} " +
+            log"from ${MDC(BLOCK_MANAGER_ID, loc)} " +
+            log"(failed attempt ${MDC(NUM_FAILURES, runningFailureCount)}", e)
 
           // If there is a large number of executors then locations list can contain a
           // large number of stale entries causing a large number of retries that may
@@ -1306,12 +1309,12 @@ private[spark] class BlockManager(
   def get[T: ClassTag](blockId: BlockId): Option[BlockResult] = {
     val local = getLocalValues(blockId)
     if (local.isDefined) {
-      logInfo(s"Found block $blockId locally")
+      logInfo(log"Found block ${MDC(BLOCK_ID, blockId)} locally")
       return local
     }
     val remote = getRemoteValues[T](blockId)
     if (remote.isDefined) {
-      logInfo(s"Found block $blockId remotely")
+      logInfo(log"Found block ${MDC(BLOCK_ID, blockId)} remotely")
       return remote
     }
     None
@@ -1335,7 +1338,8 @@ private[spark] class BlockManager(
     // SPARK-27666. When a task completes, Spark automatically releases all the blocks locked
     // by this task. We should not release any locks for a task that is already completed.
     if (taskContext.isDefined && taskContext.get.isCompleted()) {
-      logWarning(s"Task ${taskAttemptId.get} already completed, not releasing lock for $blockId")
+      logWarning(log"Task ${MDC(TASK_ATTEMPT_ID, taskAttemptId.get)} " +
+        log"already completed, not releasing lock for ${MDC(BLOCK_ID, blockId)}")
     } else {
       blockInfoManager.unlock(blockId, taskAttemptId)
     }
@@ -1544,7 +1548,8 @@ private[spark] class BlockManager(
       if (blockInfoManager.lockNewBlockForWriting(blockId, newInfo, keepReadLock)) {
         newInfo
       } else {
-        logWarning(s"Block $blockId already exists on this machine; not re-adding it")
+        logWarning(log"Block ${MDC(BLOCK_ID, blockId)} " +
+          log"already exists on this machine; not re-adding it")
         return None
       }
     }
@@ -1562,7 +1567,7 @@ private[spark] class BlockManager(
           blockInfoManager.unlock(blockId)
         }
       } else {
-        logWarning(s"Putting block $blockId failed")
+        logWarning(log"Putting block ${MDC(BLOCK_ID, blockId)} failed")
         removeBlockInternal(blockId, tellMaster = false)
       }
       res
@@ -1570,7 +1575,8 @@ private[spark] class BlockManager(
       // Since removeBlockInternal may throw exception,
       // we should print exception first to show root cause.
       case NonFatal(e) =>
-        logWarning(s"Putting block $blockId failed due to exception $e.")
+        logWarning(log"Putting block ${MDC(BLOCK_ID, blockId)} " +
+          log"failed due to exception ${MDC(ERROR, e)}.")
         throw e
     } finally {
       // This cleanup is performed in a finally block rather than a `catch` to avoid having to
@@ -1631,7 +1637,7 @@ private[spark] class BlockManager(
             case Left(iter) =>
               // Not enough space to unroll this block; drop to disk if applicable
               if (level.useDisk) {
-                logWarning(s"Persisting block $blockId to disk instead.")
+                logWarning(log"Persisting block ${MDC(BLOCK_ID, blockId)} to disk instead.")
                 diskStore.put(blockId) { channel =>
                   val out = Channels.newOutputStream(channel)
                   serializerManager.dataSerializeStream(blockId, out, iter)(classTag)
@@ -1648,7 +1654,7 @@ private[spark] class BlockManager(
             case Left(partiallySerializedValues) =>
               // Not enough space to unroll this block; drop to disk if applicable
               if (level.useDisk) {
-                logWarning(s"Persisting block $blockId to disk instead.")
+                logWarning(log"Persisting block ${MDC(BLOCK_ID, blockId)} to disk instead.")
                 diskStore.put(blockId) { channel =>
                   val out = Channels.newOutputStream(channel)
                   partiallySerializedValues.finishWritingToStream(out)
@@ -1815,7 +1821,8 @@ private[spark] class BlockManager(
       existingReplicas: Set[BlockManagerId],
       maxReplicas: Int,
       maxReplicationFailures: Option[Int] = None): Boolean = {
-    logInfo(s"Using $blockManagerId to pro-actively replicate $blockId")
+    logInfo(log"Using ${MDC(BLOCK_MANAGER_ID, blockManagerId)} to pro-actively replicate " +
+      log"${MDC(BLOCK_ID, blockId)}")
     blockInfoManager.lockForReading(blockId).forall { info =>
       val data = doGetLocalBytes(blockId, info)
       val storageLevel = StorageLevel(
@@ -1904,7 +1911,9 @@ private[spark] class BlockManager(
           throw e
         // Everything else we may retry
         case NonFatal(e) =>
-          logWarning(s"Failed to replicate $blockId to $peer, failure #$numFailures", e)
+          logWarning(log"Failed to replicate ${MDC(BLOCK_ID, blockId)} " +
+            log"to ${MDC(PEER, peer)}, " +
+            log"failure #${MDC(NUM_FAILURES, numFailures)}", e)
           peersFailedToReplicateTo += peer
           // we have a failed replication, so we get the list of peers again
           // we don't want peers we have already replicated to and the ones that
@@ -1925,8 +1934,9 @@ private[spark] class BlockManager(
     logDebug(s"Replicating $blockId of ${data.size} bytes to " +
       s"${peersReplicatedTo.size} peer(s) took ${(System.nanoTime - startTime) / 1e6} ms")
     if (peersReplicatedTo.size < numPeersToReplicateTo) {
-      logWarning(s"Block $blockId replicated to only " +
-        s"${peersReplicatedTo.size} peer(s) instead of $numPeersToReplicateTo peers")
+      logWarning(log"Block ${MDC(BLOCK_ID, blockId)} replicated to only " +
+        log"${MDC(NUM_PEERS_REPLICATED_TO, peersReplicatedTo.size)} peer(s) " +
+        log"instead of ${MDC(NUM_PEERS_TO_REPLICATE_TO, numPeersToReplicateTo)} peers")
       return false
     }
 
@@ -1969,14 +1979,14 @@ private[spark] class BlockManager(
   private[storage] override def dropFromMemory[T: ClassTag](
       blockId: BlockId,
       data: () => Either[Array[T], ChunkedByteBuffer]): StorageLevel = {
-    logInfo(s"Dropping block $blockId from memory")
+    logInfo(log"Dropping block ${MDC(BLOCK_ID, blockId)} from memory")
     val info = blockInfoManager.assertBlockIsLockedForWriting(blockId)
     var blockIsUpdated = false
     val level = info.level
 
     // Drop to disk, if storage level requires
     if (level.useDisk && !diskStore.contains(blockId)) {
-      logInfo(s"Writing block $blockId to disk")
+      logInfo(log"Writing block ${MDC(BLOCK_ID, blockId)} to disk")
       data() match {
         case Left(elements) =>
           diskStore.put(blockId) { channel =>
@@ -1999,7 +2009,8 @@ private[spark] class BlockManager(
     if (blockIsRemoved) {
       blockIsUpdated = true
     } else {
-      logWarning(s"Block $blockId could not be dropped from memory as it does not exist")
+      logWarning(log"Block ${MDC(BLOCK_ID, blockId)} " +
+        log"could not be dropped from memory as it does not exist")
     }
 
     val status = getCurrentBlockStatus(blockId, info)
@@ -2019,7 +2030,7 @@ private[spark] class BlockManager(
    */
   def removeRdd(rddId: Int): Int = {
     // TODO: Avoid a linear scan by creating another mapping of RDD.id to blocks.
-    logInfo(s"Removing RDD $rddId")
+    logInfo(log"Removing RDD ${MDC(RDD_ID, rddId)}")
     val blocksToRemove = blockInfoManager.entries.flatMap(_._1.asRDDId).filter(_.rddId == rddId)
     blocksToRemove.foreach { blockId => removeBlock(blockId, tellMaster = false) }
     blocksToRemove.size
@@ -2083,7 +2094,8 @@ private[spark] class BlockManager(
     blockInfoManager.lockForWriting(blockId) match {
       case None =>
         // The block has already been removed; do nothing.
-        logWarning(s"Asked to remove block $blockId, which does not exist")
+        logWarning(log"Asked to remove block ${MDC(BLOCK_ID, blockId)}, " +
+          log"which does not exist")
       case Some(info) =>
         removeBlockInternal(blockId, tellMaster = tellMaster && info.tellMaster)
         addUpdatedBlockStatusToTaskMetrics(blockId, BlockStatus.empty)
@@ -2106,7 +2118,8 @@ private[spark] class BlockManager(
       val removedFromMemory = memoryStore.remove(blockId)
       val removedFromDisk = diskStore.remove(blockId)
       if (!removedFromMemory && !removedFromDisk) {
-        logWarning(s"Block $blockId could not be removed as it was not found on disk or in memory")
+        logWarning(log"Block ${MDC(BLOCK_ID, blockId)} " +
+          log"could not be removed as it was not found on disk or in memory")
       }
 
       blockInfoManager.removeBlock(blockId)
@@ -2118,7 +2131,7 @@ private[spark] class BlockManager(
       }
     } finally {
       if (!hasRemoveBlock) {
-        logWarning(s"Block $blockId was not removed normally.")
+        logWarning(log"Block ${MDC(BLOCK_ID, blockId)} was not removed normally.")
         blockInfoManager.removeBlock(blockId)
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index 5b4ecef233f8f..19807453ee28c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.spark._
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.SHUFFLE_BLOCK_INFO
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.shuffle.ShuffleBlockInfo
 import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -73,13 +73,15 @@ private[storage] class BlockManagerDecommissioner(
 
     private def allowRetry(shuffleBlock: ShuffleBlockInfo, failureNum: Int): Boolean = {
       if (failureNum < maxReplicationFailuresForDecommission) {
-        logInfo(s"Add $shuffleBlock back to migration queue for " +
-          s"retry ($failureNum / $maxReplicationFailuresForDecommission)")
+        logInfo(log"Add ${MDC(SHUFFLE_BLOCK_INFO, shuffleBlock)} back to migration queue for " +
+          log" retry (${MDC(FAILURES, failureNum)} / " +
+          log"${MDC(MAX_ATTEMPTS, maxReplicationFailuresForDecommission)})")
         // The block needs to retry so we should not mark it as finished
         shufflesToMigrate.add((shuffleBlock, failureNum))
       } else {
-        logWarning(s"Give up migrating $shuffleBlock since it's been " +
-          s"failed for $maxReplicationFailuresForDecommission times")
+        logWarning(log"Give up migrating ${MDC(SHUFFLE_BLOCK_INFO, shuffleBlock)} " +
+          log"since it's been failed for " +
+          log"${MDC(MAX_ATTEMPTS, maxReplicationFailuresForDecommission)} times")
         false
       }
     }
@@ -97,7 +99,7 @@ private[storage] class BlockManagerDecommissioner(
     }
 
     override def run(): Unit = {
-      logInfo(s"Starting shuffle block migration thread for $peer")
+      logInfo(log"Starting shuffle block migration thread for ${MDC(PEER, peer)}")
       // Once a block fails to transfer to an executor stop trying to transfer more blocks
       while (keepRunning) {
         try {
@@ -106,10 +108,12 @@ private[storage] class BlockManagerDecommissioner(
           var isTargetDecommissioned = false
           // We only migrate a shuffle block when both index file and data file exist.
           if (blocks.isEmpty) {
-            logInfo(s"Ignore deleted shuffle block $shuffleBlockInfo")
+            logInfo(log"Ignore deleted shuffle block ${MDC(SHUFFLE_BLOCK_INFO, shuffleBlockInfo)}")
           } else {
-            logInfo(s"Got migration sub-blocks $blocks. Trying to migrate $shuffleBlockInfo " +
-              s"to $peer ($retryCount / $maxReplicationFailuresForDecommission)")
+            logInfo(log"Got migration sub-blocks ${MDC(BLOCK_IDS, blocks)}. Trying to migrate " +
+              log"${MDC(SHUFFLE_BLOCK_INFO, shuffleBlockInfo)} to ${MDC(PEER, peer)} " +
+              log"(${MDC(NUM_RETRY, retryCount)} / " +
+              log"${MDC(MAX_ATTEMPTS, maxReplicationFailuresForDecommission)}")
             // Migrate the components of the blocks.
             try {
               val startTime = System.currentTimeMillis()
@@ -129,9 +133,10 @@ private[storage] class BlockManagerDecommissioner(
                   logDebug(s"Migrated sub-block $blockId")
                 }
               }
-              logInfo(s"Migrated $shuffleBlockInfo (" +
-                s"size: ${Utils.bytesToString(blocks.map(b => b._2.size()).sum)}) to $peer " +
-                s"in ${System.currentTimeMillis() - startTime} ms")
+              logInfo(log"Migrated ${MDC(SHUFFLE_BLOCK_INFO, shuffleBlockInfo)} (" +
+                log"size: ${MDC(SIZE, Utils.bytesToString(blocks.map(b => b._2.size()).sum))}) " +
+                log"to ${MDC(PEER, peer)} in " +
+                log"${MDC(DURATION, System.currentTimeMillis() - startTime)} ms")
             } catch {
               case e @ ( _ : IOException | _ : SparkException) =>
                 // If a block got deleted before netty opened the file handle, then trying to
@@ -140,7 +145,8 @@ private[storage] class BlockManagerDecommissioner(
                 // could also happen with manually managed shuffles or a GC event on the
                 // driver a no longer referenced RDD with shuffle files.
                 if (bm.migratableResolver.getMigrationBlocks(shuffleBlockInfo).size < blocks.size) {
-                  logWarning(s"Skipping block $shuffleBlockInfo, block deleted.")
+                  logWarning(log"Skipping block ${MDC(SHUFFLE_BLOCK_INFO, shuffleBlockInfo)}, " +
+                    log"block deleted.")
                 } else if (fallbackStorage.isDefined
                     // Confirm peer is not the fallback BM ID because fallbackStorage would already
                     // have been used in the try-block above so there's no point trying again
@@ -165,7 +171,7 @@ private[storage] class BlockManagerDecommissioner(
           if (keepRunning) {
             numMigratedShuffles.incrementAndGet()
           } else {
-            logWarning(s"Stop migrating shuffle blocks to $peer")
+            logWarning(log"Stop migrating shuffle blocks to ${MDC(PEER, peer)}")
 
             val newRetryCount = if (isTargetDecommissioned) {
               retryCount
@@ -179,7 +185,11 @@ private[storage] class BlockManagerDecommissioner(
           }
         } catch {
           case _: InterruptedException =>
-            logInfo(s"Stop shuffle block migration${if (keepRunning) " unexpectedly"}.")
+            if (keepRunning) {
+              logInfo("Stop shuffle block migration unexpectedly.")
+            } else {
+              logInfo("Stop shuffle block migration.")
+            }
             keepRunning = false
           case NonFatal(e) =>
             keepRunning = false
@@ -232,12 +242,16 @@ private[storage] class BlockManagerDecommissioner(
             logInfo("Attempting to migrate all cached RDD blocks")
             rddBlocksLeft = decommissionRddCacheBlocks()
             lastRDDMigrationTime = startTime
-            logInfo(s"Finished current round RDD blocks migration, " +
-              s"waiting for ${sleepInterval}ms before the next round migration.")
+            logInfo(log"Finished current round RDD blocks migration, " +
+              log"waiting for ${MDC(SLEEP_TIME, sleepInterval)}ms before the next round migration.")
             Thread.sleep(sleepInterval)
           } catch {
             case _: InterruptedException =>
-              logInfo(s"Stop RDD blocks migration${if (!stopped && !stoppedRDD) " unexpectedly"}.")
+              if (!stopped && !stoppedRDD) {
+                logInfo("Stop RDD blocks migration unexpectedly.")
+              } else {
+                logInfo("Stop RDD blocks migration.")
+              }
               stoppedRDD = true
             case NonFatal(e) =>
               logError("Error occurred during RDD blocks migration.", e)
@@ -263,8 +277,9 @@ private[storage] class BlockManagerDecommissioner(
           val startTime = System.nanoTime()
           shuffleBlocksLeft = refreshMigratableShuffleBlocks()
           lastShuffleMigrationTime = startTime
-          logInfo(s"Finished current round refreshing migratable shuffle blocks, " +
-            s"waiting for ${sleepInterval}ms before the next round refreshing.")
+          logInfo(log"Finished current round refreshing migratable shuffle blocks, " +
+            log"waiting for ${MDC(SLEEP_TIME, sleepInterval)}ms before the " +
+            log"next round refreshing.")
           Thread.sleep(sleepInterval)
         } catch {
           case _: InterruptedException if stopped =>
@@ -300,8 +315,9 @@ private[storage] class BlockManagerDecommissioner(
     shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava)
     migratingShuffles ++= newShufflesToMigrate
     val remainedShuffles = migratingShuffles.size - numMigratedShuffles.get()
-    logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " +
-      s"are added. In total, $remainedShuffles shuffles are remained.")
+    logInfo(log"${MDC(COUNT, newShufflesToMigrate.size)} of " +
+      log"${MDC(TOTAL, localShuffles.size)} local shuffles are added. " +
+      log"In total, ${MDC(NUM_REMAINED, remainedShuffles)} shuffles are remained.")
 
     // Update the threads doing migrations
     val livePeerSet = bm.getPeers(false).toSet
@@ -348,10 +364,11 @@ private[storage] class BlockManagerDecommissioner(
     // Refresh peers and validate we have somewhere to move blocks.
 
     if (replicateBlocksInfo.nonEmpty) {
-      logInfo(s"Need to replicate ${replicateBlocksInfo.size} RDD blocks " +
-        "for block manager decommissioning")
+      logInfo(
+        log"Need to replicate ${MDC(NUM_REPLICAS, replicateBlocksInfo.size)} RDD blocks " +
+        log"for block manager decommissioning")
     } else {
-      logWarning(s"Asked to decommission RDD cache blocks, but no blocks to migrate")
+      logWarning("Asked to decommission RDD cache blocks, but no blocks to migrate")
       return false
     }
 
@@ -362,8 +379,8 @@ private[storage] class BlockManagerDecommissioner(
         (replicateBlock.blockId, replicatedSuccessfully)
     }.filterNot(_._2).map(_._1)
     if (blocksFailedReplication.nonEmpty) {
-      logWarning("Blocks failed replication in cache decommissioning " +
-        s"process: ${blocksFailedReplication.mkString(",")}")
+      logWarning(log"Blocks failed replication in cache decommissioning " +
+        log"process: ${MDC(BLOCK_IDS, blocksFailedReplication.mkString(","))}")
       return true
     }
     false
@@ -376,11 +393,12 @@ private[storage] class BlockManagerDecommissioner(
       blockToReplicate.maxReplicas,
       maxReplicationFailures = Some(maxReplicationFailuresForDecommission))
     if (replicatedSuccessfully) {
-      logInfo(s"Block ${blockToReplicate.blockId} migrated successfully, Removing block now")
+      logInfo(log"Block ${MDC(BLOCK_ID, blockToReplicate.blockId)} migrated " +
+        log"successfully, Removing block now")
       bm.removeBlock(blockToReplicate.blockId)
-      logInfo(s"Block ${blockToReplicate.blockId} removed")
+      logInfo(log"Block ${MDC(BLOCK_ID, blockToReplicate.blockId)} removed")
     } else {
-      logWarning(s"Failed to migrate block ${blockToReplicate.blockId}")
+      logWarning(log"Failed to migrate block ${MDC(BLOCK_ID, blockToReplicate.blockId)}")
     }
     replicatedSuccessfully
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 2b961317e01d9..276bd63e14237 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -23,7 +23,8 @@ import scala.concurrent.Future
 
 import org.apache.spark.SparkConf
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util.{RpcUtils, ThreadUtils}
@@ -41,7 +42,7 @@ class BlockManagerMaster(
   /** Remove a dead executor from the driver endpoint. This is only called on the driver side. */
   def removeExecutor(execId: String): Unit = {
     tell(RemoveExecutor(execId))
-    logInfo("Removed " + execId + " successfully in removeExecutor")
+    logInfo(log"Removed ${MDC(EXECUTOR_ID, execId)} successfully in removeExecutor")
   }
 
   /** Decommission block managers corresponding to given set of executors
@@ -61,7 +62,7 @@ class BlockManagerMaster(
    */
   def removeExecutorAsync(execId: String): Unit = {
     driverEndpoint.ask[Boolean](RemoveExecutor(execId))
-    logInfo("Removal of executor " + execId + " requested")
+    logInfo(log"Removal of executor ${MDC(EXECUTOR_ID, execId)} requested")
   }
 
   /**
@@ -76,7 +77,7 @@ class BlockManagerMaster(
       maxOffHeapMemSize: Long,
       storageEndpoint: RpcEndpointRef,
       isReRegister: Boolean = false): BlockManagerId = {
-    logInfo(s"Registering BlockManager $id")
+    logInfo(log"Registering BlockManager ${MDC(BLOCK_MANAGER_ID, id)}")
     val updatedId = driverEndpoint.askSync[BlockManagerId](
       RegisterBlockManager(
         id,
@@ -89,9 +90,9 @@ class BlockManagerMaster(
     )
     if (updatedId.executorId == BlockManagerId.INVALID_EXECUTOR_ID) {
       assert(isReRegister, "Got invalid executor id from non re-register case")
-      logInfo(s"Re-register BlockManager $id failed")
+      logInfo(log"Re-register BlockManager ${MDC(BLOCK_MANAGER_ID, id)} failed")
     } else {
-      logInfo(s"Registered BlockManager $updatedId")
+      logInfo(log"Registered BlockManager ${MDC(BLOCK_MANAGER_ID, updatedId)}")
     }
     updatedId
   }
@@ -189,7 +190,8 @@ class BlockManagerMaster(
   def removeRdd(rddId: Int, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Int]]](RemoveRdd(rddId))
     future.failed.foreach(e =>
-      logWarning(s"Failed to remove RDD $rddId - ${e.getMessage}", e)
+      logWarning(log"Failed to remove RDD ${MDC(RDD_ID, rddId)} - " +
+        log"${MDC(ERROR, e.getMessage)}", e)
     )(ThreadUtils.sameThread)
     if (blocking) {
       // the underlying Futures will timeout anyway, so it's safe to use infinite timeout here
@@ -201,7 +203,8 @@ class BlockManagerMaster(
   def removeShuffle(shuffleId: Int, blocking: Boolean): Unit = {
     val future = driverEndpoint.askSync[Future[Seq[Boolean]]](RemoveShuffle(shuffleId))
     future.failed.foreach(e =>
-      logWarning(s"Failed to remove shuffle $shuffleId - ${e.getMessage}", e)
+      logWarning(log"Failed to remove shuffle ${MDC(SHUFFLE_ID, shuffleId)} - " +
+        log"${MDC(ERROR, e.getMessage)}", e)
     )(ThreadUtils.sameThread)
     if (blocking) {
       // the underlying Futures will timeout anyway, so it's safe to use infinite timeout here
@@ -214,8 +217,9 @@ class BlockManagerMaster(
     val future = driverEndpoint.askSync[Future[Seq[Int]]](
       RemoveBroadcast(broadcastId, removeFromMaster))
     future.failed.foreach(e =>
-      logWarning(s"Failed to remove broadcast $broadcastId" +
-        s" with removeFromMaster = $removeFromMaster - ${e.getMessage}", e)
+      logWarning(log"Failed to remove broadcast ${MDC(BROADCAST_ID, broadcastId)}" +
+        log" with removeFromMaster = ${MDC(REMOVE_FROM_MASTER, removeFromMaster)} - " +
+        log"${MDC(ERROR, e.getMessage)}", e)
     )(ThreadUtils.sameThread)
     if (blocking) {
       // the underlying Futures will timeout anyway, so it's safe to use infinite timeout here
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 5bb4e096c029c..73f89ea0e86e5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -32,7 +32,7 @@ import com.google.common.cache.CacheBuilder
 import org.apache.spark.{MapOutputTrackerMaster, SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.{BLOCK_MANAGER_ID, EXECUTOR_ID, OLD_BLOCK_MANAGER_ID}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.RDD_CACHE_VISIBILITY_TRACKING_ENABLED
 import org.apache.spark.network.shuffle.{ExternalBlockStoreClient, RemoteBlockPushResolver}
 import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
@@ -110,7 +110,7 @@ class BlockManagerMasterEndpoint(
     val clazz = Utils.classForName(topologyMapperClassName)
     val mapper =
       clazz.getConstructor(classOf[SparkConf]).newInstance(conf).asInstanceOf[TopologyMapper]
-    logInfo(s"Using $topologyMapperClassName for getting topology information")
+    logInfo(log"Using ${MDC(CLASS_NAME, topologyMapperClassName)} for getting topology information")
     mapper
   }
 
@@ -218,7 +218,8 @@ class BlockManagerMasterEndpoint(
       // executor is notified(see BlockManager.decommissionSelf), so we don't need to send the
       // notification here.
       val bms = executorIds.flatMap(blockManagerIdByExecutor.get)
-      logInfo(s"Mark BlockManagers (${bms.mkString(", ")}) as being decommissioning.")
+      logInfo(log"Mark BlockManagers (${MDC(BLOCK_MANAGER_IDS, bms.mkString(", "))}) as " +
+        log"being decommissioning.")
       decommissioningBlockManagerSet ++= bms
       context.reply(true)
 
@@ -314,8 +315,9 @@ class BlockManagerMasterEndpoint(
       defaultValue: T): PartialFunction[Throwable, T] = {
     case e: IOException =>
       if (!SparkContext.getActive.map(_.isStopped).getOrElse(true)) {
-        logWarning(s"Error trying to remove $blockType $blockId" +
-          s" from block manager $bmId", e)
+        logWarning(log"Error trying to remove ${MDC(BLOCK_TYPE, blockType)} " +
+          log"${MDC(BLOCK_ID, blockId)}" +
+          log" from block manager ${MDC(BLOCK_MANAGER_ID, bmId)}", e)
       }
       defaultValue
 
@@ -333,8 +335,9 @@ class BlockManagerMasterEndpoint(
           false
       }
       if (!isAlive) {
-        logWarning(s"Error trying to remove $blockType $blockId. " +
-          s"The executor $executorId may have been lost.", t)
+        logWarning(log"Error trying to remove ${MDC(BLOCK_TYPE, blockType)} " +
+          log"${MDC(BLOCK_ID, blockId)}. " +
+          log"The executor ${MDC(EXECUTOR_ID, executorId)} may have been lost.", t)
         defaultValue
       } else {
         throw t
@@ -516,7 +519,7 @@ class BlockManagerMasterEndpoint(
       // etc.) as replication doesn't make much sense in that context.
       if (locations.isEmpty) {
         blockLocations.remove(blockId)
-        logWarning(s"No more replicas available for $blockId !")
+        logWarning(log"No more replicas available for ${MDC(BLOCK_ID, blockId)}!")
       } else if (proactivelyReplicate && (blockId.isRDD || blockId.isInstanceOf[TestBlockId])) {
         // As a heuristic, assume single executor failure to find out the number of replicas that
         // existed before failure
@@ -533,7 +536,7 @@ class BlockManagerMasterEndpoint(
     }
 
     listenerBus.post(SparkListenerBlockManagerRemoved(System.currentTimeMillis(), blockManagerId))
-    logInfo(s"Removing block manager $blockManagerId")
+    logInfo(log"Removing block manager ${MDC(BLOCK_MANAGER_ID, blockManagerId)}")
 
   }
 
@@ -549,7 +552,7 @@ class BlockManagerMasterEndpoint(
   }
 
   private def removeExecutor(execId: String): Unit = {
-    logInfo("Trying to remove executor " + execId + " from BlockManagerMaster.")
+    logInfo(log"Trying to remove executor ${MDC(EXECUTOR_ID, execId)} from BlockManagerMaster.")
     blockManagerIdByExecutor.get(execId).foreach(removeBlockManager)
   }
 
@@ -705,8 +708,9 @@ class BlockManagerMasterEndpoint(
           removeExecutor(id.executorId)
         case None =>
       }
-      logInfo("Registering block manager %s with %s RAM, %s".format(
-        id.hostPort, Utils.bytesToString(maxOnHeapMemSize + maxOffHeapMemSize), id))
+      logInfo(log"Registering block manager ${MDC(HOST_PORT, id.hostPort)} with " +
+        log"${MDC(MEMORY_SIZE, Utils.bytesToString(maxOnHeapMemSize + maxOffHeapMemSize))} RAM, " +
+        log"${MDC(BLOCK_MANAGER_ID, id)}")
 
       blockManagerIdByExecutor(id.executorId) = id
 
@@ -736,8 +740,8 @@ class BlockManagerMasterEndpoint(
       assert(!blockManagerInfo.contains(id),
         "BlockManager re-registration shouldn't succeed when the executor is lost")
 
-      logInfo(s"BlockManager ($id) re-registration is rejected since " +
-        s"the executor (${id.executorId}) has been lost")
+      logInfo(log"BlockManager (${MDC(BLOCK_MANAGER_ID, id)}) re-registration is rejected since " +
+        log"the executor (${MDC(EXECUTOR_ID, id.executorId)}) has been lost")
 
       // Use "invalid" as the return executor id to indicate the block manager that
       // re-registration failed. It's a bit hacky but fine since the returned block
@@ -1055,26 +1059,30 @@ private[spark] class BlockManagerInfo(
         _blocks.put(blockId, blockStatus)
         _remainingMem -= memSize
         if (blockExists) {
-          logInfo(s"Updated $blockId in memory on ${blockManagerId.hostPort}" +
-            s" (current size: ${Utils.bytesToString(memSize)}," +
-            s" original size: ${Utils.bytesToString(originalMemSize)}," +
-            s" free: ${Utils.bytesToString(_remainingMem)})")
+          logInfo(log"Updated ${MDC(BLOCK_ID, blockId)} in memory on " +
+            log"${MDC(HOST_PORT, blockManagerId.hostPort)}  (current size: " +
+            log"${MDC(CURRENT_MEMORY_SIZE, Utils.bytesToString(memSize))}, original " +
+            log"size: ${MDC(ORIGINAL_MEMORY_SIZE, Utils.bytesToString(originalMemSize))}, " +
+            log"free: ${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(_remainingMem))})")
         } else {
-          logInfo(s"Added $blockId in memory on ${blockManagerId.hostPort}" +
-            s" (size: ${Utils.bytesToString(memSize)}," +
-            s" free: ${Utils.bytesToString(_remainingMem)})")
+          logInfo(log"Added ${MDC(BLOCK_ID, blockId)} in memory on " +
+            log"${MDC(HOST_PORT, blockManagerId.hostPort)} " +
+            log"(size: ${MDC(CURRENT_MEMORY_SIZE, Utils.bytesToString(memSize))}, " +
+            log"free: ${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(_remainingMem))})")
         }
       }
       if (storageLevel.useDisk) {
         blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize)
         _blocks.put(blockId, blockStatus)
         if (blockExists) {
-          logInfo(s"Updated $blockId on disk on ${blockManagerId.hostPort}" +
-            s" (current size: ${Utils.bytesToString(diskSize)}," +
-            s" original size: ${Utils.bytesToString(originalDiskSize)})")
+          logInfo(log"Updated ${MDC(BLOCK_ID, blockId)} on disk on " +
+            log"${MDC(HOST_PORT, blockManagerId.hostPort)} " +
+            log"(current size: ${MDC(CURRENT_DISK_SIZE, Utils.bytesToString(diskSize))}," +
+            log" original size: ${MDC(ORIGINAL_DISK_SIZE, Utils.bytesToString(originalDiskSize))})")
         } else {
-          logInfo(s"Added $blockId on disk on ${blockManagerId.hostPort}" +
-            s" (size: ${Utils.bytesToString(diskSize)})")
+          logInfo(log"Added ${MDC(BLOCK_ID, blockId)} on disk on " +
+            log"${MDC(HOST_PORT, blockManagerId.hostPort)} (size: " +
+            log"${MDC(CURRENT_DISK_SIZE, Utils.bytesToString(diskSize))})")
         }
       }
 
@@ -1090,13 +1098,15 @@ private[spark] class BlockManagerInfo(
         blockStatus.remove(blockId)
       }
       if (originalLevel.useMemory) {
-        logInfo(s"Removed $blockId on ${blockManagerId.hostPort} in memory" +
-          s" (size: ${Utils.bytesToString(originalMemSize)}," +
-          s" free: ${Utils.bytesToString(_remainingMem)})")
+        logInfo(log"Removed ${MDC(BLOCK_ID, blockId)} on " +
+          log"${MDC(HOST_PORT, blockManagerId.hostPort)} in memory " +
+          log"(size: ${MDC(ORIGINAL_MEMORY_SIZE, Utils.bytesToString(originalMemSize))}, " +
+          log"free: ${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(_remainingMem))})")
       }
       if (originalLevel.useDisk) {
-        logInfo(s"Removed $blockId on ${blockManagerId.hostPort} on disk" +
-          s" (size: ${Utils.bytesToString(originalDiskSize)})")
+        logInfo(log"Removed ${MDC(BLOCK_ID, blockId)} on " +
+          log"${MDC(HOST_PORT, blockManagerId.hostPort)} on disk" +
+          log" (size: ${MDC(ORIGINAL_DISK_SIZE, Utils.bytesToString(originalDiskSize))})")
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
index 1fccbd16ced5b..686ac1eb786e0 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
@@ -21,7 +21,7 @@ import scala.concurrent.{ExecutionContext, ExecutionContextExecutorService, Futu
 
 import org.apache.spark.{MapOutputTracker, SparkEnv}
 import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
-import org.apache.spark.internal.LogKey.{BLOCK_ID, BROADCAST_ID, RDD_ID, SHUFFLE_ID}
+import org.apache.spark.internal.LogKeys.{BLOCK_ID, BROADCAST_ID, RDD_ID, SHUFFLE_ID}
 import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEnv}
 import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util.{ThreadUtils, Utils}
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
index 893b5605414e4..5186cbfa217cc 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockReplicationPolicy.scala
@@ -21,7 +21,8 @@ import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 
 /**
  * ::DeveloperApi::
@@ -120,7 +121,8 @@ class RandomBlockReplicationPolicy
       BlockReplicationUtils.getRandomSample(peers, numReplicas, random)
     } else {
       if (peers.size < numReplicas) {
-        logWarning(s"Expecting ${numReplicas} replicas with only ${peers.size} peer/s.")
+        logWarning(log"Expecting ${MDC(NUM_REPLICAS, numReplicas)} " +
+          log"replicas with only ${MDC(NUM_PEERS, peers.size)} peer/s.")
       }
       random.shuffle(peers).toList
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index 4c0b5f4a14f64..72d8dc0b19d21 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -31,7 +31,7 @@ import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.{MERGE_DIR_NAME, PATH}
+import org.apache.spark.internal.LogKeys.{MERGE_DIR_NAME, PATH}
 import org.apache.spark.network.shuffle.ExecutorDiskUtils
 import org.apache.spark.storage.DiskBlockManager.ATTEMPT_ID_KEY
 import org.apache.spark.storage.DiskBlockManager.MERGE_DIR_KEY
@@ -252,7 +252,7 @@ private[spark] class DiskBlockManager(
     Utils.getConfiguredLocalDirs(conf).flatMap { rootDir =>
       try {
         val localDir = Utils.createDirectory(rootDir, "blockmgr")
-        logInfo(s"Created local directory at $localDir")
+        logInfo(log"Created local directory at ${MDC(PATH, localDir)}")
         Some(localDir)
       } catch {
         case e: IOException =>
@@ -290,7 +290,7 @@ private[spark] class DiskBlockManager(
               }
             }
           }
-          logInfo(s"Merge directory and its sub dirs get created at $mergeDir")
+          logInfo(log"Merge directory and its sub dirs get created at ${MDC(PATH, mergeDir)}")
         } catch {
           case e: IOException =>
             logError(
@@ -325,8 +325,8 @@ private[spark] class DiskBlockManager(
         logDebug(s"Created directory at ${dirToCreate.getAbsolutePath} with permission 770")
       } catch {
         case e: SecurityException =>
-          logWarning(s"Failed to create directory ${dirToCreate.getAbsolutePath} " +
-            s"with permission 770", e)
+          logWarning(log"Failed to create directory ${MDC(PATH, dirToCreate.getAbsolutePath)} " +
+            log"with permission 770", e)
           created = null;
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index 0b6e33ff5fb37..efcdb7fa8c69e 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -25,7 +25,7 @@ import java.util.zip.Checksum
 import org.apache.spark.SparkException
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, PATH}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.io.MutableCheckedOutputStream
 import org.apache.spark.serializer.{SerializationStream, SerializerInstance, SerializerManager}
 import org.apache.spark.shuffle.ShuffleWriteMetricsReporter
@@ -126,6 +126,12 @@ private[spark] class DiskBlockObjectWriter(
    */
   private var numRecordsCommitted = 0L
 
+  // For testing only.
+  private[storage] def getSerializerWrappedStream: OutputStream = bs
+
+  // For testing only.
+  private[storage] def getSerializationStream: SerializationStream = objOut
+
   /**
    * Set the checksum that the checksumOutputStream should use
    */
@@ -174,19 +180,36 @@ private[spark] class DiskBlockObjectWriter(
    * Should call after committing or reverting partial writes.
    */
   private def closeResources(): Unit = {
-    if (initialized) {
-      Utils.tryWithSafeFinally {
-        mcs.manualClose()
-      } {
-        channel = null
-        mcs = null
-        bs = null
-        fos = null
-        ts = null
-        objOut = null
-        initialized = false
-        streamOpen = false
-        hasBeenClosed = true
+    try {
+      if (streamOpen) {
+        Utils.tryWithSafeFinally {
+          if (null != objOut) objOut.close()
+          bs = null
+        } {
+          objOut = null
+          if (null != bs) bs.close()
+          bs = null
+        }
+      }
+    } catch {
+      case e: IOException =>
+        logInfo(log"Exception occurred while closing the output stream" +
+          log"${MDC(ERROR, e.getMessage)}")
+    } finally {
+      if (initialized) {
+        Utils.tryWithSafeFinally {
+          mcs.manualClose()
+        } {
+          channel = null
+          mcs = null
+          bs = null
+          fos = null
+          ts = null
+          objOut = null
+          initialized = false
+          streamOpen = false
+          hasBeenClosed = true
+        }
       }
     }
   }
@@ -297,7 +320,7 @@ private[spark] class DiskBlockObjectWriter(
       }
     } {
       if (!Files.deleteIfExists(file.toPath)) {
-        logWarning(s"Error deleting $file")
+        logWarning(log"Error deleting ${MDC(FILE_NAME, file)}")
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 54c5d0b2dce71..1498b224b0c92 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -30,7 +30,8 @@ import io.netty.channel.DefaultFileRegion
 import org.apache.commons.io.FileUtils
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.util.{AbstractFileRegion, JavaUtils}
 import org.apache.spark.security.CryptoStreamUtils
@@ -62,7 +63,7 @@ private[spark] class DiskStore(
    */
   def put(blockId: BlockId)(writeFunc: WritableByteChannel => Unit): Unit = {
     if (contains(blockId)) {
-      logWarning(s"Block $blockId is already present in the disk store")
+      logWarning(log"Block ${MDC(BLOCK_ID, blockId)} is already present in the disk store")
       try {
         diskManager.getFile(blockId).delete()
       } catch {
@@ -133,7 +134,7 @@ private[spark] class DiskStore(
     if (file.exists()) {
       val ret = file.delete()
       if (!ret) {
-        logWarning(s"Error deleting ${file.getPath()}")
+        logWarning(log"Error deleting ${MDC(PATH, file.getPath())}")
       }
       ret
     } else {
@@ -148,6 +149,7 @@ private[spark] class DiskStore(
   def moveFileToBlock(sourceFile: File, blockSize: Long, targetBlockId: BlockId): Unit = {
     blockSizes.put(targetBlockId, blockSize)
     val targetFile = diskManager.getFile(targetBlockId.name)
+    logDebug(s"${sourceFile.getPath()} -> ${targetFile.getPath()}")
     FileUtils.moveFile(sourceFile, targetFile)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
index 161120393490f..0f2bfaede4454 100644
--- a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
+++ b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
@@ -28,7 +28,8 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{STORAGE_DECOMMISSION_FALLBACK_STORAGE_CLEANUP, STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH}
 import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
 import org.apache.spark.network.util.JavaUtils
@@ -84,7 +85,7 @@ private[storage] class FallbackStorage(conf: SparkConf) extends Logging {
           }
         }
       case r =>
-        logWarning(s"Unsupported Resolver: ${r.getClass.getName}")
+        logWarning(log"Unsupported Resolver: ${MDC(CLASS_NAME, r.getClass.getName)}")
     }
   }
 
@@ -138,10 +139,10 @@ private[spark] object FallbackStorage extends Logging {
       // The fallback directory for this app may not be created yet.
       if (fallbackFileSystem.exists(fallbackPath)) {
         if (fallbackFileSystem.delete(fallbackPath, true)) {
-          logInfo(s"Succeed to clean up: $fallbackUri")
+          logInfo(log"Succeed to clean up: ${MDC(URI, fallbackUri)}")
         } else {
           // Clean-up can fail due to the permission issues.
-          logWarning(s"Failed to clean up: $fallbackUri")
+          logWarning(log"Failed to clean up: ${MDC(URI, fallbackUri)}")
         }
       }
     }
@@ -158,7 +159,7 @@ private[spark] object FallbackStorage extends Logging {
    * Read a ManagedBuffer.
    */
   def read(conf: SparkConf, blockId: BlockId): ManagedBuffer = {
-    logInfo(s"Read $blockId")
+    logInfo(log"Read ${MDC(BLOCK_ID, blockId)}")
     val fallbackPath = new Path(conf.get(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH).get)
     val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
     val fallbackFileSystem = FileSystem.get(fallbackPath.toUri, hadoopConf)
diff --git a/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala b/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala
index 31958af84e54b..8a3ca3066961c 100644
--- a/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala
+++ b/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala
@@ -29,7 +29,7 @@ import org.roaringbitmap.RoaringBitmap
 import org.apache.spark.MapOutputTracker
 import org.apache.spark.MapOutputTracker.SHUFFLE_PUSH_MAP_ID
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{HOST, PORT, REDUCE_ID, SHUFFLE_ID, SHUFFLE_MERGE_ID}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.network.shuffle.{BlockStoreClient, MergedBlockMeta, MergedBlocksMetaListener}
 import org.apache.spark.shuffle.ShuffleReadMetricsReporter
 import org.apache.spark.storage.BlockManagerId.SHUFFLE_MERGER_IDENTIFIER
@@ -246,8 +246,9 @@ private class PushBasedFetchHelper(
         case Failure(throwable) =>
           // If we see an exception with getting the local dirs for push-merged-local blocks,
           // we fallback to fetch the original blocks. We do not report block fetch failure.
-          logWarning(s"Error while fetching the merged dirs for push-merged-local " +
-            s"blocks: ${pushMergedLocalBlocks.mkString(", ")}. Fetch the original blocks instead",
+          logWarning(log"Error while fetching the merged dirs for push-merged-local " +
+            log"blocks: ${MDC(BLOCK_IDS, pushMergedLocalBlocks.mkString(", "))}. " +
+            log"Fetch the original blocks instead",
             throwable)
           pushMergedLocalBlocks.foreach {
             blockId =>
@@ -280,8 +281,8 @@ private class PushBasedFetchHelper(
         // If we see an exception with reading a push-merged-local meta, we fallback to
         // fetch the original blocks. We do not report block fetch failure
         // and will continue with the remaining local block read.
-        logWarning(s"Error occurred while fetching push-merged-local meta, " +
-          s"prepare to fetch the original blocks", e)
+        logWarning(log"Error occurred while fetching push-merged-local meta, " +
+          log"prepare to fetch the original blocks", e)
         iterator.addToResultsQueue(
           FallbackOnPushMergedFailureResult(blockId, blockManagerId, 0, isNetworkReqDone = false))
     }
@@ -315,7 +316,8 @@ private class PushBasedFetchHelper(
       blockId: BlockId,
       address: BlockManagerId): Unit = {
     assert(blockId.isInstanceOf[ShuffleMergedBlockId] || blockId.isInstanceOf[ShuffleBlockChunkId])
-    logWarning(s"Falling back to fetch the original blocks for push-merged block $blockId")
+    logWarning(log"Falling back to fetch the original blocks for push-merged block " +
+      log"${MDC(BLOCK_ID, blockId)}")
     shuffleMetrics.incMergedFetchFallbackCount(1)
     // Increase the blocks processed since we will process another block in the next iteration of
     // the while loop in ShuffleBlockFetcherIterator.next().
@@ -340,7 +342,8 @@ private class PushBasedFetchHelper(
             // Fallback for all the pending fetch requests
             val pendingShuffleChunks = iterator.removePendingChunks(shuffleChunkId, address)
             pendingShuffleChunks.foreach { pendingBlockId =>
-              logInfo(s"Falling back immediately for shuffle chunk $pendingBlockId")
+              logInfo(
+                log"Falling back immediately for shuffle chunk ${MDC(BLOCK_ID, pendingBlockId)}")
               shuffleMetrics.incMergedFetchFallbackCount(1)
               val bitmapOfPendingChunk: RoaringBitmap = chunksMetaMap.remove(pendingBlockId).get
               chunkBitmap.or(bitmapOfPendingChunk)
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index d22ce3dbed772..ff1799d8ff3e1 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -37,7 +37,7 @@ import org.apache.spark.{MapOutputTracker, SparkException, TaskContext}
 import org.apache.spark.MapOutputTracker.SHUFFLE_PUSH_MAP_ID
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{BLOCK_ID, ERROR, HOST, MAX_ATTEMPTS, PORT}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer}
 import org.apache.spark.network.shuffle._
 import org.apache.spark.network.shuffle.checksum.{Cause, ShuffleChecksumHelper}
@@ -249,7 +249,7 @@ final class ShuffleBlockFetcherIterator(
     }
     shuffleFilesSet.foreach { file =>
       if (!file.delete()) {
-        logWarning("Failed to cleanup shuffle fetch temp file " + file.path())
+        logWarning(log"Failed to cleanup shuffle fetch temp file ${MDC(PATH, file.path())}")
       }
     }
   }
@@ -342,8 +342,8 @@ final class ShuffleBlockFetcherIterator(
                 if (isNettyOOMOnShuffle.compareAndSet(false, true)) {
                   // The fetcher can fail remaining blocks in batch for the same error. So we only
                   // log the warning once to avoid flooding the logs.
-                  logInfo(s"Block $blockId has failed $failureTimes times " +
-                    s"due to Netty OOM, will retry")
+                  logInfo(log"Block ${MDC(BLOCK_ID, blockId)} has failed " +
+                    log"${MDC(FAILURES, failureTimes)} times due to Netty OOM, will retry")
                 }
                 remainingBlocks -= blockId
                 deferredBlocks += blockId
@@ -448,14 +448,17 @@ final class ShuffleBlockFetcherIterator(
         s"the number of host-local blocks ${numHostLocalBlocks} " +
         s"the number of push-merged-local blocks ${pushMergedLocalBlocks.size} " +
         s"+ the number of remote blocks ${numRemoteBlocks} ")
-    logInfo(s"Getting $blocksToFetchCurrentIteration " +
-      s"(${Utils.bytesToString(totalBytes)}) non-empty blocks including " +
-      s"${localBlocks.size} (${Utils.bytesToString(localBlockBytes)}) local and " +
-      s"${numHostLocalBlocks} (${Utils.bytesToString(hostLocalBlockBytes)}) " +
-      s"host-local and ${pushMergedLocalBlocks.size} " +
-      s"(${Utils.bytesToString(pushMergedLocalBlockBytes)}) " +
-      s"push-merged-local and $numRemoteBlocks (${Utils.bytesToString(remoteBlockBytes)}) " +
-      s"remote blocks")
+    logInfo(
+      log"Getting ${MDC(NUM_BLOCKS, blocksToFetchCurrentIteration)} " +
+      log"(${MDC(TOTAL_SIZE, Utils.bytesToString(totalBytes))}) non-empty blocks including " +
+      log"${MDC(NUM_LOCAL_BLOCKS, localBlocks.size)} " +
+      log"(${MDC(LOCAL_BLOCKS_SIZE, Utils.bytesToString(localBlockBytes))}) local and " +
+      log"${MDC(NUM_HOST_LOCAL_BLOCKS, numHostLocalBlocks)} " +
+      log"(${MDC(HOST_LOCAL_BLOCKS_SIZE, Utils.bytesToString(hostLocalBlockBytes))}) " +
+      log"host-local and ${MDC(NUM_PUSH_MERGED_LOCAL_BLOCKS, pushMergedLocalBlocks.size)} " +
+      log"(${MDC(PUSH_MERGED_LOCAL_BLOCKS_SIZE, Utils.bytesToString(pushMergedLocalBlockBytes))})" +
+      log" push-merged-local and ${MDC(NUM_REMOTE_BLOCKS, numRemoteBlocks)} " +
+      log"(${MDC(REMOTE_BLOCKS_SIZE, Utils.bytesToString(remoteBlockBytes))}) remote blocks")
     this.hostLocalBlocks ++= hostLocalBlocksByExecutor.values
       .flatMap { infos => infos.map(info => (info._1, info._3)) }
     collectedRemoteRequests
@@ -719,8 +722,10 @@ final class ShuffleBlockFetcherIterator(
 
     val numDeferredRequest = deferredFetchRequests.values.map(_.size).sum
     val numFetches = remoteRequests.size - fetchRequests.size - numDeferredRequest
-    logInfo(s"Started $numFetches remote fetches in ${Utils.getUsedTimeNs(startTimeNs)}" +
-      (if (numDeferredRequest > 0 ) s", deferred $numDeferredRequest requests" else ""))
+    logInfo(log"Started ${MDC(COUNT, numFetches)} remote fetches in " +
+      log"${MDC(DURATION, Utils.getUsedTimeNs(startTimeNs))}" +
+      (if (numDeferredRequest > 0) log", deferred ${MDC(NUM_REQUESTS, numDeferredRequest)} requests"
+      else log""))
 
     // Get Local Blocks
     fetchLocalBlocks(localBlocks)
@@ -846,8 +851,10 @@ final class ShuffleBlockFetcherIterator(
             // uses are shared by the UnsafeShuffleWriter (both writers use DiskBlockObjectWriter
             // which returns a zero-size from commitAndGet() in case no records were written
             // since the last call.
-            val msg = s"Received a zero-size buffer for block $blockId from $address " +
-              s"(expectedApproxSize = $size, isNetworkReqDone=$isNetworkReqDone)"
+            val msg = log"Received a zero-size buffer for block ${MDC(BLOCK_ID, blockId)} " +
+              log"from ${MDC(URI, address)} " +
+              log"(expectedApproxSize = ${MDC(NUM_BYTES, size)}, " +
+              log"isNetworkReqDone=${MDC(IS_NETWORK_REQUEST_DONE, isNetworkReqDone)})"
             if (blockId.isShuffleChunk) {
               // Zero-size block may come from nodes with hardware failures, For shuffle chunks,
               // the original shuffle blocks that belong to that zero-size shuffle chunk is
@@ -859,7 +866,7 @@ final class ShuffleBlockFetcherIterator(
               result = null
               null
             } else {
-              throwFetchFailedException(blockId, mapIndex, address, new IOException(msg))
+              throwFetchFailedException(blockId, mapIndex, address, new IOException(msg.message))
             }
           } else {
             try {
@@ -945,7 +952,8 @@ final class ShuffleBlockFetcherIterator(
                   }
                 } else {
                   // It's the first time this block is detected corrupted
-                  logWarning(s"got an corrupted block $blockId from $address, fetch again", e)
+                  logWarning(log"got an corrupted block ${MDC(BLOCK_ID, blockId)} " +
+                    log"from ${MDC(URI, address)}, fetch again", e)
                   corruptedBlocks += blockId
                   fetchRequests += FetchRequest(
                     address, Array(FetchBlockInfo(blockId, size, mapIndex)))
@@ -1033,8 +1041,8 @@ final class ShuffleBlockFetcherIterator(
                 // If we see an exception with reading push-merged-local index file, we fallback
                 // to fetch the original blocks. We do not report block fetch failure
                 // and will continue with the remaining local block read.
-                logWarning(s"Error occurred while reading push-merged-local index, " +
-                  s"prepare to fetch the original blocks", e)
+                logWarning("Error occurred while reading push-merged-local index, " +
+                  "prepare to fetch the original blocks", e)
                 pushBasedFetchHelper.initiateFallbackFetchForPushMergedBlock(
                   shuffleBlockId, pushBasedFetchHelper.localShuffleMergerBlockMgrId)
             }
@@ -1138,14 +1146,16 @@ final class ShuffleBlockFetcherIterator(
           case otherCause =>
             s"Block $blockId is corrupted due to $otherCause"
         }
-        logInfo(s"Finished corruption diagnosis in $duration ms. $diagnosisResponse")
+        logInfo(log"Finished corruption diagnosis in ${MDC(DURATION, duration)} ms. " +
+          log"${MDC(STATUS, diagnosisResponse)}")
         diagnosisResponse
       case shuffleBlockChunk: ShuffleBlockChunkId =>
         // TODO SPARK-36284 Add shuffle checksum support for push-based shuffle
-        val diagnosisResponse = s"BlockChunk $shuffleBlockChunk is corrupted but corruption " +
+        logWarning(log"BlockChunk ${MDC(SHUFFLE_BLOCK_INFO, shuffleBlockChunk)} " +
+          log"is corrupted but corruption diagnosis is skipped due to lack of shuffle " +
+          log"checksum support for push-based shuffle.")
+        s"BlockChunk $shuffleBlockChunk is corrupted but corruption " +
           s"diagnosis is skipped due to lack of shuffle checksum support for push-based shuffle."
-        logWarning(diagnosisResponse)
-        diagnosisResponse
       case unexpected: BlockId =>
         throw SparkException.internalError(
           s"Unexpected type of BlockId, $unexpected", category = "STORAGE")
@@ -1273,7 +1283,8 @@ final class ShuffleBlockFetcherIterator(
       originalLocalBlocks, originalHostLocalBlocksByExecutor, originalMergedLocalBlocks)
     // Add the remote requests into our queue in a random order
     fetchRequests ++= Utils.randomize(originalRemoteReqs)
-    logInfo(s"Created ${originalRemoteReqs.size} fallback remote requests for push-merged")
+    logInfo(log"Created ${MDC(NUM_REQUESTS, originalRemoteReqs.size)} fallback remote requests " +
+      log"for push-merged")
     // fetch all the fallback blocks that are local.
     fetchLocalBlocks(originalLocalBlocks)
     // Merged local blocks should be empty during fallback
diff --git a/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala b/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala
index 3c2c4b46dc4ca..f1dca53c7e3b1 100644
--- a/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala
+++ b/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala
@@ -19,7 +19,8 @@ package org.apache.spark.storage
 
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.Utils
 
 /**
@@ -78,7 +79,7 @@ class FileBasedTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with
     if (topology.isDefined) {
       logDebug(s"$hostname -> ${topology.get}")
     } else {
-      logWarning(s"$hostname does not have any topology information")
+      logWarning(log"${MDC(HOST_PORT, hostname)} does not have any topology information")
     }
     topology
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index 48d2ef68b41ab..6746bbd490c42 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -30,7 +30,8 @@ import scala.util.control.NonFatal
 import com.google.common.io.ByteStreams
 
 import org.apache.spark.{SparkConf, SparkException, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{STORAGE_UNROLL_MEMORY_THRESHOLD, UNROLL_MEMORY_CHECK_PERIOD, UNROLL_MEMORY_GROWTH_FACTOR}
 import org.apache.spark.memory.{MemoryManager, MemoryMode}
 import org.apache.spark.serializer.{SerializationStream, SerializerManager}
@@ -110,12 +111,14 @@ private[spark] class MemoryStore(
   }
 
   if (maxMemory < unrollMemoryThreshold) {
-    logWarning(s"Max memory ${Utils.bytesToString(maxMemory)} is less than the initial memory " +
-      s"threshold ${Utils.bytesToString(unrollMemoryThreshold)} needed to store a block in " +
-      s"memory. Please configure Spark with more memory.")
+    logWarning(log"Max memory ${MDC(NUM_BYTES, Utils.bytesToString(maxMemory))} " +
+      log"is less than the initial memory " +
+      log"threshold ${MDC(MAX_SIZE, Utils.bytesToString(unrollMemoryThreshold))} " +
+      log"needed to store a block in memory. Please configure Spark with more memory.")
   }
 
-  logInfo("MemoryStore started with capacity %s".format(Utils.bytesToString(maxMemory)))
+  logInfo(log"MemoryStore started with capacity " +
+    log"${MDC(MEMORY_SIZE, Utils.bytesToString(maxMemory))}")
 
   /** Total storage memory used including unroll memory, in bytes. */
   private def memoryUsed: Long = memoryManager.storageMemoryUsed
@@ -156,8 +159,9 @@ private[spark] class MemoryStore(
       entries.synchronized {
         entries.put(blockId, entry)
       }
-      logInfo("Block %s stored as bytes in memory (estimated size %s, free %s)".format(
-        blockId, Utils.bytesToString(size), Utils.bytesToString(maxMemory - blocksMemoryUsed)))
+      logInfo(log"Block ${MDC(BLOCK_ID, blockId)} stored as bytes in memory " +
+        log"(estimated size ${MDC(SIZE, Utils.bytesToString(size))}, " +
+        log"free ${MDC(MEMORY_SIZE, Utils.bytesToString(maxMemory - blocksMemoryUsed))})")
       true
     } else {
       false
@@ -213,8 +217,9 @@ private[spark] class MemoryStore(
       reserveUnrollMemoryForThisTask(blockId, initialMemoryThreshold, memoryMode)
 
     if (!keepUnrolling) {
-      logWarning(s"Failed to reserve initial memory threshold of " +
-        s"${Utils.bytesToString(initialMemoryThreshold)} for computing block $blockId in memory.")
+      logWarning(log"Failed to reserve initial memory threshold of " +
+        log"${MDC(NUM_BYTES, Utils.bytesToString(initialMemoryThreshold))} " +
+        log"for computing block ${MDC(BLOCK_ID, blockId)} in memory.")
     } else {
       unrollMemoryUsedByThisBlock += initialMemoryThreshold
     }
@@ -247,7 +252,8 @@ private[spark] class MemoryStore(
     // SPARK-45025 - if a thread interrupt was received, we log a warning and return used memory
     // to avoid getting killed by task reaper eventually.
     if (shouldCheckThreadInterruption && Thread.currentThread().isInterrupted) {
-      logInfo(s"Failed to unroll block=$blockId since thread interrupt was received")
+      logInfo(
+        log"Failed to unroll block=${MDC(BLOCK_ID, blockId)} since thread interrupt was received")
       Left(unrollMemoryUsedByThisBlock)
     } else if (keepUnrolling) {
       // Make sure that we have enough memory to store the block. By this point, it is possible that
@@ -276,8 +282,9 @@ private[spark] class MemoryStore(
           entries.put(blockId, entry)
         }
 
-        logInfo("Block %s stored as values in memory (estimated size %s, free %s)".format(blockId,
-          Utils.bytesToString(entry.size), Utils.bytesToString(maxMemory - blocksMemoryUsed)))
+        logInfo(log"Block ${MDC(BLOCK_ID, blockId)} stored as values in memory " +
+          log"(estimated size ${MDC(MEMORY_SIZE, Utils.bytesToString(entry.size))}, free " +
+          log"${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(maxMemory - blocksMemoryUsed))})")
         Right(entry.size)
       } else {
         // We ran out of space while unrolling the values for this block
@@ -348,9 +355,10 @@ private[spark] class MemoryStore(
     // Initial per-task memory to request for unrolling blocks (bytes).
     val initialMemoryThreshold = unrollMemoryThreshold
     val chunkSize = if (initialMemoryThreshold > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
-      logWarning(s"Initial memory threshold of ${Utils.bytesToString(initialMemoryThreshold)} " +
-        s"is too large to be set as chunk size. Chunk size has been capped to " +
-        s"${Utils.bytesToString(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)}")
+      logWarning(log"Initial memory threshold of " +
+        log"${MDC(NUM_BYTES, Utils.bytesToString(initialMemoryThreshold))} " +
+        log"is too large to be set as chunk size. Chunk size has been capped to " +
+        log"${MDC(MAX_SIZE, Utils.bytesToString(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH))}")
       ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
     } else {
       initialMemoryThreshold.toInt
@@ -517,8 +525,8 @@ private[spark] class MemoryStore(
       if (freedMemory >= space) {
         var lastSuccessfulBlock = -1
         try {
-          logInfo(s"${selectedBlocks.size} blocks selected for dropping " +
-            s"(${Utils.bytesToString(freedMemory)} bytes)")
+          logInfo(log"${MDC(NUM_BLOCKS, selectedBlocks.size)} blocks selected for dropping " +
+            log"(${MDC(MEMORY_SIZE, Utils.bytesToString(freedMemory))} bytes)")
           selectedBlocks.indices.foreach { idx =>
             val blockId = selectedBlocks(idx)
             val entry = entries.synchronized {
@@ -533,8 +541,9 @@ private[spark] class MemoryStore(
             }
             lastSuccessfulBlock = idx
           }
-          logInfo(s"After dropping ${selectedBlocks.size} blocks, " +
-            s"free memory is ${Utils.bytesToString(maxMemory - blocksMemoryUsed)}")
+          logInfo(
+            log"After dropping ${MDC(NUM_BLOCKS, selectedBlocks.size)} blocks, free memory is" +
+            log"${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(maxMemory - blocksMemoryUsed))}")
           freedMemory
         } finally {
           // like BlockManager.doPut, we use a finally rather than a catch to avoid having to deal
@@ -549,7 +558,7 @@ private[spark] class MemoryStore(
         }
       } else {
         blockId.foreach { id =>
-          logInfo(s"Will not store $id")
+          logInfo(log"Will not store ${MDC(BLOCK_ID, id)}")
         }
         selectedBlocks.foreach { id =>
           blockInfoManager.unlock(id)
@@ -645,11 +654,11 @@ private[spark] class MemoryStore(
    */
   private def logMemoryUsage(): Unit = {
     logInfo(
-      s"Memory use = ${Utils.bytesToString(blocksMemoryUsed)} (blocks) + " +
-      s"${Utils.bytesToString(currentUnrollMemory)} (scratch space shared across " +
-      s"$numTasksUnrolling tasks(s)) = ${Utils.bytesToString(memoryUsed)}. " +
-      s"Storage limit = ${Utils.bytesToString(maxMemory)}."
-    )
+      log"Memory use = ${MDC(CURRENT_MEMORY_SIZE, Utils.bytesToString(blocksMemoryUsed))} " +
+      log"(blocks) + ${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(currentUnrollMemory))} " +
+      log"(scratch space shared across ${MDC(NUM_TASKS, numTasksUnrolling)} " +
+      log"tasks(s)) = ${MDC(STORAGE_MEMORY_SIZE, Utils.bytesToString(memoryUsed))}. " +
+      log"Storage limit = ${MDC(MAX_MEMORY_SIZE, Utils.bytesToString(maxMemory))}.")
   }
 
   /**
@@ -660,8 +669,8 @@ private[spark] class MemoryStore(
    */
   private def logUnrollFailureMessage(blockId: BlockId, finalVectorSize: Long): Unit = {
     logWarning(
-      s"Not enough space to cache $blockId in memory! " +
-      s"(computed ${Utils.bytesToString(finalVectorSize)} so far)"
+      log"Not enough space to cache ${MDC(BLOCK_ID, blockId)} in memory! " +
+        log"(computed ${MDC(NUM_BYTES, Utils.bytesToString(finalVectorSize))} so far)"
     )
     logMemoryUsage()
   }
diff --git a/core/src/main/scala/org/apache/spark/ui/DriverLogPage.scala b/core/src/main/scala/org/apache/spark/ui/DriverLogPage.scala
index 3102115159994..8b4eebc26b3ba 100644
--- a/core/src/main/scala/org/apache/spark/ui/DriverLogPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/DriverLogPage.scala
@@ -22,7 +22,7 @@ import jakarta.servlet.http.HttpServletRequest
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{LOG_TYPE, PATH}
+import org.apache.spark.internal.LogKeys.{LOG_TYPE, PATH}
 import org.apache.spark.internal.config.DRIVER_LOG_LOCAL_DIR
 import org.apache.spark.util.Utils
 import org.apache.spark.util.logging.DriverLogger.DRIVER_LOG_FILE
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 5e567a891d587..f503be908c072 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -40,7 +40,9 @@ import org.json4s.JValue
 import org.json4s.jackson.JsonMethods.{pretty, render}
 
 import org.apache.spark.{SecurityManager, SparkConf, SSLOptions}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.Utils
 
@@ -84,7 +86,8 @@ private[spark] object JettyUtils extends Logging {
           case e: IllegalArgumentException =>
             response.sendError(HttpServletResponse.SC_BAD_REQUEST, e.getMessage)
           case e: Exception =>
-            logWarning(s"GET ${request.getRequestURI} failed: $e", e)
+            logWarning(log"GET ${MDC(LogKeys.URI, request.getRequestURI)} failed: " +
+              log"${MDC(ERROR, e)}", e)
             throw e
         }
       }
@@ -247,7 +250,8 @@ private[spark] object JettyUtils extends Logging {
       poolSize: Int = 200): ServerInfo = {
 
     val stopTimeout = conf.get(UI_JETTY_STOP_TIMEOUT)
-    logInfo(s"Start Jetty $hostName:$port for $serverName")
+    logInfo(log"Start Jetty ${MDC(HOST, hostName)}:${MDC(PORT, port)}" +
+      log" for ${MDC(SERVER_NAME, serverName)}")
     // Start the server first, with no connectors.
     val pool = new QueuedThreadPool(poolSize)
     if (serverName.nonEmpty) {
@@ -555,7 +559,9 @@ private[spark] case class ServerInfo(
    */
   private def addFilters(handler: ServletContextHandler, securityMgr: SecurityManager): Unit = {
     conf.get(UI_FILTERS).foreach { filter =>
-      logInfo(s"Adding filter to ${handler.getContextPath()}: $filter")
+      logInfo(log"Adding filter to" +
+        log" ${MDC(SERVLET_CONTEXT_HANDLER_PATH, handler.getContextPath())}:" +
+        log" ${MDC(UI_FILTER, filter)}")
       val oldParams = conf.getOption(s"spark.$filter.params").toSeq
         .flatMap(Utils.stringToSeq)
         .flatMap { param =>
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index ddf451c16f3a2..b8d422c9d9fbb 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -24,7 +24,7 @@ import org.eclipse.jetty.servlet.ServletContextHandler
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkContext}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, WEB_URL}
 import org.apache.spark.internal.config.DRIVER_LOG_LOCAL_DIR
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.scheduler._
@@ -164,7 +164,7 @@ private[spark] class SparkUI private (
   /** Stop the server behind this web interface. Only valid after bind(). */
   override def stop(): Unit = {
     super.stop()
-    logInfo(s"Stopped Spark web UI at $webUrl")
+    logInfo(log"Stopped Spark web UI at ${MDC(WEB_URL, webUrl)}")
   }
 
   override def withSparkUI[T](appId: String, attemptId: Option[String])(fn: SparkUI => T): T = {
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index baeed322e8ad3..60d4e5db99d7e 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -30,7 +30,7 @@ import org.json4s.JsonAST.{JNothing, JValue}
 
 import org.apache.spark.{SecurityManager, SparkConf, SSLOptions}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.ui.JettyUtils._
 import org.apache.spark.util.Utils
@@ -156,7 +156,8 @@ private[spark] abstract class WebUI(
       serverInfo = Some(server)
       val hostName = Option(conf.getenv("SPARK_LOCAL_IP"))
           .getOrElse(if (Utils.preferIPv6) "[::]" else "0.0.0.0")
-      logInfo(s"Bound $className to $hostName, and started at $webUrl")
+      logInfo(log"Bound ${MDC(CLASS_NAME, className)} to ${MDC(HOST, hostName)}," +
+        log" and started at ${MDC(WEB_URL, webUrl)}")
     } catch {
       case e: Exception =>
         logError(log"Failed to bind ${MDC(CLASS_NAME, className)}", e)
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index 9a035e0f1e834..ee7f67233bbd5 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -25,7 +25,8 @@ import scala.xml.Utility
 
 import org.apache.commons.text.StringEscapeUtils
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.DeterministicLevel
 import org.apache.spark.scheduler.StageInfo
 import org.apache.spark.storage.StorageLevel
@@ -214,7 +215,8 @@ private[spark] object RDDOperationGraph extends Logging {
         case (true, false) => outgoingEdges += e
         case (false, true) => incomingEdges += e
         // should never happen
-        case _ => logWarning(s"Found an orphan edge in stage ${stage.stageId}: $e")
+        case _ => logWarning(log"Found an orphan edge in stage " +
+          log"${MDC(STAGE_ID, stage.stageId)}: ${MDC(ERROR, e)}")
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index c6d8073a0c2fa..3237a321f1c3e 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -23,7 +23,8 @@ import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicLong
 
 import org.apache.spark.{InternalAccumulator, SparkContext, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.scheduler.AccumulableInfo
 import org.apache.spark.util.AccumulatorContext.internOption
 
@@ -276,7 +277,8 @@ private[spark] object AccumulatorContext extends Logging {
       // Since we are storing weak references, warn when the underlying data is not valid.
       val acc = ref.get
       if (acc eq null) {
-        logWarning(s"Attempted to access garbage collected accumulator $id")
+        logWarning(log"Attempted to access garbage collected accumulator " +
+          log"${MDC(ACCUMULATOR_ID, id)}")
       }
       Option(acc)
     }
diff --git a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
index 14851d8772895..8526a21254586 100644
--- a/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/DependencyUtils.scala
@@ -26,7 +26,9 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkSubmit
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.util.ArrayImplicits._
 
@@ -223,10 +225,10 @@ private[spark] object DependencyUtils extends Logging {
         if (file.exists()) {
           loader.addURL(file.toURI.toURL)
         } else {
-          logWarning(s"Local jar $file does not exist, skipping.")
+          logWarning(log"Local jar ${MDC(FILE_NAME, file)} does not exist, skipping.")
         }
       case _ =>
-        logWarning(s"Skip remote jar $uri.")
+        logWarning(log"Skip remote jar ${MDC(LogKeys.URI, uri)}.")
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala
index 49aab5575f843..07ea9720b0b8d 100644
--- a/core/src/main/scala/org/apache/spark/util/Distribution.scala
+++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala
@@ -19,8 +19,6 @@ package org.apache.spark.util
 
 import java.io.PrintStream
 
-import scala.collection.immutable.IndexedSeq
-
 /**
  * Util for getting some stats from a small sample of numeric values, with some handy
  * summary functions.
diff --git a/core/src/main/scala/org/apache/spark/util/EventLoop.scala b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
index eaa9ef517294e..b9de661b63c4f 100644
--- a/core/src/main/scala/org/apache/spark/util/EventLoop.scala
+++ b/core/src/main/scala/org/apache/spark/util/EventLoop.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 import scala.util.control.NonFatal
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.EVENT_LOOP
+import org.apache.spark.internal.LogKeys.EVENT_LOOP
 
 /**
  * An event loop to receive events from the caller and process all events in the event thread. It
diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
index a4b9ba7bb0169..f8f5bb4f72a40 100644
--- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -28,7 +28,8 @@ import org.apache.hadoop.fs.viewfs.ViewFileSystem
 import org.apache.hadoop.hdfs.DistributedFileSystem
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.util.ArrayImplicits._
 
@@ -86,7 +87,7 @@ private[spark] object HadoopFSUtils extends Logging {
       path: Path,
       hadoopConf: Configuration,
       filter: PathFilter): Seq[(Path, Seq[FileStatus])] = {
-    logInfo(s"Listing $path with listFiles API")
+    logInfo(log"Listing ${MDC(PATH, path)} with listFiles API")
     try {
       val prefixLength = path.toString.length
       val remoteIter = path.getFileSystem(hadoopConf).listFiles(path, true)
@@ -99,7 +100,8 @@ private[spark] object HadoopFSUtils extends Logging {
       Seq((path, statues.toImmutableArraySeq))
     } catch {
       case _: FileNotFoundException =>
-        logWarning(s"The root directory $path was not found. Was it deleted very recently?")
+        logWarning(log"The root directory ${MDC(PATH, path)} " +
+          log"was not found. Was it deleted very recently?")
         Seq((path, Seq.empty[FileStatus]))
     }
   }
@@ -132,8 +134,9 @@ private[spark] object HadoopFSUtils extends Logging {
       }
     }
 
-    logInfo(s"Listing leaf files and directories in parallel under ${paths.length} paths." +
-      s" The first several paths are: ${paths.take(10).mkString(", ")}.")
+    logInfo(log"Listing leaf files and directories in parallel under" +
+      log"${MDC(NUM_PATHS, paths.length)} paths." +
+      log" The first several paths are: ${MDC(PATHS, paths.take(10).mkString(", "))}.")
     HiveCatalogMetrics.incrementParallelListingJobCount(1)
 
     val serializableConfiguration = new SerializableConfiguration(hadoopConf)
@@ -235,7 +238,8 @@ private[spark] object HadoopFSUtils extends Logging {
       // InMemoryFileIndex construction. However, it's still a net improvement to detect and
       // fail-fast on the non-root cases. For more info see the SPARK-27676 review discussion.
       case _: FileNotFoundException if isRootPath || ignoreMissingFiles =>
-        logWarning(s"The directory $path was not found. Was it deleted very recently?")
+        logWarning(log"The directory ${MDC(PATH, path)} " +
+          log"was not found. Was it deleted very recently?")
         Array.empty[FileStatus]
     }
 
@@ -323,8 +327,8 @@ private[spark] object HadoopFSUtils extends Logging {
     }
 
     if (missingFiles.nonEmpty) {
-      logWarning(
-        s"the following files were missing during file scan:\n  ${missingFiles.mkString("\n  ")}")
+      logWarning(log"the following files were missing during file scan:\n  " +
+        log"${MDC(PATHS, missingFiles.mkString("\n  "))}")
     }
 
     resolvedLeafStatuses.toImmutableArraySeq
diff --git a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
index 814201d8c959c..4f01cd6ac2136 100644
--- a/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/util/ListenerBus.scala
@@ -27,7 +27,7 @@ import com.codahale.metrics.Timer
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.LISTENER
+import org.apache.spark.internal.LogKeys.{EVENT, LISTENER, TOTAL_TIME}
 import org.apache.spark.scheduler.EventLoggingListener
 import org.apache.spark.scheduler.SparkListenerEnvironmentUpdate
 
@@ -132,8 +132,9 @@ private[spark] trait ListenerBus[L <: AnyRef, E] extends Logging {
         if (maybeTimerContext != null) {
           val elapsed = maybeTimerContext.stop()
           if (logSlowEventEnabled && elapsed > logSlowEventThreshold) {
-            logInfo(s"Process of event ${redactEvent(event)} by listener ${listenerName} took " +
-              s"${elapsed / 1000000000d}s.")
+            logInfo(log"Process of event ${MDC(EVENT, redactEvent(event))} by" +
+              log"listener ${MDC(LISTENER, listenerName)} took " +
+              log"${MDC(TOTAL_TIME, elapsed / 1000000d)}ms.")
           }
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
index f01645d82303e..7a98c4830db92 100644
--- a/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
+++ b/core/src/main/scala/org/apache/spark/util/PeriodicCheckpointer.scala
@@ -23,7 +23,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkContext
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 
 
 /**
@@ -184,9 +185,9 @@ private[spark] object PeriodicCheckpointer extends Logging {
       val fs = path.getFileSystem(conf)
       fs.delete(path, true)
     } catch {
-      case e: Exception =>
-        logWarning("PeriodicCheckpointer could not remove old checkpoint file: " +
-          checkpointFile)
+      case _: Exception =>
+        logWarning(log"PeriodicCheckpointer could not remove old checkpoint file: " +
+          log"${MDC(FILE_NAME, checkpointFile)}")
     }
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
index b9dece19f2651..993352c6a6379 100644
--- a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
+++ b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.FileSystem
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.internal.config.SPARK_SHUTDOWN_TIMEOUT_MS
 
 
diff --git a/core/src/main/scala/org/apache/spark/util/SignalUtils.scala b/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
index 775dc44fc1a13..b41166a50efd2 100644
--- a/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/SignalUtils.scala
@@ -25,7 +25,8 @@ import org.apache.commons.lang3.SystemUtils
 import org.slf4j.Logger
 import sun.misc.{Signal, SignalHandler}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys._
 
 /**
  * Contains utilities for working with posix signals.
@@ -58,7 +59,7 @@ private[spark] object SignalUtils extends Logging {
    */
   def register(signal: String)(action: => Boolean): Unit = {
     if (SystemUtils.IS_OS_UNIX) {
-      register(signal, s"Failed to register signal handler for $signal",
+      register(signal, log"Failed to register signal handler for ${MDC(SIGNAL, signal)}",
         logStackTrace = true)(action)
     }
   }
@@ -74,12 +75,12 @@ private[spark] object SignalUtils extends Logging {
    */
   def register(
       signal: String,
-      failMessage: String,
+      failMessage: MessageWithContext,
       logStackTrace: Boolean = true)(
       action: => Boolean): Unit = synchronized {
     try {
       val handler = handlers.getOrElseUpdate(signal, {
-        logInfo(s"Registering signal handler for $signal")
+        logInfo(log"Registering signal handler for ${MDC(SIGNAL, signal)}")
         new ActionHandler(new Signal(signal))
       })
       handler.register(action)
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 1447a3e752de7..88fe64859a214 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -151,11 +151,11 @@ object SizeEstimator extends Logging {
       // TODO: We could use reflection on the VMOption returned ?
       getVMMethod.invoke(bean, "UseCompressedOops").toString.contains("true")
     } catch {
-      case e: Exception =>
+      case _: Exception =>
         // Guess whether they've enabled UseCompressedOops based on whether maxMemory < 32 GB
         val guess = Runtime.getRuntime.maxMemory < (32L*1024*1024*1024)
-        val guessInWords = if (guess) "yes" else "not"
-        logWarning("Failed to check whether UseCompressedOops is set; assuming " + guessInWords)
+        logWarning(log"Failed to check whether UseCompressedOops is set; " +
+          log"assuming " + (if (guess) log"yes" else log"not"))
         guess
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
index 74f1474f9cf78..c1ea4f929101f 100644
--- a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -17,8 +17,9 @@
 
 package org.apache.spark.util
 
+import org.apache.spark.executor.{ExecutorExitCode, KilledByTaskReaperException}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.THREAD
+import org.apache.spark.internal.LogKeys.THREAD
 
 /**
  * The default uncaught exception handler for Spark daemons. It terminates the whole process for
@@ -56,6 +57,8 @@ private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException:
             // SPARK-24294: This is defensive code, in case that SparkFatalException is
             // misused and uncaught.
             System.exit(SparkExitCode.OOM)
+          case _: KilledByTaskReaperException if exitOnUncaughtException =>
+            System.exit(ExecutorExitCode.KILLED_BY_TASK_REAPER)
           case _ if exitOnUncaughtException =>
             System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
           case _ =>
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index e4167c43ab9f6..7f61b3f0b2c24 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -231,7 +231,7 @@ private[spark] object ThreadUtils {
   /**
    * Run a piece of code in a new thread and return the result. Exception in the new thread is
    * thrown in the caller thread with an adjusted stack trace that removes references to this
-   * method for clarity. The exception stack traces will be like the following
+   * method for clarity. The exception stack traces will be like the following:
    *
    * SomeException: exception-message
    *   at CallerClass.body-method (sourcefile.scala)
@@ -261,31 +261,51 @@ private[spark] object ThreadUtils {
 
     exception match {
       case Some(realException) =>
-        // Remove the part of the stack that shows method calls into this helper method
-        // This means drop everything from the top until the stack element
-        // ThreadUtils.runInNewThread(), and then drop that as well (hence the `drop(1)`).
-        val baseStackTrace = Thread.currentThread().getStackTrace().dropWhile(
-          ! _.getClassName.contains(this.getClass.getSimpleName)).drop(1)
-
-        // Remove the part of the new thread stack that shows methods call from this helper method
-        val extraStackTrace = realException.getStackTrace.takeWhile(
-          ! _.getClassName.contains(this.getClass.getSimpleName))
-
-        // Combine the two stack traces, with a place holder just specifying that there
-        // was a helper method used, without any further details of the helper
-        val placeHolderStackElem = new StackTraceElement(
-          s"... run in separate thread using ${ThreadUtils.getClass.getName.stripSuffix("$")} ..",
-          " ", "", -1)
-        val finalStackTrace = extraStackTrace ++ Seq(placeHolderStackElem) ++ baseStackTrace
-
-        // Update the stack trace and rethrow the exception in the caller thread
-        realException.setStackTrace(finalStackTrace)
-        throw realException
+        throw wrapCallerStacktrace(realException, dropStacks = 2)
       case None =>
         result
     }
   }
 
+  /**
+   * Adjust exception stack stace to wrap with caller side thread stack trace.
+   * The exception stack traces will be like the following:
+   *
+   * SomeException: exception-message
+   *   at CallerClass.body-method (sourcefile.scala)
+   *   at ... run in separate thread using org.apache.spark.util.ThreadUtils ... ()
+   *   at CallerClass.caller-method (sourcefile.scala)
+   *   ...
+   */
+  def wrapCallerStacktrace[T <: Throwable](
+       realException: T,
+       combineMessage: String =
+         s"run in separate thread using ${ThreadUtils.getClass.getName.stripSuffix("$")}",
+       dropStacks: Int = 1): T = {
+    require(dropStacks >= 0, "dropStacks must be zero or positive")
+    val simpleName = this.getClass.getSimpleName
+    // Remove the part of the stack that shows method calls into this helper method
+    // This means drop everything from the top until the stack element
+    // ThreadUtils.wrapCallerStack(), and then drop that as well (hence the `drop(1)`).
+    // Large dropStacks allows caller to drop more stacks.
+    val baseStackTrace = Thread.currentThread().getStackTrace
+      .dropWhile(!_.getClassName.contains(simpleName))
+      .drop(dropStacks)
+
+    // Remove the part of the new thread stack that shows methods call from this helper method
+    val extraStackTrace = realException.getStackTrace
+      .takeWhile(!_.getClassName.contains(simpleName))
+
+    // Combine the two stack traces, with a place holder just specifying that there
+    // was a helper method used, without any further details of the helper
+    val placeHolderStackElem = new StackTraceElement(s"... $combineMessage ..", " ", "", -1)
+    val finalStackTrace = extraStackTrace ++ Seq(placeHolderStackElem) ++ baseStackTrace
+
+    // Update the stack trace and rethrow the exception in the caller thread
+    realException.setStackTrace(finalStackTrace)
+    realException
+  }
+
   /**
    * Construct a new ForkJoinPool with a specified max parallelism and name prefix.
    */
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index af91a4b32c6fc..a37aedfcb635a 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -69,7 +69,8 @@ import org.slf4j.Logger
 import org.apache.spark._
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Streaming._
 import org.apache.spark.internal.config.Tests.IS_TESTING
@@ -400,14 +401,14 @@ private[spark] object Utils
           "Untarring behavior will be deprecated at spark.files and " +
             "SparkContext.addFile. Consider using spark.archives or SparkContext.addArchive " +
             "instead.")
-        logInfo("Untarring " + fileName)
+        logInfo(log"Untarring ${MDC(FILE_NAME, fileName)}")
         executeAndGetOutput(Seq("tar", "-xzf", fileName), targetDir)
       } else if (fileName.endsWith(".tar")) {
         logWarning(
           "Untarring behavior will be deprecated at spark.files and " +
             "SparkContext.addFile. Consider using spark.archives or SparkContext.addArchive " +
             "instead.")
-        logInfo("Untarring " + fileName)
+        logInfo(log"Untarring ${MDC(FILE_NAME, fileName)}")
         executeAndGetOutput(Seq("tar", "-xf", fileName), targetDir)
       }
     }
@@ -444,7 +445,8 @@ private[spark] object Utils
       // TODO(SPARK-38632): should keep file permissions. Java implementation doesn't.
       unTarUsingJava(source, dest)
     } else {
-      logWarning(s"Cannot unpack $source, just copying it to $dest.")
+      logWarning(log"Cannot unpack ${MDC(LogKeys.FILE_NAME, source)}, " +
+        log"just copying it to ${MDC(FILE_NAME2, dest)}.")
       copyRecursive(source, dest)
     }
   }
@@ -501,7 +503,7 @@ private[spark] object Utils
       fileOverwrite: Boolean): Unit = {
     val tempFile = File.createTempFile("fetchFileTemp", null,
       new File(destFile.getParentFile.getAbsolutePath))
-    logInfo(s"Fetching $url to $tempFile")
+    logInfo(log"Fetching ${MDC(LogKeys.URL, url)} to ${MDC(FILE_ABSOLUTE_PATH, tempFile)}")
 
     try {
       val out = new FileOutputStream(tempFile)
@@ -543,7 +545,8 @@ private[spark] object Utils
       if (!filesEqualRecursive(sourceFile, destFile)) {
         if (fileOverwrite) {
           logInfo(
-            s"File $destFile exists and does not match contents of $url, replacing it with $url"
+            log"File ${MDC(DESTINATION_PATH, destFile)} exists and does not match contents of" +
+              log" ${MDC(LogKeys.URL, url)}, replacing it with ${MDC(LogKeys.URL2, url)}"
           )
           if (!destFile.delete()) {
             throw new SparkException(
@@ -561,10 +564,8 @@ private[spark] object Utils
         // Do nothing if the file contents are the same, i.e. this file has been copied
         // previously.
         logInfo(
-          "%s has been previously copied to %s".format(
-            sourceFile.getAbsolutePath,
-            destFile.getAbsolutePath
-          )
+          log"${MDC(SOURCE_PATH, sourceFile.getAbsolutePath)} has been previously" +
+            log" copied to ${MDC(DESTINATION_PATH, destFile.getAbsolutePath)}"
         )
         return
       }
@@ -574,7 +575,8 @@ private[spark] object Utils
     if (removeSourceFile) {
       Files.move(sourceFile.toPath, destFile.toPath)
     } else {
-      logInfo(s"Copying ${sourceFile.getAbsolutePath} to ${destFile.getAbsolutePath}")
+      logInfo(log"Copying ${MDC(SOURCE_PATH, sourceFile.getAbsolutePath)}" +
+        log" to ${MDC(DESTINATION_PATH, destFile.getAbsolutePath)}")
       copyRecursive(sourceFile, destFile)
     }
   }
@@ -797,8 +799,10 @@ private[spark] object Utils
     }
     if (uris.nonEmpty) {
       logWarning(
-        "The configured local directories are not expected to be URIs; however, got suspicious " +
-        s"values [${uris.mkString(", ")}]. Please check your configured local directories.")
+        log"The configured local directories are not expected to be URIs; " +
+          log"however, got suspicious values [" +
+          log"${MDC(LogKeys.URIS, uris.mkString(", "))}]. " +
+          log"Please check your configured local directories.")
     }
 
     configuredLocalDirs.flatMap { root =>
@@ -889,16 +893,17 @@ private[spark] object Utils
             // because of Inet6Address.toHostName may add interface at the end if it knows about it
             val strippedAddress = InetAddress.getByAddress(addr.getAddress)
             // We've found an address that looks reasonable!
-            logWarning("Your hostname, " + InetAddress.getLocalHost.getHostName + " resolves to" +
-              " a loopback address: " + address.getHostAddress + "; using " +
-              strippedAddress.getHostAddress + " instead (on interface " + ni.getName + ")")
+            logWarning(log"Your hostname, ${MDC(HOST, InetAddress.getLocalHost.getHostName)}, " +
+              log"resolves to a loopback address: ${MDC(HOST_PORT, address.getHostAddress)}; " +
+              log"using ${MDC(HOST_PORT2, strippedAddress.getHostAddress)} instead (on interface " +
+              log"${MDC(NETWORK_IF, ni.getName)})")
             logWarning("Set SPARK_LOCAL_IP if you need to bind to another address")
             return strippedAddress
           }
         }
-        logWarning("Your hostname, " + InetAddress.getLocalHost.getHostName + " resolves to" +
-          " a loopback address: " + address.getHostAddress + ", but we couldn't find any" +
-          " external IP address!")
+        logWarning(log"Your hostname, ${MDC(HOST, InetAddress.getLocalHost.getHostName)}, " +
+          log"resolves to a loopback address: ${MDC(HOST_PORT, address.getHostAddress)}, " +
+          log"but we couldn't find any external IP address!")
         logWarning("Set SPARK_LOCAL_IP if you need to bind to another address")
       }
       address
@@ -1197,7 +1202,7 @@ private[spark] object Utils
     val process = builder.start()
     if (redirectStderr) {
       val threadName = "redirect stderr for command " + command(0)
-      def log(s: String): Unit = logInfo(s)
+      def log(s: String): Unit = logInfo(log"${MDC(LINE, s)}")
       processStreamByLine(threadName, process.getErrorStream, log)
     }
     process
@@ -1330,7 +1335,7 @@ private[spark] object Utils
           case t: Throwable =>
             if (originalThrowable != t) {
               originalThrowable.addSuppressed(t)
-              logWarning(s"Suppressing exception in catch: ${t.getMessage}", t)
+              logWarning(log"Suppressing exception in catch: ${MDC(ERROR, t.getMessage)}", t)
             }
         }
         throw originalThrowable
@@ -1340,7 +1345,7 @@ private[spark] object Utils
       } catch {
         case t: Throwable if (originalThrowable != null && originalThrowable != t) =>
           originalThrowable.addSuppressed(t)
-          logWarning(s"Suppressing exception in finally: ${t.getMessage}", t)
+          logWarning(log"Suppressing exception in finally: ${MDC(ERROR, t.getMessage)}", t)
           throw originalThrowable
       }
     }
@@ -2129,6 +2134,18 @@ private[spark] object Utils
     (base + offset - 1024) % (65536 - 1024) + 1024
   }
 
+  /**
+   * Attempt to start a service on the given port, or fail after a number of attempts.
+   * Use a shared configuration for the maximum number of port retries.
+   */
+  def startServiceOnPort[T](
+      startPort: Int,
+      startService: Int => (T, Int),
+      conf: SparkConf,
+      serviceName: String = ""): (T, Int) = {
+    startServiceOnPort(startPort, startService, portMaxRetries(conf), serviceName)
+  }
+
   /**
    * Attempt to start a service on the given port, or fail after a number of attempts.
    * Each subsequent attempt uses 1 + the port used in the previous attempt (unless the port is 0).
@@ -2136,21 +2153,20 @@ private[spark] object Utils
    * @param startPort The initial port to start the service on.
    * @param startService Function to start service on a given port.
    *                     This is expected to throw java.net.BindException on port collision.
-   * @param conf A SparkConf used to get the maximum number of retries when binding to a port.
+   * @param maxRetries The maximum number of retries when binding to a port.
    * @param serviceName Name of the service.
    * @return (service: T, port: Int)
    */
   def startServiceOnPort[T](
       startPort: Int,
       startService: Int => (T, Int),
-      conf: SparkConf,
-      serviceName: String = ""): (T, Int) = {
+      maxRetries: Int,
+      serviceName: String): (T, Int) = {
 
     require(startPort == 0 || (1024 <= startPort && startPort < 65536),
       "startPort should be between 1024 and 65535 (inclusive), or 0 for a random free port.")
 
     val serviceString = if (serviceName.isEmpty) "" else s" '$serviceName'"
-    val maxRetries = portMaxRetries(conf)
     for (offset <- 0 to maxRetries) {
       // Do not increment port if startPort is 0, which is treated as a special port
       val tryPort = if (startPort == 0) {
@@ -2160,7 +2176,8 @@ private[spark] object Utils
       }
       try {
         val (service, port) = startService(tryPort)
-        logInfo(s"Successfully started service$serviceString on port $port.")
+        logInfo(log"Successfully started service${MDC(SERVICE_NAME, serviceString)}" +
+          log" on port ${MDC(PORT, port)}.")
         return (service, port)
       } catch {
         case e: Exception if isBindCollision(e) =>
@@ -2185,11 +2202,13 @@ private[spark] object Utils
           if (startPort == 0) {
             // As startPort 0 is for a random free port, it is most possibly binding address is
             // not correct.
-            logWarning(s"Service$serviceString could not bind on a random free port. " +
-              "You may check whether configuring an appropriate binding address.")
+            logWarning(log"Service${MDC(SERVICE_NAME, serviceString)} " +
+              log"could not bind on a random free port. " +
+              log"You may check whether configuring an appropriate binding address.")
           } else {
-            logWarning(s"Service$serviceString could not bind on port $tryPort. " +
-              s"Attempting port ${tryPort + 1}.")
+            logWarning(log"Service${MDC(SERVICE_NAME, serviceString)} " +
+              log"could not bind on port ${MDC(PORT, tryPort)}. " +
+              log"Attempting port ${MDC(PORT2, tryPort + 1)}.")
           }
       }
     }
@@ -2212,6 +2231,9 @@ private[spark] object Utils
       case e: NativeIoException =>
         (e.getMessage != null && e.getMessage.startsWith("bind() failed: ")) ||
           isBindCollision(e.getCause)
+      case e: IOException =>
+        (e.getMessage != null && e.getMessage.startsWith("Failed to bind to address")) ||
+          isBindCollision(e.getCause)
       case e: Exception => isBindCollision(e.getCause)
       case _ => false
     }
@@ -2454,9 +2476,9 @@ private[spark] object Utils
         (isShuffleServiceAndYarn || isTesting) && ioEncryptionDisabled && serializerIsSupported
       }
       if (!canDoPushBasedShuffle) {
-        logWarning("Push-based shuffle can only be enabled when the application is submitted " +
-          "to run in YARN mode, with external shuffle service enabled, IO encryption disabled, " +
-          "and relocation of serialized objects supported.")
+        logWarning(log"Push-based shuffle can only be enabled when the application is submitted " +
+          log"to run in YARN mode, with external shuffle service enabled, IO encryption " +
+          log"disabled, and relocation of serialized objects supported.")
       }
 
       canDoPushBasedShuffle
@@ -2517,15 +2539,15 @@ private[spark] object Utils
    */
   def getDynamicAllocationInitialExecutors(conf: SparkConf): Int = {
     if (conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS) < conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) {
-      logWarning(s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key} less than " +
-        s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its setting, " +
-          "please update your configs.")
+      logWarning(log"${MDC(CONFIG, DYN_ALLOCATION_INITIAL_EXECUTORS.key)} less than " +
+        log"${MDC(CONFIG2, DYN_ALLOCATION_MIN_EXECUTORS.key)} is invalid, ignoring its setting, " +
+        log"please update your configs.")
     }
 
     if (conf.get(EXECUTOR_INSTANCES).getOrElse(0) < conf.get(DYN_ALLOCATION_MIN_EXECUTORS)) {
-      logWarning(s"${EXECUTOR_INSTANCES.key} less than " +
-        s"${DYN_ALLOCATION_MIN_EXECUTORS.key} is invalid, ignoring its setting, " +
-          "please update your configs.")
+      logWarning(log"${MDC(CONFIG, EXECUTOR_INSTANCES.key)} less than " +
+        log"${MDC(CONFIG2, DYN_ALLOCATION_MIN_EXECUTORS.key)} is invalid, ignoring its setting, " +
+        log"please update your configs.")
     }
 
     val initialExecutors = Seq(
@@ -2533,9 +2555,10 @@ private[spark] object Utils
       conf.get(DYN_ALLOCATION_INITIAL_EXECUTORS),
       conf.get(EXECUTOR_INSTANCES).getOrElse(0)).max
 
-    logInfo(s"Using initial executors = $initialExecutors, max of " +
-      s"${DYN_ALLOCATION_INITIAL_EXECUTORS.key}, ${DYN_ALLOCATION_MIN_EXECUTORS.key} and " +
-        s"${EXECUTOR_INSTANCES.key}")
+    logInfo(log"Using initial executors = ${MDC(NUM_EXECUTORS, initialExecutors)}, max of " +
+      log"${MDC(CONFIG, DYN_ALLOCATION_INITIAL_EXECUTORS.key)}," +
+      log"${MDC(CONFIG2, DYN_ALLOCATION_MIN_EXECUTORS.key)} and" +
+      log" ${MDC(CONFIG3, EXECUTOR_INSTANCES.key)}")
     initialExecutors
   }
 
@@ -2723,7 +2746,7 @@ private[spark] object Utils
           e.getCause() match {
             case uoe: UnsupportedOperationException =>
               logDebug(s"Extension $name not being initialized.", uoe)
-              logInfo(s"Extension $name not being initialized.")
+              logInfo(log"Extension ${MDC(CLASS_NAME, name)} not being initialized.")
               None
 
             case null => throw e
@@ -2747,8 +2770,8 @@ private[spark] object Utils
     // To handle master URLs, e.g., k8s://host:port.
     if (!masterWithoutK8sPrefix.contains("://")) {
       val resolvedURL = s"https://$masterWithoutK8sPrefix"
-      logInfo("No scheme specified for kubernetes master URL, so defaulting to https. Resolved " +
-        s"URL is $resolvedURL.")
+      logInfo(log"No scheme specified for kubernetes master URL, so defaulting to https." +
+        log" Resolved URL is ${MDC(LogKeys.URL, resolvedURL)}.")
       return s"k8s://$resolvedURL"
     }
 
@@ -2758,7 +2781,7 @@ private[spark] object Utils
       case Some("https") =>
         masterWithoutK8sPrefix
       case Some("http") =>
-        logWarning("Kubernetes master URL uses HTTP instead of HTTPS.")
+        logWarning(log"Kubernetes master URL uses HTTP instead of HTTPS.")
         masterWithoutK8sPrefix
       case _ =>
         throw new IllegalArgumentException("Invalid Kubernetes master scheme: " + masterScheme
@@ -3001,7 +3024,7 @@ private[spark] object Utils
         entry = in.getNextEntry()
       }
       in.close() // so that any error in closing does not get ignored
-      logInfo(s"Unzipped from $dfsZipFile\n\t${files.mkString("\n\t")}")
+      logInfo(log"Unzipped from ${MDC(PATH, dfsZipFile)}\n\t${MDC(PATHS, files.mkString("\n\t"))}")
     } finally {
       // Close everything no matter what happened
       IOUtils.closeQuietly(in)
@@ -3118,7 +3141,8 @@ private[spark] class CallerContext(
       context
     } else {
       val finalContext = context.substring(0, len)
-      logWarning(s"Truncated Spark caller context from $context to $finalContext")
+      logWarning(log"Truncated Spark caller context from ${MDC(CONTEXT, context)} " +
+        log"to ${MDC(FINAL_CONTEXT, finalContext)}")
       finalContext
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index 8224472b75458..16a2f4fb6cad9 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -29,7 +29,8 @@ import com.google.common.io.ByteStreams
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.serializer.{DeserializationStream, Serializer, SerializerManager}
 import org.apache.spark.storage.{BlockId, BlockManager}
 import org.apache.spark.util.CompletionIterator
@@ -544,7 +545,7 @@ class ExternalAppendOnlyMap[K, V, C](
       }
       if (file.exists()) {
         if (!file.delete()) {
-          logWarning(s"Error deleting ${file}")
+          logWarning(log"Error deleting ${MDC(FILE_NAME, file)}")
         }
       }
     }
@@ -565,8 +566,9 @@ class ExternalAppendOnlyMap[K, V, C](
       if (hasSpilled) {
         false
       } else {
-        logInfo(s"Task ${context.taskAttemptId()} force spilling in-memory map to disk and " +
-          s"it will release ${org.apache.spark.util.Utils.bytesToString(getUsed())} memory")
+        logInfo(log"Task ${MDC(TASK_ATTEMPT_ID, context.taskAttemptId())} force spilling" +
+          log" in-memory map to disk and it will release " +
+          log"${MDC(NUM_BYTES, org.apache.spark.util.Utils.bytesToString(getUsed()))} memory")
         val nextUpstream = spillMemoryIteratorToDisk(upstream)
         assert(!upstream.hasNext)
         hasSpilled = true
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 77aec10a6b126..393cdbbef0a5a 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -28,9 +28,10 @@ import com.google.common.io.ByteStreams
 
 import org.apache.spark._
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys.{NUM_BYTES, TASK_ATTEMPT_ID}
 import org.apache.spark.serializer._
-import org.apache.spark.shuffle.ShufflePartitionPairsWriter
+import org.apache.spark.shuffle.{ShufflePartitionPairsWriter, ShuffleWriteMetricsReporter}
 import org.apache.spark.shuffle.api.{ShuffleMapOutputWriter, ShufflePartitionWriter}
 import org.apache.spark.shuffle.checksum.ShuffleChecksumSupport
 import org.apache.spark.storage.{BlockId, DiskBlockObjectWriter, ShuffleBlockId}
@@ -693,7 +694,8 @@ private[spark] class ExternalSorter[K, V, C](
   def writePartitionedMapOutput(
       shuffleId: Int,
       mapId: Long,
-      mapOutputWriter: ShuffleMapOutputWriter): Unit = {
+      mapOutputWriter: ShuffleMapOutputWriter,
+      writeMetrics: ShuffleWriteMetricsReporter): Unit = {
     if (spills.isEmpty) {
       // Case where we only have in-memory data
       val collection = if (aggregator.isDefined) map else buffer
@@ -710,7 +712,7 @@ private[spark] class ExternalSorter[K, V, C](
             serializerManager,
             serInstance,
             blockId,
-            context.taskMetrics().shuffleWriteMetrics,
+            writeMetrics,
             if (partitionChecksums.nonEmpty) partitionChecksums(partitionId) else null)
           while (it.hasNext && it.nextPartition() == partitionId) {
             it.writeNext(partitionPairsWriter)
@@ -734,7 +736,7 @@ private[spark] class ExternalSorter[K, V, C](
             serializerManager,
             serInstance,
             blockId,
-            context.taskMetrics().shuffleWriteMetrics,
+            writeMetrics,
             if (partitionChecksums.nonEmpty) partitionChecksums(id) else null)
           if (elements.hasNext) {
             for (elem <- elements) {
@@ -815,8 +817,9 @@ private[spark] class ExternalSorter[K, V, C](
         false
       } else {
         val inMemoryIterator = new WritablePartitionedIterator[K, C](upstream)
-        logInfo(s"Task ${TaskContext.get().taskAttemptId()} force spilling in-memory map to disk " +
-          s"and it will release ${org.apache.spark.util.Utils.bytesToString(getUsed())} memory")
+        logInfo(log"Task ${MDC(TASK_ATTEMPT_ID, TaskContext.get().taskAttemptId())}" +
+          log" force spilling in-memory map to disk and it will release" +
+          log" ${MDC(NUM_BYTES, org.apache.spark.util.Utils.bytesToString(getUsed()))} memory")
         val spillFile = spillMemoryIteratorToDisk(inMemoryIterator)
         forceSpillFiles += spillFile
         val spillReader = new SpillReader(spillFile)
diff --git a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
index ffb13db515822..840fb59d410a6 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
@@ -33,7 +33,8 @@ import org.apache.logging.log4j.core.layout.PatternLayout
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.{ThreadUtils, Utils}
@@ -79,7 +80,7 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
     val fa = log4jFileAppender()
     logger.addAppender(fa)
     fa.start()
-    logInfo(s"Added a local log appender at: $localLogFile")
+    logInfo(log"Added a local log appender at: ${MDC(FILE_NAME, localLogFile)}")
   }
 
   def startSync(hadoopConf: Configuration): Unit = {
@@ -144,7 +145,7 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
       threadpool = ThreadUtils.newDaemonSingleThreadScheduledExecutor("dfsSyncThread")
       threadpool.scheduleWithFixedDelay(this, UPLOAD_INTERVAL_IN_SECS, UPLOAD_INTERVAL_IN_SECS,
         TimeUnit.SECONDS)
-      logInfo(s"Started driver log file sync to: ${dfsLogFile}")
+      logInfo(log"Started driver log file sync to: ${MDC(PATH, dfsLogFile)}")
     }
 
     def run(): Unit = {
@@ -229,8 +230,8 @@ private[spark] object DriverLogger extends Logging {
         // Return None because we don't need DFS-related logic in SparkContext and DfsAsyncWriter
         None
       } else {
-        logWarning(s"Driver logs are not persisted because" +
-          s" ${DRIVER_LOG_DFS_DIR.key} is not configured")
+        logWarning(log"Driver logs are not persisted because" +
+          log" ${MDC(CONFIG, DRIVER_LOG_DFS_DIR.key)} is not configured")
         None
       }
     } else {
diff --git a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
index 1dadf15da40fa..202c919362951 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/FileAppender.scala
@@ -21,7 +21,7 @@ import java.io.{File, FileOutputStream, InputStream, IOException}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.{IntParam, Utils}
 
 /**
@@ -138,20 +138,24 @@ private[spark] object FileAppender extends Logging {
     def createTimeBasedAppender(): FileAppender = {
       val validatedParams: Option[(Long, String)] = rollingInterval match {
         case "daily" =>
-          logInfo(s"Rolling executor logs enabled for $file with daily rolling")
+          logInfo(log"Rolling executor logs enabled for ${MDC(FILE_NAME, file)} with daily rolling")
           Some((24 * 60 * 60 * 1000L, "--yyyy-MM-dd"))
         case "hourly" =>
-          logInfo(s"Rolling executor logs enabled for $file with hourly rolling")
+          logInfo(log"Rolling executor logs enabled for ${MDC(FILE_NAME, file)}" +
+            log" with hourly rolling")
           Some((60 * 60 * 1000L, "--yyyy-MM-dd--HH"))
         case "minutely" =>
-          logInfo(s"Rolling executor logs enabled for $file with rolling every minute")
+          logInfo(log"Rolling executor logs enabled for ${MDC(FILE_NAME, file)}" +
+            log" with rolling every minute")
           Some((60 * 1000L, "--yyyy-MM-dd--HH-mm"))
         case IntParam(seconds) =>
-          logInfo(s"Rolling executor logs enabled for $file with rolling $seconds seconds")
+          logInfo(log"Rolling executor logs enabled for ${MDC(FILE_NAME, file)}" +
+            log" with rolling ${MDC(TIME_UNITS, seconds)} seconds")
           Some((seconds * 1000L, "--yyyy-MM-dd--HH-mm-ss"))
         case _ =>
-          logWarning(s"Illegal interval for rolling executor logs [$rollingInterval], " +
-              s"rolling logs not enabled")
+          logWarning(log"Illegal interval for rolling executor logs [" +
+            log"${MDC(TIME_UNITS, rollingInterval)}], " +
+            log"rolling logs not enabled")
           None
       }
       validatedParams.map {
@@ -167,12 +171,14 @@ private[spark] object FileAppender extends Logging {
     def createSizeBasedAppender(): FileAppender = {
       rollingSizeBytes match {
         case IntParam(bytes) =>
-          logInfo(s"Rolling executor logs enabled for $file with rolling every $bytes bytes")
+          logInfo(log"Rolling executor logs enabled for ${MDC(FILE_NAME, file)}" +
+            log" with rolling every ${MDC(NUM_BYTES, bytes)} bytes")
           new RollingFileAppender(
             inputStream, file, new SizeBasedRollingPolicy(bytes), conf, closeStreams = closeStreams)
         case _ =>
           logWarning(
-            s"Illegal size [$rollingSizeBytes] for rolling executor logs, rolling logs not enabled")
+            log"Illegal size [${MDC(NUM_BYTES, rollingSizeBytes)}] " +
+              log"for rolling executor logs, rolling logs not enabled")
           new FileAppender(inputStream, file, closeStreams = closeStreams)
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
index f8f144f6e3885..6927c119a91c5 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingFileAppender.scala
@@ -25,7 +25,7 @@ import org.apache.commons.io.IOUtils
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.{config, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -118,7 +118,7 @@ private[spark] class RollingFileAppender(
     if (activeFile.exists) {
       if (!rolloverFileExist(rolloverFile)) {
         rotateFile(activeFile, rolloverFile)
-        logInfo(s"Rolled over $activeFile to $rolloverFile")
+        logInfo(log"Rolled over ${MDC(FILE_NAME, activeFile)} to ${MDC(FILE_NAME2, rolloverFile)}")
       } else {
         // In case the rollover file name clashes, make a unique file name.
         // The resultant file names are long and ugly, so this is used only
@@ -132,12 +132,13 @@ private[spark] class RollingFileAppender(
           i += 1
         } while (i < 10000 && rolloverFileExist(altRolloverFile))
 
-        logWarning(s"Rollover file $rolloverFile already exists, " +
-          s"rolled over $activeFile to file $altRolloverFile")
+        logWarning(log"Rollover file ${MDC(FILE_NAME, rolloverFile)} already exists, " +
+          log"rolled over ${MDC(FILE_NAME2, activeFile)} " +
+          log"to file ${MDC(FILE_NAME3, altRolloverFile)}")
         rotateFile(activeFile, altRolloverFile)
       }
     } else {
-      logWarning(s"File $activeFile does not exist")
+      logWarning(log"File ${MDC(FILE_NAME, activeFile)} does not exist")
     }
   }
 
@@ -152,7 +153,8 @@ private[spark] class RollingFileAppender(
       val filesToBeDeleted = rolledoverFiles.take(
         math.max(0, rolledoverFiles.length - maxRetainedFiles))
       filesToBeDeleted.foreach { file =>
-        logInfo(s"Deleting file executor log file ${file.getAbsolutePath}")
+        logInfo(log"Deleting file executor log file" +
+          log" ${MDC(FILE_ABSOLUTE_PATH, file.getAbsolutePath)}")
         file.delete()
       }
     } catch {
diff --git a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
index 5327ecd3e56a9..310e895930943 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/RollingPolicy.scala
@@ -20,7 +20,8 @@ package org.apache.spark.util.logging
 import java.text.SimpleDateFormat
 import java.util.{Calendar, Locale}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 
 /**
  * Defines the policy based on which [[org.apache.spark.util.logging.RollingFileAppender]] will
@@ -53,8 +54,9 @@ private[spark] class TimeBasedRollingPolicy(
 
   import TimeBasedRollingPolicy._
   if (checkIntervalConstraint && rolloverIntervalMillis < MINIMUM_INTERVAL_SECONDS * 1000L) {
-    logWarning(s"Rolling interval [${rolloverIntervalMillis/1000L} seconds] is too small. " +
-      s"Setting the interval to the acceptable minimum of $MINIMUM_INTERVAL_SECONDS seconds.")
+    logWarning(log"Rolling interval [${MDC(TIME_UNITS, rolloverIntervalMillis)} " +
+      log"ms] is too small. Setting the interval to the acceptable minimum of " +
+      log"${MDC(MIN_TIME, MINIMUM_INTERVAL_SECONDS * 1000)} ms.")
     rolloverIntervalMillis = MINIMUM_INTERVAL_SECONDS * 1000L
   }
 
@@ -103,8 +105,9 @@ private[spark] class SizeBasedRollingPolicy(
 
   import SizeBasedRollingPolicy._
   if (checkSizeConstraint && rolloverSizeBytes < MINIMUM_SIZE_BYTES) {
-    logWarning(s"Rolling size [$rolloverSizeBytes bytes] is too small. " +
-      s"Setting the size to the acceptable minimum of $MINIMUM_SIZE_BYTES bytes.")
+    logWarning(log"Rolling size [${MDC(NUM_BYTES, rolloverSizeBytes)} bytes] is too small. " +
+      log"Setting the size to the acceptable minimum of ${MDC(MIN_SIZE, MINIMUM_SIZE_BYTES)} " +
+      log"bytes.")
     rolloverSizeBytes = MINIMUM_SIZE_BYTES
   }
 
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 063d391bb4bfd..d95d648c2d732 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -314,7 +314,8 @@ public void writeWithoutSpilling() throws Exception {
 
   @Test
   public void writeChecksumFileWithoutSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
+    IndexShuffleBlockResolver blockResolver =
+      new IndexShuffleBlockResolver(conf, blockManager, Collections.emptyMap());
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
@@ -344,7 +345,8 @@ public void writeChecksumFileWithoutSpill() throws Exception {
 
   @Test
   public void writeChecksumFileWithSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
+    IndexShuffleBlockResolver blockResolver =
+      new IndexShuffleBlockResolver(conf, blockManager, Collections.emptyMap());
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
index 874f4896bb01e..7a39ba4ab382b 100644
--- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
+++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala
@@ -669,4 +669,20 @@ class CheckpointStorageSuite extends SparkFunSuite with LocalSparkContext {
       assert(rdd.firstParent.isInstanceOf[ReliableCheckpointRDD[_]])
     }
   }
+
+  test("SPARK-48268: checkpoint directory via configuration") {
+    withTempDir { checkpointDir =>
+      val conf = new SparkConf()
+        .set("spark.checkpoint.dir", checkpointDir.toString)
+        .set(UI_ENABLED.key, "false")
+      sc = new SparkContext("local", "test", conf)
+      val parCollection = sc.makeRDD(1 to 4)
+      val flatMappedRDD = parCollection.flatMap(x => 1 to x)
+      flatMappedRDD.checkpoint()
+      assert(flatMappedRDD.dependencies.head.rdd === parCollection)
+      val result = flatMappedRDD.collect()
+      assert(flatMappedRDD.dependencies.head.rdd != parCollection)
+      assert(flatMappedRDD.collect() === result)
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index c15fdf098bb56..58cf14e969e50 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark
 import java.util.concurrent.{Semaphore, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.{ExecutionContext, Future}
 // scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
@@ -29,9 +30,10 @@ import scala.concurrent.duration._
 import org.scalatest.BeforeAndAfter
 import org.scalatest.matchers.must.Matchers
 
+import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Deploy._
-import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorRemoved, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -429,12 +431,20 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
       .set(TASK_REAPER_KILL_TIMEOUT.key, "5s")
     sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)
 
-    // Add a listener to release the semaphore once any tasks are launched.
+    // Add a listener to release a semaphore once any tasks are launched, and another semaphore
+    // once an executor is removed.
     val sem = new Semaphore(0)
+    val semExec = new Semaphore(0)
+    val execLossReason = new ArrayBuffer[String]()
     sc.addSparkListener(new SparkListener {
       override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = {
         sem.release()
       }
+
+      override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
+        execLossReason += executorRemoved.reason
+        semExec.release()
+      }
     })
 
     // jobA is the one to be cancelled.
@@ -455,6 +465,9 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     sc.cancelJobGroup("jobA")
     val e = intercept[SparkException] { ThreadUtils.awaitResult(jobA, 15.seconds) }.getCause
     assert(e.getMessage contains "cancel")
+    semExec.acquire(2)
+    val expectedReason = s"Command exited with code ${ExecutorExitCode.KILLED_BY_TASK_REAPER}"
+    assert(execLossReason == Seq(expectedReason, expectedReason))
 
     // Once A is cancelled, job B should finish fairly quickly.
     assert(ThreadUtils.awaitResult(jobB, 1.minute) === 100)
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 7aec8eeaad423..26dc218c30c74 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -1110,4 +1110,59 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext {
       rpcEnv.shutdown()
     }
   }
+
+  test(
+    "SPARK-48394: mapIdToMapIndex should cleanup unused mapIndexes after removeOutputsByFilter"
+  ) {
+    val rpcEnv = createRpcEnv("test")
+    val tracker = newTrackerMaster()
+    try {
+      tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME,
+        new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf))
+      tracker.registerShuffle(0, 1, 1)
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-1", "hostA", 1000),
+        Array(2L), 0))
+      tracker.removeOutputsOnHost("hostA")
+      assert(tracker.shuffleStatuses(0).mapIdToMapIndex.filter(_._2 == 0).size == 0)
+    } finally {
+      tracker.stop()
+      rpcEnv.shutdown()
+    }
+  }
+
+  test("SPARK-48394: mapIdToMapIndex should cleanup unused mapIndexes after unregisterMapOutput") {
+    val rpcEnv = createRpcEnv("test")
+    val tracker = newTrackerMaster()
+    try {
+      tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME,
+        new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf))
+      tracker.registerShuffle(0, 1, 1)
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-1", "hostA", 1000),
+        Array(2L), 0))
+      tracker.unregisterMapOutput(0, 0, BlockManagerId("exec-1", "hostA", 1000))
+      assert(tracker.shuffleStatuses(0).mapIdToMapIndex.filter(_._2 == 0).size == 0)
+    } finally {
+      tracker.stop()
+      rpcEnv.shutdown()
+    }
+  }
+
+  test("SPARK-48394: mapIdToMapIndex should cleanup unused mapIndexes after registerMapOutput") {
+    val rpcEnv = createRpcEnv("test")
+    val tracker = newTrackerMaster()
+    try {
+      tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME,
+        new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf))
+      tracker.registerShuffle(0, 1, 1)
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-1", "hostA", 1000),
+        Array(2L), 0))
+      // Another task also finished working on partition 0.
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-2", "hostB", 1000),
+        Array(2L), 1))
+      assert(tracker.shuffleStatuses(0).mapIdToMapIndex.filter(_._2 == 0).size == 1)
+    } finally {
+      tracker.stop()
+      rpcEnv.shutdown()
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
index ca85ffda4e602..75f952d063d33 100644
--- a/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
@@ -123,7 +123,6 @@ object MapStatusesSerDeserBenchmark extends BenchmarkBase {
   }
 
   override def afterAll(): Unit = {
-    tracker.stop()
     if (sc != null) {
       sc.stop()
     }
diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
index 1492543c56bbc..231cfdc3f32fc 100644
--- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
@@ -20,11 +20,8 @@ package org.apache.spark
 import java.io.File
 import java.nio.charset.StandardCharsets
 import java.nio.file.Files
-import java.util.Locale
 
-import scala.jdk.CollectionConverters._
 import scala.util.Properties.lineSeparator
-import scala.util.matching.Regex
 
 import com.fasterxml.jackson.annotation.JsonInclude.Include
 import com.fasterxml.jackson.core.JsonParser.Feature.STRICT_DUPLICATE_DETECTION
@@ -48,19 +45,13 @@ class SparkThrowableSuite extends SparkFunSuite {
       SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
         "core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\""
    }}}
-
-   To regenerate the error class document. Run:
-   {{{
-      SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
-        "core/testOnly *SparkThrowableSuite -- -t \"Error classes match with document\""
-   }}}
    */
   private val regenerateCommand = "SPARK_GENERATE_GOLDEN_FILES=1 build/sbt " +
     "\"core/testOnly *SparkThrowableSuite -- -t \\\"Error classes match with document\\\"\""
 
   private val errorJsonFilePath = getWorkspaceFilePath(
     // Note that though we call them "error classes" here, the proper name is "error conditions",
-    // hence why the name of the JSON file different. We will address this inconsistency as part
+    // hence why the name of the JSON file is different. We will address this inconsistency as part
     // of this ticket: https://issues.apache.org/jira/browse/SPARK-47429
     "common", "utils", "src", "main", "resources", "error", "error-conditions.json")
 
@@ -173,219 +164,6 @@ class SparkThrowableSuite extends SparkFunSuite {
     checkIfUnique(messageFormats)
   }
 
-  test("Error classes match with document") {
-    val errors = errorReader.errorInfoMap
-
-    // the black list of error class name which should not add quote
-    val contentQuoteBlackList = Seq(
-      "INCOMPLETE_TYPE_DEFINITION.MAP",
-      "INCOMPLETE_TYPE_DEFINITION.STRUCT")
-
-    def quoteParameter(content: String, errorName: String): String = {
-      if (contentQuoteBlackList.contains(errorName)) {
-        content
-      } else {
-        "<(.*?)>".r.replaceAllIn(content, (m: Regex.Match) => {
-          val matchStr = m.group(1)
-          if (matchStr.nonEmpty) {
-            s"`<$matchStr>`"
-          } else {
-            m.matched
-          }
-        }).replaceAll("%(.*?)\\$", "`\\%$1\\$`")
-      }
-    }
-
-    val sqlStates = IOUtils.toString(getWorkspaceFilePath("docs",
-      "sql-error-conditions-sqlstates.md").toUri, StandardCharsets.UTF_8).split("\n")
-      .filter(_.startsWith("##")).map(s => {
-
-      val errorHeader = s.split("[`|:|#|\\s]+").filter(_.nonEmpty)
-      val sqlState = errorHeader(1)
-      (sqlState, errorHeader.head.toLowerCase(Locale.ROOT) + "-" + sqlState + "-" +
-        errorHeader.takeRight(errorHeader.length - 2).mkString("-").toLowerCase(Locale.ROOT))
-    }).toMap
-
-    def getSqlState(sqlState: Option[String]): String = {
-      if (sqlState.isDefined) {
-        val prefix = sqlState.get.substring(0, 2)
-        if (sqlStates.contains(prefix)) {
-          s"[SQLSTATE: ${sqlState.get}](sql-error-conditions-sqlstates.html#${sqlStates(prefix)})"
-        } else {
-          "SQLSTATE: " + sqlState.get
-        }
-      } else {
-        "SQLSTATE: none assigned"
-      }
-    }
-
-    def getErrorPath(error: String): String = {
-      s"sql-error-conditions-${error.toLowerCase(Locale.ROOT).replaceAll("_", "-")}-error-class"
-    }
-
-    def getHeader(title: String): String = {
-      s"""---
-         |layout: global
-         |title: $title
-         |displayTitle: $title
-         |license: |
-         |  Licensed to the Apache Software Foundation (ASF) under one or more
-         |  contributor license agreements.  See the NOTICE file distributed with
-         |  this work for additional information regarding copyright ownership.
-         |  The ASF licenses this file to You under the Apache License, Version 2.0
-         |  (the "License"); you may not use this file except in compliance with
-         |  the License.  You may obtain a copy of the License at
-         |
-         |     http://www.apache.org/licenses/LICENSE-2.0
-         |
-         |  Unless required by applicable law or agreed to in writing, software
-         |  distributed under the License is distributed on an "AS IS" BASIS,
-         |  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-         |  See the License for the specific language governing permissions and
-         |  limitations under the License.
-         |---
-         |
-         |<!--
-         |  DO NOT EDIT THIS FILE.
-         |  It was generated automatically by `${getClass().getName()}`.
-         |-->""".stripMargin
-    }
-
-    def orphanedGoldenFiles(): Iterable[File] = {
-      val subErrorFileNames = errors.filter(_._2.subClass.isDefined).map(error => {
-        getErrorPath(error._1) + ".md"
-      }).toSet
-
-      val docsDir = getWorkspaceFilePath("docs")
-      val orphans = FileUtils.listFiles(docsDir.toFile, Array("md"), false).asScala.filter { f =>
-        (f.getName.startsWith("sql-error-conditions-") && f.getName.endsWith("-error-class.md")) &&
-          !subErrorFileNames.contains(f.getName)
-      }
-      orphans
-    }
-
-    val sqlErrorParentDocContent = errors.toSeq.filter(!_._1.startsWith("_LEGACY_ERROR"))
-      .sortBy(_._1).map(error => {
-      val name = error._1
-      val info = error._2
-      if (info.subClass.isDefined) {
-        val title = s"[$name](${getErrorPath(name)}.html)"
-        s"""|### $title
-            |
-            |${getSqlState(info.sqlState)}
-            |
-            |${quoteParameter(info.messageTemplate, name)}
-            |
-            |For more details see $title
-            |""".stripMargin
-      } else {
-        s"""|### $name
-            |
-            |${getSqlState(info.sqlState)}
-            |
-            |${quoteParameter(info.messageTemplate, name)}
-            |""".stripMargin
-      }
-    }).mkString("\n")
-
-    val sqlErrorParentDoc =
-      s"""${getHeader("Error Conditions")}
-         |
-         |This is a list of common, named error conditions returned by Spark SQL.
-         |
-         |Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html).
-         |
-         |$sqlErrorParentDocContent""".stripMargin
-
-    errors.filter(_._2.subClass.isDefined).foreach(error => {
-      val name = error._1
-      val info = error._2
-
-      val subErrorContent = info.subClass.get.toSeq.sortBy(_._1).map(subError => {
-        s"""|## ${subError._1}
-            |
-            |${quoteParameter(subError._2.messageTemplate, s"$name.${subError._1}")}
-            |""".stripMargin
-      }).mkString("\n")
-
-      val subErrorDoc =
-        s"""${getHeader(name + " error class")}
-           |
-           |${getSqlState(info.sqlState)}
-           |
-           |${quoteParameter(info.messageTemplate, name)}
-           |
-           |This error class has the following derived error classes:
-           |
-           |$subErrorContent
-           |""".stripMargin
-
-      val errorDocPath = getWorkspaceFilePath("docs", getErrorPath(name) + ".md")
-      val errorsInDoc = if (errorDocPath.toFile.exists()) {
-        IOUtils.toString(errorDocPath.toUri, StandardCharsets.UTF_8)
-      } else {
-        ""
-      }
-      if (regenerateGoldenFiles) {
-        if (subErrorDoc.trim != errorsInDoc.trim) {
-          logInfo(s"Regenerating sub error class document $errorDocPath")
-          if (errorDocPath.toFile.exists()) {
-            Files.delete(errorDocPath)
-          }
-          FileUtils.writeStringToFile(
-            errorDocPath.toFile,
-            subErrorDoc + lineSeparator,
-            StandardCharsets.UTF_8)
-        }
-      } else {
-        assert(subErrorDoc.trim == errorsInDoc.trim,
-          "The error class document is not up to date. " +
-            s"Please regenerate it by running `$regenerateCommand`")
-      }
-    })
-
-    val parentDocPath = getWorkspaceFilePath("docs", "sql-error-conditions.md")
-    val commonErrorsInDoc = if (parentDocPath.toFile.exists()) {
-      IOUtils.toString(parentDocPath.toUri, StandardCharsets.UTF_8)
-    } else {
-      ""
-    }
-    if (regenerateGoldenFiles) {
-      if (sqlErrorParentDoc.trim != commonErrorsInDoc.trim) {
-        logInfo(s"Regenerating error class document $parentDocPath")
-        if (parentDocPath.toFile.exists()) {
-          Files.delete(parentDocPath)
-        }
-        FileUtils.writeStringToFile(
-          parentDocPath.toFile,
-          sqlErrorParentDoc,
-          StandardCharsets.UTF_8)
-      }
-    } else {
-      assert(sqlErrorParentDoc.trim == commonErrorsInDoc.trim,
-        "The error class document is not up to date. " +
-          s"Please regenerate it by running `$regenerateCommand`")
-    }
-
-    val orphans = orphanedGoldenFiles()
-    if (regenerateGoldenFiles) {
-      if (orphans.nonEmpty) {
-        logInfo(s"Orphaned error class documents (${orphans.size}) is not empty, " +
-          "executing cleanup operation.")
-        orphans.foreach { f =>
-          FileUtils.deleteQuietly(f)
-          logInfo(s"Cleanup orphaned error document: ${f.getName}.")
-        }
-      } else {
-        logInfo("Orphaned error class documents is empty")
-      }
-    } else {
-      assert(orphans.isEmpty,
-        "Exist orphaned error class documents. " +
-          s"Please regenerate it by running `$regenerateCommand`")
-    }
-  }
-
   test("Round trip") {
     val tmpFile = File.createTempFile("rewritten", ".json")
     val mapper = JsonMapper.builder()
diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
index a8ede31f1d30d..77f5268f79cae 100644
--- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
@@ -127,7 +127,7 @@ class RPackageUtilsSuite
       RPackageUtils.checkAndBuildRPackage(jar.getAbsolutePath, new BufferPrintStream,
         verbose = true)
       val output = lineBuffer.mkString("\n")
-      assert(output.contains(RPackageUtils.RJarDoc))
+      assert(output.contains(RPackageUtils.RJarDoc.message))
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index f55c00d7d61a5..40d8eae644a07 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1107,12 +1107,49 @@ class SparkSubmitSuite
         "--master", "local",
         unusedJar.toString)
       val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
-      assert(appArgs.propertiesFile != null)
-      assert(appArgs.propertiesFile.startsWith(path))
       appArgs.executorMemory should be ("3g")
     }
   }
 
+  test("SPARK-48392: load spark-defaults.conf when --load-spark-defaults is set") {
+    forConfDir(Map("spark.executor.memory" -> "3g", "spark.driver.memory" -> "3g")) { path =>
+      withPropertyFile("spark-conf.properties",
+          Map("spark.executor.cores" -> "16", "spark.driver.memory" -> "4g")) { propsFile =>
+        val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+        val args = Seq(
+          "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
+          "--name", "testApp",
+          "--master", "local",
+          "--properties-file", propsFile,
+          "--load-spark-defaults",
+          unusedJar.toString)
+        val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
+        appArgs.executorCores should be("16")
+        appArgs.executorMemory should be("3g")
+        appArgs.driverMemory should be("4g")
+      }
+    }
+  }
+
+  test("SPARK-48392: should skip spark-defaults.conf when --load-spark-defaults is not set") {
+    forConfDir(Map("spark.executor.memory" -> "3g", "spark.driver.memory" -> "3g")) { path =>
+      withPropertyFile("spark-conf.properties",
+        Map("spark.executor.cores" -> "16", "spark.driver.memory" -> "4g")) { propsFile =>
+        val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+        val args = Seq(
+          "--class", SimpleApplicationTest.getClass.getName.stripSuffix("$"),
+          "--name", "testApp",
+          "--master", "local",
+          "--properties-file", propsFile,
+          unusedJar.toString)
+        val appArgs = new SparkSubmitArguments(args, env = Map("SPARK_CONF_DIR" -> path))
+        appArgs.executorCores should be("16")
+        appArgs.driverMemory should be("4g")
+        appArgs.executorMemory should be(null)
+      }
+    }
+  }
+
   test("support glob path") {
     withTempDir { tmpJarDir =>
       withTempDir { tmpFileDir =>
@@ -1623,6 +1660,22 @@ class SparkSubmitSuite
     }
   }
 
+  private def withPropertyFile(fileName: String, conf: Map[String, String])(f: String => Unit) = {
+    withTempDir { tmpDir =>
+      val props = new java.util.Properties()
+      val propsFile = File.createTempFile(fileName, "", tmpDir)
+      val propsOutputStream = new FileOutputStream(propsFile)
+      try {
+        conf.foreach { case (k, v) => props.put(k, v) }
+        props.store(propsOutputStream, "")
+      } finally {
+        propsOutputStream.close()
+      }
+
+      f(propsFile.getPath)
+    }
+  }
+
   private def updateConfWithFakeS3Fs(conf: Configuration): Unit = {
     conf.set("fs.s3a.impl", classOf[TestFileSystem].getCanonicalName)
     conf.set("fs.s3a.impl.disable.cache", "true")
@@ -1694,40 +1747,31 @@ class SparkSubmitSuite
     val infixDelimFromFile = s"${delimKey}infixDelimFromFile" -> s"${CR}blah${LF}"
     val nonDelimSpaceFromFile = s"${delimKey}nonDelimSpaceFromFile" -> " blah\f"
 
-    val testProps = Seq(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile,
+    val testProps = Map(leadingDelimKeyFromFile, trailingDelimKeyFromFile, infixDelimFromFile,
       nonDelimSpaceFromFile)
 
-    val props = new java.util.Properties()
-    val propsFile = File.createTempFile("test-spark-conf", ".properties",
-      Utils.createTempDir())
-    val propsOutputStream = new FileOutputStream(propsFile)
-    try {
-      testProps.foreach { case (k, v) => props.put(k, v) }
-      props.store(propsOutputStream, "test whitespace")
-    } finally {
-      propsOutputStream.close()
-    }
+    withPropertyFile("test-spark-conf.properties", testProps) { propsFile =>
+      val clArgs = Seq(
+        "--class", "org.SomeClass",
+        "--conf", s"${lineFeedFromCommandLine._1}=${lineFeedFromCommandLine._2}",
+        "--conf", "spark.master=yarn",
+        "--properties-file", propsFile,
+        "thejar.jar")
 
-    val clArgs = Seq(
-      "--class", "org.SomeClass",
-      "--conf", s"${lineFeedFromCommandLine._1}=${lineFeedFromCommandLine._2}",
-      "--conf", "spark.master=yarn",
-      "--properties-file", propsFile.getPath,
-      "thejar.jar")
+      val appArgs = new SparkSubmitArguments(clArgs)
+      val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
 
-    val appArgs = new SparkSubmitArguments(clArgs)
-    val (_, _, conf, _) = submit.prepareSubmitEnvironment(appArgs)
+      Seq(
+        lineFeedFromCommandLine,
+        leadingDelimKeyFromFile,
+        trailingDelimKeyFromFile,
+        infixDelimFromFile
+      ).foreach { case (k, v) =>
+        conf.get(k) should be (v)
+      }
 
-    Seq(
-      lineFeedFromCommandLine,
-      leadingDelimKeyFromFile,
-      trailingDelimKeyFromFile,
-      infixDelimFromFile
-    ).foreach { case (k, v) =>
-      conf.get(k) should be (v)
+      conf.get(nonDelimSpaceFromFile._1) should be ("blah")
     }
-
-    conf.get(nonDelimSpaceFromFile._1) should be ("blah")
   }
 
   test("get a Spark configuration from arguments") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
index 5903ae71ec66e..2b9b110a41424 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
@@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets._
 import com.google.common.io.Files
 
 import org.apache.spark._
+import org.apache.spark.internal.config.{ConfigEntry, History}
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.Tests._
 
@@ -52,4 +53,16 @@ class HistoryServerArgumentsSuite extends SparkFunSuite {
       assert(conf.get("spark.test.CustomPropertyB") === "notblah")
     }
   }
+
+  test("SPARK-48471: all history configurations should have documentations") {
+    val configs = History.getClass.getDeclaredFields
+      .filter(f => classOf[ConfigEntry[_]].isAssignableFrom(f.getType))
+      .map { f =>
+        f.setAccessible(true)
+        f.get(History).asInstanceOf[ConfigEntry[_]]
+      }
+    configs.foreach { config =>
+      assert(config.doc.nonEmpty, s"Config ${config.key} doesn't have documentation")
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 0d739b647eab9..91a93bbe01d7f 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -626,28 +626,6 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
   }
 
   test("access history application defaults to the last attempt id") {
-
-    def getRedirectUrl(url: URL): (Int, String) = {
-      val connection = url.openConnection().asInstanceOf[HttpURLConnection]
-      connection.setRequestMethod("GET")
-      connection.setUseCaches(false)
-      connection.setDefaultUseCaches(false)
-      connection.setInstanceFollowRedirects(false)
-      connection.connect()
-      val code = connection.getResponseCode()
-      val location = connection.getHeaderField("Location")
-      (code, location)
-    }
-
-    def buildPageAttemptUrl(appId: String, attemptId: Option[Int]): URL = {
-      attemptId match {
-        case Some(id) =>
-          new URL(s"http://$localhost:$port/history/$appId/$id")
-        case None =>
-          new URL(s"http://$localhost:$port/history/$appId")
-      }
-    }
-
     val oneAttemptAppId = "local-1430917381534"
     HistoryServerSuite.getUrl(buildPageAttemptUrl(oneAttemptAppId, None))
 
@@ -668,6 +646,42 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
     }
   }
 
+  test("Redirect URLs should end with a slash") {
+    val oneAttemptAppId = "local-1430917381534"
+    val multiAttemptAppid = "local-1430917381535"
+
+    val url = buildPageAttemptUrl(oneAttemptAppId, None)
+    val (code, location) = getRedirectUrl(url)
+    assert(code === 302, s"Unexpected status code $code for $url")
+    assert(location === url.toString + "/")
+
+    val url2 = buildPageAttemptUrl(multiAttemptAppid, None)
+    val (code2, location2) = getRedirectUrl(url2)
+    assert(code2 === 302, s"Unexpected status code $code2 for $url2")
+    assert(location2 === url2.toString + "/2/")
+  }
+
+  def getRedirectUrl(url: URL): (Int, String) = {
+    val connection = url.openConnection().asInstanceOf[HttpURLConnection]
+    connection.setRequestMethod("GET")
+    connection.setUseCaches(false)
+    connection.setDefaultUseCaches(false)
+    connection.setInstanceFollowRedirects(false)
+    connection.connect()
+    val code = connection.getResponseCode()
+    val location = connection.getHeaderField("Location")
+    (code, location)
+  }
+
+  def buildPageAttemptUrl(appId: String, attemptId: Option[Int]): URL = {
+    attemptId match {
+      case Some(id) =>
+        new URL(s"http://$localhost:$port/history/$appId/$id")
+      case None =>
+        new URL(s"http://$localhost:$port/history/$appId")
+    }
+  }
+
   def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
     HistoryServerSuite.getContentAndCode(new URL(s"http://$localhost:$port/api/v1/$path"))
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala
index 34a447efe5281..2a06ee5ed947b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineBenchmark.scala
@@ -29,7 +29,7 @@ import org.apache.spark.deploy.{DeployTestUtils, DriverDescription}
 import org.apache.spark.internal.config.Deploy.ZOOKEEPER_URL
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
-import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
+import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.util.Utils
 
 
@@ -49,7 +49,7 @@ import org.apache.spark.util.Utils
 object PersistenceEngineBenchmark extends BenchmarkBase {
 
   val conf = new SparkConf()
-  val serializers = Seq(new JavaSerializer(conf), new KryoSerializer(conf))
+  val serializers = Seq(new JavaSerializer(conf))
   val zkTestServer = new TestingServer(findFreePort(conf))
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
index 01b7e46eb2a8f..6839afdeeff8e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/PersistenceEngineSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.Deploy.ZOOKEEPER_URL
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rpc.{RpcEndpoint, RpcEnv}
-import org.apache.spark.serializer.{JavaSerializer, KryoSerializer, Serializer}
+import org.apache.spark.serializer.{JavaSerializer, Serializer}
 import org.apache.spark.util.Utils
 
 class PersistenceEngineSuite extends SparkFunSuite {
@@ -103,18 +103,6 @@ class PersistenceEngineSuite extends SparkFunSuite {
     }
   }
 
-  test("SPARK-46205: Support KryoSerializer in FileSystemPersistenceEngine") {
-    withTempDir { dir =>
-      val conf = new SparkConf()
-      val serializer = new KryoSerializer(conf)
-      val engine = new FileSystemPersistenceEngine(dir.getAbsolutePath, serializer)
-      engine.persist("test_1", "test_1_value")
-      engine.read[String]("test_1")
-      engine.unpersist("test_1")
-      engine.close()
-    }
-  }
-
   test("SPARK-46216: FileSystemPersistenceEngine with compression") {
     val conf = new SparkConf()
     CompressionCodec.ALL_COMPRESSION_CODECS.foreach { c =>
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/RecoverySuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/RecoverySuite.scala
index 5e2939738cdfb..18b22e7352c92 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/RecoverySuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/RecoverySuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.io.LZ4CompressionCodec
 import org.apache.spark.resource.{ResourceInformation, ResourceProfile, ResourceRequirement}
 import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
 import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, RpcEndpointRef, RpcEnv}
-import org.apache.spark.serializer.KryoSerializer
+import org.apache.spark.serializer.JavaSerializer
 
 class RecoverySuite extends MasterSuiteBase {
   test("can use a custom recovery mode factory") {
@@ -474,26 +474,6 @@ class RecoverySuite extends MasterSuiteBase {
     }
   }
 
-  test("SPARK-46205: Recovery with Kryo Serializer") {
-    val conf = new SparkConf(loadDefaults = false)
-    conf.set(RECOVERY_MODE, "FILESYSTEM")
-    conf.set(RECOVERY_SERIALIZER, "Kryo")
-    conf.set(RECOVERY_DIRECTORY, System.getProperty("java.io.tmpdir"))
-
-    var master: Master = null
-    try {
-      master = makeAliveMaster(conf)
-      val e = master.invokePrivate(_persistenceEngine()).asInstanceOf[FileSystemPersistenceEngine]
-      assert(e.serializer.isInstanceOf[KryoSerializer])
-    } finally {
-      if (master != null) {
-        master.rpcEnv.shutdown()
-        master.rpcEnv.awaitTermination()
-        master = null
-      }
-    }
-  }
-
   test("SPARK-46216: Recovery without compression") {
     val conf = new SparkConf(loadDefaults = false)
     conf.set(RECOVERY_MODE, "FILESYSTEM")
@@ -536,14 +516,13 @@ class RecoverySuite extends MasterSuiteBase {
   test("SPARK-46258: Recovery with RocksDB") {
     val conf = new SparkConf(loadDefaults = false)
     conf.set(RECOVERY_MODE, "ROCKSDB")
-    conf.set(RECOVERY_SERIALIZER, "Kryo")
     conf.set(RECOVERY_DIRECTORY, System.getProperty("java.io.tmpdir"))
 
     var master: Master = null
     try {
       master = makeAliveMaster(conf)
       val e = master.invokePrivate(_persistenceEngine()).asInstanceOf[RocksDBPersistenceEngine]
-      assert(e.serializer.isInstanceOf[KryoSerializer])
+      assert(e.serializer.isInstanceOf[JavaSerializer])
     } finally {
       if (master != null) {
         master.rpcEnv.shutdown()
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
index d7f05754a7cd1..20ff932eb01a3 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.deploy.master.ui
 
-import jakarta.servlet.http.HttpServletResponse.SC_METHOD_NOT_ALLOWED
+import scala.io.Source
+
+import jakarta.servlet.http.HttpServletResponse.{SC_METHOD_NOT_ALLOWED, SC_OK}
 import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
@@ -74,4 +76,14 @@ class ReadOnlyMasterWebUISuite extends SparkFunSuite {
     val body = convPostDataToString(hostnames.map(("host", _)))
     assert(sendHttpRequest(url, "POST", body).getResponseCode === SC_METHOD_NOT_ALLOWED)
   }
+
+  test("SPARK-47894: /environment") {
+    val url = s"http://${Utils.localHostNameForURI()}:${masterWebUI.boundPort}/environment"
+    val conn = sendHttpRequest(url, "GET", "")
+    assert(conn.getResponseCode === SC_OK)
+    val result = Source.fromInputStream(conn.getInputStream).mkString
+    assert(result.contains("Runtime Information"))
+    assert(result.contains("Spark Properties"))
+    assert(result.contains("Hadoop Properties"))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
index 26c1be259ad23..1d258888fa0bc 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.input
 
 import java.io.{DataOutputStream, File, FileOutputStream}
 
-import scala.collection.immutable.IndexedSeq
-
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 
 /**
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index 0fc0b7536067e..9caf778de3848 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -21,8 +21,6 @@ import java.io.DataOutputStream
 import java.io.File
 import java.io.FileOutputStream
 
-import scala.collection.immutable.IndexedSeq
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.io.compress.{CompressionCodecFactory, GzipCodec}
diff --git a/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala b/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
index 38063c47ec96a..ae99735084056 100644
--- a/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
@@ -196,12 +196,6 @@ class ConfigEntrySuite extends SparkFunSuite {
     assert(conversionError.getMessage === s"${conversionTest.key} should be double, but was abc")
   }
 
-  test("default value handling is null-safe") {
-    val conf = new SparkConf()
-    val stringConf = ConfigBuilder(testKey("string")).stringConf.createWithDefault(null)
-    assert(conf.get(stringConf) === null)
-  }
-
   test("variable expansion of spark config entries") {
     val env = Map("ENV1" -> "env1")
     val conf = new SparkConfWithEnv(env)
@@ -220,7 +214,7 @@ class ConfigEntrySuite extends SparkFunSuite {
 
     val refConf = ConfigBuilder(testKey("configReferenceTest"))
       .stringConf
-      .createWithDefault(null)
+      .createWithDefault("")
 
     def ref(entry: ConfigEntry[_]): String = "${" + entry.key + "}"
 
@@ -250,12 +244,6 @@ class ConfigEntrySuite extends SparkFunSuite {
     // Make sure SparkConf's env override works.
     conf.set(refConf, "${env:ENV1}")
     assert(conf.get(refConf) === env("ENV1"))
-
-    // Conf with null default value is not expanded.
-    val nullConf = ConfigBuilder(testKey("nullString"))
-      .stringConf
-      .createWithDefault(null)
-    testEntryRef(nullConf, ref(nullConf))
   }
 
   test("conf entry : default function") {
diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
index cdbe5553bc95d..79fa8d21bf3f1 100644
--- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.internal.plugin
 import java.io.File
 import java.nio.charset.StandardCharsets
 import java.util.{Map => JMap}
+import java.util.concurrent.CountDownLatch
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.concurrent.duration._
@@ -40,6 +41,7 @@ import org.apache.spark.memory.MemoryMode
 import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.resource.ResourceUtils.GPU
 import org.apache.spark.resource.TestResourceIDs.{DRIVER_GPU_ID, EXECUTOR_GPU_ID, WORKER_GPU_ID}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
 import org.apache.spark.util.Utils
 
 class PluginContainerSuite extends SparkFunSuite with LocalSparkContext {
@@ -256,6 +258,40 @@ class PluginContainerSuite extends SparkFunSuite with LocalSparkContext {
       }
     }
   }
+
+  test("The plugin should be shutdown before the listener bus is stopped") {
+
+    val conf = new SparkConf()
+      .setAppName(getClass().getName())
+      .set(SparkLauncher.SPARK_MASTER, "local[1]")
+      .set(PLUGINS, Seq(classOf[TestSparkPlugin].getName()))
+
+    val sc = new SparkContext(conf)
+
+    val countDownLatch = new CountDownLatch(1)
+    sc.addSparkListener(new SparkListener {
+
+      override def onOtherEvent(event: SparkListenerEvent): Unit = {
+        event match {
+          case _: TestSparkPluginEvent =>
+            // Count down upon receiving the event sent from the plugin during shutdown.
+            countDownLatch.countDown()
+        }
+      }
+    })
+
+    TestSparkPlugin.driverPluginShutdownHook = () => {
+      // The listener bus should still be active when the plugin is shutdown
+      sc.listenerBus.post(TestSparkPluginEvent())
+    }
+
+    // Stop the context
+    sc.stop()
+    countDownLatch.await()
+    // The listener should receive the event posted by the plugin on shutdown.
+    // If the listener bus is stopped before the plugin is shutdown,
+    // then the event will be dropped and won't be delivered to the listener.
+  }
 }
 
 class MemoryOverridePlugin extends SparkPlugin {
@@ -392,6 +428,12 @@ private class TestDriverPlugin extends DriverPlugin {
     case other => throw new IllegalArgumentException(s"unknown: $other")
   }
 
+  override def shutdown(): Unit = {
+    if (TestSparkPlugin.driverPluginShutdownHook != null) {
+      TestSparkPlugin.driverPluginShutdownHook()
+    }
+  }
+
 }
 
 private class TestExecutorPlugin extends ExecutorPlugin {
@@ -420,9 +462,12 @@ private class TestExecutorPlugin extends ExecutorPlugin {
   }
 }
 
+case class TestSparkPluginEvent() extends SparkListenerEvent
+
 private object TestSparkPlugin {
   var driverPlugin: TestDriverPlugin = _
   var driverContext: PluginContext = _
+  var driverPluginShutdownHook: () => Unit = _
 
   var executorPlugin: TestExecutorPlugin = _
   var executorContext: PluginContext = _
@@ -432,6 +477,7 @@ private object TestSparkPlugin {
   def reset(): Unit = {
     driverPlugin = null
     driverContext = null
+    driverPluginShutdownHook = null
     executorPlugin = null
     executorContext = null
     extraConf = null
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 729fcecff1207..5c09a1f965b9e 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.io
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
+import java.util.Locale
 
 import com.google.common.io.ByteStreams
 
@@ -160,4 +161,18 @@ class CompressionCodecSuite extends SparkFunSuite {
     ByteStreams.readFully(concatenatedBytes, decompressed)
     assert(decompressed.toSeq === (0 to 127))
   }
+
+  test("SPARK-48506: CompressionCodec getShortName is case insensitive for short names") {
+    CompressionCodec.shortCompressionCodecNames.foreach { case (shortName, codecClass) =>
+      assert(CompressionCodec.getShortName(shortName) === shortName)
+      assert(CompressionCodec.getShortName(shortName.toUpperCase(Locale.ROOT)) === shortName)
+      assert(CompressionCodec.getShortName(codecClass) === shortName)
+      checkError(
+        exception = intercept[SparkIllegalArgumentException] {
+          CompressionCodec.getShortName(codecClass.toUpperCase(Locale.ROOT))
+        },
+        errorClass = "CODEC_SHORT_NAME_NOT_FOUND",
+        parameters = Map("codecName" -> codecClass.toUpperCase(Locale.ROOT)))
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/io/LZFBenchmark.scala b/core/src/test/scala/org/apache/spark/io/LZFBenchmark.scala
new file mode 100644
index 0000000000000..1934bd5169703
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/io/LZFBenchmark.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.io
+
+import java.io.{ByteArrayOutputStream, ObjectOutputStream}
+import java.lang.management.ManagementFactory
+
+import org.apache.spark.SparkConf
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.internal.config.IO_COMPRESSION_LZF_PARALLEL
+
+/**
+ * Benchmark for ZStandard codec performance.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class> <spark core test jar>
+ *   2. build/sbt "core/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/ZStandardBenchmark-results.txt".
+ * }}}
+ */
+object LZFBenchmark extends BenchmarkBase {
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Benchmark LZFCompressionCodec") {
+      compressSmallObjects()
+      compressLargeObjects()
+    }
+  }
+
+  private def compressSmallObjects(): Unit = {
+    val N = 256_000_000
+    val benchmark = new Benchmark("Compress small objects", N, output = output)
+    Seq(true, false).foreach { parallel =>
+      val conf = new SparkConf(false).set(IO_COMPRESSION_LZF_PARALLEL, parallel)
+      val condition = if (parallel) "in parallel" else "single-threaded"
+      benchmark.addCase(s"Compression $N int values $condition") { _ =>
+        val os = new LZFCompressionCodec(conf).compressedOutputStream(new ByteArrayOutputStream())
+        for (i <- 1 until N) {
+          os.write(i)
+        }
+        os.close()
+      }
+    }
+    benchmark.run()
+  }
+
+  private def compressLargeObjects(): Unit = {
+    val N = 1024
+    val data: Array[Byte] = (1 until 128 * 1024 * 1024).map(_.toByte).toArray
+    val benchmark = new Benchmark(s"Compress large objects", N, output = output)
+
+    // com.ning.compress.lzf.parallel.PLZFOutputStream.getNThreads
+    def getNThreads: Int = {
+      var nThreads = Runtime.getRuntime.availableProcessors
+      val jmx = ManagementFactory.getOperatingSystemMXBean
+      if (jmx != null)  {
+        val loadAverage = jmx.getSystemLoadAverage.toInt
+        if (nThreads > 1 && loadAverage >= 1)  nThreads = Math.max(1, nThreads - loadAverage)
+      }
+      nThreads
+    }
+    Seq(true, false).foreach { parallel =>
+      val conf = new SparkConf(false).set(IO_COMPRESSION_LZF_PARALLEL, parallel)
+      val condition = if (parallel) s"in $getNThreads threads" else "single-threaded"
+      benchmark.addCase(s"Compression $N array values $condition") { _ =>
+        val baos = new ByteArrayOutputStream()
+        val zcos = new LZFCompressionCodec(conf).compressedOutputStream(baos)
+        val oos = new ObjectOutputStream(zcos)
+        1 to N foreach { _ =>
+          oos.writeObject(data)
+        }
+        oos.close()
+      }
+    }
+    benchmark.run()
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index c94e2d76b9f8c..7c5db914cd5ba 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -914,6 +914,22 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(c.cartesian[Int](a).collect().toList.sorted === c_cartesian_a)
   }
 
+  test("SPARK-48656: number of cartesian partitions overflow") {
+    val numSlices: Int = 65536
+    val rdd1 = sc.parallelize(Seq(1, 2, 3), numSlices = numSlices)
+    val rdd2 = sc.parallelize(Seq(1, 2, 3), numSlices = numSlices)
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        rdd1.cartesian(rdd2).partitions
+      },
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.INITIALIZE",
+      sqlState = "54000",
+      parameters = Map(
+        "numberOfElements" -> (numSlices.toLong * numSlices.toLong).toString,
+        "maxRoundedArrayLength" -> Int.MaxValue.toString)
+    )
+  }
+
   test("intersection") {
     val all = sc.parallelize(1 to 10)
     val evens = sc.parallelize(2 to 10 by 2)
@@ -1317,7 +1333,9 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
       val thrown = intercept[IllegalStateException] {
         block
       }
-      assert(thrown.getMessage.contains("stopped"))
+      assert(thrown.getMessage.contains("Cannot call methods on a stopped SparkContext"))
+      assert(thrown.getMessage.contains("This stopped SparkContext was created at:"))
+      assert(thrown.getMessage.contains("And it was stopped at:"))
     }
     assertFails { sc.parallelize(1 to 100) }
     assertFails { sc.textFile("/nonexistent-path") }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
index fcacd223814c7..ecc91560714d1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
@@ -19,8 +19,9 @@ package org.apache.spark.scheduler
 
 import org.apache.hadoop.mapred.{FileOutputCommitter, TaskAttemptContext}
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
+import org.scalatest.time.{Seconds, Span}
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite, TaskContext}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TaskContext}
 
 /**
  * Integration tests for the OutputCommitCoordinator.
@@ -44,15 +45,13 @@ class OutputCommitCoordinatorIntegrationSuite
     sc = new SparkContext("local[2, 4]", "test", conf)
   }
 
-  test("SPARK-39195: exception thrown in OutputCommitter.commitTask()") {
+  test("exception thrown in OutputCommitter.commitTask()") {
     // Regression test for SPARK-10381
-    val e = intercept[SparkException] {
+    failAfter(Span(60, Seconds)) {
       withTempDir { tempDir =>
         sc.parallelize(1 to 4, 2).map(_.toString).saveAsTextFile(tempDir.getAbsolutePath + "/out")
       }
-    }.getCause.getMessage
-    assert(e.contains("failed; but task commit success, data duplication may happen.") &&
-      e.contains("Intentional exception"))
+    }
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index f1a4b97c2981d..46b95177e7719 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -87,12 +87,11 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
           isLocal: Boolean,
           listenerBus: LiveListenerBus): SparkEnv = {
         outputCommitCoordinator =
-          spy[OutputCommitCoordinator](
-            new OutputCommitCoordinator(conf, isDriver = true, Option(this)))
+          spy[OutputCommitCoordinator](new OutputCommitCoordinator(conf, isDriver = true))
         // Use Mockito.spy() to maintain the default infrastructure everywhere else.
         // This mocking allows us to control the coordinator responses in test cases.
         SparkEnv.createDriverEnv(conf, isLocal, listenerBus,
-          SparkContext.numDriverCores(master), this, Some(outputCommitCoordinator))
+          SparkContext.numDriverCores(master), Some(outputCommitCoordinator))
       }
     }
     // Use Mockito.spy() to maintain the default infrastructure everywhere else
@@ -190,9 +189,12 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     // The authorized committer now fails, clearing the lock
     outputCommitCoordinator.taskCompleted(stage, stageAttempt, partition,
       attemptNumber = authorizedCommitter, reason = TaskKilled("test"))
-    // A new task should not be allowed to become stage failed because of potential data duplication
-    assert(!outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
+    // A new task should now be allowed to become the authorized committer
+    assert(outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
       nonAuthorizedCommitter + 2))
+    // There can only be one authorized committer
+    assert(!outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
+      nonAuthorizedCommitter + 3))
   }
 
   test("SPARK-19631: Do not allow failed attempts to be authorized for committing") {
@@ -226,8 +228,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(outputCommitCoordinator.canCommit(stage, 2, partition, taskAttempt))
 
     // Commit the 1st attempt, fail the 2nd attempt, make sure 3rd attempt cannot commit,
-    // then fail the 1st attempt and since stage failed because of potential data duplication,
-    // make sure fail the 4th attempt.
+    // then fail the 1st attempt and make sure the 4th one can commit again.
     stage += 1
     outputCommitCoordinator.stageStart(stage, maxPartitionId = 1)
     assert(outputCommitCoordinator.canCommit(stage, 1, partition, taskAttempt))
@@ -236,9 +237,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(!outputCommitCoordinator.canCommit(stage, 3, partition, taskAttempt))
     outputCommitCoordinator.taskCompleted(stage, 1, partition, taskAttempt,
       ExecutorLostFailure("0", exitCausedByApp = true, None))
-    // A new task should not be allowed to become the authorized committer since stage failed
-    // because of potential data duplication
-    assert(!outputCommitCoordinator.canCommit(stage, 4, partition, taskAttempt))
+    assert(outputCommitCoordinator.canCommit(stage, 4, partition, taskAttempt))
   }
 
   test("SPARK-24589: Make sure stage state is cleaned up") {
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
index 9e52b5e15143b..99402abb16cac 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
@@ -85,6 +85,7 @@ class SortShuffleWriterSuite
       shuffleHandle,
       mapId = 1,
       context,
+      context.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
     writer.write(Iterator.empty)
     writer.stop(success = true)
@@ -102,6 +103,7 @@ class SortShuffleWriterSuite
       shuffleHandle,
       mapId = 2,
       context,
+      context.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
     writer.write(records.iterator)
     writer.stop(success = true)
@@ -158,6 +160,7 @@ class SortShuffleWriterSuite
         shuffleHandle,
         mapId = 0,
         context,
+        context.taskMetrics().shuffleWriteMetrics,
         new LocalDiskShuffleExecutorComponents(
           conf, shuffleBlockResolver._blockManager, shuffleBlockResolver))
       writer.write(records.iterator)
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
index 3db7527262568..7ab2cb864234f 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
@@ -71,7 +71,7 @@ class LocalDiskShuffleMapOutputWriterSuite extends SparkFunSuite {
     partitionSizesInMergedFile = null
     conf = new SparkConf()
       .set("spark.app.id", "example.spark.app")
-      .set("spark.shuffle.unsafe.file.output.buffer", "16k")
+      .set("spark.shuffle.localDisk.file.output.buffer", "16k")
     when(blockResolver.getDataFile(anyInt, anyLong)).thenReturn(mergedOutputFile)
     when(blockResolver.createTempFile(any(classOf[File])))
       .thenAnswer { invocationOnMock =>
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index ba665600a1cb7..febe1ac4bb4cf 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.storage
 
+import java.io.File
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, Semaphore, TimeUnit}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 import scala.jdk.CollectionConverters._
 
+import org.apache.commons.io.FileUtils
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
@@ -353,4 +355,78 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
     import scala.language.reflectiveCalls
     assert(listener.removeReasonValidated)
   }
+
+  test("SPARK-46957: Migrated shuffle files should be able to cleanup from executor") {
+
+    val sparkTempDir = System.getProperty("java.io.tmpdir")
+
+    def shuffleFiles: Seq[File] = {
+      FileUtils
+        .listFiles(new File(sparkTempDir), Array("data", "index"), true)
+        .asScala
+        .toSeq
+    }
+
+    val existingShuffleFiles = shuffleFiles
+
+    val conf = new SparkConf()
+      .setAppName("SPARK-46957")
+      .setMaster("local-cluster[2,1,1024]")
+      .set(config.DECOMMISSION_ENABLED, true)
+      .set(config.STORAGE_DECOMMISSION_ENABLED, true)
+      .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
+    sc = new SparkContext(conf)
+    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+    val shuffleBlockUpdates = new ArrayBuffer[BlockId]()
+    var isDecommissionedExecutorRemoved = false
+    val execToDecommission = sc.getExecutorIds().head
+    sc.addSparkListener(new SparkListener {
+      override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
+        if (blockUpdated.blockUpdatedInfo.blockId.isShuffle) {
+          shuffleBlockUpdates += blockUpdated.blockUpdatedInfo.blockId
+        }
+      }
+
+      override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
+        assert(execToDecommission === executorRemoved.executorId)
+        isDecommissionedExecutorRemoved = true
+      }
+    })
+
+    // Run a job to create shuffle data
+    val result = sc.parallelize(1 to 1000, 10)
+      .map { i => (i % 2, i) }
+      .reduceByKey(_ + _).collect()
+
+    assert(result.head === (0, 250500))
+    assert(result.tail.head === (1, 250000))
+    sc.schedulerBackend
+      .asInstanceOf[StandaloneSchedulerBackend]
+      .decommissionExecutor(
+        execToDecommission,
+        ExecutorDecommissionInfo("test", None),
+        adjustTargetNumExecutors = true
+      )
+
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
+      assert(isDecommissionedExecutorRemoved)
+      // Ensure there are shuffle data have been migrated
+      assert(shuffleBlockUpdates.size >= 2)
+    }
+
+    val shuffleId = shuffleBlockUpdates
+      .find(_.isInstanceOf[ShuffleIndexBlockId])
+      .map(_.asInstanceOf[ShuffleIndexBlockId].shuffleId)
+      .get
+
+    val newShuffleFiles = shuffleFiles.diff(existingShuffleFiles)
+    assert(newShuffleFiles.size >= shuffleBlockUpdates.size)
+
+    // Remove the shuffle data
+    sc.shuffleDriverComponents.removeShuffle(shuffleId, true)
+
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
+      assert(newShuffleFiles.intersect(shuffleFiles).isEmpty)
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
index 70a57eed07acd..4352436c872fe 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
@@ -16,11 +16,14 @@
  */
 package org.apache.spark.storage
 
-import java.io.File
+import java.io.{File, InputStream, OutputStream}
+import java.nio.ByteBuffer
+
+import scala.reflect.ClassTag
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
+import org.apache.spark.serializer.{DeserializationStream, JavaSerializer, SerializationStream, Serializer, SerializerInstance, SerializerManager}
 import org.apache.spark.util.Utils
 
 class DiskBlockObjectWriterSuite extends SparkFunSuite {
@@ -43,10 +46,14 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite {
   private def createWriter(): (DiskBlockObjectWriter, File, ShuffleWriteMetrics) = {
     val file = new File(tempDir, "somefile")
     val conf = new SparkConf()
-    val serializerManager = new SerializerManager(new JavaSerializer(conf), conf)
+    val serializerManager = new CustomSerializerManager(new JavaSerializer(conf), conf, None)
     val writeMetrics = new ShuffleWriteMetrics()
     val writer = new DiskBlockObjectWriter(
-      file, serializerManager, new JavaSerializer(new SparkConf()).newInstance(), 1024, true,
+      file,
+      serializerManager,
+      new CustomJavaSerializer(new SparkConf()).newInstance(),
+      1024,
+      true,
       writeMetrics)
     (writer, file, writeMetrics)
   }
@@ -196,9 +203,76 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite {
     for (i <- 1 to 500) {
       writer.write(i, i)
     }
+
+    val bs = writer.getSerializerWrappedStream.asInstanceOf[OutputStreamWithCloseDetecting]
+    val objOut = writer.getSerializationStream.asInstanceOf[SerializationStreamWithCloseDetecting]
+
     writer.closeAndDelete()
     assert(!file.exists())
     assert(writeMetrics.bytesWritten == 0)
     assert(writeMetrics.recordsWritten == 0)
+    assert(bs.isClosed)
+    assert(objOut.isClosed)
+  }
+}
+
+trait CloseDetecting {
+  var isClosed = false
+}
+
+class OutputStreamWithCloseDetecting(outputStream: OutputStream)
+    extends OutputStream
+    with CloseDetecting {
+  override def write(b: Int): Unit = outputStream.write(b)
+
+  override def close(): Unit = {
+    isClosed = true
+    outputStream.close()
+  }
+}
+
+class CustomSerializerManager(
+    defaultSerializer: Serializer,
+    conf: SparkConf,
+    encryptionKey: Option[Array[Byte]])
+    extends SerializerManager(defaultSerializer, conf, encryptionKey) {
+  override def wrapStream(blockId: BlockId, s: OutputStream): OutputStream = {
+    new OutputStreamWithCloseDetecting(wrapForCompression(blockId, wrapForEncryption(s)))
+  }
+}
+
+class CustomJavaSerializer(conf: SparkConf) extends JavaSerializer(conf) {
+
+  override def newInstance(): SerializerInstance = {
+    new CustomJavaSerializerInstance(super.newInstance())
   }
 }
+
+class SerializationStreamWithCloseDetecting(serializationStream: SerializationStream)
+    extends SerializationStream with CloseDetecting {
+
+  override def close(): Unit = {
+    isClosed = true
+    serializationStream.close()
+  }
+
+  override def writeObject[T: ClassTag](t: T): SerializationStream =
+    serializationStream.writeObject(t)
+
+  override def flush(): Unit = serializationStream.flush()
+}
+
+class CustomJavaSerializerInstance(instance: SerializerInstance) extends SerializerInstance {
+  override def serializeStream(s: OutputStream): SerializationStream =
+    new SerializationStreamWithCloseDetecting(instance.serializeStream(s))
+
+  override def serialize[T: ClassTag](t: T): ByteBuffer = instance.serialize(t)
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer): T = instance.deserialize(bytes)
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T =
+    instance.deserialize(bytes, loader)
+
+  override def deserializeStream(s: InputStream): DeserializationStream =
+    instance.deserializeStream(s)
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index 02900e14b1f67..ca77d2c7b7097 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -392,9 +392,11 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     configureMockTransfer(Map())
     val iterator = createShuffleBlockIteratorWithDefaults(
-      Map(hostLocalBmId -> toBlockList(hostLocalBlocks.keys, 1L, 1))
+      Map(hostLocalBmId -> toBlockList(hostLocalBlocks.keys, 1L, 1)),
+      blockManager = Some(blockManager)
     )
     intercept[FetchFailedException] { iterator.next() }
+    verify(mockExternalBlockStoreClient, times(1)).getHostLocalDirs(any(), any(), any(), any())
   }
 
   test("Hit maxBytesInFlight limitation before maxBlocksInFlightPerAddress") {
diff --git a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
index 9e23b25493dfe..4843409661554 100644
--- a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
@@ -22,6 +22,7 @@ import java.io.File
 import scala.util.Try
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.executor.{ExecutorExitCode, KilledByTaskReaperException}
 
 class SparkUncaughtExceptionHandlerSuite extends SparkFunSuite {
 
@@ -33,6 +34,8 @@ class SparkUncaughtExceptionHandlerSuite extends SparkFunSuite {
     (ThrowableTypes.RuntimeException, false, 0),
     (ThrowableTypes.OutOfMemoryError, true, SparkExitCode.OOM),
     (ThrowableTypes.OutOfMemoryError, false, SparkExitCode.OOM),
+    (ThrowableTypes.KilledByTaskReaperException, true, ExecutorExitCode.KILLED_BY_TASK_REAPER),
+    (ThrowableTypes.KilledByTaskReaperException, false, 0),
     (ThrowableTypes.SparkFatalRuntimeException, true, SparkExitCode.UNCAUGHT_EXCEPTION),
     (ThrowableTypes.SparkFatalRuntimeException, false, 0),
     (ThrowableTypes.SparkFatalOutOfMemoryError, true, SparkExitCode.OOM),
@@ -64,6 +67,8 @@ object ThrowableTypes extends Enumeration {
 
   val RuntimeException = ThrowableTypesVal("RuntimeException", new RuntimeException)
   val OutOfMemoryError = ThrowableTypesVal("OutOfMemoryError", new OutOfMemoryError)
+  val KilledByTaskReaperException = ThrowableTypesVal("KilledByTaskReaperException",
+    new KilledByTaskReaperException("dummy message"))
   val SparkFatalRuntimeException = ThrowableTypesVal("SparkFatalException(RuntimeException)",
     new SparkFatalException(new RuntimeException))
   val SparkFatalOutOfMemoryError = ThrowableTypesVal("SparkFatalException(OutOfMemoryError)",
diff --git a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
index d907fe1a27c83..04f661db691e5 100644
--- a/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/ThreadUtilsSuite.scala
@@ -119,11 +119,46 @@ class ThreadUtilsSuite extends SparkFunSuite {
       runInNewThread("thread-name") { throw new IllegalArgumentException(uniqueExceptionMessage) }
     }
     assert(exception.getMessage === uniqueExceptionMessage)
-    assert(exception.getStackTrace.mkString("\n").contains(
+    val stacktrace = exception.getStackTrace.mkString("\n")
+    assert(stacktrace.contains(
       "... run in separate thread using org.apache.spark.util.ThreadUtils ..."),
       "stack trace does not contain expected place holder"
     )
-    assert(exception.getStackTrace.mkString("\n").contains("ThreadUtils.scala") === false,
+    assert(!stacktrace.contains("ThreadUtils.scala"),
+      "stack trace contains unexpected references to ThreadUtils"
+    )
+  }
+
+  test("SPARK-47833: wrapCallerStacktrace") {
+    var runnerThreadName: String = null
+    var exception: Throwable = null
+    val t = new Thread() {
+      override def run(): Unit = {
+        runnerThreadName = Thread.currentThread().getName
+        internalMethod()
+      }
+      private def internalMethod(): Unit = {
+        throw new RuntimeException(s"Error occurred on $runnerThreadName")
+      }
+    }
+    t.setDaemon(true)
+    t.setUncaughtExceptionHandler { case (_, e) => exception = e }
+    t.start()
+    t.join()
+
+    ThreadUtils.wrapCallerStacktrace(exception, s"run in separate thread: $runnerThreadName")
+
+    val stacktrace = exception.getStackTrace.mkString("\n")
+    assert(stacktrace.contains("internalMethod"),
+      "stack trace does not contain real exception stack trace"
+    )
+    assert(stacktrace.contains(s"... run in separate thread: $runnerThreadName ..."),
+      "stack trace does not contain expected place holder"
+    )
+    assert(stacktrace.contains("org.scalatest.Suite.run"),
+      "stack trace does not contain caller stack trace"
+    )
+    assert(!stacktrace.contains("ThreadUtils.scala"),
       "stack trace contains unexpected references to ThreadUtils"
     )
   }
diff --git a/dev/.scalafmt.conf b/dev/.scalafmt.conf
index 9a01136dfaf88..bb16145f4df7d 100644
--- a/dev/.scalafmt.conf
+++ b/dev/.scalafmt.conf
@@ -27,4 +27,4 @@ danglingParentheses.preset = false
 docstrings.style = Asterisk
 maxColumn = 98
 runner.dialect = scala213
-version = 3.8.0
+version = 3.8.2
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index 834265f48aa8c..677381704427c 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -64,4 +64,8 @@
               files="src/main/java/org/apache/spark/sql/api/java/*"/>
     <suppress checks="IllegalImport"
               files="src/test/java/org/apache/spark/sql/hive/test/Complex.java"/>
+    <suppress checks="Header"
+      files="src/main/java/org/apache/spark/network/util/LimitedInputStream.java" />
+    <suppress checks="Header"
+      files="src/test/java/org/apache/spark/util/collection/TestTimSort.java" />
 </suppressions>
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index 7add947428160..c4023a84ee3cf 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -71,6 +71,12 @@
 
     <module name="NewlineAtEndOfFile"/>
 
+    <module name="Header">
+        <property name="charset" value="UTF-8" />
+        <property name="headerFile" value="dev/java-file-header" />
+        <property name="fileExtensions" value="java" />
+    </module>
+
     <module name="TreeWalker">
         <!--
         If you wish to turn off checking for a section of code, you can put a comment in the source
@@ -200,5 +206,10 @@
             <property name="illegalPkgs" value="org.apache.log4j" />
             <property name="illegalPkgs" value="org.apache.commons.lang" />
         </module>
+        <!-- support structured logging -->
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="org\.slf4j\.(Logger|LoggerFactory)" />
+            <property name="message" value="Please use org.apache.spark.internal.(SparkLogger|SparkLoggerFactory) instead." />
+        </module>
     </module>
 </module>
diff --git a/dev/connect-gen-protos.sh b/dev/connect-gen-protos.sh
index 9ae3bac45933d..33b06167d67cc 100755
--- a/dev/connect-gen-protos.sh
+++ b/dev/connect-gen-protos.sh
@@ -76,7 +76,7 @@ for f in `find gen/proto/python -name "*.py*"`; do
     sed -e "s/DESCRIPTOR, 'spark.connect/DESCRIPTOR, 'pyspark.sql.connect.proto/g" $f > $f.tmp
     mv $f.tmp $f
   elif [[ $f == *.pyi ]]; then
-    sed -e 's/import spark.connect./import pyspark.sql.connect.proto./g' -e 's/spark.connect./pyspark.sql.connect.proto./g' $f > $f.tmp
+    sed -e 's/import spark.connect./import pyspark.sql.connect.proto./g' -e 's/spark.connect./pyspark.sql.connect.proto./g' -e '/ *@typing_extensions\.final/d' $f > $f.tmp
     mv $f.tmp $f
   fi
 
diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh
index c44d0193069b8..132f6b78c3db6 100755
--- a/dev/create-release/do-release-docker.sh
+++ b/dev/create-release/do-release-docker.sh
@@ -84,8 +84,8 @@ if [ ! -z "$RELEASE_STEP" ] && [ "$RELEASE_STEP" = "finalize" ]; then
     error "Exiting."
   fi
 
-  if [ -z "$PYPI_PASSWORD" ]; then
-    stty -echo && printf "PyPi password: " && read PYPI_PASSWORD && printf '\n' && stty echo
+  if [ -z "$PYPI_API_TOKEN" ]; then
+    stty -echo && printf "PyPi API token: " && read PYPI_API_TOKEN && printf '\n' && stty echo
   fi
 fi
 
@@ -142,7 +142,7 @@ GIT_NAME=$GIT_NAME
 GIT_EMAIL=$GIT_EMAIL
 GPG_KEY=$GPG_KEY
 ASF_PASSWORD=$ASF_PASSWORD
-PYPI_PASSWORD=$PYPI_PASSWORD
+PYPI_API_TOKEN=$PYPI_API_TOKEN
 GPG_PASSPHRASE=$GPG_PASSPHRASE
 RELEASE_STEP=$RELEASE_STEP
 USER=$USER
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 62d172ef74ca4..19589b951a6e1 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -80,6 +80,9 @@ done
 export LC_ALL=C.UTF-8
 export LANG=C.UTF-8
 
+export PYSPARK_PYTHON=/usr/local/bin/python
+export PYSPARK_DRIVER_PYTHON=/usr/local/bin/python
+
 # Commit ref to checkout when building
 GIT_REF=${GIT_REF:-master}
 
@@ -95,8 +98,8 @@ init_java
 init_maven_sbt
 
 if [[ "$1" == "finalize" ]]; then
-  if [[ -z "$PYPI_PASSWORD" ]]; then
-    error 'The environment variable PYPI_PASSWORD is not set. Exiting.'
+  if [[ -z "$PYPI_API_TOKEN" ]]; then
+    error 'The environment variable PYPI_API_TOKEN is not set. Exiting.'
   fi
 
   git config --global user.name "$GIT_NAME"
@@ -104,31 +107,36 @@ if [[ "$1" == "finalize" ]]; then
 
   # Create the git tag for the new release
   echo "Creating the git tag for the new release"
-  rm -rf spark
-  git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b master
-  cd spark
-  git tag "v$RELEASE_VERSION" "$RELEASE_TAG"
-  git push origin "v$RELEASE_VERSION"
-  cd ..
-  rm -rf spark
-  echo "git tag v$RELEASE_VERSION created"
+  if check_for_tag "v$RELEASE_VERSION"; then
+    echo "v$RELEASE_VERSION already exists. Skip creating it."
+  else
+    rm -rf spark
+    git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b master
+    cd spark
+    git tag "v$RELEASE_VERSION" "$RELEASE_TAG"
+    git push origin "v$RELEASE_VERSION"
+    cd ..
+    rm -rf spark
+    echo "git tag v$RELEASE_VERSION created"
+  fi
 
   # download PySpark binary from the dev directory and upload to PyPi.
   echo "Uploading PySpark to PyPi"
   svn co --depth=empty "$RELEASE_STAGING_LOCATION/$RELEASE_TAG-bin" svn-spark
   cd svn-spark
-  svn update "pyspark-$RELEASE_VERSION.tar.gz"
-  svn update "pyspark-$RELEASE_VERSION.tar.gz.asc"
-  TWINE_USERNAME=spark-upload TWINE_PASSWORD="$PYPI_PASSWORD" twine upload \
+  PYSPARK_VERSION=`echo "$RELEASE_VERSION" |  sed -e "s/-/./" -e "s/preview/dev/"`
+  svn update "pyspark-$PYSPARK_VERSION.tar.gz"
+  svn update "pyspark-$PYSPARK_VERSION.tar.gz.asc"
+  twine upload -u __token__  -p $PYPI_API_TOKEN \
     --repository-url https://upload.pypi.org/legacy/ \
-    "pyspark-$RELEASE_VERSION.tar.gz" \
-    "pyspark-$RELEASE_VERSION.tar.gz.asc"
-  svn update "pyspark-connect-$RELEASE_VERSION.tar.gz"
-  svn update "pyspark-connect-$RELEASE_VERSION.tar.gz.asc"
-  TWINE_USERNAME=spark-upload TWINE_PASSWORD="$PYPI_PASSWORD" twine upload \
+    "pyspark-$PYSPARK_VERSION.tar.gz" \
+    "pyspark-$PYSPARK_VERSION.tar.gz.asc"
+  svn update "pyspark_connect-$PYSPARK_VERSION.tar.gz"
+  svn update "pyspark_connect-$PYSPARK_VERSION.tar.gz.asc"
+  twine upload -u __token__  -p $PYPI_API_TOKEN \
     --repository-url https://upload.pypi.org/legacy/ \
-    "pyspark-connect-$RELEASE_VERSION.tar.gz" \
-    "pyspark-connect-$RELEASE_VERSION.tar.gz.asc"
+    "pyspark_connect-$PYSPARK_VERSION.tar.gz" \
+    "pyspark_connect-$PYSPARK_VERSION.tar.gz.asc"
   cd ..
   rm -rf svn-spark
   echo "PySpark uploaded"
@@ -194,6 +202,8 @@ fi
 PUBLISH_SCALA_2_12=1
 if [[ $SPARK_VERSION > "3.5.99" ]]; then
   PUBLISH_SCALA_2_12=0
+  # There is no longer scala-2.13 profile since 4.0.0
+  SCALA_2_13_PROFILES=""
 fi
 SCALA_2_12_PROFILES="-Pscala-2.12"
 
@@ -201,7 +211,7 @@ SCALA_2_12_PROFILES="-Pscala-2.12"
 HIVE_PROFILES="-Phive -Phive-thriftserver"
 # Profiles for publishing snapshots and release to Maven Central
 # We use Apache Hive 2.3 for publishing
-PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud"
+PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler"
 # Profiles for building binary releases
 BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"
 
@@ -309,7 +319,7 @@ if [[ "$1" == "package" ]]; then
         --detach-sig $PYTHON_DIST_NAME
       shasum -a 512 $PYTHON_DIST_NAME > $PYTHON_DIST_NAME.sha512
 
-      PYTHON_CONNECT_DIST_NAME=pyspark-connect-$PYSPARK_VERSION.tar.gz
+      PYTHON_CONNECT_DIST_NAME=pyspark_connect-$PYSPARK_VERSION.tar.gz
       cp spark-$SPARK_VERSION-bin-$NAME/python/dist/$PYTHON_CONNECT_DIST_NAME .
 
       echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \
@@ -345,21 +355,25 @@ if [[ "$1" == "package" ]]; then
   declare -A BINARY_PKGS_EXTRA
   BINARY_PKGS_EXTRA["hadoop3"]="withpip,withr"
 
-  if [[ $PUBLISH_SCALA_2_13 = 1 ]]; then
-    key="hadoop3-scala2.13"
+  # This is dead code as Scala 2.12 is no longer supported, but we keep it as a template for
+  # adding new Scala version support in the future. This secondary Scala version only has one
+  # binary package to avoid doubling the number of final packages. It doesn't build PySpark and
+  # SparkR as the primary Scala version will build them.
+  if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
+    key="hadoop3-scala2.12"
     args="-Phadoop-3 $HIVE_PROFILES"
     extra=""
-    if ! make_binary_release "$key" "$SCALA_2_13_PROFILES $args" "$extra" "2.13"; then
+    if ! make_binary_release "$key" "$SCALA_2_12_PROFILES $args" "$extra" "2.12"; then
       error "Failed to build $key package. Check logs for details."
     fi
   fi
 
-  if [[ $PUBLISH_SCALA_2_12 = 1 ]]; then
+  if [[ $PUBLISH_SCALA_2_13 = 1 ]]; then
     echo "Packages to build: ${!BINARY_PKGS_ARGS[@]}"
     for key in ${!BINARY_PKGS_ARGS[@]}; do
       args=${BINARY_PKGS_ARGS[$key]}
       extra=${BINARY_PKGS_EXTRA[$key]}
-      if ! make_binary_release "$key" "$SCALA_2_12_PROFILES $args" "$extra" "2.12"; then
+      if ! make_binary_release "$key" "$SCALA_2_13_PROFILES $args" "$extra" "2.13"; then
         error "Failed to build $key package. Check logs for details."
       fi
     done
@@ -374,8 +388,8 @@ if [[ "$1" == "package" ]]; then
 
     echo "Copying release tarballs"
     cp spark-* "svn-spark/${DEST_DIR_NAME}-bin/"
-    cp pyspark-* "svn-spark/${DEST_DIR_NAME}-bin/"
-    cp SparkR_* "svn-spark/${DEST_DIR_NAME}-bin/"
+    cp pyspark* "svn-spark/${DEST_DIR_NAME}-bin/"
+    cp SparkR* "svn-spark/${DEST_DIR_NAME}-bin/"
     svn add "svn-spark/${DEST_DIR_NAME}-bin"
 
     cd svn-spark
diff --git a/dev/create-release/release-util.sh b/dev/create-release/release-util.sh
index 0394fb49c2fa0..b5edbf40d487d 100755
--- a/dev/create-release/release-util.sh
+++ b/dev/create-release/release-util.sh
@@ -128,6 +128,9 @@ function get_release_info {
     RC_COUNT=1
   fi
 
+  if [ "$GIT_BRANCH" = "master" ]; then
+    RELEASE_VERSION="$RELEASE_VERSION-preview1"
+  fi
   export NEXT_VERSION
   export RELEASE_VERSION=$(read_config "Release" "$RELEASE_VERSION")
 
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index f51b24d583947..ca9e10bebfc53 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -15,74 +15,123 @@
 # limitations under the License.
 #
 
-# Image for building Spark releases. Based on Ubuntu 20.04.
-#
-# Includes:
-# * Java 17
-# * Ivy
-# * Python (3.8.5)
-# * R-base/R-base-dev (4.0.3)
-# * Ruby (2.7.0)
-#
-# You can test it as below:
-#   cd dev/create-release/spark-rm
-#   docker build -t spark-rm --build-arg UID=$UID .
+# Image for building Spark releases. Based on Ubuntu 22.04.
+FROM ubuntu:jammy-20240227
 
-FROM ubuntu:20.04
+ENV FULL_REFRESH_DATE 20240318
 
-# For apt to be noninteractive
 ENV DEBIAN_FRONTEND noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN true
 
-# These arguments are just for reuse and not really meant to be customized.
-ARG APT_INSTALL="apt-get install --no-install-recommends -y"
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    subversion \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    nodejs \
+    npm \
+    openjdk-17-jdk-headless \
+    pandoc \
+    pkg-config \
+    python3.10 \
+    python3-psutil \
+    texlive-latex-base \
+    texlive \
+    texlive-fonts-extra \
+    texinfo \
+    texlive-latex-extra \
+    qpdf \
+    jq \
+    r-base \
+    ruby \
+    ruby-dev \
+    software-properties-common \
+    wget \
+    zlib1g-dev \
+    && rm -rf /var/lib/apt/lists/*
 
-ARG PIP_PKGS="sphinx==4.5.0 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.13.3 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==3.1.2 twine==3.4.1 sphinx-plotly-directive==0.1.3 sphinx-copybutton==0.5.2 pandas==1.5.3 pyarrow==10.0.1 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.62.0 protobuf==4.21.6 grpcio-status==1.62.0 googleapis-common-protos==1.56.4"
-ARG GEM_PKGS="bundler:2.3.8"
 
-# Install extra needed repos and refresh.
-# - CRAN repo
-# - Ruby repo (for doc generation)
-#
-# This is all in a single "RUN" command so that if anything changes, "apt update" is run to fetch
-# the most current package versions (instead of potentially using old versions cached by docker).
-RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
-  echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list && \
-  gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
-  gpg -a --export E084DAB9 | apt-key add - && \
-  apt-get clean && \
-  rm -rf /var/lib/apt/lists/* && \
-  apt-get clean && \
-  apt-get update && \
-  $APT_INSTALL software-properties-common && \
-  apt-get update && \
-  # Install openjdk 17.
-  $APT_INSTALL openjdk-17-jdk && \
-  update-alternatives --set java $(ls /usr/lib/jvm/java-17-openjdk-*/bin/java) && \
-  # Install build / source control tools
-  $APT_INSTALL curl wget git maven ivy subversion make gcc lsof libffi-dev \
-    pandoc pandoc-citeproc libssl-dev libcurl4-openssl-dev libxml2-dev && \
-  curl -sL https://deb.nodesource.com/setup_12.x | bash && \
-  $APT_INSTALL nodejs && \
-  # Install needed python packages. Use pip for installing packages (for consistency).
-  $APT_INSTALL python-is-python3 python3-pip python3-setuptools && \
-  # qpdf is required for CRAN checks to pass.
-  $APT_INSTALL qpdf jq && \
-  pip3 install $PIP_PKGS && \
-  # Install R packages and dependencies used when building.
-  # R depends on pandoc*, libssl (which are installed above).
-  # Note that PySpark doc generation also needs pandoc due to nbsphinx
-  $APT_INSTALL r-base r-base-dev && \
-  $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \
-  $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf texlive-latex-extra && \
-  $APT_INSTALL libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev && \
-  Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \
-  Rscript -e "devtools::install_github('jimhester/lintr')" && \
-  Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
-  Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" && \
-  # Install tools needed to build the documentation.
-  $APT_INSTALL ruby2.7 ruby2.7-dev && \
-  gem install --no-document $GEM_PKGS
+RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list
+RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
+RUN gpg -a --export E084DAB9 | apt-key add -
+RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
+
+# See more in SPARK-39959, roxygen2 < 7.2.1
+RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown',  \
+    'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow',  \
+    'ggplot2', 'mvtnorm', 'statmod', 'xml2'), repos='https://cloud.r-project.org/')" && \
+    Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
+
+# See more in SPARK-39735
+ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+
+
+RUN add-apt-repository ppa:pypy/ppa
+RUN mkdir -p /usr/local/pypy/pypy3.9 && \
+    curl -sqL https://downloads.python.org/pypy/pypy3.9-v7.3.16-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.9 --strip-components=1 && \
+    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3.8 && \
+    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.2' scipy coverage matplotlib lxml
+
+
+ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
+# Python deps for Spark Connect
+ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.56.4"
+
+# Install Python 3.10 packages
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
+RUN python3.10 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.10 -m pip install --ignore-installed 'six==1.16.0'  # Avoid `python3-six` installation
+RUN python3.10 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
+    python3.10 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
+    python3.10 -m pip install deepspeed torcheval && \
+    python3.10 -m pip cache purge
+
+# Install Python 3.9
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y \
+    python3.9 python3.9-distutils \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+RUN python3.9 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS && \
+    python3.9 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
+    python3.9 -m pip install torcheval && \
+    python3.9 -m pip cache purge
+
+# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
+# See 'ipython_genutils' in SPARK-38517
+# See 'docutils<0.18.0' in SPARK-39421
+RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
+ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
+'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
+'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
+RUN python3.9 -m pip list
+
+RUN gem install --no-document "bundler:2.4.22"
+RUN ln -s "$(which python3.9)" "/usr/local/bin/python"
 
 WORKDIR /opt/spark-rm/output
 
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 466e8d09d89ed..5478fbde929db 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -1,10 +1,10 @@
 HikariCP/2.5.1//HikariCP-2.5.1.jar
 JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
-RoaringBitmap/1.0.5//RoaringBitmap-1.0.5.jar
+RoaringBitmap/1.1.0//RoaringBitmap-1.1.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
-aircompressor/0.26//aircompressor-0.26.jar
+aircompressor/0.27//aircompressor-0.27.jar
 algebra_2.13/2.8.0//algebra_2.13-2.8.0.jar
 aliyun-java-sdk-core/4.5.10//aliyun-java-sdk-core-4.5.10.jar
 aliyun-java-sdk-kms/2.11.0//aliyun-java-sdk-kms-2.11.0.jar
@@ -16,10 +16,11 @@ antlr4-runtime/4.13.1//antlr4-runtime-4.13.1.jar
 aopalliance-repackaged/3.0.3//aopalliance-repackaged-3.0.3.jar
 arpack/3.0.3//arpack-3.0.3.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/15.0.2//arrow-format-15.0.2.jar
-arrow-memory-core/15.0.2//arrow-memory-core-15.0.2.jar
-arrow-memory-netty/15.0.2//arrow-memory-netty-15.0.2.jar
-arrow-vector/15.0.2//arrow-vector-15.0.2.jar
+arrow-format/16.1.0//arrow-format-16.1.0.jar
+arrow-memory-core/16.1.0//arrow-memory-core-16.1.0.jar
+arrow-memory-netty-buffer-patch/16.1.0//arrow-memory-netty-buffer-patch-16.1.0.jar
+arrow-memory-netty/16.1.0//arrow-memory-netty-16.1.0.jar
+arrow-vector/16.1.0//arrow-vector-16.1.0.jar
 audience-annotations/0.12.0//audience-annotations-0.12.0.jar
 avro-ipc/1.11.3//avro-ipc-1.11.3.jar
 avro-mapred/1.11.3//avro-mapred-1.11.3.jar
@@ -28,19 +29,19 @@ azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar
 azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar
 azure-storage/7.0.1//azure-storage-7.0.1.jar
 blas/3.0.3//blas-3.0.3.jar
-bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
 breeze-macros_2.13/2.1.0//breeze-macros_2.13-2.1.0.jar
 breeze_2.13/2.1.0//breeze_2.13-2.1.0.jar
 bundle/2.24.6//bundle-2.24.6.jar
 cats-kernel_2.13/2.8.0//cats-kernel_2.13-2.8.0.jar
+checker-qual/3.42.0//checker-qual-3.42.0.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.13/0.10.0//chill_2.13-0.10.0.jar
-commons-cli/1.6.0//commons-cli-1.6.0.jar
-commons-codec/1.16.1//commons-codec-1.16.1.jar
+commons-cli/1.8.0//commons-cli-1.8.0.jar
+commons-codec/1.17.0//commons-codec-1.17.0.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
 commons-compiler/3.1.9//commons-compiler-3.1.9.jar
-commons-compress/1.26.1//commons-compress-1.26.1.jar
+commons-compress/1.26.2//commons-compress-1.26.2.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-io/2.16.1//commons-io-2.16.1.jar
@@ -48,22 +49,20 @@ commons-lang/2.6//commons-lang-2.6.jar
 commons-lang3/3.14.0//commons-lang3-3.14.0.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
-commons-text/1.11.0//commons-text-1.11.0.jar
+commons-text/1.12.0//commons-text-1.12.0.jar
 compress-lzf/1.1.2//compress-lzf-1.1.2.jar
-curator-client/5.6.0//curator-client-5.6.0.jar
-curator-framework/5.6.0//curator-framework-5.6.0.jar
-curator-recipes/5.6.0//curator-recipes-5.6.0.jar
+curator-client/5.7.0//curator-client-5.7.0.jar
+curator-framework/5.7.0//curator-framework-5.7.0.jar
+curator-recipes/5.7.0//curator-recipes-5.7.0.jar
 datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
 datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
 datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
-datasketches-java/5.0.1//datasketches-java-5.0.1.jar
+datasketches-java/6.0.0//datasketches-java-6.0.0.jar
 datasketches-memory/2.2.0//datasketches-memory-2.2.0.jar
 derby/10.16.1.1//derby-10.16.1.1.jar
 derbyshared/10.16.1.1//derbyshared-10.16.1.1.jar
 derbytools/10.16.1.1//derbytools-10.16.1.1.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
-eclipse-collections-api/11.1.0//eclipse-collections-api-11.1.0.jar
-eclipse-collections/11.1.0//eclipse-collections-11.1.0.jar
 esdk-obs-java/3.20.4.2//esdk-obs-java-3.20.4.2.jar
 flatbuffers-java/23.5.26//flatbuffers-java-23.5.26.jar
 gcs-connector/hadoop3-2.2.21/shaded/gcs-connector-hadoop3-2.2.21-shaded.jar
@@ -80,39 +79,39 @@ hadoop-client-runtime/3.4.0//hadoop-client-runtime-3.4.0.jar
 hadoop-cloud-storage/3.4.0//hadoop-cloud-storage-3.4.0.jar
 hadoop-huaweicloud/3.4.0//hadoop-huaweicloud-3.4.0.jar
 hadoop-shaded-guava/1.2.0//hadoop-shaded-guava-1.2.0.jar
-hadoop-yarn-server-web-proxy/3.4.0//hadoop-yarn-server-web-proxy-3.4.0.jar
-hive-beeline/2.3.9//hive-beeline-2.3.9.jar
-hive-cli/2.3.9//hive-cli-2.3.9.jar
-hive-common/2.3.9//hive-common-2.3.9.jar
-hive-exec/2.3.9/core/hive-exec-2.3.9-core.jar
-hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar
-hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar
-hive-metastore/2.3.9//hive-metastore-2.3.9.jar
-hive-serde/2.3.9//hive-serde-2.3.9.jar
+hive-beeline/2.3.10//hive-beeline-2.3.10.jar
+hive-cli/2.3.10//hive-cli-2.3.10.jar
+hive-common/2.3.10//hive-common-2.3.10.jar
+hive-exec/2.3.10/core/hive-exec-2.3.10-core.jar
+hive-jdbc/2.3.10//hive-jdbc-2.3.10.jar
+hive-llap-common/2.3.10//hive-llap-common-2.3.10.jar
+hive-metastore/2.3.10//hive-metastore-2.3.10.jar
+hive-serde/2.3.10//hive-serde-2.3.10.jar
 hive-service-rpc/4.0.0//hive-service-rpc-4.0.0.jar
-hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar
-hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar
-hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar
-hive-shims/2.3.9//hive-shims-2.3.9.jar
+hive-shims-0.23/2.3.10//hive-shims-0.23-2.3.10.jar
+hive-shims-common/2.3.10//hive-shims-common-2.3.10.jar
+hive-shims-scheduler/2.3.10//hive-shims-scheduler-2.3.10.jar
+hive-shims/2.3.10//hive-shims-2.3.10.jar
 hive-storage-api/2.8.1//hive-storage-api-2.8.1.jar
 hk2-api/3.0.3//hk2-api-3.0.3.jar
 hk2-locator/3.0.3//hk2-locator-3.0.3.jar
 hk2-utils/3.0.3//hk2-utils-3.0.3.jar
 httpclient/4.5.14//httpclient-4.5.14.jar
 httpcore/4.4.16//httpcore-4.4.16.jar
-icu4j/72.1//icu4j-72.1.jar
+icu4j/75.1//icu4j-75.1.jar
 ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.5.2//ivy-2.5.2.jar
-jackson-annotations/2.17.0//jackson-annotations-2.17.0.jar
+jackson-annotations/2.17.1//jackson-annotations-2.17.1.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.17.0//jackson-core-2.17.0.jar
-jackson-databind/2.17.0//jackson-databind-2.17.0.jar
-jackson-dataformat-cbor/2.17.0//jackson-dataformat-cbor-2.17.0.jar
-jackson-dataformat-yaml/2.17.0//jackson-dataformat-yaml-2.17.0.jar
-jackson-datatype-jsr310/2.17.0//jackson-datatype-jsr310-2.17.0.jar
+jackson-core/2.17.1//jackson-core-2.17.1.jar
+jackson-databind/2.17.1//jackson-databind-2.17.1.jar
+jackson-dataformat-cbor/2.17.1//jackson-dataformat-cbor-2.17.1.jar
+jackson-dataformat-yaml/2.17.1//jackson-dataformat-yaml-2.17.1.jar
+jackson-datatype-jdk8/2.17.0//jackson-datatype-jdk8-2.17.0.jar
+jackson-datatype-jsr310/2.17.1//jackson-datatype-jsr310-2.17.1.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.13/2.17.0//jackson-module-scala_2.13-2.17.0.jar
+jackson-module-scala_2.13/2.17.1//jackson-module-scala_2.13-2.17.1.jar
 jakarta.annotation-api/2.0.0//jakarta.annotation-api-2.0.0.jar
 jakarta.inject-api/2.0.1//jakarta.inject-api-2.0.1.jar
 jakarta.servlet-api/5.0.0//jakarta.servlet-api-5.0.0.jar
@@ -138,11 +137,11 @@ jersey-container-servlet/3.0.12//jersey-container-servlet-3.0.12.jar
 jersey-hk2/3.0.12//jersey-hk2-3.0.12.jar
 jersey-server/3.0.12//jersey-server-3.0.12.jar
 jettison/1.5.4//jettison-1.5.4.jar
-jetty-util-ajax/11.0.20//jetty-util-ajax-11.0.20.jar
-jetty-util/11.0.20//jetty-util-11.0.20.jar
+jetty-util-ajax/11.0.21//jetty-util-ajax-11.0.21.jar
+jetty-util/11.0.21//jetty-util-11.0.21.jar
 jline/2.14.6//jline-2.14.6.jar
-jline/3.24.1//jline-3.24.1.jar
-jna/5.13.0//jna-5.13.0.jar
+jline/3.25.1//jline-3.25.1.jar
+jna/5.14.0//jna-5.14.0.jar
 joda-time/2.12.7//joda-time-2.12.7.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
@@ -156,35 +155,35 @@ jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
 jul-to-slf4j/2.0.13//jul-to-slf4j-2.0.13.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client-api/6.12.0//kubernetes-client-api-6.12.0.jar
-kubernetes-client/6.12.0//kubernetes-client-6.12.0.jar
-kubernetes-httpclient-okhttp/6.12.0//kubernetes-httpclient-okhttp-6.12.0.jar
-kubernetes-model-admissionregistration/6.12.0//kubernetes-model-admissionregistration-6.12.0.jar
-kubernetes-model-apiextensions/6.12.0//kubernetes-model-apiextensions-6.12.0.jar
-kubernetes-model-apps/6.12.0//kubernetes-model-apps-6.12.0.jar
-kubernetes-model-autoscaling/6.12.0//kubernetes-model-autoscaling-6.12.0.jar
-kubernetes-model-batch/6.12.0//kubernetes-model-batch-6.12.0.jar
-kubernetes-model-certificates/6.12.0//kubernetes-model-certificates-6.12.0.jar
-kubernetes-model-common/6.12.0//kubernetes-model-common-6.12.0.jar
-kubernetes-model-coordination/6.12.0//kubernetes-model-coordination-6.12.0.jar
-kubernetes-model-core/6.12.0//kubernetes-model-core-6.12.0.jar
-kubernetes-model-discovery/6.12.0//kubernetes-model-discovery-6.12.0.jar
-kubernetes-model-events/6.12.0//kubernetes-model-events-6.12.0.jar
-kubernetes-model-extensions/6.12.0//kubernetes-model-extensions-6.12.0.jar
-kubernetes-model-flowcontrol/6.12.0//kubernetes-model-flowcontrol-6.12.0.jar
-kubernetes-model-gatewayapi/6.12.0//kubernetes-model-gatewayapi-6.12.0.jar
-kubernetes-model-metrics/6.12.0//kubernetes-model-metrics-6.12.0.jar
-kubernetes-model-networking/6.12.0//kubernetes-model-networking-6.12.0.jar
-kubernetes-model-node/6.12.0//kubernetes-model-node-6.12.0.jar
-kubernetes-model-policy/6.12.0//kubernetes-model-policy-6.12.0.jar
-kubernetes-model-rbac/6.12.0//kubernetes-model-rbac-6.12.0.jar
-kubernetes-model-resource/6.12.0//kubernetes-model-resource-6.12.0.jar
-kubernetes-model-scheduling/6.12.0//kubernetes-model-scheduling-6.12.0.jar
-kubernetes-model-storageclass/6.12.0//kubernetes-model-storageclass-6.12.0.jar
+kubernetes-client-api/6.13.0//kubernetes-client-api-6.13.0.jar
+kubernetes-client/6.13.0//kubernetes-client-6.13.0.jar
+kubernetes-httpclient-okhttp/6.13.0//kubernetes-httpclient-okhttp-6.13.0.jar
+kubernetes-model-admissionregistration/6.13.0//kubernetes-model-admissionregistration-6.13.0.jar
+kubernetes-model-apiextensions/6.13.0//kubernetes-model-apiextensions-6.13.0.jar
+kubernetes-model-apps/6.13.0//kubernetes-model-apps-6.13.0.jar
+kubernetes-model-autoscaling/6.13.0//kubernetes-model-autoscaling-6.13.0.jar
+kubernetes-model-batch/6.13.0//kubernetes-model-batch-6.13.0.jar
+kubernetes-model-certificates/6.13.0//kubernetes-model-certificates-6.13.0.jar
+kubernetes-model-common/6.13.0//kubernetes-model-common-6.13.0.jar
+kubernetes-model-coordination/6.13.0//kubernetes-model-coordination-6.13.0.jar
+kubernetes-model-core/6.13.0//kubernetes-model-core-6.13.0.jar
+kubernetes-model-discovery/6.13.0//kubernetes-model-discovery-6.13.0.jar
+kubernetes-model-events/6.13.0//kubernetes-model-events-6.13.0.jar
+kubernetes-model-extensions/6.13.0//kubernetes-model-extensions-6.13.0.jar
+kubernetes-model-flowcontrol/6.13.0//kubernetes-model-flowcontrol-6.13.0.jar
+kubernetes-model-gatewayapi/6.13.0//kubernetes-model-gatewayapi-6.13.0.jar
+kubernetes-model-metrics/6.13.0//kubernetes-model-metrics-6.13.0.jar
+kubernetes-model-networking/6.13.0//kubernetes-model-networking-6.13.0.jar
+kubernetes-model-node/6.13.0//kubernetes-model-node-6.13.0.jar
+kubernetes-model-policy/6.13.0//kubernetes-model-policy-6.13.0.jar
+kubernetes-model-rbac/6.13.0//kubernetes-model-rbac-6.13.0.jar
+kubernetes-model-resource/6.13.0//kubernetes-model-resource-6.13.0.jar
+kubernetes-model-scheduling/6.13.0//kubernetes-model-scheduling-6.13.0.jar
+kubernetes-model-storageclass/6.13.0//kubernetes-model-storageclass-6.13.0.jar
 lapack/3.0.3//lapack-3.0.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
-libthrift/0.12.0//libthrift-0.12.0.jar
+libthrift/0.16.0//libthrift-0.16.0.jar
 log4j-1.2-api/2.22.1//log4j-1.2-api-2.22.1.jar
 log4j-api/2.22.1//log4j-api-2.22.1.jar
 log4j-core/2.22.1//log4j-core-2.22.1.jar
@@ -192,38 +191,37 @@ log4j-layout-template-json/2.22.1//log4j-layout-template-json-2.22.1.jar
 log4j-slf4j2-impl/2.22.1//log4j-slf4j2-impl-2.22.1.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
-metrics-core/4.2.25//metrics-core-4.2.25.jar
-metrics-graphite/4.2.25//metrics-graphite-4.2.25.jar
-metrics-jmx/4.2.25//metrics-jmx-4.2.25.jar
-metrics-json/4.2.25//metrics-json-4.2.25.jar
-metrics-jvm/4.2.25//metrics-jvm-4.2.25.jar
+metrics-core/4.2.26//metrics-core-4.2.26.jar
+metrics-graphite/4.2.26//metrics-graphite-4.2.26.jar
+metrics-jmx/4.2.26//metrics-jmx-4.2.26.jar
+metrics-json/4.2.26//metrics-json-4.2.26.jar
+metrics-jvm/4.2.26//metrics-jvm-4.2.26.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.108.Final//netty-all-4.1.108.Final.jar
-netty-buffer/4.1.108.Final//netty-buffer-4.1.108.Final.jar
-netty-codec-http/4.1.108.Final//netty-codec-http-4.1.108.Final.jar
-netty-codec-http2/4.1.108.Final//netty-codec-http2-4.1.108.Final.jar
-netty-codec-socks/4.1.108.Final//netty-codec-socks-4.1.108.Final.jar
-netty-codec/4.1.108.Final//netty-codec-4.1.108.Final.jar
-netty-common/4.1.108.Final//netty-common-4.1.108.Final.jar
-netty-handler-proxy/4.1.108.Final//netty-handler-proxy-4.1.108.Final.jar
-netty-handler/4.1.108.Final//netty-handler-4.1.108.Final.jar
-netty-resolver/4.1.108.Final//netty-resolver-4.1.108.Final.jar
-netty-tcnative-boringssl-static/2.0.61.Final//netty-tcnative-boringssl-static-2.0.61.Final.jar
+netty-all/4.1.110.Final//netty-all-4.1.110.Final.jar
+netty-buffer/4.1.110.Final//netty-buffer-4.1.110.Final.jar
+netty-codec-http/4.1.110.Final//netty-codec-http-4.1.110.Final.jar
+netty-codec-http2/4.1.110.Final//netty-codec-http2-4.1.110.Final.jar
+netty-codec-socks/4.1.110.Final//netty-codec-socks-4.1.110.Final.jar
+netty-codec/4.1.110.Final//netty-codec-4.1.110.Final.jar
+netty-common/4.1.110.Final//netty-common-4.1.110.Final.jar
+netty-handler-proxy/4.1.110.Final//netty-handler-proxy-4.1.110.Final.jar
+netty-handler/4.1.110.Final//netty-handler-4.1.110.Final.jar
+netty-resolver/4.1.110.Final//netty-resolver-4.1.110.Final.jar
 netty-tcnative-boringssl-static/2.0.65.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar
 netty-tcnative-boringssl-static/2.0.65.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar
 netty-tcnative-boringssl-static/2.0.65.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar
 netty-tcnative-boringssl-static/2.0.65.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar
 netty-tcnative-boringssl-static/2.0.65.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar
 netty-tcnative-classes/2.0.65.Final//netty-tcnative-classes-2.0.65.Final.jar
-netty-transport-classes-epoll/4.1.108.Final//netty-transport-classes-epoll-4.1.108.Final.jar
-netty-transport-classes-kqueue/4.1.108.Final//netty-transport-classes-kqueue-4.1.108.Final.jar
-netty-transport-native-epoll/4.1.108.Final/linux-aarch_64/netty-transport-native-epoll-4.1.108.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.108.Final/linux-riscv64/netty-transport-native-epoll-4.1.108.Final-linux-riscv64.jar
-netty-transport-native-epoll/4.1.108.Final/linux-x86_64/netty-transport-native-epoll-4.1.108.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.108.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.108.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.108.Final/osx-x86_64/netty-transport-native-kqueue-4.1.108.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.108.Final//netty-transport-native-unix-common-4.1.108.Final.jar
-netty-transport/4.1.108.Final//netty-transport-4.1.108.Final.jar
+netty-transport-classes-epoll/4.1.110.Final//netty-transport-classes-epoll-4.1.110.Final.jar
+netty-transport-classes-kqueue/4.1.110.Final//netty-transport-classes-kqueue-4.1.110.Final.jar
+netty-transport-native-epoll/4.1.110.Final/linux-aarch_64/netty-transport-native-epoll-4.1.110.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.110.Final/linux-riscv64/netty-transport-native-epoll-4.1.110.Final-linux-riscv64.jar
+netty-transport-native-epoll/4.1.110.Final/linux-x86_64/netty-transport-native-epoll-4.1.110.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.110.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.110.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.110.Final/osx-x86_64/netty-transport-native-kqueue-4.1.110.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.110.Final//netty-transport-native-unix-common-4.1.110.Final.jar
+netty-transport/4.1.110.Final//netty-transport-4.1.110.Final.jar
 objenesis/3.3//objenesis-3.3.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
 okio/1.15.0//okio-1.15.0.jar
@@ -231,30 +229,30 @@ opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/2.0.0/shaded-protobuf/orc-core-2.0.0-shaded-protobuf.jar
+orc-core/2.0.1/shaded-protobuf/orc-core-2.0.1-shaded-protobuf.jar
 orc-format/1.0.0/shaded-protobuf/orc-format-1.0.0-shaded-protobuf.jar
-orc-mapreduce/2.0.0/shaded-protobuf/orc-mapreduce-2.0.0-shaded-protobuf.jar
-orc-shims/2.0.0//orc-shims-2.0.0.jar
+orc-mapreduce/2.0.1/shaded-protobuf/orc-mapreduce-2.0.1-shaded-protobuf.jar
+orc-shims/2.0.1//orc-shims-2.0.1.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
-parquet-column/1.13.1//parquet-column-1.13.1.jar
-parquet-common/1.13.1//parquet-common-1.13.1.jar
-parquet-encoding/1.13.1//parquet-encoding-1.13.1.jar
-parquet-format-structures/1.13.1//parquet-format-structures-1.13.1.jar
-parquet-hadoop/1.13.1//parquet-hadoop-1.13.1.jar
-parquet-jackson/1.13.1//parquet-jackson-1.13.1.jar
-pickle/1.3//pickle-1.3.jar
+parquet-column/1.14.1//parquet-column-1.14.1.jar
+parquet-common/1.14.1//parquet-common-1.14.1.jar
+parquet-encoding/1.14.1//parquet-encoding-1.14.1.jar
+parquet-format-structures/1.14.1//parquet-format-structures-1.14.1.jar
+parquet-hadoop/1.14.1//parquet-hadoop-1.14.1.jar
+parquet-jackson/1.14.1//parquet-jackson-1.14.1.jar
+pickle/1.5//pickle-1.5.jar
 py4j/0.10.9.7//py4j-0.10.9.7.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
-rocksdbjni/8.11.3//rocksdbjni-8.11.3.jar
+rocksdbjni/9.2.1//rocksdbjni-9.2.1.jar
 scala-collection-compat_2.13/2.7.0//scala-collection-compat_2.13-2.7.0.jar
-scala-compiler/2.13.13//scala-compiler-2.13.13.jar
-scala-library/2.13.13//scala-library-2.13.13.jar
+scala-compiler/2.13.14//scala-compiler-2.13.14.jar
+scala-library/2.13.14//scala-library-2.13.14.jar
 scala-parallel-collections_2.13/1.0.4//scala-parallel-collections_2.13-1.0.4.jar
-scala-parser-combinators_2.13/2.3.0//scala-parser-combinators_2.13-2.3.0.jar
-scala-reflect/2.13.13//scala-reflect-2.13.13.jar
-scala-xml_2.13/2.2.0//scala-xml_2.13-2.2.0.jar
+scala-parser-combinators_2.13/2.4.0//scala-parser-combinators_2.13-2.4.0.jar
+scala-reflect/2.13.14//scala-reflect-2.13.14.jar
+scala-xml_2.13/2.3.0//scala-xml_2.13-2.3.0.jar
 slf4j-api/2.0.13//slf4j-api-2.0.13.jar
 snakeyaml-engine/2.7//snakeyaml-engine-2.7.jar
 snakeyaml/2.2//snakeyaml-2.2.jar
@@ -264,7 +262,7 @@ spire-platform_2.13/0.18.0//spire-platform_2.13-0.18.0.jar
 spire-util_2.13/0.18.0//spire-util_2.13-0.18.0.jar
 spire_2.13/0.18.0//spire_2.13-0.18.0.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
-stream/2.9.6//stream-2.9.6.jar
+stream/2.9.8//stream-2.9.8.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
 threeten-extra/1.7.1//threeten-extra-1.7.1.jar
 tink/1.13.0//tink-1.13.0.jar
@@ -272,10 +270,10 @@ transaction-api/1.1//transaction-api-1.1.jar
 txw2/3.0.2//txw2-3.0.2.jar
 univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar
 wildfly-openssl/1.1.3.Final//wildfly-openssl-1.1.3.Final.jar
-xbean-asm9-shaded/4.24//xbean-asm9-shaded-4.24.jar
+xbean-asm9-shaded/4.25//xbean-asm9-shaded-4.25.jar
 xmlschema-core/2.3.1//xmlschema-core-2.3.1.jar
 xz/1.9//xz-1.9.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
 zookeeper-jute/3.9.2//zookeeper-jute-3.9.2.jar
 zookeeper/3.9.2//zookeeper-3.9.2.jar
-zstd-jni/1.5.6-2//zstd-jni-1.5.6-2.jar
+zstd-jni/1.5.6-3//zstd-jni-1.5.6-3.jar
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index 0b0a478b4bf44..6ba9be87552ab 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -81,17 +81,17 @@ ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library
 
 
 RUN add-apt-repository ppa:pypy/ppa
-RUN mkdir -p /usr/local/pypy/pypy3.8 && \
-    curl -sqL https://downloads.python.org/pypy/pypy3.8-v7.3.11-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.8 --strip-components=1 && \
-    ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3.8 && \
-    ln -sf /usr/local/pypy/pypy3.8/bin/pypy /usr/local/bin/pypy3
+RUN mkdir -p /usr/local/pypy/pypy3.9 && \
+    curl -sqL https://downloads.python.org/pypy/pypy3.9-v7.3.16-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.9 --strip-components=1 && \
+    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3.8 && \
+    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas<=2.2.2' scipy coverage matplotlib lxml
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.2' scipy coverage matplotlib lxml
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas<=2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
 # Python deps for Spark Connect
-ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.56.4"
+ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.56.4 graphviz==0.20.3"
 
 # Install Python 3.10 packages
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
@@ -138,9 +138,6 @@ RUN python3.12 -m pip install $BASIC_PIP_PKGS $CONNECT_PIP_PKGS lxml && \
     python3.12 -m pip cache purge
 
 # Remove unused installation packages to free up disk space
-RUN apt-get remove --purge -y \
-    '^aspnet.*' '^dotnet-.*' '^llvm-.*' 'php.*' '^mongodb-.*' \
-    snapd google-chrome-stable microsoft-edge-stable firefox \
-    azure-cli google-cloud-sdk mono-devel powershell libgl1-mesa-dri || true
+RUN apt-get remove --purge -y 'gfortran-11' 'humanity-icon-theme' 'nodejs-doc' || true
 RUN apt-get autoremove --purge -y
 RUN apt-get clean
diff --git a/dev/is-changed.py b/dev/is-changed.py
index 85f0d3cda6df4..1962e244d5dd7 100755
--- a/dev/is-changed.py
+++ b/dev/is-changed.py
@@ -17,6 +17,8 @@
 # limitations under the License.
 #
 
+import warnings
+import traceback
 import os
 import sys
 from argparse import ArgumentParser
@@ -82,4 +84,8 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception:
+        warnings.warn(f"Ignored exception:\n\n{traceback.format_exc()}")
+        print("true")
diff --git a/dev/java-file-header b/dev/java-file-header
new file mode 100644
index 0000000000000..c6a5afeef509f
--- /dev/null
+++ b/dev/java-file-header
@@ -0,0 +1,16 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
\ No newline at end of file
diff --git a/dev/lint-java b/dev/lint-java
index ac5a2c869404f..ff431301773f3 100755
--- a/dev/lint-java
+++ b/dev/lint-java
@@ -20,7 +20,7 @@
 SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
 
-ERRORS=$($SCRIPT_DIR/../build/mvn -Pkinesis-asl -Pkubernetes -Pyarn -Phive -Phive-thriftserver checkstyle:check | grep ERROR)
+ERRORS=$($SCRIPT_DIR/../build/mvn -Pkinesis-asl -Pspark-ganglia-lgpl -Pkubernetes -Pyarn -Phive -Phive-thriftserver checkstyle:check | grep ERROR)
 
 if test ! -z "$ERRORS"; then
     echo -e "Checkstyle checks failed at following occurrences:\n$ERRORS"
diff --git a/dev/lint-python b/dev/lint-python
index 8d587bd52aca7..b8703310bc4b6 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -84,7 +84,10 @@ function satisfies_min_version {
     local expected_version="$2"
     echo "$(
         "$PYTHON_EXECUTABLE" << EOM
-from setuptools.extern.packaging import version
+try:
+    from setuptools.extern.packaging import version
+except ModuleNotFoundError:
+    from packaging import version
 print(version.parse('$provided_version') >= version.parse('$expected_version'))
 EOM
     )"
@@ -122,6 +125,7 @@ function mypy_annotation_test {
 
     echo "starting mypy annotations test..."
     MYPY_REPORT=$( ($MYPY_BUILD \
+      --python-executable $PYTHON_EXECUTABLE \
       --namespace-packages \
       --config-file python/mypy.ini \
       --cache-dir /tmp/.mypy_cache/ \
@@ -181,6 +185,7 @@ function mypy_examples_test {
     echo "starting mypy examples test..."
 
     MYPY_REPORT=$( (MYPYPATH=python $MYPY_BUILD \
+      --python-executable $PYTHON_EXECUTABLE \
       --namespace-packages \
       --config-file python/mypy.ini \
       --exclude "mllib/*" \
diff --git a/dev/lint-scala b/dev/lint-scala
index 6b3178312c106..03771954ff525 100755
--- a/dev/lint-scala
+++ b/dev/lint-scala
@@ -32,7 +32,7 @@ ERRORS=$(./build/mvn \
     -pl connector/connect/common \
     -pl connector/connect/server \
     -pl connector/connect/client/jvm \
-    2>&1 | grep -e "^Requires formatting" \
+    2>&1 | grep -e "Unformatted files found" \
 )
 
 if test ! -z "$ERRORS"; then
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index c9893fd7e5a9d..4ebd3e4b951f5 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -118,9 +118,14 @@ def run_cmd(cmd):
         return subprocess.check_output(cmd.split(" ")).decode("utf-8")
 
 
-def continue_maybe(prompt):
+def continue_maybe(prompt, cherry=False):
     result = bold_input("%s (y/N): " % prompt)
     if result.lower() != "y":
+        if cherry:
+            try:
+                run_cmd("git cherry-pick --abort")
+            except Exception:
+                print_error("Unable to abort and get back to the state before cherry-pick")
         fail("Okay, exiting")
 
 
@@ -234,9 +239,9 @@ def cherry_pick(pr_num, merge_hash, default_branch):
         run_cmd("git cherry-pick -sx %s" % merge_hash)
     except Exception as e:
         msg = "Error cherry-picking: %s\nWould you like to manually fix-up this merge?" % e
-        continue_maybe(msg)
+        continue_maybe(msg, True)
         msg = "Okay, please fix any conflicts and finish the cherry-pick. Finished?"
-        continue_maybe(msg)
+        continue_maybe(msg, True)
 
     continue_maybe(
         "Pick complete (local ref %s). Push to %s?" % (pick_branch_name, PUSH_REMOTE_NAME)
@@ -257,16 +262,19 @@ def cherry_pick(pr_num, merge_hash, default_branch):
 
 
 def print_jira_issue_summary(issue):
-    summary = issue.fields.summary
+    summary = "Summary\t\t%s\n" % issue.fields.summary
     assignee = issue.fields.assignee
     if assignee is not None:
         assignee = assignee.displayName
-    status = issue.fields.status.name
+    assignee = "Assignee\t%s\n" % assignee
+    status = "Status\t\t%s\n" % issue.fields.status.name
+    url = "Url\t\t%s/%s\n" % (JIRA_BASE, issue.key)
+    target_versions = "Affected\t%s\n" % [x.name for x in issue.fields.versions]
+    fix_versions = ""
+    if len(issue.fields.fixVersions) > 0:
+        fix_versions = "Fixed\t\t%s\n" % [x.name for x in issue.fields.fixVersions]
     print("=== JIRA %s ===" % issue.key)
-    print(
-        "summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n"
-        % (summary, assignee, status, JIRA_BASE, issue.key)
-    )
+    print("%s%s%s%s%s%s" % (summary, assignee, status, url, target_versions, fix_versions))
 
 
 def get_jira_issue(prompt, default_jira_id=""):
@@ -501,12 +509,19 @@ def standardize_jira_ref(text):
     >>> standardize_jira_ref(
     ...     "[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
     '[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.'
+    >>> standardize_jira_ref(
+    ...     'Revert "[SPARK-48591][PYTHON] Simplify the if-else branches with F.lit"')
+    'Revert "[SPARK-48591][PYTHON] Simplify the if-else branches with F.lit"'
     >>> standardize_jira_ref("Additional information for users building from source code")
     'Additional information for users building from source code'
     """
     jira_refs = []
     components = []
 
+    # If this is a Revert PR, no need to process any further
+    if text.startswith('Revert "') and text.endswith('"'):
+        return text
+
     # If the string is compliant, no need to process any further
     if re.search(r"^\[SPARK-[0-9]{3,6}\](\[[A-Z0-9_\s,]+\] )+\S+", text):
         return text
@@ -678,6 +693,14 @@ def main():
         )
         continue_maybe(msg)
 
+    if asf_jira is not None:
+        jira_ids = re.findall("SPARK-[0-9]{4,5}", title)
+        for jira_id in jira_ids:
+            try:
+                print_jira_issue_summary(asf_jira.issue(jira_id))
+            except Exception:
+                print_error("Unable to fetch summary of %s" % jira_id)
+
     print("\n=== Pull Request #%s ===" % pr_num)
     print("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (title, pr_repo_desc, target_ref, url))
     continue_maybe("Proceed with merging pull request #%s?" % pr_num)
diff --git a/dev/pyproject.toml b/dev/pyproject.toml
index 4f462d14c7838..f19107b3782a6 100644
--- a/dev/pyproject.toml
+++ b/dev/pyproject.toml
@@ -29,6 +29,6 @@ testpaths = [
 # GitHub workflow version and dev/reformat-python
 required-version = "23.9.1"
 line-length = 100
-target-version = ['py38']
+target-version = ['py39']
 include = '\.pyi?$'
 extend-exclude = 'cloudpickle|error_classes.py'
diff --git a/dev/requirements.txt b/dev/requirements.txt
index d6530d8ce2821..88883a963950e 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -60,6 +60,9 @@ mypy-protobuf==3.3.0
 googleapis-common-protos-stubs==2.2.0
 grpc-stubs==1.24.11
 
+# Debug for Spark and Spark Connect
+graphviz==0.20.3
+
 # TorchDistributor dependencies
 torch
 torchvision
diff --git a/dev/run-pip-tests b/dev/run-pip-tests
index f8a547b0c917c..91399ff1e25ea 100755
--- a/dev/run-pip-tests
+++ b/dev/run-pip-tests
@@ -87,6 +87,10 @@ for python in "${PYTHON_EXECS[@]}"; do
     VIRTUALENV_PATH="$VIRTUALENV_BASE"/$python
     rm -rf "$VIRTUALENV_PATH"
     if [ -n "$USE_CONDA" ]; then
+      if [ -f "$CONDA_PREFIX/etc/profile.d/conda.sh" ]; then
+        # See also https://github.com/conda/conda/issues/7980
+        source "$CONDA_PREFIX/etc/profile.d/conda.sh"
+      fi
       conda create -y -p "$VIRTUALENV_PATH" python=$python numpy pandas pip setuptools
       source activate "$VIRTUALENV_PATH" || conda activate "$VIRTUALENV_PATH"
     else
diff --git a/dev/sbt-checkstyle b/dev/sbt-checkstyle
index 99a46a3a0e38b..f2d5a0fa304ac 100755
--- a/dev/sbt-checkstyle
+++ b/dev/sbt-checkstyle
@@ -17,7 +17,7 @@
 # limitations under the License.
 #
 
-SPARK_PROFILES=${1:-"-Pkinesis-asl -Pkubernetes -Pyarn -Phive -Phive-thriftserver"}
+SPARK_PROFILES=${1:-"-Pkinesis-asl -Pspark-ganglia-lgpl -Pkubernetes -Pyarn -Phive -Phive-thriftserver -Pjvm-profiler"}
 
 # NOTE: echo "q" is needed because SBT prompts the user for input on encountering a build file
 # with failure (either resolution or compilation); the "q" makes SBT quit.
diff --git a/dev/scalastyle b/dev/scalastyle
index 12457af1ae7b3..9de1fd1c9d9d5 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -17,7 +17,7 @@
 # limitations under the License.
 #
 
-SPARK_PROFILES=${1:-"-Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive -Pvolcano"}
+SPARK_PROFILES=${1:-"-Pkubernetes -Pyarn -Pspark-ganglia-lgpl -Pkinesis-asl -Phive-thriftserver -Phive -Pvolcano -Pjvm-profiler -Phadoop-cloud"}
 
 # NOTE: echo "q" is needed because SBT prompts the user for input on encountering a build file
 # with failure (either resolution or compilation); the "q" makes SBT quit.
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 5e169eb119b45..44295e7e630e9 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -476,8 +476,9 @@ def __hash__(self):
         "pyspark.sql.session",
         "pyspark.sql.conf",
         "pyspark.sql.catalog",
-        "pyspark.sql.column",
-        "pyspark.sql.dataframe",
+        "pyspark.sql.classic.column",
+        "pyspark.sql.classic.dataframe",
+        "pyspark.sql.classic.window",
         "pyspark.sql.datasource",
         "pyspark.sql.group",
         "pyspark.sql.functions.builtin",
@@ -488,7 +489,6 @@ def __hash__(self):
         "pyspark.sql.streaming.listener",
         "pyspark.sql.udf",
         "pyspark.sql.udtf",
-        "pyspark.sql.window",
         "pyspark.sql.avro.functions",
         "pyspark.sql.protobuf.functions",
         "pyspark.sql.pandas.conversion",
@@ -1009,6 +1009,7 @@ def __hash__(self):
         # sql unittests
         "pyspark.sql.tests.connect.test_connect_plan",
         "pyspark.sql.tests.connect.test_connect_basic",
+        "pyspark.sql.tests.connect.test_connect_dataframe_property",
         "pyspark.sql.tests.connect.test_connect_error",
         "pyspark.sql.tests.connect.test_connect_function",
         "pyspark.sql.tests.connect.test_connect_collection",
@@ -1050,6 +1051,7 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_python_streaming_datasource",
         "pyspark.sql.tests.connect.test_utils",
         "pyspark.sql.tests.connect.client.test_artifact",
+        "pyspark.sql.tests.connect.client.test_artifact_localcluster",
         "pyspark.sql.tests.connect.client.test_client",
         "pyspark.sql.tests.connect.client.test_reattach",
         "pyspark.sql.tests.connect.streaming.test_parity_streaming",
@@ -1062,6 +1064,7 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_pandas_udf_window",
         "pyspark.sql.tests.connect.test_resources",
         "pyspark.sql.tests.connect.shell.test_progress",
+        "pyspark.sql.tests.connect.test_df_debug",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
@@ -1102,6 +1105,8 @@ def __hash__(self):
         "python/pyspark/pandas",
     ],
     python_test_goals=[
+        # unittests dedicated for Spark Connect
+        "pyspark.pandas.tests.connect.test_connect_plotting",
         # pandas-on-Spark unittests
         "pyspark.pandas.tests.connect.test_parity_categorical",
         "pyspark.pandas.tests.connect.test_parity_config",
@@ -1171,6 +1176,9 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.indexes.test_parity_reindex",
         "pyspark.pandas.tests.connect.indexes.test_parity_rename",
         "pyspark.pandas.tests.connect.indexes.test_parity_reset_index",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
         "pyspark.pandas.tests.connect.computation.test_parity_any_all",
         "pyspark.pandas.tests.connect.computation.test_parity_apply_func",
         "pyspark.pandas.tests.connect.computation.test_parity_binary_ops",
@@ -1183,6 +1191,12 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.computation.test_parity_describe",
         "pyspark.pandas.tests.connect.computation.test_parity_eval",
         "pyspark.pandas.tests.connect.computation.test_parity_melt",
+        "pyspark.pandas.tests.connect.computation.test_parity_missing_data",
+        "pyspark.pandas.tests.connect.groupby.test_parity_stat",
+        "pyspark.pandas.tests.connect.groupby.test_parity_stat_adv",
+        "pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof",
+        "pyspark.pandas.tests.connect.groupby.test_parity_stat_func",
+        "pyspark.pandas.tests.connect.groupby.test_parity_stat_prod",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
@@ -1248,6 +1262,18 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object",
         "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix",
         "pyspark.pandas.tests.connect.reshape.test_parity_merge_asof",
+        "pyspark.pandas.tests.connect.indexes.test_parity_append",
+        "pyspark.pandas.tests.connect.indexes.test_parity_intersection",
+        "pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
+        "pyspark.pandas.tests.connect.indexes.test_parity_union",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
+        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
+        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
+        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
         # fallback
         "pyspark.pandas.tests.connect.frame.test_parity_asfreq",
         "pyspark.pandas.tests.connect.frame.test_parity_asof",
@@ -1273,7 +1299,6 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx",
         "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv",
         "pyspark.pandas.tests.connect.computation.test_parity_stats",
-        "pyspark.pandas.tests.connect.computation.test_parity_missing_data",
         "pyspark.pandas.tests.connect.frame.test_parity_interpolate",
         "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error",
         "pyspark.pandas.tests.connect.resample.test_parity_frame",
@@ -1346,24 +1371,6 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.io.test_parity_dataframe_conversion",
         "pyspark.pandas.tests.connect.io.test_parity_dataframe_spark_io",
         "pyspark.pandas.tests.connect.io.test_parity_series_conversion",
-        "pyspark.pandas.tests.connect.groupby.test_parity_stat",
-        "pyspark.pandas.tests.connect.groupby.test_parity_stat_adv",
-        "pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof",
-        "pyspark.pandas.tests.connect.groupby.test_parity_stat_func",
-        "pyspark.pandas.tests.connect.groupby.test_parity_stat_prod",
-        "pyspark.pandas.tests.connect.indexes.test_parity_append",
-        "pyspark.pandas.tests.connect.indexes.test_parity_intersection",
-        "pyspark.pandas.tests.connect.indexes.test_parity_monotonic",
-        "pyspark.pandas.tests.connect.indexes.test_parity_union",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property",
-        "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext_float",
@@ -1386,9 +1393,7 @@ def __hash__(self):
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_diff_len",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_fillna",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_filter",
-        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_split_apply_combine",
-        "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_adv",
         "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_count",
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 175f59a700941..563a7e1acab4f 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -31,7 +31,7 @@ export LC_ALL=C
 # NOTE: These should match those in the release publishing script, and be kept in sync with
 #   dev/create-release/release-build.sh
 HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pkubernetes -Pyarn -Phive \
-    -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud"
+    -Pspark-ganglia-lgpl -Pkinesis-asl -Phadoop-cloud -Pjvm-profiler"
 MVN="build/mvn"
 HADOOP_HIVE_PROFILES=(
     hadoop-3-hive-2.3
@@ -49,7 +49,7 @@ OLD_VERSION=$($MVN -q \
     --non-recursive \
     org.codehaus.mojo:exec-maven-plugin:1.6.0:exec | grep -E '[0-9]+\.[0-9]+\.[0-9]+')
 # dependency:get for guava and jetty-io are workaround for SPARK-37302.
-GUAVA_VERSION=$(build/mvn help:evaluate -Dexpression=guava.version -q -DforceStdout | grep -E "^[0-9.]+$")
+GUAVA_VERSION=$(build/mvn help:evaluate -Dexpression=guava.version -q -DforceStdout | grep -E "^[0-9\.]+")
 build/mvn dependency:get -Dartifact=com.google.guava:guava:${GUAVA_VERSION} -q
 JETTY_VERSION=$(build/mvn help:evaluate -Dexpression=jetty.version -q -DforceStdout | grep -E "[0-9]+\.[0-9]+\.[0-9]+")
 build/mvn dependency:get -Dartifact=org.eclipse.jetty:jetty-io:${JETTY_VERSION} -q
@@ -140,4 +140,8 @@ for HADOOP_HIVE_PROFILE in "${HADOOP_HIVE_PROFILES[@]}"; do
   fi
 done
 
+if [[ -d "$FWDIR/dev/pr-deps" ]]; then
+  rm -rf "$FWDIR/dev/pr-deps"
+fi
+
 exit 0
diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
index 4e38f18703f3c..e137f0f039b97 100644
--- a/docs/Gemfile.lock
+++ b/docs/Gemfile.lock
@@ -4,16 +4,16 @@ GEM
     addressable (2.8.6)
       public_suffix (>= 2.0.2, < 6.0)
     colorator (1.1.0)
-    concurrent-ruby (1.2.2)
+    concurrent-ruby (1.2.3)
     em-websocket (0.5.3)
       eventmachine (>= 0.12.9)
       http_parser.rb (~> 0)
     eventmachine (1.2.7)
     ffi (1.16.3)
     forwardable-extended (2.6.0)
-    google-protobuf (3.25.2)
+    google-protobuf (3.25.3)
     http_parser.rb (0.8.0)
-    i18n (1.14.1)
+    i18n (1.14.5)
       concurrent-ruby (~> 1.0)
     jekyll (4.3.3)
       addressable (~> 2.4)
@@ -42,22 +42,22 @@ GEM
     kramdown-parser-gfm (1.1.0)
       kramdown (~> 2.0)
     liquid (4.0.4)
-    listen (3.8.0)
+    listen (3.9.0)
       rb-fsevent (~> 0.10, >= 0.10.3)
       rb-inotify (~> 0.9, >= 0.9.10)
     mercenary (0.4.0)
     pathutil (0.16.2)
       forwardable-extended (~> 2.6)
-    public_suffix (5.0.4)
-    rake (13.1.0)
+    public_suffix (5.0.5)
+    rake (13.2.1)
     rb-fsevent (0.11.2)
     rb-inotify (0.10.1)
       ffi (~> 1.0)
     rexml (3.2.6)
     rouge (3.30.0)
     safe_yaml (1.0.5)
-    sass-embedded (1.69.7)
-      google-protobuf (~> 3.25)
+    sass-embedded (1.63.6)
+      google-protobuf (~> 3.23)
       rake (>= 13.0.0)
     terminal-table (3.0.2)
       unicode-display_width (>= 1.1.1, < 3)
diff --git a/docs/README.md b/docs/README.md
index fac9010d86922..363f1c2076363 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -36,27 +36,25 @@ You need to have [Ruby 3][ruby] and [Python 3][python] installed. Make sure the
 [python]: https://www.python.org/downloads/
 
 ```sh
-$ gem install bundler
+$ gem install bundler -v 2.4.22
 ```
 
-After this all the required ruby dependencies can be installed from the `docs/` directory via the Bundler:
+After this all the required Ruby dependencies can be installed from the `docs/` directory via Bundler:
 
 ```sh
-$ cd docs
+$ cd "$SPARK_HOME"/docs
 $ bundle install
 ```
 
-To generate the Python or R docs, you'll need to [install Pandoc](https://pandoc.org/installing.html).
-
-### SQL and Python API Documentation (Optional)
-
-To generate SQL and Python API docs, you'll need to install these libraries:
+And the required Python dependencies can be installed using pip:
 
-Run the following command from $SPARK_HOME:
 ```sh
+$ cd "$SPARK_HOME"
 $ pip install --upgrade -r dev/requirements.txt
 ```
 
+To generate the Python or R API docs, you'll also need to [install Pandoc](https://pandoc.org/installing.html).
+
 ### R API Documentation (Optional)
 
 If you'd like to generate R API documentation, install these libraries:
@@ -121,6 +119,10 @@ The jekyll plugin also generates the PySpark docs using [Sphinx](http://sphinx-d
 using [roxygen2](https://cran.r-project.org/web/packages/roxygen2/index.html) and SQL docs
 using [MkDocs](https://www.mkdocs.org/).
 
-NOTE: To skip the step of building and copying over the Scala, Java, Python, R and SQL API docs, run `SKIP_API=1
-bundle exec jekyll build`. In addition, `SKIP_SCALADOC=1`, `SKIP_PYTHONDOC=1`, `SKIP_RDOC=1` and `SKIP_SQLDOC=1` can be used
-to skip a single step of the corresponding language. `SKIP_SCALADOC` indicates skipping both the Scala and Java docs.
+To control what API docs get built, you can set any combination of the following shell variables before you run `bundle exec jekyll build`:
+* `SKIP_API=1`: Skip building all the API docs.
+* `SKIP_SCALADOC=1`: Skip the Scala and Java API docs.
+* `SKIP_PYTHONDOC=1`: Skip the Python API docs.
+* `SKIP_RDOC=1`: Skip the R API docs.
+* `SKIP_SQLDOC=1`: Skip the SQL API docs.
+
diff --git a/docs/_config.yml b/docs/_config.yml
index 19183f85df239..e74eda0470417 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -22,7 +22,7 @@ include:
 SPARK_VERSION: 4.0.0-SNAPSHOT
 SPARK_VERSION_SHORT: 4.0.0
 SCALA_BINARY_VERSION: "2.13"
-SCALA_VERSION: "2.13.13"
+SCALA_VERSION: "2.13.14"
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
 # Before a new release, we should:
diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 04792ebf576fa..01c8a8076958f 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -63,6 +63,8 @@
       url: sql-performance-tuning.html#optimizing-the-join-strategy
     - text: Adaptive Query Execution
       url: sql-performance-tuning.html#adaptive-query-execution
+    - text: Storage Partition Join
+      url: sql-performance-tuning.html#storage-partition-join
 - text: Distributed SQL Engine
   url: sql-distributed-sql-engine.html
   subitems:
@@ -85,6 +87,8 @@
       url: sql-ref-datetime-pattern.html
     - text: Number Pattern
       url: sql-ref-number-pattern.html
+    - text: Operators
+      url: sql-ref-operators.html
     - text: Functions
       url: sql-ref-functions.html
     - text: Identifiers
@@ -106,48 +110,3 @@
           url: sql-ref-syntax.html#auxiliary-statements
 - text: Error Conditions
   url: sql-error-conditions.html
-  subitems:
-    - text: SQLSTATE Codes
-      url: sql-error-conditions-sqlstates.html
-    - text: COLLECTION_SIZE_LIMIT_EXCEEDED error class
-      url: sql-error-conditions-collection-size-limit-exceeded-error-class.html
-    - text: CONNECT error class
-      url: sql-error-conditions-connect-error-class.html
-    - text: DATATYPE_MISMATCH error class
-      url: sql-error-conditions-datatype-mismatch-error-class.html
-    - text: INCOMPATIBLE_DATA_FOR_TABLE error class
-      url: sql-error-conditions-incompatible-data-for-table-error-class.html
-    - text: INCOMPLETE_TYPE_DEFINITION error class
-      url: sql-error-conditions-incomplete-type-definition-error-class.html
-    - text: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
-      url: sql-error-conditions-inconsistent-behavior-cross-version-error-class.html
-    - text: INVALID_FORMAT error class
-      url: sql-error-conditions-invalid-format-error-class.html
-    - text: INVALID_OPTIONS error class
-      url: sql-error-conditions-invalid-options-error-class.html
-    - text: INVALID_PARAMETER_VALUE error class
-      url: sql-error-conditions-invalid-parameter-value-error-class.html
-    - text: INVALID_SCHEMA error class
-      url: sql-error-conditions-invalid-schema-error-class.html
-    - text: INVALID_SUBQUERY_EXPRESSION error class
-      url: sql-error-conditions-invalid-subquery-expression-error-class.html
-    - text: NOT_NULL_CONSTRAINT_VIOLATION error class
-      url: sql-error-conditions-not-null-constraint-violation-error-class.html
-    - text: UNRESOLVED_COLUMN error class
-      url: sql-error-conditions-unresolved-column-error-class.html
-    - text: UNRESOLVED_FIELD error class
-      url: sql-error-conditions-unresolved-field-error-class.html
-    - text: UNRESOLVED_MAP_KEY error class
-      url: sql-error-conditions-unresolved-map-key-error-class.html
-    - text: UNSUPPORTED_DESERIALIZER error class
-      url: sql-error-conditions-unsupported-deserializer-error-class.html
-    - text: UNSUPPORTED_FEATURE error class
-      url: sql-error-conditions-unsupported-feature-error-class.html
-    - text: UNSUPPORTED_GENERATOR error class
-      url: sql-error-conditions-unsupported-generator-error-class.html
-    - text: UNSUPPORTED_SAVE_MODE error class
-      url: sql-error-conditions-unsupported-save-mode-error-class.html
-    - text: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
-      url: sql-error-conditions-unsupported-subquery-expression-category-error-class.html
-    - text: WRONG_NUM_ARGS error class
-      url: sql-error-conditions-wrong-num-args-error-class.html
diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb
index 5b52f3799cc4a..8d3ef86ac3d66 100644
--- a/docs/_plugins/build_api_docs.rb
+++ b/docs/_plugins/build_api_docs.rb
@@ -188,6 +188,14 @@ def build_sql_docs
   cp_r("../sql/site/.", "api/sql")
 end
 
+def build_error_docs
+  print_header "Building error docs."
+  system("python '#{SPARK_PROJECT_ROOT}/docs/util/build-error-docs.py'") \
+  || raise("Error doc generation failed")
+end
+
+build_error_docs
+
 if not (ENV['SKIP_API'] == '1')
   if not (ENV['SKIP_SCALADOC'] == '1')
     build_scala_and_java_docs
diff --git a/docs/building-spark.md b/docs/building-spark.md
index d10dfc9434fec..9ac61bfb8a64a 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.9.6 and Java 17/21.
+Building Spark using Maven requires Maven 3.9.8 and Java 17/21.
 Spark requires Scala 2.13; support for Scala 2.12 was removed in Spark 4.0.0.
 
 ### Setting up Maven's Memory Usage
@@ -85,9 +85,9 @@ Example:
 
 To enable Hive integration for Spark SQL along with its JDBC server and CLI,
 add the `-Phive` and `-Phive-thriftserver` profiles to your existing build options.
-By default Spark will build with Hive 2.3.9.
+By default Spark will build with Hive 2.3.10.
 
-    # With Hive 2.3.9 support
+    # With Hive 2.3.10 support
     ./build/mvn -Pyarn -Phive -Phive-thriftserver -DskipTests clean package
 
 ## Packaging without Hadoop Dependencies for YARN
@@ -117,6 +117,13 @@ where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as def
 
     ./build/mvn -Pconnect -DskipTests clean package
 
+## Building with JVM Profile support
+
+    ./build/mvn -Pjvm-profiler -DskipTests clean package
+
+**Note:** The `jvm-profiler` profile builds the assembly without including the dependency `ap-loader`,
+you can download it manually from maven central repo and use it together with `spark-profiler_{{site.SCALA_BINARY_VERSION}}`.
+
 ## Continuous Compilation
 
 We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.
diff --git a/docs/configuration.md b/docs/configuration.md
index d5e2a569fdeaf..6833d4e54fd03 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -91,7 +91,7 @@ Then, you can supply configuration values at runtime:
 ```sh
 ./bin/spark-submit \
   --name "My app" \
-  --master local[4] \
+  --master "local[4]" \
   --conf spark.eventLog.enabled=false \
   --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \
   myApp.jar
@@ -103,20 +103,25 @@ such as `--master`, as shown above. `spark-submit` can accept any Spark property
 flag, but uses special flags for properties that play a part in launching the Spark application.
 Running `./bin/spark-submit --help` will show the entire list of these options.
 
-`bin/spark-submit` will also read configuration options from `conf/spark-defaults.conf`, in which
-each line consists of a key and a value separated by whitespace. For example:
+When configurations are specified via the `--conf/-c` flags, `bin/spark-submit` will also read
+configuration options from `conf/spark-defaults.conf`, in which each line consists of a key and
+a value separated by whitespace. For example:
 
     spark.master            spark://5.6.7.8:7077
     spark.executor.memory   4g
     spark.eventLog.enabled  true
     spark.serializer        org.apache.spark.serializer.KryoSerializer
 
+In addition, a property file with Spark configurations can be passed to `bin/spark-submit` via
+`--properties-file` parameter. When this is set, Spark will no longer load configurations from
+`conf/spark-defaults.conf` unless another parameter `--load-spark-defaults` is provided.
+
 Any values specified as flags or in the properties file will be passed on to the application
 and merged with those specified through SparkConf. Properties set directly on the SparkConf
-take highest precedence, then flags passed to `spark-submit` or `spark-shell`, then options
-in the `spark-defaults.conf` file. A few configuration keys have been renamed since earlier
-versions of Spark; in such cases, the older key names are still accepted, but take lower
-precedence than any instance of the newer key.
+take the highest precedence, then those through `--conf` flags or `--properties-file` passed to
+`spark-submit` or `spark-shell`, then options in the `spark-defaults.conf` file. A few
+configuration keys have been renamed since earlier versions of Spark; in such cases, the older
+key names are still accepted, but take lower precedence than any instance of the newer key.
 
 Spark properties mainly can be divided into two kinds: one is related to deploy, like
 "spark.driver.memory", "spark.executor.instances", this kind of properties may not be affected when
@@ -1028,11 +1033,19 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.unsafe.file.output.buffer</code></td>
   <td>32k</td>
   <td>
-    The file system for this buffer size after each partition is written in unsafe shuffle writer.
-    In KiB unless otherwise specified.
+    Deprecated since Spark 4.0, please use <code>spark.shuffle.localDisk.file.output.buffer</code>.
   </td>
   <td>2.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.localDisk.file.output.buffer</code></td>
+  <td>32k</td>
+  <td>
+    The file system for this buffer size after each partition is written in all local disk shuffle writers.
+    In KiB unless otherwise specified.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.spill.diskWriteBufferSize</code></td>
   <td>1024 * 1024</td>
@@ -1795,6 +1808,15 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>0.6.0</td>
 </tr>
+<tr>
+  <td><code>spark.checkpoint.dir</code></td>
+  <td>(none)</td>
+  <td>
+    Set the default directory for checkpointing. It can be overwritten by
+    SparkContext.setCheckpointDir.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.checkpoint.compress</code></td>
   <td>false</td>
@@ -1881,6 +1903,14 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>4.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.io.compression.lzf.parallel.enabled</code></td>
+  <td>false</td>
+  <td>
+    When true, LZF compression will use multiple threads to compress data in parallel.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.kryo.classesToRegister</code></td>
   <td>(none)</td>
@@ -3072,7 +3102,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.stage.ignoreDecommissionFetchFailure</code></td>
-  <td>false</td>
+  <td>true</td>
   <td>
     Whether ignore stage fetch failure caused by executor decommission when
     count <code>spark.stage.maxConsecutiveAttempts</code>
@@ -3387,7 +3417,7 @@ Spark subsystems.
 
 Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file
 and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`.
-Also, they can be set and queried by SET commands and rest to their initial values by RESET command,
+Also, they can be set and queried by SET commands and reset to their initial values by RESET command,
 or by `SparkSession.conf`'s setter and getter methods in runtime.
 
 {% include_api_gen generated-runtime-sql-config-table.html %}
@@ -3670,14 +3700,36 @@ Note: When running Spark on YARN in `cluster` mode, environment variables need t
 # Configuring Logging
 
 Spark uses [log4j](http://logging.apache.org/log4j/) for logging. You can configure it by adding a
-`log4j2.properties` file in the `conf` directory. One way to start is to copy the existing
-`log4j2.properties.template` located there.
+`log4j2.properties` file in the `conf` directory. One way to start is to copy the existing templates `log4j2.properties.template` or `log4j2.properties.pattern-layout-template` located there.
+
+## Structured Logging
+Starting from version 4.0.0, `spark-submit` has adopted the [JSON Template Layout](https://logging.apache.org/log4j/2.x/manual/json-template-layout.html) for logging, which outputs logs in JSON format. This format facilitates querying logs using Spark SQL with the JSON data source. Additionally, the logs include all Mapped Diagnostic Context (MDC) information for search and debugging purposes.
+
+To configure the layout of structured logging, start with the `log4j2.properties.template` file.
+
+To query Spark logs using Spark SQL, you can use the following Python code snippet:
+
+```python
+from pyspark.util import LogUtils
+
+logDf = spark.read.schema(LogUtils.LOG_SCHEMA).json("path/to/logs")
+```
+
+Or using the following Scala code snippet:
+```scala
+import org.apache.spark.util.LogUtils.LOG_SCHEMA
+
+val logDf = spark.read.schema(LOG_SCHEMA).json("path/to/logs")
+```
+
+## Plain Text Logging
+If you prefer plain text logging, you have two options:
+- Disable structured JSON logging by setting the Spark configuration `spark.log.structuredLogging.enabled` to `false`.
+- Use a custom log4j configuration file. Rename `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`. This reverts to the default configuration prior to Spark 4.0, which utilizes [PatternLayout](https://logging.apache.org/log4j/2.x/manual/layouts.html#PatternLayout) for logging all messages in plain text.
 
-By default, Spark adds 1 record to the MDC (Mapped Diagnostic Context): `mdc.taskName`, which shows something
-like `task 1.0 in stage 0.0`. You can add `%X{mdc.taskName}` to your patternLayout in
-order to print it in the logs.
+MDC information is not included by default when with plain text logging. In order to print it in the logs, you can update the patternLayout in the file. For example, you can add `%X{task_name}` to print the task name in the logs.
 Moreover, you can use `spark.sparkContext.setLocalProperty(s"mdc.$name", "value")` to add user specific data into MDC.
-The key in MDC will be the string of "mdc.$name".
+The key in MDC will be the string of `mdc.$name`.
 
 # Overriding configuration directory
 
@@ -3728,7 +3780,7 @@ Also, you can modify or add configurations at runtime:
 {% highlight bash %}
 ./bin/spark-submit \
   --name "My app" \
-  --master local[4] \
+  --master "local[4]" \
   --conf spark.eventLog.enabled=false \
   --conf "spark.executor.extraJavaOptions=-XX:+PrintGCDetails -XX:+PrintGCTimeStamps" \
   --conf spark.hadoop.abc.def=xyz \
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 5f3560883e593..26b0ff32cf5d9 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -42,9 +42,15 @@ license: |
 
 - Since Spark 4.0, Spark uses the external shuffle service for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. To restore the legacy behavior, you can set `spark.shuffle.service.removeShuffle` to `false`.
 
-- Since Spark 4.0, the default log4j output has shifted from plain text to JSON lines to enhance analyzability. To revert to plain text output, you can either set `spark.log.structuredLogging.enabled` to `false`, or use a custom log4j configuration.
+- Starting with Spark 4.0, the default logging format for `spark-submit` has changed from plain text to JSON lines to improve log analysis. If you prefer plain text logs, you have two options:
+  - Set the Spark configuration `spark.log.structuredLogging.enabled` to `false`. 
+  - Use a custom log4j configuration file, such as renaming the template file `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`.
 
-- Since Spark 4.0, Spark performs speculative executions less agressively with `spark.speculation.multiplier=3` and `spark.speculation.quantile=0.9`. To restore the legacy behavior, you can set `spark.speculation.multiplier=1.5` and `spark.speculation.quantile=0.75`.
+- Since Spark 4.0, the MDC (Mapped Diagnostic Context) key for Spark task names in Spark logs has been changed from `mdc.taskName` to `task_name`. To use the key `mdc.taskName`, you can set `spark.log.legacyTaskNameMdc.enabled` to `true`.
+
+- Since Spark 4.0, Spark performs speculative executions less aggressively with `spark.speculation.multiplier=3` and `spark.speculation.quantile=0.9`. To restore the legacy behavior, you can set `spark.speculation.multiplier=1.5` and `spark.speculation.quantile=0.75`.
+
+- Since Spark 4.0, `spark.shuffle.unsafe.file.output.buffer` is deprecated though still works. Use `spark.shuffle.localDisk.file.output.buffer` instead.
 
 ## Upgrading from Core 3.4 to 3.5
 
diff --git a/docs/css/custom.css b/docs/css/custom.css
index 22175068023b7..9edb466606555 100644
--- a/docs/css/custom.css
+++ b/docs/css/custom.css
@@ -988,3 +988,26 @@ table.spark-config th:nth-child(4),
 table.spark-config td:nth-child(4) {
   width: 90px;
 }
+
+table#error-conditions {
+  table-layout: fixed;
+
+  span.error-condition-name {
+    /* Any error names that wrap will have the wrapped lines indented
+       relative to the first line thanks to these three rules.
+     */
+    padding-left: 2em;
+    text-indent: -2em;
+    display: block;
+  }
+
+  th:nth-child(1),
+  td:nth-child(1) {
+    /* width: 85px; */
+    width: 105px;
+  }
+
+  td.error-sub-condition {
+    padding-left: 2.5em;
+  }
+}
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 5e11d5aef81eb..d04fb35cf7275 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1301,6 +1301,17 @@ These metrics are exposed by Spark executors.
   - shuffleRemoteBytesReadToDisk.count
   - shuffleTotalBytesRead.count
   - shuffleWriteTime.count
+  - Metrics related to push-based shuffle:
+    - shuffleCorruptMergedBlockChunks
+    - shuffleMergedFetchFallbackCount
+    - shuffleMergedRemoteBlocksFetched
+    - shuffleMergedLocalBlocksFetched
+    - shuffleMergedRemoteChunksFetched
+    - shuffleMergedLocalChunksFetched
+    - shuffleMergedRemoteBytesRead
+    - shuffleMergedLocalBytesRead
+    - shuffleRemoteReqsDuration
+    - shuffleMergedRemoteReqsDuration
   - succeededTasks.count
   - threadpool.activeTasks
   - threadpool.completeTasks
@@ -1424,12 +1435,14 @@ Note: applies to the shuffle service
 - blockTransferMessageRate (meter) - rate of block transfer messages,
   i.e. if batch fetches are enabled, this represents number of batches rather than number of blocks
 - blockTransferRateBytes (meter)
-- blockTransferAvgTime_1min (gauge - 1-minute moving average)
+- blockTransferAvgSize_1min (gauge - 1-minute moving average)
 - numActiveConnections.count
 - numRegisteredConnections.count
 - numCaughtExceptions.count
-- openBlockRequestLatencyMillis (histogram)
-- registerExecutorRequestLatencyMillis (histogram)
+- openBlockRequestLatencyMillis (timer)
+- registerExecutorRequestLatencyMillis (timer)
+- fetchMergedBlocksMetaLatencyMillis (timer)
+- finalizeShuffleMergeLatencyMillis (timer)
 - registeredExecutorsSize
 - shuffle-server.usedDirectMemory
 - shuffle-server.usedHeapMemory
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 366970cf66c71..5a03af98cd832 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -286,7 +286,7 @@ We can run this application using the `bin/spark-submit` script:
 {% highlight bash %}
 # Use spark-submit to run your application
 $ YOUR_SPARK_HOME/bin/spark-submit \
-  --master local[4] \
+  --master "local[4]" \
   SimpleApp.py
 ...
 Lines with a: 46, Lines with b: 23
@@ -371,7 +371,7 @@ $ sbt package
 # Use spark-submit to run your application
 $ YOUR_SPARK_HOME/bin/spark-submit \
   --class "SimpleApp" \
-  --master local[4] \
+  --master "local[4]" \
   target/scala-{{site.SCALA_BINARY_VERSION}}/simple-project_{{site.SCALA_BINARY_VERSION}}-1.0.jar
 ...
 Lines with a: 46, Lines with b: 23
@@ -452,7 +452,7 @@ $ mvn package
 # Use spark-submit to run your application
 $ YOUR_SPARK_HOME/bin/spark-submit \
   --class "SimpleApp" \
-  --master local[4] \
+  --master "local[4]" \
   target/simple-project-1.0.jar
 ...
 Lines with a: 46, Lines with b: 23
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index f75bda0ffafb0..cbbce4c082060 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -214,13 +214,13 @@ can be passed to the `--repositories` argument. For example, to run
 `bin/pyspark` on exactly four cores, use:
 
 {% highlight bash %}
-$ ./bin/pyspark --master local[4]
+$ ./bin/pyspark --master "local[4]"
 {% endhighlight %}
 
 Or, to also add `code.py` to the search path (in order to later be able to `import code`), use:
 
 {% highlight bash %}
-$ ./bin/pyspark --master local[4] --py-files code.py
+$ ./bin/pyspark --master "local[4]" --py-files code.py
 {% endhighlight %}
 
 For a complete list of options, run `pyspark --help`. Behind the scenes,
@@ -260,19 +260,19 @@ can be passed to the `--repositories` argument. For example, to run `bin/spark-s
 four cores, use:
 
 {% highlight bash %}
-$ ./bin/spark-shell --master local[4]
+$ ./bin/spark-shell --master "local[4]"
 {% endhighlight %}
 
 Or, to also add `code.jar` to its classpath, use:
 
 {% highlight bash %}
-$ ./bin/spark-shell --master local[4] --jars code.jar
+$ ./bin/spark-shell --master "local[4]" --jars code.jar
 {% endhighlight %}
 
 To include a dependency using Maven coordinates:
 
 {% highlight bash %}
-$ ./bin/spark-shell --master local[4] --packages "org.example:example:0.1"
+$ ./bin/spark-shell --master "local[4]" --packages "org.example:example:0.1"
 {% endhighlight %}
 
 For a complete list of options, run `spark-shell --help`. Behind the scenes,
@@ -781,7 +781,7 @@ One of the harder things about Spark is understanding the scope and life cycle o
 
 #### Example
 
-Consider the naive RDD element sum below, which may behave differently depending on whether execution is happening within the same JVM. A common example of this is when running Spark in `local` mode (`--master = local[n]`) versus deploying a Spark application to a cluster (e.g. via spark-submit to YARN):
+Consider the naive RDD element sum below, which may behave differently depending on whether execution is happening within the same JVM. A common example of this is when running Spark in `local` mode (`--master = "local[n]"`) versus deploying a Spark application to a cluster (e.g. via spark-submit to YARN):
 
 <div class="codetabs">
 
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 778af5f0751a8..7619dd728a2e5 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -44,7 +44,7 @@ Cluster administrators should use [Pod Security Policies](https://kubernetes.io/
 
 # Prerequisites
 
-* A running Kubernetes cluster at version >= 1.26 with access configured to it using
+* A running Kubernetes cluster at version >= 1.27 with access configured to it using
 [kubectl](https://kubernetes.io/docs/reference/kubectl/).  If you do not already have a working Kubernetes cluster,
 you may set up a test cluster on your local machine using
 [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/).
@@ -1939,10 +1939,10 @@ Install Apache YuniKorn:
 ```bash
 helm repo add yunikorn https://apache.github.io/yunikorn-release
 helm repo update
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.5.0 --create-namespace --set embedAdmissionController=false
+helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.5.1 --create-namespace --set embedAdmissionController=false
 ```
 
-The above steps will install YuniKorn v1.5.0 on an existing Kubernetes cluster.
+The above steps will install YuniKorn v1.5.1 on an existing Kubernetes cluster.
 
 ##### Get started
 
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index aab8ee60a256c..700ddefabea47 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -33,6 +33,9 @@ Please see [Spark Security](security.html) and the specific security sections in
 
 # Launching Spark on YARN
 
+Apache Hadoop does not support Java 17 as of 3.4.0, while Apache Spark requires at least Java 17 since 4.0.0, so a different JDK should be configured for Spark applications.
+Please refer to [Configuring different JDKs for Spark Applications](#configuring-different-jdks-for-spark-applications) for details.
+
 Ensure that `HADOOP_CONF_DIR` or `YARN_CONF_DIR` points to the directory which contains the (client side) configuration files for the Hadoop cluster.
 These configs are used to write to HDFS and connect to the YARN ResourceManager. The
 configuration contained in this directory will be distributed to the YARN cluster so that all
@@ -1032,3 +1035,34 @@ and one should be configured with:
   spark.shuffle.service.name = spark_shuffle_y
   spark.shuffle.service.port = <other value>
 ```
+
+# Configuring different JDKs for Spark Applications
+
+In some cases it may be desirable to use a different JDK from YARN node manager to run Spark applications,
+this can be achieved by setting the `JAVA_HOME` environment variable for YARN containers and the `spark-submit`
+process.
+
+Note that, Spark assumes that all JVM processes runs in one application use the same version of JDK, otherwise,
+you may encounter JDK serialization issues.
+
+To configure a Spark application to use a JDK which has been pre-installed on all nodes at `/opt/openjdk-17`:
+
+    $ export JAVA_HOME=/opt/openjdk-17
+    $ ./bin/spark-submit --class path.to.your.Class \
+        --master yarn \
+        --conf spark.yarn.appMasterEnv.JAVA_HOME=/opt/openjdk-17 \
+        --conf spark.executorEnv.JAVA_HOME=/opt/openjdk-17 \
+        <app jar> [app options]
+
+Optionally, the user may want to avoid installing a different JDK on the YARN cluster nodes, in such a case,
+it's also possible to distribute the JDK using YARN's Distributed Cache. For example, to use Java 21 to run
+a Spark application, prepare a JDK 21 tarball `openjdk-21.tar.gz` and untar it to `/opt` on the local node,
+then submit a Spark application:
+
+    $ export JAVA_HOME=/opt/openjdk-21
+    $ ./bin/spark-submit --class path.to.your.Class \
+        --master yarn \
+        --archives path/to/openjdk-21.tar.gz \
+        --conf spark.yarn.appMasterEnv.JAVA_HOME=./openjdk-21.tar.gz/openjdk-21 \
+        --conf spark.executorEnv.JAVA_HOME=./openjdk-21.tar.gz/openjdk-21 \
+        <app jar> [app options]
diff --git a/docs/security.md b/docs/security.md
index 455935fcffca3..1b5dcb3836457 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -207,6 +207,15 @@ The following table describes the different options available for configuring th
   </td>
   <td>2.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.network.crypto.cipher</code></td>
+  <td>AES/CTR/NoPadding</td>
+  <td>
+    Cipher mode to use. Defaults "AES/CTR/NoPadding" for backward compatibility, which is not authenticated. 
+    Recommended to use "AES/GCM/NoPadding", which is an authenticated encryption mode.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.network.crypto.authEngineVersion</code></td>
   <td>1</td>
@@ -308,7 +317,7 @@ The following settings cover enabling encryption for data written to disk:
 
 ## Authentication and Authorization
 
-Enabling authentication for the Web UIs is done using [javax servlet filters](https://docs.oracle.com/javaee/6/api/javax/servlet/Filter.html).
+Enabling authentication for the Web UIs is done using [jakarta servlet filters](https://jakarta.ee/specifications/servlet/5.0/apidocs/jakarta/servlet/filter).
 You will need a filter that implements the authentication method you want to deploy. Spark does not
 provide any built-in authentication filters.
 
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 1eab3158e2e56..774c0bee31295 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -793,19 +793,11 @@ In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spa
     <td><code>spark.deploy.recoveryDirectory</code></td>
     <td>""</td>
     <td>The directory in which Spark will store recovery state, accessible from the Master's perspective.
-      Note that the directory should be clearly manualy if <code>spark.deploy.recoveryMode</code>,
-      <code>spark.deploy.recoverySerializer</code>, or <code>spark.deploy.recoveryCompressionCodec</code> is changed.
+      Note that the directory should be clearly manualy if <code>spark.deploy.recoveryMode</code>
+      or <code>spark.deploy.recoveryCompressionCodec</code> is changed.
     </td>
     <td>0.8.1</td>
   </tr>
-  <tr>
-    <td><code>spark.deploy.recoverySerializer</code></td>
-    <td>JAVA</td>
-    <td>A serializer for writing/reading objects to/from persistence engines; JAVA (default) or KRYO.
-      Java serializer has been the default mode since Spark 0.8.1.
-      Kryo serializer is a new fast and compact mode from Spark 4.0.0.</td>
-    <td>4.0.0</td>
-  </tr>
   <tr>
     <td><code>spark.deploy.recoveryCompressionCodec</code></td>
     <td>(none)</td>
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index d717899564299..3721f92d93266 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -225,6 +225,24 @@ write.stream(
 {% endhighlight %}
 </div>
 
+<div data-lang="sql" markdown="1">
+{% highlight sql %}
+CREATE TABLE t AS
+  SELECT NAMED_STRUCT('u', NAMED_STRUCT('member0', member0, 'member1', member1)) AS s
+  FROM VALUES (1, NULL), (NULL,  'a') tab(member0, member1);
+DECLARE avro_schema STRING;
+SET VARIABLE avro_schema =
+  '{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] }';
+
+SELECT TO_AVRO(s, avro_schema) AS RESULT FROM t;
+
+SELECT FROM_AVRO(result, avro_schema, MAP()).u FROM (
+  SELECT TO_AVRO(s, avro_schema) AS RESULT FROM t);
+
+DROP TEMPORARY VARIABLE avro_schema;
+DROP TABLE t;
+{% endhighlight %}
+</div>
 </div>
 
 ## Data Source Option
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index b51cde53bd8fd..566dcb33a25d9 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -127,10 +127,10 @@ The following options can be used to configure the version of Hive that is used
   <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.sql.hive.metastore.version</code></td>
-    <td><code>2.3.9</code></td>
+    <td><code>2.3.10</code></td>
     <td>
       Version of the Hive metastore. Available
-      options are <code>2.0.0</code> through <code>2.3.9</code> and <code>3.0.0</code> through <code>3.1.3</code>.
+      options are <code>2.0.0</code> through <code>2.3.10</code> and <code>3.0.0</code> through <code>3.1.3</code>.
     </td>
     <td>1.4.0</td>
   </tr>
@@ -142,9 +142,9 @@ The following options can be used to configure the version of Hive that is used
       property can be one of four options:
       <ol>
         <li><code>builtin</code></li>
-        Use Hive 2.3.9, which is bundled with the Spark assembly when <code>-Phive</code> is
+        Use Hive 2.3.10, which is bundled with the Spark assembly when <code>-Phive</code> is
         enabled. When this option is chosen, <code>spark.sql.hive.metastore.version</code> must be
-        either <code>2.3.9</code> or not defined.
+        either <code>2.3.10</code> or not defined.
         <li><code>maven</code></li>
         Use Hive jars of specified version downloaded from Maven repositories. This configuration
         is not generally recommended for production deployments.
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index edd1a51f39322..9ffd96cd40ee5 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -845,7 +845,7 @@ as the activated JDBC Driver. Note that, different JDBC drivers, or different ve
     <tr>
       <td>numeric, decimal</td>
       <td>DecimalType</td>
-      <td>Since PostgreSQL 15, 's' can be negative. If 's<0' it'll be adjusted to DecimalType(min(p-s, 38), 0); Otherwise, DecimalType(p, s), and if 'p>38', the fraction part will be truncated if exceeded. And if any value of this column have an actual precision greater 38 will fail with NUMERIC_VALUE_OUT_OF_RANGE.WITHOUT_SUGGESTION error</td>
+      <td><ul><li>Since PostgreSQL 15, 's' can be negative. If 's<0' it'll be adjusted to DecimalType(min(p-s, 38), 0); Otherwise, DecimalType(p, s)</li><li>If 'p>38', the fraction part will be truncated if exceeded. And if any value of this column have an actual precision greater 38 will fail with NUMERIC_VALUE_OUT_OF_RANGE.WITHOUT_SUGGESTION error.</li><li>Special numeric values, 'NaN', 'infinity' and '-infinity' is not supported</li></ul></td>
     </tr>
     <tr>
       <td>character varying(n), varchar(n)</td>
@@ -1074,8 +1074,8 @@ the [PostgreSQL JDBC Driver](https://mvnrepository.com/artifact/org.postgresql/p
     </tr>
     <tr>
       <td>TimestampType</td>
-      <td>timestamp</td>
-      <td></td>
+      <td>timestamp with time zone</td>
+      <td>Before Spark 4.0, it was mapped as timestamp. Please refer to the migration guide for more information</td>
     </tr>
     <tr>
       <td>TimestampNTZType</td>
@@ -1335,3 +1335,873 @@ as the activated JDBC Driver.
     </tr>
   </tbody>
 </table>
+
+### Mapping Spark SQL Data Types to Oracle
+
+The below table describes the data type conversions from Spark SQL Data Types to Oracle data types,
+when creating, altering, or writing data to an Oracle table using the built-in jdbc data source with
+the Oracle JDBC as the activated JDBC Driver.
+
+<table>
+  <thead>
+    <tr>
+      <th><b>Spark SQL Data Type</b></th>
+      <th><b>Oracle Data Type</b></th>
+      <th><b>Remarks</b></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>BooleanType</td>
+      <td>NUMBER(1, 0)</td>
+      <td>BooleanType maps to NUMBER(1, 0) as BOOLEAN is introduced since Oracle Release 23c</td>
+    </tr>
+    <tr>
+      <td>ByteType</td>
+      <td>NUMBER(3)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ShortType</td>
+      <td>NUMBER(5)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>IntegerType</td>
+      <td>NUMBER(10)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>LongType</td>
+      <td>NUMBER(19)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>FloatType</td>
+      <td>NUMBER(19, 4)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DoubleType</td>
+      <td>NUMBER(19, 4)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DecimalType(p, s)</td>
+      <td>NUMBER(p,s)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DateType</td>
+      <td>DATE</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampType</td>
+      <td>TIMESTAMP WITH LOCAL TIME ZONE</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampNTZType</td>
+      <td>TIMESTAMP</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>StringType</td>
+      <td>VARCHAR2(255)</td>
+      <td>For historical reason, a string value has maximum 255 characters</td>
+    </tr>
+    <tr>
+      <td>BinaryType</td>
+      <td>BLOB</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>CharType(n)</td>
+      <td>CHAR(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VarcharType(n)</td>
+      <td>VARCHAR2(n)</td>
+      <td></td>
+    </tr>
+  </tbody>
+</table>
+
+The Spark Catalyst data types below are not supported with suitable Oracle types.
+
+- DayTimeIntervalType
+- YearMonthIntervalType
+- CalendarIntervalType
+- ArrayType
+- MapType
+- StructType
+- UserDefinedType
+- NullType
+- ObjectType
+- VariantType
+
+### Mapping Spark SQL Data Types from Microsoft SQL Server
+
+The below table describes the data type conversions from Microsoft SQL Server data types to Spark SQL Data Types,
+when reading data from a Microsoft SQL Server table using the built-in jdbc data source with the mssql-jdbc
+as the activated JDBC Driver.
+
+
+<table>
+  <thead>
+    <tr>
+      <th><b>SQL Server  Data Type</b></th>
+      <th><b>Spark SQL Data Type</b></th>
+      <th><b>Remarks</b></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>bit</td>
+      <td>BooleanType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>tinyint</td>
+      <td>ShortType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>smallint</td>
+      <td>ShortType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>int</td>
+      <td>IntegerType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>bigint</td>
+      <td>LongType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>float(p), real</td>
+      <td>FloatType</td>
+      <td>1 &le; p &le; 24</td>
+    </tr>
+    <tr>
+      <td>float[(p)]</td>
+      <td>DoubleType</td>
+      <td>25 &le; p &le; 53</td>
+    </tr>
+    <tr>
+      <td>double precision</td>
+      <td>DoubleType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>smallmoney</td>
+      <td>DecimalType(10, 4)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>money</td>
+      <td>DecimalType(19, 4)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>decimal[(p[, s])], numeric[(p[, s])]</td>
+      <td>DecimalType(p, s)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>date</td>
+      <td>DateType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>datetime</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>datetime</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>datetime2 [ (fractional seconds precision) ]</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>datetime2 [ (fractional seconds precision) ]</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>datetimeoffset [ (fractional seconds precision) ]</td>
+      <td>TimestampType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>smalldatetime</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>smalldatetime</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>time [ (fractional second scale) ]</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>time [ (fractional second scale) ]</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>binary [ ( n ) ]</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>varbinary [ ( n | max ) ]</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>char [ ( n ) ]</td>
+      <td>CharType(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>varchar [ ( n | max ) ]</td>
+      <td>VarcharType(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>nchar [ ( n ) ]</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>nvarchar [ ( n | max ) ]</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>text</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ntext</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>image</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>geography</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>geometry</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>rowversion</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>sql_variant</td>
+      <td></td>
+      <td>UNRECOGNIZED_SQL_TYPE error raised</td>
+    </tr>
+  </tbody>
+</table>
+
+### Mapping Spark SQL Data Types to Microsoft SQL Server
+
+The below table describes the data type conversions from Spark SQL Data Types to Microsoft SQL Server data types,
+when creating, altering, or writing data to a Microsoft SQL Server table using the built-in jdbc data source with
+the mssql-jdbc as the activated JDBC Driver.
+
+<table>
+  <thead>
+    <tr>
+      <th><b>Spark SQL Data Type</b></th>
+      <th><b>SQL Server Data Type</b></th>
+      <th><b>Remarks</b></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>BooleanType</td>
+      <td>bit</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ByteType</td>
+      <td>smallint</td>
+      <td>Supported since Spark 4.0.0, previous versions throw errors</td>
+    </tr>
+    <tr>
+      <td>ShortType</td>
+      <td>smallint</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>IntegerType</td>
+      <td>int</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>LongType</td>
+      <td>bigint</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>FloatType</td>
+      <td>real</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DoubleType</td>
+      <td>double precision</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DecimalType(p, s)</td>
+      <td>number(p,s)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DateType</td>
+      <td>date</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampType</td>
+      <td>datetime</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampNTZType</td>
+      <td>datetime</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>StringType</td>
+      <td>nvarchar(max)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BinaryType</td>
+      <td>varbinary(max)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>CharType(n)</td>
+      <td>char(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VarcharType(n)</td>
+      <td>varchar(n)</td>
+      <td></td>
+    </tr>
+  </tbody>
+</table>
+
+The Spark Catalyst data types below are not supported with suitable SQL Server types.
+
+- DayTimeIntervalType
+- YearMonthIntervalType
+- CalendarIntervalType
+- ArrayType
+- MapType
+- StructType
+- UserDefinedType
+- NullType
+- ObjectType
+- VariantType
+
+### Mapping Spark SQL Data Types from DB2
+
+The below table describes the data type conversions from DB2 data types to Spark SQL Data Types,
+when reading data from a DB2 table using the built-in jdbc data source with the [IBM Data Server Driver For JDBC and SQLJ](https://mvnrepository.com/artifact/com.ibm.db2/jcc)
+as the activated JDBC Driver.
+
+
+<table>
+  <thead>
+    <tr>
+      <th><b>DB2 Data Type</b></th>
+      <th><b>Spark SQL Data Type</b></th>
+      <th><b>Remarks</b></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>BOOLEAN</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>SMALLINT</td>
+      <td>ShortType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>INTEGER</td>
+      <td>IntegerType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BIGINT</td>
+      <td>LongType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>REAL</td>
+      <td>FloatType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DOUBLE, FLOAT</td>
+      <td>DoubleType</td>
+      <td>FLOAT is double precision floating-point in db2</td>
+    </tr>
+    <tr>
+      <td>DECIMAL, NUMERIC, DECFLOAT</td>
+      <td>DecimalType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DATE</td>
+      <td>DateType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TIMESTAMP, TIMESTAMP WITHOUT TIME ZONE</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>TIMESTAMP, TIMESTAMP WITHOUT TIME ZONE</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>TIMESTAMP WITH TIME ZONE</td>
+      <td>TimestampType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TIME</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>TIME</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>CHAR(n)</td>
+      <td>CharType(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VARCHAR(n)</td>
+      <td>VarcharType(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>CHAR(n) FOR BIT DATA</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VARCHAR(n) FOR BIT DATA</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BINARY(n)</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VARBINARY(n)</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>CLOB(n)</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DBCLOB(n)</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BLOB(n)</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>GRAPHIC(n)</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VARGRAPHIC(n)</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>XML</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ROWID</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+  </tbody>
+</table>
+
+### Mapping Spark SQL Data Types to DB2
+
+The below table describes the data type conversions from Spark SQL Data Types to DB2 data types,
+when creating, altering, or writing data to a DB2 table using the built-in jdbc data source with
+the [IBM Data Server Driver For JDBC and SQLJ](https://mvnrepository.com/artifact/com.ibm.db2/jcc) as the activated JDBC Driver.
+
+<table>
+  <thead>
+    <tr>
+      <th><b>Spark SQL Data Type</b></th>
+      <th><b>DB2 Data Type</b></th>
+      <th><b>Remarks</b></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>BooleanType</td>
+      <td>BOOLEAN</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ByteType</td>
+      <td>SMALLINT</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ShortType</td>
+      <td>SMALLINT</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>IntegerType</td>
+      <td>INTEGER</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>LongType</td>
+      <td>BIGINT</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>FloatType</td>
+      <td>REAL</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DoubleType</td>
+      <td>DOUBLE PRECISION</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DecimalType(p, s)</td>
+      <td>DECIMAL(p,s)</td>
+      <td>The maximum value for 'p' is 31 in DB2, while it is 38 in Spark. It might fail when storing DecimalType(p>=32, s) to DB2</td>
+    </tr>
+    <tr>
+      <td>DateType</td>
+      <td>DATE</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampType</td>
+      <td>TIMESTAMP</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampNTZType</td>
+      <td>TIMESTAMP</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>StringType</td>
+      <td>CLOB</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BinaryType</td>
+      <td>BLOB</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>CharType(n)</td>
+      <td>CHAR(n)</td>
+      <td>The maximum value for 'n' is 255 in DB2, while it is unlimited in Spark.</td>
+    </tr>
+    <tr>
+      <td>VarcharType(n)</td>
+      <td>VARCHAR(n)</td>
+      <td>The maximum value for 'n' is 255 in DB2, while it is unlimited in Spark.</td>
+    </tr>
+  </tbody>
+</table>
+
+The Spark Catalyst data types below are not supported with suitable DB2 types.
+
+- DayTimeIntervalType
+- YearMonthIntervalType
+- CalendarIntervalType
+- ArrayType
+- MapType
+- StructType
+- UserDefinedType
+- NullType
+- ObjectType
+- VariantType
+
+### Mapping Spark SQL Data Types from Teradata
+
+The below table describes the data type conversions from Teradata data types to Spark SQL Data Types,
+when reading data from a Teradata table using the built-in jdbc data source with the [Teradata JDBC Driver](https://mvnrepository.com/artifact/com.teradata.jdbc/terajdbc)
+as the activated JDBC Driver.
+
+<table>
+  <thead>
+    <tr>
+      <th><b>Teradata Data Type</b></th>
+      <th><b>Spark SQL Data Type</b></th>
+      <th><b>Remarks</b></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>BYTEINT</td>
+      <td>ByteType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>SMALLINT</td>
+      <td>ShortType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>INTEGER, INT</td>
+      <td>IntegerType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BIGINT</td>
+      <td>LongType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>REAL, DOUBLE PRECISION, FLOAT</td>
+      <td>DoubleType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DECIMAL, NUMERIC, NUMBER</td>
+      <td>DecimalType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DATE</td>
+      <td>DateType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TIMESTAMP, TIMESTAMP WITH TIME ZONE</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>TIMESTAMP, TIMESTAMP WITH TIME ZONE</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>TIME, TIME WITH TIME ZONE</td>
+      <td>TimestampType</td>
+      <td>(Default)preferTimestampNTZ=false or spark.sql.timestampType=TIMESTAMP_LTZ</td>
+    </tr>
+    <tr>
+      <td>TIME, TIME WITH TIME ZONE</td>
+      <td>TimestampNTZType</td>
+      <td>preferTimestampNTZ=true or spark.sql.timestampType=TIMESTAMP_NTZ</td>
+    </tr>
+    <tr>
+      <td>CHARACTER(n), CHAR(n), GRAPHIC(n)</td>
+      <td>CharType(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VARCHAR(n), VARGRAPHIC(n)</td>
+      <td>VarcharType(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BYTE(n), VARBYTE(n)</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>CLOB</td>
+      <td>StringType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BLOB</td>
+      <td>BinaryType</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>INTERVAL Data Types</td>
+      <td>-</td>
+      <td>The INTERVAL data types are unknown yet</td>
+    </tr>
+    <tr>
+      <td>Period Data Types, ARRAY, UDT</td>
+      <td>-</td>
+      <td>Not Supported</td>
+    </tr>
+  </tbody>
+</table>
+
+### Mapping Spark SQL Data Types to Teradata
+
+The below table describes the data type conversions from Spark SQL Data Types to Teradata data types,
+when creating, altering, or writing data to a Teradata table using the built-in jdbc data source with
+the [Teradata JDBC Driver](https://mvnrepository.com/artifact/com.teradata.jdbc/terajdbc) as the activated JDBC Driver.
+
+<table>
+  <thead>
+    <tr>
+      <th><b>Spark SQL Data Type</b></th>
+      <th><b>Teradata Data Type</b></th>
+      <th><b>Remarks</b></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>BooleanType</td>
+      <td>CHAR(1)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ByteType</td>
+      <td>BYTEINT</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>ShortType</td>
+      <td>SMALLINT</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>IntegerType</td>
+      <td>INTEGER</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>LongType</td>
+      <td>BIGINT</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>FloatType</td>
+      <td>REAL</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DoubleType</td>
+      <td>DOUBLE PRECISION</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DecimalType(p, s)</td>
+      <td>DECIMAL(p,s)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>DateType</td>
+      <td>DATE</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampType</td>
+      <td>TIMESTAMP</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>TimestampNTZType</td>
+      <td>TIMESTAMP</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>StringType</td>
+      <td>VARCHAR(255)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>BinaryType</td>
+      <td>BLOB</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>CharType(n)</td>
+      <td>CHAR(n)</td>
+      <td></td>
+    </tr>
+    <tr>
+      <td>VarcharType(n)</td>
+      <td>VARCHAR(n)</td>
+      <td></td>
+    </tr>
+  </tbody>
+</table>
+
+The Spark Catalyst data types below are not supported with suitable Teradata types.
+
+- DayTimeIntervalType
+- YearMonthIntervalType
+- CalendarIntervalType
+- ArrayType
+- MapType
+- StructType
+- UserDefinedType
+- NullType
+- ObjectType
+- VariantType
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index b42f6e84076d2..70105c22e583c 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -109,7 +109,7 @@ For example, you can control bloom filters and dictionary encodings for ORC data
 The following ORC example will create bloom filter and use dictionary encoding only for `favorite_color`.
 For Parquet, there exists `parquet.bloom.filter.enabled` and `parquet.enable.dictionary`, too.
 To find more detailed information about the extra ORC/Parquet options,
-visit the official Apache [ORC](https://orc.apache.org/docs/spark-config.html) / [Parquet](https://github.com/apache/parquet-mr/tree/master/parquet-hadoop) websites.
+visit the official Apache [ORC](https://orc.apache.org/docs/spark-config.html) / [Parquet](https://github.com/apache/parquet-java/tree/master/parquet-hadoop) websites.
 
 ORC data source:
 
diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index f5c5ccd3b89a1..5a0ca595fabbc 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -350,7 +350,7 @@ Dataset<Row> df2 = spark.read().parquet("/path/to/table.parquet.encrypted");
 
 #### KMS Client
 
-The InMemoryKMS class is provided only for illustration and simple demonstration of Parquet encryption functionality. **It should not be used in a real deployment**. The master encryption keys must be kept and managed in a production-grade KMS system, deployed in user's organization. Rollout of Spark with Parquet encryption requires implementation of a client class for the KMS server. Parquet provides a plug-in [interface](https://github.com/apache/parquet-mr/blob/apache-parquet-1.13.1/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/KmsClient.java) for development of such classes,
+The InMemoryKMS class is provided only for illustration and simple demonstration of Parquet encryption functionality. **It should not be used in a real deployment**. The master encryption keys must be kept and managed in a production-grade KMS system, deployed in user's organization. Rollout of Spark with Parquet encryption requires implementation of a client class for the KMS server. Parquet provides a plug-in [interface](https://github.com/apache/parquet-java/blob/apache-parquet-1.13.1/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/KmsClient.java) for development of such classes,
 
 <div data-lang="java"  markdown="1">
 {% highlight java %}
@@ -371,9 +371,9 @@ public interface KmsClient {
 
 </div>
 
-An [example](https://github.com/apache/parquet-mr/blob/master/parquet-hadoop/src/test/java/org/apache/parquet/crypto/keytools/samples/VaultClient.java) of such class for an open source [KMS](https://www.vaultproject.io/api/secret/transit) can be found in the parquet-mr repository. The production KMS client should be designed in cooperation with organization's security administrators, and built by developers with an experience in access control management. Once such class is created, it can be passed to applications via the `parquet.encryption.kms.client.class` parameter and leveraged by general Spark users as shown in the encrypted dataframe write/read sample above.
+An [example](https://github.com/apache/parquet-java/blob/master/parquet-hadoop/src/test/java/org/apache/parquet/crypto/keytools/samples/VaultClient.java) of such class for an open source [KMS](https://www.vaultproject.io/api/secret/transit) can be found in the parquet-java repository. The production KMS client should be designed in cooperation with organization's security administrators, and built by developers with an experience in access control management. Once such class is created, it can be passed to applications via the `parquet.encryption.kms.client.class` parameter and leveraged by general Spark users as shown in the encrypted dataframe write/read sample above.
 
-Note: By default, Parquet implements a "double envelope encryption" mode, that minimizes the interaction of Spark executors with a KMS server. In this mode, the DEKs are encrypted with "key encryption keys" (KEKs, randomly generated by Parquet). The KEKs are encrypted with MEKs in KMS; the result and the KEK itself are cached in Spark executor memory. Users interested in regular envelope encryption, can switch to it by setting the `parquet.encryption.double.wrapping` parameter to `false`. For more details on Parquet encryption parameters, visit the parquet-hadoop configuration [page](https://github.com/apache/parquet-mr/blob/master/parquet-hadoop/README.md#class-propertiesdrivencryptofactory).
+Note: By default, Parquet implements a "double envelope encryption" mode, that minimizes the interaction of Spark executors with a KMS server. In this mode, the DEKs are encrypted with "key encryption keys" (KEKs, randomly generated by Parquet). The KEKs are encrypted with MEKs in KMS; the result and the KEK itself are cached in Spark executor memory. Users interested in regular envelope encryption, can switch to it by setting the `parquet.encryption.double.wrapping` parameter to `false`. For more details on Parquet encryption parameters, visit the parquet-hadoop configuration [page](https://github.com/apache/parquet-java/blob/master/parquet-hadoop/README.md#class-propertiesdrivencryptofactory).
 
 
 ## Data Source Option
diff --git a/docs/sql-error-conditions-as-of-join-error-class.md b/docs/sql-error-conditions-as-of-join-error-class.md
deleted file mode 100644
index df122c22616e5..0000000000000
--- a/docs/sql-error-conditions-as-of-join-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: AS_OF_JOIN error class
-displayTitle: AS_OF_JOIN error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid as-of join.
-
-This error class has the following derived error classes:
-
-## TOLERANCE_IS_NON_NEGATIVE
-
-The input argument `tolerance` must be non-negative.
-
-## TOLERANCE_IS_UNFOLDABLE
-
-The input argument `tolerance` must be a constant.
-
-
diff --git a/docs/sql-error-conditions-cannot-create-data-source-table-error-class.md b/docs/sql-error-conditions-cannot-create-data-source-table-error-class.md
deleted file mode 100644
index a5bc5c0dc094b..0000000000000
--- a/docs/sql-error-conditions-cannot-create-data-source-table-error-class.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-layout: global
-title: CANNOT_CREATE_DATA_SOURCE_TABLE error class
-displayTitle: CANNOT_CREATE_DATA_SOURCE_TABLE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42KDE](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to create data source table `<tableName>`:
-
-This error class has the following derived error classes:
-
-## EXTERNAL_METADATA_UNSUPPORTED
-
-provider '`<provider>`' does not support external metadata but a schema is provided. Please remove the schema when creating the table.
-
-
diff --git a/docs/sql-error-conditions-cannot-load-state-store-error-class.md b/docs/sql-error-conditions-cannot-load-state-store-error-class.md
deleted file mode 100644
index 1f44e5592eba8..0000000000000
--- a/docs/sql-error-conditions-cannot-load-state-store-error-class.md
+++ /dev/null
@@ -1,74 +0,0 @@
----
-layout: global
-title: CANNOT_LOAD_STATE_STORE error class
-displayTitle: CANNOT_LOAD_STATE_STORE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-SQLSTATE: 58030
-
-An error occurred during loading state.
-
-This error class has the following derived error classes:
-
-## CANNOT_READ_CHECKPOINT
-
-Cannot read RocksDB checkpoint metadata. Expected `<expectedVersion>`, but found `<actualVersion>`.
-
-## CANNOT_READ_DELTA_FILE_KEY_SIZE
-
-Error reading delta file `<fileToRead>` of `<clazz>`: key size cannot be `<keySize>`.
-
-## CANNOT_READ_DELTA_FILE_NOT_EXISTS
-
-Error reading delta file `<fileToRead>` of `<clazz>`: `<fileToRead>` does not exist.
-
-## CANNOT_READ_SNAPSHOT_FILE_KEY_SIZE
-
-Error reading snapshot file `<fileToRead>` of `<clazz>`: key size cannot be `<keySize>`.
-
-## CANNOT_READ_SNAPSHOT_FILE_VALUE_SIZE
-
-Error reading snapshot file `<fileToRead>` of `<clazz>`: value size cannot be `<valueSize>`.
-
-## CANNOT_READ_STREAMING_STATE_FILE
-
-Error reading streaming state file of `<fileToRead>` does not exist. If the stream job is restarted with a new or updated state operation, please create a new checkpoint location or clear the existing checkpoint location.
-
-## UNCATEGORIZED
-
-
-
-## UNEXPECTED_FILE_SIZE
-
-Copied `<dfsFile>` to `<localFile>`, expected `<expectedSize>` bytes, found `<localFileSize>` bytes.
-
-## UNEXPECTED_VERSION
-
-Version cannot be `<version>` because it is less than 0.
-
-## UNRELEASED_THREAD_ERROR
-
-`<loggingId>`: RocksDB instance could not be acquired by `<newAcquiredThreadInfo>` for operationType=`<operationType>` as it was not released by `<acquiredThreadInfo>` after `<timeWaitedMs>` ms.
-Thread holding the lock has trace: `<stackTraceOutput>`
-
-
diff --git a/docs/sql-error-conditions-cannot-update-field-error-class.md b/docs/sql-error-conditions-cannot-update-field-error-class.md
deleted file mode 100644
index fe27ab90d149d..0000000000000
--- a/docs/sql-error-conditions-cannot-update-field-error-class.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-layout: global
-title: CANNOT_UPDATE_FIELD error class
-displayTitle: CANNOT_UPDATE_FIELD error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Cannot update `<table>` field `<fieldName>` type:
-
-This error class has the following derived error classes:
-
-## ARRAY_TYPE
-
-Update the element by updating `<fieldName>`.element.
-
-## INTERVAL_TYPE
-
-Update an interval by updating its fields.
-
-## MAP_TYPE
-
-Update a map by updating `<fieldName>`.key or `<fieldName>`.value.
-
-## STRUCT_TYPE
-
-Update a struct by updating its fields.
-
-## USER_DEFINED_TYPE
-
-Update a UserDefinedType[`<udtSql>`] by updating its fields.
-
-
diff --git a/docs/sql-error-conditions-cannot-write-state-store-error-class.md b/docs/sql-error-conditions-cannot-write-state-store-error-class.md
deleted file mode 100644
index 0bed5755bdb83..0000000000000
--- a/docs/sql-error-conditions-cannot-write-state-store-error-class.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-layout: global
-title: CANNOT_WRITE_STATE_STORE error class
-displayTitle: CANNOT_WRITE_STATE_STORE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-SQLSTATE: 58030
-
-Error writing state store files for provider `<providerClass>`.
-
-This error class has the following derived error classes:
-
-## CANNOT_COMMIT
-
-Cannot perform commit during state checkpoint.
-
-
diff --git a/docs/sql-error-conditions-collection-size-limit-exceeded-error-class.md b/docs/sql-error-conditions-collection-size-limit-exceeded-error-class.md
deleted file mode 100644
index 0d502245459e7..0000000000000
--- a/docs/sql-error-conditions-collection-size-limit-exceeded-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: COLLECTION_SIZE_LIMIT_EXCEEDED error class
-displayTitle: COLLECTION_SIZE_LIMIT_EXCEEDED error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 54000](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
-
-Can't create array with `<numberOfElements>` elements which exceeding the array size limit `<maxRoundedArrayLength>`,
-
-This error class has the following derived error classes:
-
-## FUNCTION
-
-unsuccessful try to create arrays in the function `<functionName>`.
-
-## INITIALIZE
-
-cannot initialize an array with specified parameters.
-
-## PARAMETER
-
-the value of parameter(s) `<parameter>` in the function `<functionName>` is invalid.
-
-
diff --git a/docs/sql-error-conditions-complex-expression-unsupported-input-error-class.md b/docs/sql-error-conditions-complex-expression-unsupported-input-error-class.md
deleted file mode 100644
index e73499ffabd51..0000000000000
--- a/docs/sql-error-conditions-complex-expression-unsupported-input-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: COMPLEX_EXPRESSION_UNSUPPORTED_INPUT error class
-displayTitle: COMPLEX_EXPRESSION_UNSUPPORTED_INPUT error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot process input data types for the expression: `<expression>`.
-
-This error class has the following derived error classes:
-
-## MISMATCHED_TYPES
-
-All input types must be the same except nullable, containsNull, valueContainsNull flags, but found the input types `<inputTypes>`.
-
-## NO_INPUTS
-
-The collection of input data types must not be empty.
-
-
diff --git a/docs/sql-error-conditions-connect-error-class.md b/docs/sql-error-conditions-connect-error-class.md
deleted file mode 100644
index c6d2057b09836..0000000000000
--- a/docs/sql-error-conditions-connect-error-class.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-layout: global
-title: CONNECT error class
-displayTitle: CONNECT error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-SQLSTATE: 56K00
-
-Generic Spark Connect error.
-
-This error class has the following derived error classes:
-
-## INTERCEPTOR_CTOR_MISSING
-
-Cannot instantiate GRPC interceptor because `<cls>` is missing a default constructor without arguments.
-
-## INTERCEPTOR_RUNTIME_ERROR
-
-Error instantiating GRPC interceptor: `<msg>`
-
-## PLUGIN_CTOR_MISSING
-
-Cannot instantiate Spark Connect plugin because `<cls>` is missing a default constructor without arguments.
-
-## PLUGIN_RUNTIME_ERROR
-
-Error instantiating Spark Connect plugin: `<msg>`
-
-## SESSION_NOT_SAME
-
-Both Datasets must belong to the same SparkSession.
-
-
diff --git a/docs/sql-error-conditions-create-view-column-arity-mismatch-error-class.md b/docs/sql-error-conditions-create-view-column-arity-mismatch-error-class.md
deleted file mode 100644
index a11449954ee0a..0000000000000
--- a/docs/sql-error-conditions-create-view-column-arity-mismatch-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: CREATE_VIEW_COLUMN_ARITY_MISMATCH error class
-displayTitle: CREATE_VIEW_COLUMN_ARITY_MISMATCH error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 21S01](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
-
-Cannot create view `<viewName>`, the reason is
-
-This error class has the following derived error classes:
-
-## NOT_ENOUGH_DATA_COLUMNS
-
-not enough data columns:
-View columns: `<viewColumns>`.
-Data columns: `<dataColumns>`.
-
-## TOO_MANY_DATA_COLUMNS
-
-too many data columns:
-View columns: `<viewColumns>`.
-Data columns: `<dataColumns>`.
-
-
diff --git a/docs/sql-error-conditions-datatype-mismatch-error-class.md b/docs/sql-error-conditions-datatype-mismatch-error-class.md
deleted file mode 100644
index 1d18836ac9e77..0000000000000
--- a/docs/sql-error-conditions-datatype-mismatch-error-class.md
+++ /dev/null
@@ -1,254 +0,0 @@
----
-layout: global
-title: DATATYPE_MISMATCH error class
-displayTitle: DATATYPE_MISMATCH error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve `<sqlExpr>` due to data type mismatch:
-
-This error class has the following derived error classes:
-
-## ARRAY_FUNCTION_DIFF_TYPES
-
-Input to `<functionName>` should have been `<dataType>` followed by a value with same element type, but it's [`<leftType>`, `<rightType>`].
-
-## BINARY_ARRAY_DIFF_TYPES
-
-Input to function `<functionName>` should have been two `<arrayType>` with same element type, but it's [`<leftType>`, `<rightType>`].
-
-## BINARY_OP_DIFF_TYPES
-
-the left and right operands of the binary operator have incompatible types (`<left>` and `<right>`).
-
-## BINARY_OP_WRONG_TYPE
-
-the binary operator requires the input type `<inputType>`, not `<actualDataType>`.
-
-## BLOOM_FILTER_BINARY_OP_WRONG_TYPE
-
-The Bloom filter binary input to `<functionName>` should be either a constant value or a scalar subquery expression, but it's `<actual>`.
-
-## BLOOM_FILTER_WRONG_TYPE
-
-Input to function `<functionName>` should have been `<expectedLeft>` followed by value with `<expectedRight>`, but it's [`<actual>`].
-
-## CANNOT_CONVERT_TO_JSON
-
-Unable to convert column `<name>` of type `<type>` to JSON.
-
-## CANNOT_DROP_ALL_FIELDS
-
-Cannot drop all fields in struct.
-
-## CAST_WITHOUT_SUGGESTION
-
-cannot cast `<srcType>` to `<targetType>`.
-
-## CAST_WITH_CONF_SUGGESTION
-
-cannot cast `<srcType>` to `<targetType>` with ANSI mode on.
-If you have to cast `<srcType>` to `<targetType>`, you can set `<config>` as `<configVal>`.
-
-## CAST_WITH_FUNC_SUGGESTION
-
-cannot cast `<srcType>` to `<targetType>`.
-To convert values from `<srcType>` to `<targetType>`, you can use the functions `<functionNames>` instead.
-
-## CREATE_MAP_KEY_DIFF_TYPES
-
-The given keys of function `<functionName>` should all be the same type, but they are `<dataType>`.
-
-## CREATE_MAP_VALUE_DIFF_TYPES
-
-The given values of function `<functionName>` should all be the same type, but they are `<dataType>`.
-
-## CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING
-
-Only foldable `STRING` expressions are allowed to appear at odd position, but they are `<inputExprs>`.
-
-## DATA_DIFF_TYPES
-
-Input to `<functionName>` should all be the same type, but it's `<dataType>`.
-
-## FILTER_NOT_BOOLEAN
-
-Filter expression `<filter>` of type `<type>` is not a boolean.
-
-## HASH_MAP_TYPE
-
-Input to the function `<functionName>` cannot contain elements of the "MAP" type. In Spark, same maps may have different hashcode, thus hash expressions are prohibited on "MAP" elements. To restore previous behavior set "spark.sql.legacy.allowHashOnMapType" to "true".
-
-## INPUT_SIZE_NOT_ONE
-
-Length of `<exprName>` should be 1.
-
-## INVALID_ARG_VALUE
-
-The `<inputName>` value must to be a `<requireType>` literal of `<validValues>`, but got `<inputValue>`.
-
-## INVALID_JSON_MAP_KEY_TYPE
-
-Input schema `<schema>` can only contain STRING as a key type for a MAP.
-
-## INVALID_JSON_SCHEMA
-
-Input schema `<schema>` must be a struct, an array or a map.
-
-## INVALID_MAP_KEY_TYPE
-
-The key of map cannot be/contain `<keyType>`.
-
-## INVALID_ORDERING_TYPE
-
-The `<functionName>` does not support ordering on type `<dataType>`.
-
-## INVALID_ROW_LEVEL_OPERATION_ASSIGNMENTS
-
-`<errors>`
-
-## INVALID_XML_MAP_KEY_TYPE
-
-Input schema `<schema>` can only contain STRING as a key type for a MAP.
-
-## IN_SUBQUERY_DATA_TYPE_MISMATCH
-
-The data type of one or more elements in the left hand side of an IN subquery is not compatible with the data type of the output of the subquery. Mismatched columns: [`<mismatchedColumns>`], left side: [`<leftType>`], right side: [`<rightType>`].
-
-## IN_SUBQUERY_LENGTH_MISMATCH
-
-The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. Left hand side columns(length: `<leftLength>`): [`<leftColumns>`], right hand side columns(length: `<rightLength>`): [`<rightColumns>`].
-
-## MAP_CONCAT_DIFF_TYPES
-
-The `<functionName>` should all be of type map, but it's `<dataType>`.
-
-## MAP_FUNCTION_DIFF_TYPES
-
-Input to `<functionName>` should have been `<dataType>` followed by a value with same key type, but it's [`<leftType>`, `<rightType>`].
-
-## MAP_ZIP_WITH_DIFF_TYPES
-
-Input to the `<functionName>` should have been two maps with compatible key types, but it's [`<leftType>`, `<rightType>`].
-
-## NON_FOLDABLE_INPUT
-
-the input `<inputName>` should be a foldable `<inputType>` expression; however, got `<inputExpr>`.
-
-## NON_STRING_TYPE
-
-all arguments must be strings.
-
-## NULL_TYPE
-
-Null typed values cannot be used as arguments of `<functionName>`.
-
-## PARAMETER_CONSTRAINT_VIOLATION
-
-The `<leftExprName>`(`<leftExprValue>`) must be `<constraint>` the `<rightExprName>`(`<rightExprValue>`).
-
-## RANGE_FRAME_INVALID_TYPE
-
-The data type `<orderSpecType>` used in the order specification does not match the data type `<valueBoundaryType>` which is used in the range frame.
-
-## RANGE_FRAME_MULTI_ORDER
-
-A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: `<orderSpec>`.
-
-## RANGE_FRAME_WITHOUT_ORDER
-
-A range window frame cannot be used in an unordered window specification.
-
-## SEQUENCE_WRONG_INPUT_TYPES
-
-`<functionName>` uses the wrong parameter type. The parameter type must conform to:
-1. The start and stop expressions must resolve to the same type.
-2. If start and stop expressions resolve to the `<startType>` type, then the step expression must resolve to the `<stepType>` type.
-3. Otherwise, if start and stop expressions resolve to the `<otherStartType>` type, then the step expression must resolve to the same type.
-
-## SPECIFIED_WINDOW_FRAME_DIFF_TYPES
-
-Window frame bounds `<lower>` and `<upper>` do not have the same type: `<lowerType>` <> `<upperType>`.
-
-## SPECIFIED_WINDOW_FRAME_INVALID_BOUND
-
-Window frame upper bound `<upper>` does not follow the lower bound `<lower>`.
-
-## SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE
-
-The data type of the `<location>` bound `<exprType>` does not match the expected data type `<expectedType>`.
-
-## SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE
-
-Window frame `<location>` bound `<expression>` is not a literal.
-
-## SPECIFIED_WINDOW_FRAME_WRONG_COMPARISON
-
-The lower bound of a window frame must be `<comparison>` to the upper bound.
-
-## STACK_COLUMN_DIFF_TYPES
-
-The data type of the column (`<columnIndex>`) do not have the same type: `<leftType>` (`<leftParamIndex>`) <> `<rightType>` (`<rightParamIndex>`).
-
-## TYPE_CHECK_FAILURE_WITH_HINT
-
-`<msg>``<hint>`.
-
-## UNEXPECTED_CLASS_TYPE
-
-class `<className>` not found.
-
-## UNEXPECTED_INPUT_TYPE
-
-The `<paramIndex>` parameter requires the `<requiredType>` type, however `<inputSql>` has the type `<inputType>`.
-
-## UNEXPECTED_NULL
-
-The `<exprName>` must not be null.
-
-## UNEXPECTED_RETURN_TYPE
-
-The `<functionName>` requires return `<expectedType>` type, but the actual is `<actualType>` type.
-
-## UNEXPECTED_STATIC_METHOD
-
-cannot find a static method `<methodName>` that matches the argument types in `<className>`.
-
-## UNSUPPORTED_INPUT_TYPE
-
-The input of `<functionName>` can't be `<dataType>` type data.
-
-## VALUE_OUT_OF_RANGE
-
-The `<exprName>` must be between `<valueRange>` (current value = `<currentValue>`).
-
-## WRONG_NUM_ARG_TYPES
-
-The expression requires `<expectedNum>` argument types but the actual number is `<actualNum>`.
-
-## WRONG_NUM_ENDPOINTS
-
-The number of endpoints must be >= 2 to construct intervals but the actual number is `<actualNumber>`.
-
-
diff --git a/docs/sql-error-conditions-duplicate-routine-parameter-assignment-error-class.md b/docs/sql-error-conditions-duplicate-routine-parameter-assignment-error-class.md
deleted file mode 100644
index 288088e57e7c4..0000000000000
--- a/docs/sql-error-conditions-duplicate-routine-parameter-assignment-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT error class
-displayTitle: DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Call to function `<functionName>` is invalid because it includes multiple argument assignments to the same parameter name `<parameterName>`.
-
-This error class has the following derived error classes:
-
-## BOTH_POSITIONAL_AND_NAMED
-
-A positional argument and named argument both referred to the same parameter. Please remove the named argument referring to this parameter.
-
-## DOUBLE_NAMED_ARGUMENT_REFERENCE
-
-More than one named argument referred to the same parameter. Please assign a value only once.
-
-
diff --git a/docs/sql-error-conditions-expect-table-not-view-error-class.md b/docs/sql-error-conditions-expect-table-not-view-error-class.md
deleted file mode 100644
index 0ab99ce33fa83..0000000000000
--- a/docs/sql-error-conditions-expect-table-not-view-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: EXPECT_TABLE_NOT_VIEW error class
-displayTitle: EXPECT_TABLE_NOT_VIEW error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-'`<operation>`' expects a table but `<viewName>` is a view.
-
-This error class has the following derived error classes:
-
-## NO_ALTERNATIVE
-
-
-
-## USE_ALTER_VIEW
-
-Please use ALTER VIEW instead.
-
-
diff --git a/docs/sql-error-conditions-expect-view-not-table-error-class.md b/docs/sql-error-conditions-expect-view-not-table-error-class.md
deleted file mode 100644
index 97a6f59e13f42..0000000000000
--- a/docs/sql-error-conditions-expect-view-not-table-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: EXPECT_VIEW_NOT_TABLE error class
-displayTitle: EXPECT_VIEW_NOT_TABLE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The table `<tableName>` does not support `<operation>`.
-
-This error class has the following derived error classes:
-
-## NO_ALTERNATIVE
-
-
-
-## USE_ALTER_TABLE
-
-Please use ALTER TABLE instead.
-
-
diff --git a/docs/sql-error-conditions-failed-jdbc-error-class.md b/docs/sql-error-conditions-failed-jdbc-error-class.md
deleted file mode 100644
index bc8464c188d7c..0000000000000
--- a/docs/sql-error-conditions-failed-jdbc-error-class.md
+++ /dev/null
@@ -1,89 +0,0 @@
----
-layout: global
-title: FAILED_JDBC error class
-displayTitle: FAILED_JDBC error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-SQLSTATE: HV000
-
-Failed JDBC `<url>` on the operation:
-
-This error class has the following derived error classes:
-
-## ALTER_TABLE
-
-Alter the table `<tableName>`.
-
-## CREATE_INDEX
-
-Create the index `<indexName>` in the `<tableName>` table.
-
-## CREATE_NAMESPACE
-
-Create the namespace `<namespace>`.
-
-## CREATE_NAMESPACE_COMMENT
-
-Create a comment on the namespace: `<namespace>`.
-
-## CREATE_TABLE
-
-Create the table `<tableName>`.
-
-## DROP_INDEX
-
-Drop the index `<indexName>` in the `<tableName>` table.
-
-## DROP_NAMESPACE
-
-Drop the namespace `<namespace>`.
-
-## GET_TABLES
-
-Get tables from the namespace: `<namespace>`.
-
-## LIST_NAMESPACES
-
-List namespaces.
-
-## NAMESPACE_EXISTS
-
-Check that the namespace `<namespace>` exists.
-
-## REMOVE_NAMESPACE_COMMENT
-
-Remove a comment on the namespace: `<namespace>`.
-
-## RENAME_TABLE
-
-Rename the table `<oldName>` to `<newName>`.
-
-## TABLE_EXISTS
-
-Check that the table `<tableName>` exists.
-
-## UNCLASSIFIED
-
-`<message>`
-
-
diff --git a/docs/sql-error-conditions-incompatible-data-for-table-error-class.md b/docs/sql-error-conditions-incompatible-data-for-table-error-class.md
deleted file mode 100644
index 2f84dc90b6536..0000000000000
--- a/docs/sql-error-conditions-incompatible-data-for-table-error-class.md
+++ /dev/null
@@ -1,73 +0,0 @@
----
-layout: global
-title: INCOMPATIBLE_DATA_FOR_TABLE error class
-displayTitle: INCOMPATIBLE_DATA_FOR_TABLE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-SQLSTATE: KD000
-
-Cannot write incompatible data for the table `<tableName>`:
-
-This error class has the following derived error classes:
-
-## AMBIGUOUS_COLUMN_NAME
-
-Ambiguous column name in the input data `<colName>`.
-
-## CANNOT_FIND_DATA
-
-Cannot find data for the output column `<colName>`.
-
-## CANNOT_SAFELY_CAST
-
-Cannot safely cast `<colName>` `<srcType>` to `<targetType>`.
-
-## EXTRA_COLUMNS
-
-Cannot write extra columns `<extraColumns>`.
-
-## EXTRA_STRUCT_FIELDS
-
-Cannot write extra fields `<extraFields>` to the struct `<colName>`.
-
-## NULLABLE_ARRAY_ELEMENTS
-
-Cannot write nullable elements to array of non-nulls: `<colName>`.
-
-## NULLABLE_COLUMN
-
-Cannot write nullable values to non-null column `<colName>`.
-
-## NULLABLE_MAP_VALUES
-
-Cannot write nullable values to map of non-nulls: `<colName>`.
-
-## STRUCT_MISSING_FIELDS
-
-Struct `<colName>` missing fields: `<missingFields>`.
-
-## UNEXPECTED_COLUMN_NAME
-
-Struct `<colName>` `<order>`-th field name does not match (may be out of order): expected `<expected>`, found `<found>`.
-
-
diff --git a/docs/sql-error-conditions-incomplete-type-definition-error-class.md b/docs/sql-error-conditions-incomplete-type-definition-error-class.md
deleted file mode 100644
index b84d4c37b7f03..0000000000000
--- a/docs/sql-error-conditions-incomplete-type-definition-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: INCOMPLETE_TYPE_DEFINITION error class
-displayTitle: INCOMPLETE_TYPE_DEFINITION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Incomplete complex type:
-
-This error class has the following derived error classes:
-
-## ARRAY
-
-The definition of "ARRAY" type is incomplete. You must provide an element type. For example: "ARRAY`<elementType>`".
-
-## MAP
-
-The definition of "MAP" type is incomplete. You must provide a key type and a value type. For example: "MAP<TIMESTAMP, INT>".
-
-## STRUCT
-
-The definition of "STRUCT" type is incomplete. You must provide at least one field type. For example: "STRUCT<name STRING, phone DECIMAL(10, 0)>".
-
-
diff --git a/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md b/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md
deleted file mode 100644
index 15027d5575c88..0000000000000
--- a/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md
+++ /dev/null
@@ -1,68 +0,0 @@
----
-layout: global
-title: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
-displayTitle: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0B](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-You may get a different result due to the upgrading to
-
-This error class has the following derived error classes:
-
-## DATETIME_PATTERN_RECOGNITION
-
-Spark >= 3.0:
-Fail to recognize `<pattern>` pattern in the DateTimeFormatter.
-1) You can set `<config>` to "LEGACY" to restore the behavior before Spark 3.0.
-2) You can form a valid datetime pattern with the guide from '`<docroot>`/sql-ref-datetime-pattern.html'.
-
-## DATETIME_WEEK_BASED_PATTERN
-
-Spark >= 3.0:
-All week-based patterns are unsupported since Spark 3.0, detected week-based character: `<c>`.
-Please use the SQL function EXTRACT instead.
-
-## PARSE_DATETIME_BY_NEW_PARSER
-
-Spark >= 3.0:
-Fail to parse `<datetime>` in the new parser.
-You can set `<config>` to "LEGACY" to restore the behavior before Spark 3.0, or set to "CORRECTED" and treat it as an invalid datetime string.
-
-## READ_ANCIENT_DATETIME
-
-Spark >= 3.0: reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z from `<format>` files can be ambiguous, as the files may be written by
-Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar that is different from Spark 3.0+'s Proleptic Gregorian calendar.
-See more details in SPARK-31404.
-You can set the SQL config `<config>` or the datasource option `<option>` to "LEGACY" to rebase the datetime values w.r.t. the calendar difference during reading.
-To read the datetime values as it is, set the SQL config or the datasource option to "CORRECTED".
-
-## WRITE_ANCIENT_DATETIME
-
-Spark >= 3.0:
-writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z into `<format>` files can be dangerous, as the files may be read by Spark 2.x or legacy versions of Hive later, which uses a legacy hybrid calendar that is different from Spark 3.0+'s Proleptic Gregorian calendar.
-See more details in SPARK-31404.
-You can set `<config>` to "LEGACY" to rebase the datetime values w.r.t. the calendar difference during writing, to get maximum interoperability.
-Or set the config to "CORRECTED" to write the datetime values as it is, if you are sure that the written files will only be read by Spark 3.0+ or other systems that use Proleptic Gregorian calendar.
-
-
diff --git a/docs/sql-error-conditions-insert-column-arity-mismatch-error-class.md b/docs/sql-error-conditions-insert-column-arity-mismatch-error-class.md
deleted file mode 100644
index 3c7d3cc77e87b..0000000000000
--- a/docs/sql-error-conditions-insert-column-arity-mismatch-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: INSERT_COLUMN_ARITY_MISMATCH error class
-displayTitle: INSERT_COLUMN_ARITY_MISMATCH error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 21S01](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
-
-Cannot write to `<tableName>`, the reason is
-
-This error class has the following derived error classes:
-
-## NOT_ENOUGH_DATA_COLUMNS
-
-not enough data columns:
-Table columns: `<tableColumns>`.
-Data columns: `<dataColumns>`.
-
-## TOO_MANY_DATA_COLUMNS
-
-too many data columns:
-Table columns: `<tableColumns>`.
-Data columns: `<dataColumns>`.
-
-
diff --git a/docs/sql-error-conditions-insufficient-table-property-error-class.md b/docs/sql-error-conditions-insufficient-table-property-error-class.md
deleted file mode 100644
index 43a300aa7e97e..0000000000000
--- a/docs/sql-error-conditions-insufficient-table-property-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: INSUFFICIENT_TABLE_PROPERTY error class
-displayTitle: INSUFFICIENT_TABLE_PROPERTY error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: XXKUC](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Can't find table property:
-
-This error class has the following derived error classes:
-
-## MISSING_KEY
-
-`<key>`.
-
-## MISSING_KEY_PART
-
-`<key>`, `<totalAmountOfParts>` parts are expected.
-
-
diff --git a/docs/sql-error-conditions-internal-error-metadata-catalog-error-class.md b/docs/sql-error-conditions-internal-error-metadata-catalog-error-class.md
deleted file mode 100644
index b3d61f7f7e5ac..0000000000000
--- a/docs/sql-error-conditions-internal-error-metadata-catalog-error-class.md
+++ /dev/null
@@ -1,57 +0,0 @@
----
-layout: global
-title: INTERNAL_ERROR_METADATA_CATALOG error class
-displayTitle: INTERNAL_ERROR_METADATA_CATALOG error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-An object in the metadata catalog has been corrupted:
-
-This error class has the following derived error classes:
-
-## SQL_CONFIG
-
-Corrupted view SQL configs in catalog.
-
-## TABLE_NAME_CONTEXT
-
-Corrupted table name context in catalog: `<numParts>` parts expected, but part `<index>` is missing.
-
-## TEMP_FUNCTION_REFERENCE
-
-Corrupted view referred temp functions names in catalog.
-
-## TEMP_VARIABLE_REFERENCE
-
-Corrupted view referred temp variable names in catalog.
-
-## TEMP_VIEW_REFERENCE
-
-Corrupted view referred temp view names in catalog.
-
-## VIEW_QUERY_COLUMN_ARITY
-
-Corrupted view query output column names in catalog: `<numCols>` parts expected, but part `<index>` is missing.
-
-
diff --git a/docs/sql-error-conditions-invalid-boundary-error-class.md b/docs/sql-error-conditions-invalid-boundary-error-class.md
deleted file mode 100644
index f5317e14261da..0000000000000
--- a/docs/sql-error-conditions-invalid-boundary-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: INVALID_BOUNDARY error class
-displayTitle: INVALID_BOUNDARY error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The boundary `<boundary>` is invalid: `<invalidValue>`.
-
-This error class has the following derived error classes:
-
-## END
-
-Expected the value is '0', '`<longMaxValue>`', '[`<intMinValue>`, `<intMaxValue>`]'.
-
-## START
-
-Expected the value is '0', '`<longMinValue>`', '[`<intMinValue>`, `<intMaxValue>`]'.
-
-
diff --git a/docs/sql-error-conditions-invalid-cursor-error-class.md b/docs/sql-error-conditions-invalid-cursor-error-class.md
deleted file mode 100644
index 94766e3cfb1d4..0000000000000
--- a/docs/sql-error-conditions-invalid-cursor-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: INVALID_CURSOR error class
-displayTitle: INVALID_CURSOR error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: HY109](sql-error-conditions-sqlstates.html#class-HY-cli-specific-condition)
-
-The cursor is invalid.
-
-This error class has the following derived error classes:
-
-## DISCONNECTED
-
-The cursor has been disconnected by the server.
-
-## NOT_REATTACHABLE
-
-The cursor is not reattachable.
-
-## POSITION_NOT_AVAILABLE
-
-The cursor position id `<responseId>` is no longer available at index `<index>`.
-
-## POSITION_NOT_FOUND
-
-The cursor position id `<responseId>` is not found.
-
-
diff --git a/docs/sql-error-conditions-invalid-default-value-error-class.md b/docs/sql-error-conditions-invalid-default-value-error-class.md
deleted file mode 100644
index 72a5b0db8da0e..0000000000000
--- a/docs/sql-error-conditions-invalid-default-value-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: INVALID_DEFAULT_VALUE error class
-displayTitle: INVALID_DEFAULT_VALUE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42623](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to execute `<statement>` command because the destination column or variable `<colName>` has a DEFAULT value `<defaultValue>`,
-
-This error class has the following derived error classes:
-
-## DATA_TYPE
-
-which requires `<expectedType>` type, but the statement provided a value of incompatible `<actualType>` type.
-
-## NOT_CONSTANT
-
-which is not a constant expression whose equivalent value is known at query planning time.
-
-## SUBQUERY_EXPRESSION
-
-which contains subquery expressions.
-
-## UNRESOLVED_EXPRESSION
-
-which fails to resolve as a valid expression.
-
-
diff --git a/docs/sql-error-conditions-invalid-format-error-class.md b/docs/sql-error-conditions-invalid-format-error-class.md
deleted file mode 100644
index adc5b7bd3b17d..0000000000000
--- a/docs/sql-error-conditions-invalid-format-error-class.md
+++ /dev/null
@@ -1,77 +0,0 @@
----
-layout: global
-title: INVALID_FORMAT error class
-displayTitle: INVALID_FORMAT error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The format is invalid: `<format>`.
-
-This error class has the following derived error classes:
-
-## CONT_THOUSANDS_SEPS
-
-Thousands separators (, or G) must have digits in between them in the number format.
-
-## CUR_MUST_BEFORE_DEC
-
-Currency characters must appear before any decimal point in the number format.
-
-## CUR_MUST_BEFORE_DIGIT
-
-Currency characters must appear before digits in the number format.
-
-## EMPTY
-
-The number format string cannot be empty.
-
-## ESC_AT_THE_END
-
-The escape character is not allowed to end with.
-
-## ESC_IN_THE_MIDDLE
-
-The escape character is not allowed to precede `<char>`.
-
-## MISMATCH_INPUT
-
-The input `<inputType>` `<input>` does not match the format.
-
-## THOUSANDS_SEPS_MUST_BEFORE_DEC
-
-Thousands separators (, or G) may not appear after the decimal point in the number format.
-
-## UNEXPECTED_TOKEN
-
-Found the unexpected `<token>` in the format string; the structure of the format string must match: `[MI|S]` `[$]` `[0|9|G|,]*` `[.|D]` `[0|9]*` `[$]` `[PR|MI|S]`.
-
-## WRONG_NUM_DIGIT
-
-The format string requires at least one number digit.
-
-## WRONG_NUM_TOKEN
-
-At most one `<token>` is allowed in the number format.
-
-
diff --git a/docs/sql-error-conditions-invalid-handle-error-class.md b/docs/sql-error-conditions-invalid-handle-error-class.md
deleted file mode 100644
index 953a8e00865d0..0000000000000
--- a/docs/sql-error-conditions-invalid-handle-error-class.md
+++ /dev/null
@@ -1,61 +0,0 @@
----
-layout: global
-title: INVALID_HANDLE error class
-displayTitle: INVALID_HANDLE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: HY000](sql-error-conditions-sqlstates.html#class-HY-cli-specific-condition)
-
-The handle `<handle>` is invalid.
-
-This error class has the following derived error classes:
-
-## FORMAT
-
-Handle must be an UUID string of the format '00112233-4455-6677-8899-aabbccddeeff'
-
-## OPERATION_ABANDONED
-
-Operation was considered abandoned because of inactivity and removed.
-
-## OPERATION_ALREADY_EXISTS
-
-Operation already exists.
-
-## OPERATION_NOT_FOUND
-
-Operation not found.
-
-## SESSION_CHANGED
-
-The existing Spark server driver instance has restarted. Please reconnect.
-
-## SESSION_CLOSED
-
-Session was closed.
-
-## SESSION_NOT_FOUND
-
-Session not found.
-
-
diff --git a/docs/sql-error-conditions-invalid-inline-table-error-class.md b/docs/sql-error-conditions-invalid-inline-table-error-class.md
deleted file mode 100644
index b469c3a1fe83c..0000000000000
--- a/docs/sql-error-conditions-invalid-inline-table-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: INVALID_INLINE_TABLE error class
-displayTitle: INVALID_INLINE_TABLE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid inline table.
-
-This error class has the following derived error classes:
-
-## CANNOT_EVALUATE_EXPRESSION_IN_INLINE_TABLE
-
-Cannot evaluate the expression `<expr>` in inline table definition.
-
-## FAILED_SQL_EXPRESSION_EVALUATION
-
-Failed to evaluate the SQL expression `<sqlExpr>`. Please check your syntax and ensure all required tables and columns are available.
-
-## INCOMPATIBLE_TYPES_IN_INLINE_TABLE
-
-Found incompatible types in the column `<colName>` for inline table.
-
-## NUM_COLUMNS_MISMATCH
-
-Inline table expected `<expectedNumCols>` columns but found `<actualNumCols>` columns in row `<rowIndex>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-inverse-distribution-function-error-class.md b/docs/sql-error-conditions-invalid-inverse-distribution-function-error-class.md
deleted file mode 100644
index 4da0cdcefc697..0000000000000
--- a/docs/sql-error-conditions-invalid-inverse-distribution-function-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: INVALID_INVERSE_DISTRIBUTION_FUNCTION error class
-displayTitle: INVALID_INVERSE_DISTRIBUTION_FUNCTION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid inverse distribution function `<funcName>`.
-
-This error class has the following derived error classes:
-
-## DISTINCT_UNSUPPORTED
-
-Cannot use DISTINCT with WITHIN GROUP.
-
-## WITHIN_GROUP_MISSING
-
-WITHIN GROUP is required for inverse distribution function.
-
-## WRONG_NUM_ORDERINGS
-
-Requires `<expectedNum>` orderings in WITHIN GROUP but got `<actualNum>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-lambda-function-call-error-class.md b/docs/sql-error-conditions-invalid-lambda-function-call-error-class.md
deleted file mode 100644
index 2c4042799dcd0..0000000000000
--- a/docs/sql-error-conditions-invalid-lambda-function-call-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: INVALID_LAMBDA_FUNCTION_CALL error class
-displayTitle: INVALID_LAMBDA_FUNCTION_CALL error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0D](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid lambda function call.
-
-This error class has the following derived error classes:
-
-## DUPLICATE_ARG_NAMES
-
-The lambda function has duplicate arguments `<args>`. Please, consider to rename the argument names or set `<caseSensitiveConfig>` to "true".
-
-## NON_HIGHER_ORDER_FUNCTION
-
-A lambda function should only be used in a higher order function. However, its class is `<class>`, which is not a higher order function.
-
-## NUM_ARGS_MISMATCH
-
-A higher order function expects `<expectedNumArgs>` arguments, but got `<actualNumArgs>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-limit-like-expression-error-class.md b/docs/sql-error-conditions-invalid-limit-like-expression-error-class.md
deleted file mode 100644
index 5e0f2e7e6bc59..0000000000000
--- a/docs/sql-error-conditions-invalid-limit-like-expression-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: INVALID_LIMIT_LIKE_EXPRESSION error class
-displayTitle: INVALID_LIMIT_LIKE_EXPRESSION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The limit like expression `<expr>` is invalid.
-
-This error class has the following derived error classes:
-
-## DATA_TYPE
-
-The `<name>` expression must be integer type, but got `<dataType>`.
-
-## IS_NEGATIVE
-
-The `<name>` expression must be equal to or greater than 0, but got `<v>`.
-
-## IS_NULL
-
-The evaluated `<name>` expression must not be null.
-
-## IS_UNFOLDABLE
-
-The `<name>` expression must evaluate to a constant value.
-
-
diff --git a/docs/sql-error-conditions-invalid-observed-metrics-error-class.md b/docs/sql-error-conditions-invalid-observed-metrics-error-class.md
deleted file mode 100644
index 8ff6ca47120c2..0000000000000
--- a/docs/sql-error-conditions-invalid-observed-metrics-error-class.md
+++ /dev/null
@@ -1,61 +0,0 @@
----
-layout: global
-title: INVALID_OBSERVED_METRICS error class
-displayTitle: INVALID_OBSERVED_METRICS error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid observed metrics.
-
-This error class has the following derived error classes:
-
-## AGGREGATE_EXPRESSION_WITH_DISTINCT_UNSUPPORTED
-
-Aggregate expressions with DISTINCT are not allowed in observed metrics, but found: `<expr>`.
-
-## AGGREGATE_EXPRESSION_WITH_FILTER_UNSUPPORTED
-
-Aggregate expression with FILTER predicate are not allowed in observed metrics, but found: `<expr>`.
-
-## MISSING_NAME
-
-The observed metrics should be named: `<operator>`.
-
-## NESTED_AGGREGATES_UNSUPPORTED
-
-Nested aggregates are not allowed in observed metrics, but found: `<expr>`.
-
-## NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE
-
-Attribute `<expr>` can only be used as an argument to an aggregate function.
-
-## NON_AGGREGATE_FUNC_ARG_IS_NON_DETERMINISTIC
-
-Non-deterministic expression `<expr>` can only be used as an argument to an aggregate function.
-
-## WINDOW_EXPRESSIONS_UNSUPPORTED
-
-Window expressions are not allowed in observed metrics, but found: `<expr>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-options-error-class.md b/docs/sql-error-conditions-invalid-options-error-class.md
deleted file mode 100644
index aeb3f74e23b09..0000000000000
--- a/docs/sql-error-conditions-invalid-options-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: INVALID_OPTIONS error class
-displayTitle: INVALID_OPTIONS error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K06](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid options:
-
-This error class has the following derived error classes:
-
-## NON_MAP_FUNCTION
-
-Must use the `map()` function for options.
-
-## NON_STRING_TYPE
-
-A type of keys and values in `map()` must be string, but got `<mapType>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-parameter-value-error-class.md b/docs/sql-error-conditions-invalid-parameter-value-error-class.md
deleted file mode 100644
index c5f173d44189b..0000000000000
--- a/docs/sql-error-conditions-invalid-parameter-value-error-class.md
+++ /dev/null
@@ -1,85 +0,0 @@
----
-layout: global
-title: INVALID_PARAMETER_VALUE error class
-displayTitle: INVALID_PARAMETER_VALUE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value of parameter(s) `<parameter>` in `<functionName>` is invalid:
-
-This error class has the following derived error classes:
-
-## AES_CRYPTO_ERROR
-
-detail message: `<detailMessage>`
-
-## AES_IV_LENGTH
-
-supports 16-byte CBC IVs and 12-byte GCM IVs, but got `<actualLength>` bytes for `<mode>`.
-
-## AES_KEY_LENGTH
-
-expects a binary value with 16, 24 or 32 bytes, but got `<actualLength>` bytes.
-
-## BINARY_FORMAT
-
-expects one of binary formats 'base64', 'hex', 'utf-8', but got `<invalidFormat>`.
-
-## BIT_POSITION_RANGE
-
-expects an integer value in [0, `<upper>`), but got `<invalidValue>`.
-
-## CHARSET
-
-expects one of the charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', but got `<charset>`.
-
-## DATETIME_UNIT
-
-expects one of the units without quotes YEAR, QUARTER, MONTH, WEEK, DAY, DAYOFYEAR, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND, but got the string literal `<invalidValue>`.
-
-## LENGTH
-
-Expects `length` greater than or equal to 0, but got `<length>`.
-
-## NULL
-
-expects a non-NULL value.
-
-## PATTERN
-
-`<value>`.
-
-## REGEX_GROUP_INDEX
-
-Expects group index between 0 and `<groupCount>`, but got `<groupIndex>`.
-
-## START
-
-Expects a positive or a negative value for `start`, but got 0.
-
-## ZERO_INDEX
-
-expects `%1$`, `%2$` and so on, but got `%0$`.
-
-
diff --git a/docs/sql-error-conditions-invalid-schema-error-class.md b/docs/sql-error-conditions-invalid-schema-error-class.md
deleted file mode 100644
index 3f4d67f7080ec..0000000000000
--- a/docs/sql-error-conditions-invalid-schema-error-class.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-layout: global
-title: INVALID_SCHEMA error class
-displayTitle: INVALID_SCHEMA error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The input schema `<inputSchema>` is not a valid schema string.
-
-This error class has the following derived error classes:
-
-## NON_STRING_LITERAL
-
-The input expression must be string literal and not null.
-
-## NON_STRUCT_TYPE
-
-The input expression should be evaluated to struct type, but got `<dataType>`.
-
-## PARSE_ERROR
-
-Cannot parse the schema:
-`<reason>`
-
-
diff --git a/docs/sql-error-conditions-invalid-sql-syntax-error-class.md b/docs/sql-error-conditions-invalid-sql-syntax-error-class.md
deleted file mode 100644
index f9683c8285117..0000000000000
--- a/docs/sql-error-conditions-invalid-sql-syntax-error-class.md
+++ /dev/null
@@ -1,114 +0,0 @@
----
-layout: global
-title: INVALID_SQL_SYNTAX error class
-displayTitle: INVALID_SQL_SYNTAX error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid SQL syntax:
-
-This error class has the following derived error classes:
-
-## ANALYZE_TABLE_UNEXPECTED_NOSCAN
-
-ANALYZE TABLE(S) ... COMPUTE STATISTICS ... `<ctx>` must be either NOSCAN or empty.
-
-## CREATE_FUNC_WITH_IF_NOT_EXISTS_AND_REPLACE
-
-CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed.
-
-## CREATE_TEMP_FUNC_WITH_DATABASE
-
-CREATE TEMPORARY FUNCTION with specifying a database(`<database>`) is not allowed.
-
-## CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS
-
-CREATE TEMPORARY FUNCTION with IF NOT EXISTS is not allowed.
-
-## EMPTY_PARTITION_VALUE
-
-Partition key `<partKey>` must set value.
-
-## FUNCTION_WITH_UNSUPPORTED_SYNTAX
-
-The function `<prettyName>` does not support `<syntax>`.
-
-## INVALID_COLUMN_REFERENCE
-
-Expected a column reference for transform `<transform>`: `<expr>`.
-
-## INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES
-
-Syntax error: call to table-valued function is invalid because parentheses are missing around the provided TABLE argument `<argumentName>`; please surround this with parentheses and try again.
-
-## INVALID_TABLE_VALUED_FUNC_NAME
-
-Table valued function cannot specify database name: `<funcName>`.
-
-## INVALID_WINDOW_REFERENCE
-
-Window reference `<windowName>` is not a window specification.
-
-## LATERAL_WITHOUT_SUBQUERY_OR_TABLE_VALUED_FUNC
-
-LATERAL can only be used with subquery and table-valued functions.
-
-## MULTI_PART_NAME
-
-`<statement>` with multiple part function name(`<funcName>`) is not allowed.
-
-## OPTION_IS_INVALID
-
-option or property key `<key>` is invalid; only `<supported>` are supported
-
-## REPETITIVE_WINDOW_DEFINITION
-
-The definition of window `<windowName>` is repetitive.
-
-## SHOW_FUNCTIONS_INVALID_PATTERN
-
-Invalid pattern in SHOW FUNCTIONS: `<pattern>`. It must be a "STRING" literal.
-
-## SHOW_FUNCTIONS_INVALID_SCOPE
-
-SHOW `<scope>` FUNCTIONS not supported.
-
-## TRANSFORM_WRONG_NUM_ARGS
-
-The transform`<transform>` requires `<expectedNum>` parameters but the actual number is `<actualNum>`.
-
-## UNRESOLVED_WINDOW_REFERENCE
-
-Cannot resolve window reference `<windowName>`.
-
-## UNSUPPORTED_FUNC_NAME
-
-Unsupported function name `<funcName>`.
-
-## VARIABLE_TYPE_OR_DEFAULT_REQUIRED
-
-The definition of a SQL variable requires either a datatype or a DEFAULT clause.
-For example, use `DECLARE name STRING` or `DECLARE name = 'SQL'` instead of `DECLARE name`.
-
-
diff --git a/docs/sql-error-conditions-invalid-subquery-expression-error-class.md b/docs/sql-error-conditions-invalid-subquery-expression-error-class.md
deleted file mode 100644
index 9628d17294aca..0000000000000
--- a/docs/sql-error-conditions-invalid-subquery-expression-error-class.md
+++ /dev/null
@@ -1,37 +0,0 @@
----
-layout: global
-title: INVALID_SUBQUERY_EXPRESSION error class
-displayTitle: INVALID_SUBQUERY_EXPRESSION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42823](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid subquery:
-
-This error class has the following derived error classes:
-
-## SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN
-
-Scalar subquery must return only one column, but got `<number>`.
-
-
diff --git a/docs/sql-error-conditions-invalid-time-travel-timestamp-expr-error-class.md b/docs/sql-error-conditions-invalid-time-travel-timestamp-expr-error-class.md
deleted file mode 100644
index 6a57c66804f24..0000000000000
--- a/docs/sql-error-conditions-invalid-time-travel-timestamp-expr-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: INVALID_TIME_TRAVEL_TIMESTAMP_EXPR error class
-displayTitle: INVALID_TIME_TRAVEL_TIMESTAMP_EXPR error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The time travel timestamp expression `<expr>` is invalid.
-
-This error class has the following derived error classes:
-
-## INPUT
-
-Cannot be casted to the "TIMESTAMP" type.
-
-## NON_DETERMINISTIC
-
-Must be deterministic.
-
-## OPTION
-
-Timestamp string in the options must be able to cast to TIMESTAMP type.
-
-## UNEVALUABLE
-
-Must be evaluable.
-
-
diff --git a/docs/sql-error-conditions-invalid-write-distribution-error-class.md b/docs/sql-error-conditions-invalid-write-distribution-error-class.md
deleted file mode 100644
index 9814a2b4761dd..0000000000000
--- a/docs/sql-error-conditions-invalid-write-distribution-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: INVALID_WRITE_DISTRIBUTION error class
-displayTitle: INVALID_WRITE_DISTRIBUTION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The requested write distribution is invalid.
-
-This error class has the following derived error classes:
-
-## PARTITION_NUM_AND_SIZE
-
-The partition number and advisory partition size can't be specified at the same time.
-
-## PARTITION_NUM_WITH_UNSPECIFIED_DISTRIBUTION
-
-The number of partitions can't be specified with unspecified distribution.
-
-## PARTITION_SIZE_WITH_UNSPECIFIED_DISTRIBUTION
-
-The advisory partition size can't be specified with unspecified distribution.
-
-
diff --git a/docs/sql-error-conditions-malformed-record-in-parsing-error-class.md b/docs/sql-error-conditions-malformed-record-in-parsing-error-class.md
deleted file mode 100644
index c10c30d859d1b..0000000000000
--- a/docs/sql-error-conditions-malformed-record-in-parsing-error-class.md
+++ /dev/null
@@ -1,46 +0,0 @@
----
-layout: global
-title: MALFORMED_RECORD_IN_PARSING error class
-displayTitle: MALFORMED_RECORD_IN_PARSING error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Malformed records are detected in record parsing: `<badRecord>`.
-Parse Mode: `<failFastMode>`. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'.
-
-This error class has the following derived error classes:
-
-## CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS
-
-Parsing JSON arrays as structs is forbidden.
-
-## CANNOT_PARSE_STRING_AS_DATATYPE
-
-Cannot parse the value `<fieldValue>` of the field `<fieldName>` as target spark data type `<targetType>` from the input type `<inputType>`.
-
-## WITHOUT_SUGGESTION
-
-
-
-
diff --git a/docs/sql-error-conditions-missing-attributes-error-class.md b/docs/sql-error-conditions-missing-attributes-error-class.md
deleted file mode 100644
index f6c05f5889923..0000000000000
--- a/docs/sql-error-conditions-missing-attributes-error-class.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-layout: global
-title: MISSING_ATTRIBUTES error class
-displayTitle: MISSING_ATTRIBUTES error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Resolved attribute(s) `<missingAttributes>` missing from `<input>` in operator `<operator>`.
-
-This error class has the following derived error classes:
-
-## RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION
-
-Attribute(s) with the same name appear in the operation: `<operation>`.
-Please check if the right attribute(s) are used.
-
-## RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT
-
-
-
-
diff --git a/docs/sql-error-conditions-not-a-constant-string-error-class.md b/docs/sql-error-conditions-not-a-constant-string-error-class.md
deleted file mode 100644
index ad2c82163df65..0000000000000
--- a/docs/sql-error-conditions-not-a-constant-string-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: NOT_A_CONSTANT_STRING error class
-displayTitle: NOT_A_CONSTANT_STRING error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The expression `<expr>` used for the routine or clause `<name>` must be a constant STRING which is NOT NULL.
-
-This error class has the following derived error classes:
-
-## NOT_CONSTANT
-
-To be considered constant the expression must not depend on any columns, contain a subquery, or invoke a non deterministic function such as rand().
-
-## NULL
-
-The expression evaluates to NULL.
-
-## WRONG_TYPE
-
-The data type of the expression is `<dataType>`.
-
-
diff --git a/docs/sql-error-conditions-not-allowed-in-from-error-class.md b/docs/sql-error-conditions-not-allowed-in-from-error-class.md
deleted file mode 100644
index 25ec9efc495e1..0000000000000
--- a/docs/sql-error-conditions-not-allowed-in-from-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: NOT_ALLOWED_IN_FROM error class
-displayTitle: NOT_ALLOWED_IN_FROM error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Not allowed in the FROM clause:
-
-This error class has the following derived error classes:
-
-## LATERAL_WITH_PIVOT
-
-LATERAL together with PIVOT.
-
-## LATERAL_WITH_UNPIVOT
-
-LATERAL together with UNPIVOT.
-
-## UNPIVOT_WITH_PIVOT
-
-UNPIVOT together with PIVOT.
-
-
diff --git a/docs/sql-error-conditions-not-null-constraint-violation-error-class.md b/docs/sql-error-conditions-not-null-constraint-violation-error-class.md
deleted file mode 100644
index bdb32aee69071..0000000000000
--- a/docs/sql-error-conditions-not-null-constraint-violation-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: NOT_NULL_CONSTRAINT_VIOLATION error class
-displayTitle: NOT_NULL_CONSTRAINT_VIOLATION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Assigning a NULL is not allowed here.
-
-This error class has the following derived error classes:
-
-## ARRAY_ELEMENT
-
-The array `<columnPath>` is defined to contain only elements that are NOT NULL.
-
-## MAP_VALUE
-
-The map `<columnPath>` is defined to contain only values that are NOT NULL.
-
-
diff --git a/docs/sql-error-conditions-not-supported-in-jdbc-catalog-error-class.md b/docs/sql-error-conditions-not-supported-in-jdbc-catalog-error-class.md
deleted file mode 100644
index 410cd0f0a3f35..0000000000000
--- a/docs/sql-error-conditions-not-supported-in-jdbc-catalog-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: NOT_SUPPORTED_IN_JDBC_CATALOG error class
-displayTitle: NOT_SUPPORTED_IN_JDBC_CATALOG error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Not supported command in JDBC catalog:
-
-This error class has the following derived error classes:
-
-## COMMAND
-
-`<cmd>`
-
-## COMMAND_WITH_PROPERTY
-
-`<cmd>` with property `<property>`.
-
-
diff --git a/docs/sql-error-conditions-sqlstates.md b/docs/sql-error-conditions-sqlstates.md
deleted file mode 100644
index 06bd5bb861ca4..0000000000000
--- a/docs/sql-error-conditions-sqlstates.md
+++ /dev/null
@@ -1,741 +0,0 @@
----
-layout: global
-title: SQLSTATE Codes
-displayTitle: SQLSTATE Codes
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-Most error classes returned by Spark SQL are associated with a 5 character `SQLSTATE`.
-A `SQLSTATE` is a SQL standard encoding for error conditions commonly used by `JDBC`, `ODBC`, and other client APIs.
-
-A `SQLSTATE` consists of two portions: A two character class, and a three character subclass.
-Each character must be a digit `'0'` to `'9'` or `'A'` to `'Z'`.
-
-While many `SQLSTATE` values are prescribed by the SQL standard, others are common in the industry, specific to Spark.
-
-For an ordered list of error classes see: [Error Conditions in Spark SQL](sql-error-conditions.html)
-
-Spark SQL uses the following `SQLSTATE` classes:
-
-## Class `0A`: feature not supported
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>0A000</td>
-  <td>feature not supported</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#invalid_pandas_udf_placement">INVALID_PANDAS_UDF_PLACEMENT</a>, <a href="sql-error-conditions.html#star_group_by_pos">STAR_GROUP_BY_POS</a>, <a href="sql-error-conditions.html#unsupported_arrowtype">UNSUPPORTED_ARROWTYPE</a>, <a href="sql-error-conditions.html#unsupported_datatype">UNSUPPORTED_DATATYPE</a>, <a href="unsupported-deserializer-error-class.md">UNSUPPORTED_DESERIALIZER</a>, <a href="unsupported-feature-error-class.md">UNSUPPORTED_FEATURE</a>, <a href="unsupported-generator-error-class.md">UNSUPPORTED_GENERATOR</a>, <a href="unsupported-subquery-expression-category-error-class.md">UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY</a>, <a href="sql-error-conditions.html#unsupported_typed_literal">UNSUPPORTED_TYPED_LITERAL</a>
-  </td>
-</tr>
-
-</table>
-## Class `21`: cardinality violation
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>21000</td>
-  <td>cardinality violation</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#scalar_subquery_too_many_rows">SCALAR_SUBQUERY_TOO_MANY_ROWS</a>
-  </td>
-</tr>
-
-</table>
-## Class `22`: data exception
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>22003</td>
-  <td>numeric value out of range</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="arithmetic-overflow-error-class.md">ARITHMETIC_OVERFLOW</a>, <a href="sql-error-conditions.html#cast_overflow">CAST_OVERFLOW</a>, <a href="sql-error-conditions.html#cast_overflow_in_table_insert">CAST_OVERFLOW_IN_TABLE_INSERT</a>, <a href="sql-error-conditions.html#decimal_precision_exceeds_max_precision">DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION</a>, <a href="sql-error-conditions.html#invalid_index_of_zero">INVALID_INDEX_OF_ZERO</a>, <a href="sql-error-conditions.html#incorrect_ramp_up_rate">INCORRECT_RAMP_UP_RATE</a>, <a href="invalid-array-index-error-class.md">INVALID_ARRAY_INDEX</a>, <a href="invalid-array-index-in-element-at-error-class.md">INVALID_ARRAY_INDEX_IN_ELEMENT_AT</a>, <a href="sql-error-conditions.html#numeric_out_of_supported_range">NUMERIC_OUT_OF_SUPPORTED_RANGE</a>, <a href="sql-error-conditions.html#numeric_value_out_of_range">NUMERIC_VALUE_OUT_OF_RANGE</a>
-  </td>
-</tr>
-    <tr>
-  <td>22007</td>
-  <td>invalid datetime format</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_parse_timestamp">CANNOT_PARSE_TIMESTAMP</a>
-  </td>
-</tr>
-    <tr>
-  <td>22008</td>
-  <td>datetime field overflow</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#datetime_overflow">DATETIME_OVERFLOW</a>
-  </td>
-</tr>
-    <tr>
-  <td>2200E</td>
-  <td>null value in array target</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#null_map_key">NULL_MAP_KEY</a>
-  </td>
-</tr>
-    <tr>
-  <td>22012</td>
-  <td>division by zero</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="divide-by-zero-error-class.md">DIVIDE_BY_ZERO</a>, <a href="sql-error-conditions.html#interval_divided_by_zero">INTERVAL_DIVIDED_BY_ZERO</a>
-  </td>
-</tr>
-    <tr>
-  <td>22015</td>
-  <td>interval field overflow</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#interval_arithmetic_overflow">INTERVAL_ARITHMETIC_OVERFLOW</a>
-  </td>
-</tr>
-    <tr>
-  <td>22018</td>
-  <td>invalid character value for cast</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_parse_decimal">CANNOT_PARSE_DECIMAL</a>, <a href="cast-invalid-input-error-class.md">CAST_INVALID_INPUT</a>, <a href="sql-error-conditions.html#conversion_invalid_input">CONVERSION_INVALID_INPUT</a>
-  </td>
-</tr>
-    <tr>
-  <td>22023</td>
-  <td>invalid parameter value</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#invalid_fraction_of_second">INVALID_FRACTION_OF_SECOND</a>, <a href="invalid-parameter-value-error-class.md">INVALID_PARAMETER_VALUE</a>, <a href="sql-error-conditions.html#second_function_argument_not_integer">SECOND_FUNCTION_ARGUMENT_NOT_INTEGER</a>
-  </td>
-</tr>
-    <tr>
-  <td>22032</td>
-  <td>invalid JSON text</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#invalid_json_root_field">INVALID_JSON_ROOT_FIELD</a>, <a href="sql-error-conditions.html#invalid_json_schema_map_type">INVALID_JSON_SCHEMA_MAP_TYPE</a>
-  </td>
-</tr>
-    <tr>
-  <td>2203G</td>
-  <td>sql_json_item_cannot_be_cast_to_target_type</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_parse_json_field">CANNOT_PARSE_JSON_FIELD</a>
-  </td>
-</tr>
-    <tr>
-  <td>22546</td>
-  <td>The value for a routine argument is not valid.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_decode_url">CANNOT_DECODE_URL</a>
-  </td>
-</tr>
-
-</table>
-## Class `23`: integrity constraint violation
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>23505</td>
-  <td>A violation of the constraint imposed by a unique index or a unique constraint occurred.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#duplicated_map_key">DUPLICATED_MAP_KEY</a>, <a href="sql-error-conditions.html#duplicate_key">DUPLICATE_KEY</a>
-  </td>
-</tr>
-
-</table>
-## Class `2B`: dependent privilege descriptors still exist
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>2BP01</td>
-  <td>dependent_objects_still_exist</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#schema_not_empty">SCHEMA_NOT_EMPTY</a>
-  </td>
-</tr>
-
-</table>
-## Class `38`: external routine exception
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>38000</td>
-  <td>external routine exception</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#failed_function_call">FAILED_FUNCTION_CALL</a>
-  </td>
-</tr>
-
-</table>
-## Class `39`: external routine invocation exception
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>39000</td>
-  <td>external routine invocation exception</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#failed_execute_udf">FAILED_EXECUTE_UDF</a>
-  </td>
-</tr>
-
-</table>
-## Class `42`: syntax error or access rule violation
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>42000</td>
-  <td>syntax error or access rule violation</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#ambiguous_reference_to_fields">AMBIGUOUS_REFERENCE_TO_FIELDS</a>, <a href="sql-error-conditions.html#invalid_column_or_field_data_type">INVALID_COLUMN_OR_FIELD_DATA_TYPE</a>, <a href="sql-error-conditions.html#invalid_extract_base_field_type">INVALID_EXTRACT_BASE_FIELD_TYPE</a>, <a href="sql-error-conditions.html#invalid_extract_field_type">INVALID_EXTRACT_FIELD_TYPE</a>, <a href="sql-error-conditions.html#invalid_field_name">INVALID_FIELD_NAME</a>, <a href="sql-error-conditions.html#invalid_set_syntax">INVALID_SET_SYNTAX</a>, <a href="sql-error-conditions.html#invalid_sql_syntax">INVALID_SQL_SYNTAX</a>, <a href="sql-error-conditions.html#non_partition_column">NON_PARTITION_COLUMN</a>, <a href="not-null-constraint-violation-error-class.md">NOT_NULL_CONSTRAINT_VIOLATION</a>, <a href="sql-error-conditions.html#nullable_column_or_field">NULLABLE_COLUMN_OR_FIELD</a>, <a href="sql-error-conditions.html#nullable_row_id_attributes">NULLABLE_ROW_ID_ATTRIBUTES</a>
-  </td>
-</tr>
-<tr>
-  <td>42001</td>
-  <td>Invalid encoder error</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#invalid_expression_encoder">INVALID_EXPRESSION_ENCODER</a>
-</td>
-</tr>
-    <tr>
-  <td>42601</td>
-  <td>A character, token, or clause is invalid or missing.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#identifier_too_many_name_parts">IDENTIFIER_TOO_MANY_NAME_PARTS</a>, <a href="sql-error-conditions.html#invalid_extract_field">INVALID_EXTRACT_FIELD</a>, <a href="invalid-format-error-class.md">INVALID_FORMAT</a>, <a href="sql-error-conditions.html#parse_syntax_error">PARSE_SYNTAX_ERROR</a>, <a href="sql-error-conditions.html#unclosed_bracketed_comment">UNCLOSED_BRACKETED_COMMENT</a>
-  </td>
-</tr>
-    <tr>
-  <td>42602</td>
-  <td>A character that is invalid in a name has been detected.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#invalid_identifier">INVALID_IDENTIFIER</a>, <a href="sql-error-conditions.html#invalid_property_key">INVALID_PROPERTY_KEY</a>, <a href="sql-error-conditions.html#invalid_property_value">INVALID_PROPERTY_VALUE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42604</td>
-  <td>An invalid numeric or string constant has been detected.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#empty_json_field_value">EMPTY_JSON_FIELD_VALUE</a>, <a href="sql-error-conditions.html#invalid_typed_literal">INVALID_TYPED_LITERAL</a>
-  </td>
-</tr>
-    <tr>
-  <td>42605</td>
-  <td>The number of arguments specified for a scalar function is invalid.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="wrong-num-args-error-class.md">WRONG_NUM_ARGS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42607</td>
-  <td>An operand of an aggregate function or CONCAT operator is invalid.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#nested_aggregate_function">NESTED_AGGREGATE_FUNCTION</a>
-  </td>
-</tr>
-    <tr>
-  <td>42613</td>
-  <td>Clauses are mutually exclusive.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#incompatible_join_types">INCOMPATIBLE_JOIN_TYPES</a>, <a href="sql-error-conditions.html#invalid_lateral_join_type">INVALID_LATERAL_JOIN_TYPE</a>, <a href="sql-error-conditions.html#non_last_matched_clause_omit_condition">NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION</a>, <a href="sql-error-conditions.html#non_last_not_matched_by_source_clause_omit_condition">NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION</a>, <a href="sql-error-conditions.html#non_last_not_matched_by_target_clause_omit_condition">NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION</a>
-  </td>
-</tr>
-    <tr>
-  <td>42614</td>
-  <td>A duplicate keyword or clause is invalid.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#repeated_clause">REPEATED_CLAUSE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42617</td>
-  <td>The statement string is blank or empty.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#parse_empty_statement">PARSE_EMPTY_STATEMENT</a>
-  </td>
-</tr>
-    <tr>
-  <td>42702</td>
-  <td>A column reference is ambiguous, because of duplicate names.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#ambiguous_column_or_field">AMBIGUOUS_COLUMN_OR_FIELD</a>, <a href="sql-error-conditions.html#ambiguous_lateral_column_alias">AMBIGUOUS_LATERAL_COLUMN_ALIAS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42703</td>
-  <td>An undefined column or parameter name was detected.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#column_not_found">COLUMN_NOT_FOUND</a>, <a href="unresolved-column-error-class.md">UNRESOLVED_COLUMN</a>, <a href="unresolved-field-error-class.md">UNRESOLVED_FIELD</a>, <a href="unresolved-map-key-error-class.md">UNRESOLVED_MAP_KEY</a>, <a href="sql-error-conditions.html#unresolved_using_column_for_join">UNRESOLVED_USING_COLUMN_FOR_JOIN</a>
-  </td>
-</tr>
-    <tr>
-  <td>42704</td>
-  <td>An undefined object or constraint name was detected.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#ambiguous_reference">AMBIGUOUS_REFERENCE</a>, <a href="sql-error-conditions.html#default_database_not_exists">DEFAULT_DATABASE_NOT_EXISTS</a>, <a href="sql-error-conditions.html#field_not_found">FIELD_NOT_FOUND</a>, <a href="sql-error-conditions.html#index_not_found">INDEX_NOT_FOUND</a>, <a href="sql-error-conditions.html#schema_not_found">SCHEMA_NOT_FOUND</a>, <a href="sql-error-conditions.html#unrecognized_sql_type">UNRECOGNIZED_SQL_TYPE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42710</td>
-  <td>A duplicate object or constraint name was detected.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#create_table_column_option_duplicate">CREATE_TABLE_COLUMN_OPTION_DUPLICATE</a>, <a href="sql-error-conditions.html#index_already_exists">INDEX_ALREADY_EXISTS</a>, <a href="sql-error-conditions.html#location_already_exists">LOCATION_ALREADY_EXISTS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42711</td>
-  <td>A duplicate column name was detected in the object definition or ALTER TABLE statement.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#column_already_exists">COLUMN_ALREADY_EXISTS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42723</td>
-  <td>A routine with the same signature already exists in the schema, module, or compound block where it is defined.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#routine_already_exists">ROUTINE_ALREADY_EXISTS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42803</td>
-  <td>A column reference in the SELECT or HAVING clause is invalid, because it is not a grouping column; or a column reference in the GROUP BY clause is invalid.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#grouping_column_mismatch">GROUPING_COLUMN_MISMATCH</a>, <a href="sql-error-conditions.html#grouping_id_column_mismatch">GROUPING_ID_COLUMN_MISMATCH</a>, <a href="missing-aggregation-error-class.md">MISSING_AGGREGATION</a>, <a href="sql-error-conditions.html#missing_group_by">MISSING_GROUP_BY</a>, <a href="sql-error-conditions.html#unresolved_all_in_group_by">UNRESOLVED_ALL_IN_GROUP_BY</a>
-  </td>
-</tr>
-    <tr>
-  <td>42805</td>
-  <td>An integer in the ORDER BY clause does not identify a column of the result table.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#group_by_pos_out_of_range">GROUP_BY_POS_OUT_OF_RANGE</a>, <a href="sql-error-conditions.html#order_by_pos_out_of_range">ORDER_BY_POS_OUT_OF_RANGE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42809</td>
-  <td>The identified object is not the type of object to which the statement applies.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#forbidden_operation">FORBIDDEN_OPERATION</a>
-  </td>
-</tr>
-    <tr>
-  <td>42818</td>
-  <td>The operands of an operator or function are not compatible or comparable.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#incomparable_pivot_column">INCOMPARABLE_PIVOT_COLUMN</a>
-  </td>
-</tr>
-    <tr>
-  <td>42823</td>
-  <td>Multiple columns are returned from a subquery that only allows one column.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="invalid-subquery-expression-error-class.md">INVALID_SUBQUERY_EXPRESSION</a>
-  </td>
-</tr>
-    <tr>
-  <td>42825</td>
-  <td>The rows of UNION, INTERSECT, EXCEPT, or VALUES do not have compatible columns.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_merge_incompatible_data_type">CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE</a>, <a href="sql-error-conditions.html#incompatible_column_type">INCOMPATIBLE_COLUMN_TYPE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42826</td>
-  <td>The rows of UNION, INTERSECT, EXCEPT, or VALUES do not have the same number of columns.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#num_columns_mismatch">NUM_COLUMNS_MISMATCH</a>
-  </td>
-</tr>
-    <tr>
-  <td>42846</td>
-  <td>Cast from source type to target type is not supported.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_cast_datatype">CANNOT_CAST_DATATYPE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42883</td>
-  <td>No routine was found with a matching signature.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#routine_not_found">ROUTINE_NOT_FOUND</a>, <a href="unresolved-routine-error-class.md">UNRESOLVED_ROUTINE</a>
-  </td>
-</tr>
-    <tr>
-  <td>428C4</td>
-  <td>The number of elements on each side of the predicate operator is not the same.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#unpivot_value_size_mismatch">UNPIVOT_VALUE_SIZE_MISMATCH</a>
-  </td>
-</tr>
-    <tr>
-  <td>428EK</td>
-  <td>The schema qualifier is not valid.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#temp_view_name_too_many_name_parts">TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS</a>
-  </td>
-</tr>
-    <tr>
-  <td>428FT</td>
-  <td>The partitioning clause specified on CREATE or ALTER is not valid.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#partitions_already_exist">PARTITIONS_ALREADY_EXIST</a>, <a href="sql-error-conditions.html#partitions_not_found">PARTITIONS_NOT_FOUND</a>
-  </td>
-</tr>
-    <tr>
-  <td>42903</td>
-  <td>Invalid use of an aggregate function or OLAP function.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="group-by-aggregate-error-class.md">GROUP_BY_AGGREGATE</a>, <a href="sql-error-conditions.html#group_by_pos_aggregate">GROUP_BY_POS_AGGREGATE</a>, <a href="sql-error-conditions.html#invalid_where_condition">INVALID_WHERE_CONDITION</a>
-  </td>
-</tr>
-    <tr>
-  <td>429BB</td>
-  <td>The data type of a column, parameter, or SQL variable is not supported.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_recognize_hive_type">CANNOT_RECOGNIZE_HIVE_TYPE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K01</td>
-  <td>data type not fully specified</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#datatype_missing_size">DATATYPE_MISSING_SIZE</a>, <a href="incomplete-type-definition-error-class.md">INCOMPLETE_TYPE_DEFINITION</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K02</td>
-  <td>data source not found</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#data_source_not_found">DATA_SOURCE_NOT_FOUND</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K03</td>
-  <td>File not found</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#path_not_found">PATH_NOT_FOUND</a>, <a href="sql-error-conditions.html#rename_src_path_not_found">RENAME_SRC_PATH_NOT_FOUND</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K04</td>
-  <td>Duplicate file</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#failed_rename_path">FAILED_RENAME_PATH</a>, <a href="sql-error-conditions.html#path_already_exists">PATH_ALREADY_EXISTS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K05</td>
-  <td>Name is not valid</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#invalid_empty_location">INVALID_EMPTY_LOCATION</a>, <a href="sql-error-conditions.html#requires_single_part_namespace">REQUIRES_SINGLE_PART_NAMESPACE</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K06</td>
-  <td>Invalid type for options</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="invalid-options-error-class.md">INVALID_OPTIONS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K07</td>
-  <td>Not a valid schema literal</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="invalid-schema-error-class.md">INVALID_SCHEMA</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K08</td>
-  <td>Not a constant</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#non_literal_pivot_values">NON_LITERAL_PIVOT_VALUES</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K09</td>
-  <td>Data type mismatch</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="datatype-mismatch-error-class.md">DATATYPE_MISMATCH</a>, <a href="sql-error-conditions.html#pivot_value_data_type_mismatch">PIVOT_VALUE_DATA_TYPE_MISMATCH</a>, <a href="sql-error-conditions.html#unexpected_input_type">UNEXPECTED_INPUT_TYPE</a>, <a href="sql-error-conditions.html#unpivot_value_data_type_mismatch">UNPIVOT_VALUE_DATA_TYPE_MISMATCH</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K0A</td>
-  <td>Invalid UNPIVOT clause</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#unpivot_requires_attributes">UNPIVOT_REQUIRES_ATTRIBUTES</a>, <a href="sql-error-conditions.html#unpivot_requires_value_columns">UNPIVOT_REQUIRES_VALUE_COLUMNS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42K0B</td>
-  <td>Legacy feature blocked</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="inconsistent-behavior-cross-version-error-class.md">INCONSISTENT_BEHAVIOR_CROSS_VERSION</a>
-  </td>
-</tr>
-    <tr>
-  <td>42KD9</td>
-  <td>Cannot infer table schema.</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#unable_to_infer_schema">UNABLE_TO_INFER_SCHEMA</a>
-  </td>
-</tr>
-    <tr>
-  <td>42P01</td>
-  <td>undefined_table</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="table-or-view-not-found-error-class.md">TABLE_OR_VIEW_NOT_FOUND</a>, <a href="sql-error-conditions.html#view_not_found">VIEW_NOT_FOUND</a>
-  </td>
-</tr>
-    <tr>
-  <td>42P02</td>
-  <td>undefined_parameter</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#unbound_sql_parameter">UNBOUND_SQL_PARAMETER</a>
-  </td>
-</tr>
-    <tr>
-  <td>42P06</td>
-  <td>duplicate_schema</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#schema_already_exists">SCHEMA_ALREADY_EXISTS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42P07</td>
-  <td>duplicate_table</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#table_or_view_already_exists">TABLE_OR_VIEW_ALREADY_EXISTS</a>, <a href="sql-error-conditions.html#temp_table_or_view_already_exists">TEMP_TABLE_OR_VIEW_ALREADY_EXISTS</a>, <a href="sql-error-conditions.html#view_already_exists">VIEW_ALREADY_EXISTS</a>
-  </td>
-</tr>
-    <tr>
-  <td>42P20</td>
-  <td>windowing_error</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#unsupported_expr_for_window">UNSUPPORTED_EXPR_FOR_WINDOW</a>
-  </td>
-</tr>
-
-</table>
-## Class `46`: java ddl 1
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>46110</td>
-  <td>unsupported feature</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#cannot_modify_config">CANNOT_MODIFY_CONFIG</a>
-  </td>
-</tr>
-    <tr>
-  <td>46121</td>
-  <td>invalid column name</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#invalid_column_name_as_path">INVALID_COLUMN_NAME_AS_PATH</a>
-  </td>
-</tr>
-
-</table>
-## Class `53`: insufficient resources
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>53200</td>
-  <td>out_of_memory</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#unable_to_acquire_memory">UNABLE_TO_ACQUIRE_MEMORY</a>
-  </td>
-</tr>
-
-</table>
-## Class `54`: program limit exceeded
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>54000</td>
-  <td>program limit exceeded</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#grouping_size_limit_exceeded">GROUPING_SIZE_LIMIT_EXCEEDED</a>, <a href="sql-error-conditions.html#too_many_array_elements">TOO_MANY_ARRAY_ELEMENTS</a>
-  </td>
-</tr>
-
-</table>
-## Class `HY`: CLI-specific condition
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>HY008</td>
-  <td>operation canceled</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#operation_canceled">OPERATION_CANCELED</a>
-  </td>
-</tr>
-
-</table>
-## Class `XX`: internal error
-
-<table>
-<thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
-<tr>
-  <td>XX000</td>
-  <td>internal error</td>
-</tr>
-<tr>
-  <td></td>
-  <td><a href="sql-error-conditions.html#internal_error">INTERNAL_ERROR</a>
-  </td>
-</tr>
-
-</table>
diff --git a/docs/sql-error-conditions-stds-invalid-option-value-error-class.md b/docs/sql-error-conditions-stds-invalid-option-value-error-class.md
deleted file mode 100644
index 7cc72417d752f..0000000000000
--- a/docs/sql-error-conditions-stds-invalid-option-value-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: STDS_INVALID_OPTION_VALUE error class
-displayTitle: STDS_INVALID_OPTION_VALUE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42616](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid value for source option '`<optionName>`':
-
-This error class has the following derived error classes:
-
-## IS_EMPTY
-
-cannot be empty.
-
-## IS_NEGATIVE
-
-cannot be negative.
-
-## WITH_MESSAGE
-
-`<message>`
-
-
diff --git a/docs/sql-error-conditions-invalid-partition-operation-error-class.md b/docs/sql-error-conditions-syntax-discontinued-error-class.md
similarity index 71%
rename from docs/sql-error-conditions-invalid-partition-operation-error-class.md
rename to docs/sql-error-conditions-syntax-discontinued-error-class.md
index 2f8a017803887..966e11004364e 100644
--- a/docs/sql-error-conditions-invalid-partition-operation-error-class.md
+++ b/docs/sql-error-conditions-syntax-discontinued-error-class.md
@@ -1,7 +1,7 @@
 ---
 layout: global
-title: INVALID_PARTITION_OPERATION error class
-displayTitle: INVALID_PARTITION_OPERATION error class
+title: SYNTAX_DISCONTINUED error class
+displayTitle: SYNTAX_DISCONTINUED error class
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -26,16 +26,14 @@ license: |
 
 [SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
 
-The partition command is invalid.
+Support of the clause or keyword: `<clause>` has been discontinued in this context.
 
 This error class has the following derived error classes:
 
-## PARTITION_MANAGEMENT_IS_UNSUPPORTED
+## BANG_EQUALS_NOT
 
-Table `<name>` does not support partition management.
-
-## PARTITION_SCHEMA_IS_EMPTY
-
-Table `<name>` is not partitioned.
+The '!' keyword is supported as a prefix operator in a logical operation only.
+Use the 'NOT' keyword instead for clauses such as `NOT LIKE`, `NOT IN`, `NOT BETWEEN`, etc.
+To re-enable the '!' keyword, set "spark.sql.legacy.bangEqualsNot" to "true".
 
 
diff --git a/docs/sql-error-conditions-unresolved-column-error-class.md b/docs/sql-error-conditions-unresolved-column-error-class.md
deleted file mode 100644
index 89b1daf0128df..0000000000000
--- a/docs/sql-error-conditions-unresolved-column-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNRESOLVED_COLUMN error class
-displayTitle: UNRESOLVED_COLUMN error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A column, variable, or function parameter with name `<objectName>` cannot be resolved.
-
-This error class has the following derived error classes:
-
-## WITHOUT_SUGGESTION
-
-
-
-## WITH_SUGGESTION
-
-Did you mean one of the following? [`<proposal>`].
-
-
diff --git a/docs/sql-error-conditions-unresolved-field-error-class.md b/docs/sql-error-conditions-unresolved-field-error-class.md
deleted file mode 100644
index 83f008139af43..0000000000000
--- a/docs/sql-error-conditions-unresolved-field-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNRESOLVED_FIELD error class
-displayTitle: UNRESOLVED_FIELD error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A field with name `<fieldName>` cannot be resolved with the struct-type column `<columnPath>`.
-
-This error class has the following derived error classes:
-
-## WITHOUT_SUGGESTION
-
-
-
-## WITH_SUGGESTION
-
-Did you mean one of the following? [`<proposal>`].
-
-
diff --git a/docs/sql-error-conditions-unresolved-map-key-error-class.md b/docs/sql-error-conditions-unresolved-map-key-error-class.md
deleted file mode 100644
index 9c0268240154a..0000000000000
--- a/docs/sql-error-conditions-unresolved-map-key-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNRESOLVED_MAP_KEY error class
-displayTitle: UNRESOLVED_MAP_KEY error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve column `<objectName>` as a map key. If the key is a string literal, add the single quotes '' around it.
-
-This error class has the following derived error classes:
-
-## WITHOUT_SUGGESTION
-
-
-
-## WITH_SUGGESTION
-
-Otherwise did you mean one of the following column(s)? [`<proposal>`].
-
-
diff --git a/docs/sql-error-conditions-unsupported-add-file-error-class.md b/docs/sql-error-conditions-unsupported-add-file-error-class.md
deleted file mode 100644
index 482d753fb53b0..0000000000000
--- a/docs/sql-error-conditions-unsupported-add-file-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_ADD_FILE error class
-displayTitle: UNSUPPORTED_ADD_FILE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Don't support add file.
-
-This error class has the following derived error classes:
-
-## DIRECTORY
-
-The file `<path>` is a directory, consider to set "spark.sql.legacy.addSingleFileInAddFile" to "false".
-
-## LOCAL_DIRECTORY
-
-The local directory `<path>` is not supported in a non-local master mode.
-
-
diff --git a/docs/sql-error-conditions-unsupported-default-value-error-class.md b/docs/sql-error-conditions-unsupported-default-value-error-class.md
deleted file mode 100644
index c6ca78f606bd0..0000000000000
--- a/docs/sql-error-conditions-unsupported-default-value-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_DEFAULT_VALUE error class
-displayTitle: UNSUPPORTED_DEFAULT_VALUE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-DEFAULT column values is not supported.
-
-This error class has the following derived error classes:
-
-## WITHOUT_SUGGESTION
-
-
-
-## WITH_SUGGESTION
-
-Enable it by setting "spark.sql.defaultColumn.enabled" to "true".
-
-
diff --git a/docs/sql-error-conditions-unsupported-deserializer-error-class.md b/docs/sql-error-conditions-unsupported-deserializer-error-class.md
deleted file mode 100644
index 11b58f9386d05..0000000000000
--- a/docs/sql-error-conditions-unsupported-deserializer-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_DESERIALIZER error class
-displayTitle: UNSUPPORTED_DESERIALIZER error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The deserializer is not supported:
-
-This error class has the following derived error classes:
-
-## DATA_TYPE_MISMATCH
-
-need a(n) `<desiredType>` field but got `<dataType>`.
-
-## FIELD_NUMBER_MISMATCH
-
-try to map `<schema>` to Tuple`<ordinal>`, but failed as the number of fields does not line up.
-
-
diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md b/docs/sql-error-conditions-unsupported-feature-error-class.md
deleted file mode 100644
index f67d7caff63de..0000000000000
--- a/docs/sql-error-conditions-unsupported-feature-error-class.md
+++ /dev/null
@@ -1,229 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_FEATURE error class
-displayTitle: UNSUPPORTED_FEATURE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The feature is not supported:
-
-This error class has the following derived error classes:
-
-## AES_MODE
-
-AES-`<mode>` with the padding `<padding>` by the `<functionName>` function.
-
-## AES_MODE_AAD
-
-`<functionName>` with AES-`<mode>` does not support additional authenticate data (AAD).
-
-## AES_MODE_IV
-
-`<functionName>` with AES-`<mode>` does not support initialization vectors (IVs).
-
-## ANALYZE_UNCACHED_TEMP_VIEW
-
-The ANALYZE TABLE FOR COLUMNS command can operate on temporary views that have been cached already. Consider to cache the view `<viewName>`.
-
-## ANALYZE_UNSUPPORTED_COLUMN_TYPE
-
-The ANALYZE TABLE FOR COLUMNS command does not support the type `<columnType>` of the column `<columnName>` in the table `<tableName>`.
-
-## ANALYZE_VIEW
-
-The ANALYZE TABLE command does not support views.
-
-## CATALOG_OPERATION
-
-Catalog `<catalogName>` does not support `<operation>`.
-
-## COLLATION
-
-Collation is not yet supported.
-
-## COMBINATION_QUERY_RESULT_CLAUSES
-
-Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY.
-
-## COMMENT_NAMESPACE
-
-Attach a comment to the namespace `<namespace>`.
-
-## DESC_TABLE_COLUMN_PARTITION
-
-DESC TABLE COLUMN for a specific partition.
-
-## DROP_DATABASE
-
-Drop the default database `<database>`.
-
-## DROP_NAMESPACE
-
-Drop the namespace `<namespace>`.
-
-## HIVE_TABLE_TYPE
-
-The `<tableName>` is hive `<tableType>`.
-
-## HIVE_WITH_ANSI_INTERVALS
-
-Hive table `<tableName>` with ANSI intervals.
-
-## INSERT_PARTITION_SPEC_IF_NOT_EXISTS
-
-INSERT INTO `<tableName>` with IF NOT EXISTS in the PARTITION spec.
-
-## LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC
-
-Referencing a lateral column alias `<lca>` in the aggregate function `<aggFunc>`.
-
-## LATERAL_COLUMN_ALIAS_IN_AGGREGATE_WITH_WINDOW_AND_HAVING
-
-Referencing lateral column alias `<lca>` in the aggregate query both with window expressions and with having clause. Please rewrite the aggregate query by removing the having clause or removing lateral alias reference in the SELECT list.
-
-## LATERAL_COLUMN_ALIAS_IN_GROUP_BY
-
-Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet.
-
-## LATERAL_COLUMN_ALIAS_IN_WINDOW
-
-Referencing a lateral column alias `<lca>` in window expression `<windowExpr>`.
-
-## LATERAL_JOIN_USING
-
-JOIN USING with LATERAL correlation.
-
-## LITERAL_TYPE
-
-Literal for '`<value>`' of `<type>`.
-
-## MULTIPLE_BUCKET_TRANSFORMS
-
-Multiple bucket TRANSFORMs.
-
-## MULTI_ACTION_ALTER
-
-The target JDBC server hosting table `<tableName>` does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error.
-
-## ORC_TYPE_CAST
-
-Unable to convert `<orcType>` of Orc to data type `<toType>`.
-
-## OVERWRITE_BY_SUBQUERY
-
-INSERT OVERWRITE with a subquery condition.
-
-## PANDAS_UDAF_IN_PIVOT
-
-Pandas user defined aggregate function in the PIVOT clause.
-
-## PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT
-
-Parameter markers are not allowed in `<statement>`.
-
-## PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED
-
-Invalid partitioning: `<cols>` is missing or is in a map or array.
-
-## PIVOT_AFTER_GROUP_BY
-
-PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery.
-
-## PIVOT_TYPE
-
-Pivoting by the value '`<value>`' of the column data type `<type>`.
-
-## PURGE_PARTITION
-
-Partition purge.
-
-## PURGE_TABLE
-
-Purge table.
-
-## PYTHON_UDF_IN_ON_CLAUSE
-
-Python UDF in the ON clause of a `<joinType>` JOIN. In case of an INNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause.
-
-## REMOVE_NAMESPACE_COMMENT
-
-Remove a comment from the namespace `<namespace>`.
-
-## REPLACE_NESTED_COLUMN
-
-The replace function does not support nested column `<colName>`.
-
-## SET_NAMESPACE_PROPERTY
-
-`<property>` is a reserved namespace property, `<msg>`.
-
-## SET_OPERATION_ON_MAP_TYPE
-
-Cannot have MAP type columns in DataFrame which calls set operations (INTERSECT, EXCEPT, etc.), but the type of column `<colName>` is `<dataType>`.
-
-## SET_PROPERTIES_AND_DBPROPERTIES
-
-set PROPERTIES and DBPROPERTIES at the same time.
-
-## SET_TABLE_PROPERTY
-
-`<property>` is a reserved table property, `<msg>`.
-
-## SET_VARIABLE_USING_SET
-
-`<variableName>` is a VARIABLE and cannot be updated using the SET statement. Use SET VARIABLE `<variableName>` = ... instead.
-
-## STATE_STORE_MULTIPLE_COLUMN_FAMILIES
-
-Creating multiple column families with `<stateStoreProvider>` is not supported.
-
-## STATE_STORE_REMOVING_COLUMN_FAMILIES
-
-Removing column families with `<stateStoreProvider>` is not supported.
-
-## STATE_STORE_TTL
-
-State TTL with `<stateStoreProvider>` is not supported. Please use RocksDBStateStoreProvider.
-
-## TABLE_OPERATION
-
-Table `<tableName>` does not support `<operation>`. Please check the current catalog and namespace to make sure the qualified table name is expected, and also check the catalog implementation which is configured by "spark.sql.catalog".
-
-## TIME_TRAVEL
-
-Time travel on the relation: `<relationId>`.
-
-## TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS
-
-UDF class with `<num>` type arguments.
-
-## TRANSFORM_DISTINCT_ALL
-
-TRANSFORM with the DISTINCT/ALL clause.
-
-## TRANSFORM_NON_HIVE
-
-TRANSFORM with SERDE is only supported in hive mode.
-
-
diff --git a/docs/sql-error-conditions-unsupported-generator-error-class.md b/docs/sql-error-conditions-unsupported-generator-error-class.md
deleted file mode 100644
index 4e42d6b43bca4..0000000000000
--- a/docs/sql-error-conditions-unsupported-generator-error-class.md
+++ /dev/null
@@ -1,49 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_GENERATOR error class
-displayTitle: UNSUPPORTED_GENERATOR error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The generator is not supported:
-
-This error class has the following derived error classes:
-
-## MULTI_GENERATOR
-
-only one generator allowed per SELECT clause but found `<num>`: `<generators>`.
-
-## NESTED_IN_EXPRESSIONS
-
-nested in expressions `<expression>`.
-
-## NOT_GENERATOR
-
-`<functionName>` is expected to be a generator. However, its class is `<classCanonicalName>`, which is not a generator.
-
-## OUTSIDE_SELECT
-
-outside the SELECT clause, found: `<plan>`.
-
-
diff --git a/docs/sql-error-conditions-unsupported-insert-error-class.md b/docs/sql-error-conditions-unsupported-insert-error-class.md
deleted file mode 100644
index 3f679589fd3af..0000000000000
--- a/docs/sql-error-conditions-unsupported-insert-error-class.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_INSERT error class
-displayTitle: UNSUPPORTED_INSERT error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Can't insert into the target.
-
-This error class has the following derived error classes:
-
-## MULTI_PATH
-
-Can only write data to relations with a single path but given paths are `<paths>`.
-
-## NOT_ALLOWED
-
-The target relation `<relationId>` does not allow insertion.
-
-## NOT_PARTITIONED
-
-The target relation `<relationId>` is not partitioned.
-
-## RDD_BASED
-
-An RDD-based table is not allowed.
-
-## READ_FROM
-
-The target relation `<relationId>` is also being read from.
-
-
diff --git a/docs/sql-error-conditions-unsupported-merge-condition-error-class.md b/docs/sql-error-conditions-unsupported-merge-condition-error-class.md
deleted file mode 100644
index 070782395d3bc..0000000000000
--- a/docs/sql-error-conditions-unsupported-merge-condition-error-class.md
+++ /dev/null
@@ -1,45 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_MERGE_CONDITION error class
-displayTitle: UNSUPPORTED_MERGE_CONDITION error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-MERGE operation contains unsupported `<condName>` condition.
-
-This error class has the following derived error classes:
-
-## AGGREGATE
-
-Aggregates are not allowed: `<cond>`.
-
-## NON_DETERMINISTIC
-
-Non-deterministic expressions are not allowed: `<cond>`.
-
-## SUBQUERY
-
-Subqueries are not allowed: `<cond>`.
-
-
diff --git a/docs/sql-error-conditions-unsupported-overwrite-error-class.md b/docs/sql-error-conditions-unsupported-overwrite-error-class.md
deleted file mode 100644
index dd7de62cd06d4..0000000000000
--- a/docs/sql-error-conditions-unsupported-overwrite-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_OVERWRITE error class
-displayTitle: UNSUPPORTED_OVERWRITE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42902](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Can't overwrite the target that is also being read from.
-
-This error class has the following derived error classes:
-
-## PATH
-
-The target path is `<path>`.
-
-## TABLE
-
-The target table is `<table>`.
-
-
diff --git a/docs/sql-error-conditions-unsupported-save-mode-error-class.md b/docs/sql-error-conditions-unsupported-save-mode-error-class.md
deleted file mode 100644
index dbe210360fcb1..0000000000000
--- a/docs/sql-error-conditions-unsupported-save-mode-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_SAVE_MODE error class
-displayTitle: UNSUPPORTED_SAVE_MODE error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The save mode `<saveMode>` is not supported for:
-
-This error class has the following derived error classes:
-
-## EXISTENT_PATH
-
-an existent path.
-
-## NON_EXISTENT_PATH
-
-a non-existent path.
-
-
diff --git a/docs/sql-error-conditions-unsupported-subquery-expression-category-error-class.md b/docs/sql-error-conditions-unsupported-subquery-expression-category-error-class.md
deleted file mode 100644
index 59a34d6a01695..0000000000000
--- a/docs/sql-error-conditions-unsupported-subquery-expression-category-error-class.md
+++ /dev/null
@@ -1,95 +0,0 @@
----
-layout: global
-title: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
-displayTitle: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Unsupported subquery expression:
-
-This error class has the following derived error classes:
-
-## ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED
-
-Accessing outer query column is not allowed in this location:
-`<treeNode>`
-
-## AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES
-
-Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: `<function>`.
-
-## CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE
-
-Correlated column is not allowed in predicate:
-`<treeNode>`
-
-## CORRELATED_COLUMN_NOT_FOUND
-
-A correlated outer name reference within a subquery expression body was not found in the enclosing query: `<value>`.
-
-## CORRELATED_REFERENCE
-
-Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses: `<sqlExprs>`.
-
-## HIGHER_ORDER_FUNCTION
-
-Subquery expressions are not supported within higher-order functions. Please remove all subquery expressions from higher-order functions and then try the query again.
-
-## LATERAL_JOIN_CONDITION_NON_DETERMINISTIC
-
-Lateral join condition cannot be non-deterministic: `<condition>`.
-
-## MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY
-
-Correlated scalar subqueries must be aggregated to return at most one row.
-
-## NON_CORRELATED_COLUMNS_IN_GROUP_BY
-
-A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns: `<value>`.
-
-## NON_DETERMINISTIC_LATERAL_SUBQUERIES
-
-Non-deterministic lateral subqueries are not supported when joining with outer relations that produce more than one row:
-`<treeNode>`
-
-## UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE
-
-Correlated column reference '`<expr>`' cannot be `<dataType>` type.
-
-## UNSUPPORTED_CORRELATED_SCALAR_SUBQUERY
-
-Correlated scalar subqueries can only be used in filters, aggregations, projections, and UPDATE/MERGE/DELETE commands:
-`<treeNode>`
-
-## UNSUPPORTED_IN_EXISTS_SUBQUERY
-
-IN/EXISTS predicate subqueries can only be used in filters, joins, aggregations, window functions, projections, and UPDATE/MERGE/DELETE commands:
-`<treeNode>`
-
-## UNSUPPORTED_TABLE_ARGUMENT
-
-Table arguments are used in a function where they are not supported:
-`<treeNode>`
-
-
diff --git a/docs/sql-error-conditions-wrong-num-args-error-class.md b/docs/sql-error-conditions-wrong-num-args-error-class.md
deleted file mode 100644
index 652037bae6789..0000000000000
--- a/docs/sql-error-conditions-wrong-num-args-error-class.md
+++ /dev/null
@@ -1,41 +0,0 @@
----
-layout: global
-title: WRONG_NUM_ARGS error class
-displayTitle: WRONG_NUM_ARGS error class
-license: |
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
----
-
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
-
-[SQLSTATE: 42605](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The `<functionName>` requires `<expectedNum>` parameters but the actual number is `<actualNum>`.
-
-This error class has the following derived error classes:
-
-## WITHOUT_SUGGESTION
-
-Please, refer to '`<docroot>`/sql-ref-functions.html' for a fix.
-
-## WITH_SUGGESTION
-
-If you have to call this function with `<legacyNum>` parameters, set the legacy configuration `<legacyConfKey>` to `<legacyConfValue>`.
-
-
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 36d7cf58f09f2..0c1953ea8f468 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -19,2879 +19,19 @@ license: |
   limitations under the License.
 ---
 
-<!--
-  DO NOT EDIT THIS FILE.
-  It was generated automatically by `org.apache.spark.SparkThrowableSuite`.
--->
+{% comment %}
+Don't discuss error classes (e.g. `42`) or sub-classes (e.g. `K01`) with users. It's not helpful.
+Keep this documentation focused on error states (e.g. `58002`) and conditions (e.g.
+`AMBIGUOUS_COLUMN_REFERENCE`), which is what users see and what they will typically be searching
+for when they encounter an error.
 
-This is a list of common, named error conditions returned by Spark SQL.
+To update this information, edit `error-conditions.json`. The table below will be automatically
+derived from that file via `docs/util/build-error-docs.py`.
 
-Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html).
+Also note that this is a Jekyll comment and not an HTML comment so that this comment does not show
+up in the generated HTML to end users. :-)
+{% endcomment %}
 
-### AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION
+This is a list of error states and conditions that may be returned by Spark SQL.
 
-[SQLSTATE: 42845](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Non-deterministic expression `<sqlExpr>` should not appear in the arguments of an aggregate function.
-
-### ALL_PARAMETERS_MUST_BE_NAMED
-
-SQLSTATE: 07001
-
-Using name parameterized queries requires all parameters to be named. Parameters missing names: `<exprs>`.
-
-### ALL_PARTITION_COLUMNS_NOT_ALLOWED
-
-SQLSTATE: KD005
-
-Cannot use all columns for partition columns.
-
-### ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-ALTER TABLE `<type>` column `<columnName>` specifies descriptor "`<optionName>`" more than once, which is invalid.
-
-### AMBIGUOUS_ALIAS_IN_NESTED_CTE
-
-[SQLSTATE: 42KD0](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Name `<name>` is ambiguous in nested CTE.
-Please set `<config>` to "CORRECTED" so that name defined in inner CTE takes precedence. If set it to "LEGACY", outer CTE definitions will take precedence.
-See '`<docroot>`/sql-migration-guide.html#query-engine'.
-
-### AMBIGUOUS_COLUMN_OR_FIELD
-
-[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Column or field `<name>` is ambiguous and has `<n>` matches.
-
-### AMBIGUOUS_COLUMN_REFERENCE
-
-[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Column `<name>` is ambiguous. It's because you joined several DataFrame together, and some of these DataFrames are the same.
-This column points to one of the DataFrames but Spark is unable to figure out which one.
-Please alias the DataFrames with different names via `DataFrame.alias` before joining them,
-and specify the column using qualified name, e.g. `df.alias("a").join(df.alias("b"), col("a.id") > col("b.id"))`.
-
-### AMBIGUOUS_LATERAL_COLUMN_ALIAS
-
-[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Lateral column alias `<name>` is ambiguous and has `<n>` matches.
-
-### AMBIGUOUS_REFERENCE
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Reference `<name>` is ambiguous, could be: `<referenceNames>`.
-
-### AMBIGUOUS_REFERENCE_TO_FIELDS
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Ambiguous reference to the field `<field>`. It appears `<count>` times in the schema.
-
-### ARITHMETIC_OVERFLOW
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-`<message>`.`<alternative>` If necessary set `<config>` to "false" to bypass this error.
-
-### ASSIGNMENT_ARITY_MISMATCH
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The number of columns or variables assigned or aliased: `<numTarget>` does not match the number of source expressions: `<numExpr>`.
-
-### [AS_OF_JOIN](sql-error-conditions-as-of-join-error-class.html)
-
-[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid as-of join.
-
-For more details see [AS_OF_JOIN](sql-error-conditions-as-of-join-error-class.html)
-
-### AVRO_INCOMPATIBLE_READ_TYPE
-
-[SQLSTATE: 22KD3](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Cannot convert Avro `<avroPath>` to SQL `<sqlPath>` because the original encoded data type is `<avroType>`, however you're trying to read the field as `<sqlType>`, which would lead to an incorrect answer.
-To allow reading this field, enable the SQL configuration: "spark.sql.legacy.avro.allowIncompatibleSchema".
-
-### BATCH_METADATA_NOT_FOUND
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unable to find batch `<batchMetadataFile>`.
-
-### BINARY_ARITHMETIC_OVERFLOW
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-`<value1>` `<symbol>` `<value2>` caused overflow.
-
-### CALL_ON_STREAMING_DATASET_UNSUPPORTED
-
-[SQLSTATE: 42KDE](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The method `<methodName>` can not be called on streaming Dataset/DataFrame.
-
-### CANNOT_ALTER_PARTITION_COLUMN
-
-[SQLSTATE: 428FR](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-ALTER TABLE (ALTER|CHANGE) COLUMN is not supported for partition columns, but found the partition column `<columnName>` in the table `<tableName>`.
-
-### CANNOT_CAST_DATATYPE
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot cast `<sourceType>` to `<targetType>`.
-
-### CANNOT_CONVERT_PROTOBUF_FIELD_TYPE_TO_SQL_TYPE
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot convert Protobuf `<protobufColumn>` to SQL `<sqlColumn>` because schema is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`).
-
-### CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unable to convert `<protobufType>` of Protobuf to SQL type `<toType>`.
-
-### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because schema is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`).
-
-### CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because `<data>` is not in defined values for enum: `<enumString>`.
-
-### [CANNOT_CREATE_DATA_SOURCE_TABLE](sql-error-conditions-cannot-create-data-source-table-error-class.html)
-
-[SQLSTATE: 42KDE](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to create data source table `<tableName>`:
-
-For more details see [CANNOT_CREATE_DATA_SOURCE_TABLE](sql-error-conditions-cannot-create-data-source-table-error-class.html)
-
-### CANNOT_DECODE_URL
-
-[SQLSTATE: 22546](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The provided URL cannot be decoded: `<url>`. Please ensure that the URL is properly formatted and try again.
-
-### CANNOT_INVOKE_IN_TRANSFORMATIONS
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Dataset transformations and actions can only be invoked by the driver, not inside of other Dataset transformations; for example, dataset1.map(x => dataset2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the dataset1.map transformation. For more information, see SPARK-28702.
-
-### CANNOT_LOAD_FUNCTION_CLASS
-
-[SQLSTATE: 46103](sql-error-conditions-sqlstates.html#class-46-java-ddl-1)
-
-Cannot load class `<className>` when registering the function `<functionName>`, please make sure it is on the classpath.
-
-### CANNOT_LOAD_PROTOBUF_CLASS
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Could not load Protobuf class with name `<protobufClassName>`. `<explanation>`.
-
-### [CANNOT_LOAD_STATE_STORE](sql-error-conditions-cannot-load-state-store-error-class.html)
-
-SQLSTATE: 58030
-
-An error occurred during loading state.
-
-For more details see [CANNOT_LOAD_STATE_STORE](sql-error-conditions-cannot-load-state-store-error-class.html)
-
-### CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE
-
-[SQLSTATE: 42825](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to merge incompatible data types `<left>` and `<right>`. Please check the data types of the columns being merged and ensure that they are compatible. If necessary, consider casting the columns to compatible data types before attempting the merge.
-
-### CANNOT_MERGE_SCHEMAS
-
-[SQLSTATE: 42KD9](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed merging schemas:
-Initial schema:
-`<left>`
-Schema that cannot be merged with the initial schema:
-`<right>`.
-
-### CANNOT_MODIFY_CONFIG
-
-[SQLSTATE: 46110](sql-error-conditions-sqlstates.html#class-46-java-ddl-1)
-
-Cannot modify the value of the Spark config: `<key>`.
-See also '`<docroot>`/sql-migration-guide.html#ddl-statements'.
-
-### CANNOT_PARSE_DECIMAL
-
-[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Cannot parse decimal. Please ensure that the input is a valid number with optional decimal point or comma separators.
-
-### CANNOT_PARSE_INTERVAL
-
-[SQLSTATE: 22006](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Unable to parse `<intervalString>`. Please ensure that the value provided is in a valid format for defining an interval. You can reference the documentation for the correct format. If the issue persists, please double check that the input value is not null or empty and try again.
-
-### CANNOT_PARSE_JSON_FIELD
-
-[SQLSTATE: 2203G](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Cannot parse the field name `<fieldName>` and the value `<fieldValue>` of the JSON token type `<jsonType>` to target Spark data type `<dataType>`.
-
-### CANNOT_PARSE_PROTOBUF_DESCRIPTOR
-
-[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Error parsing descriptor bytes into Protobuf FileDescriptorSet.
-
-### CANNOT_PARSE_TIMESTAMP
-
-[SQLSTATE: 22007](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-`<message>`. If necessary set `<ansiConfig>` to "false" to bypass this error.
-
-### CANNOT_RECOGNIZE_HIVE_TYPE
-
-[SQLSTATE: 429BB](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot recognize hive type string: `<fieldType>`, column: `<fieldName>`. The specified data type for the field cannot be recognized by Spark SQL. Please check the data type of the specified field and ensure that it is a valid Spark SQL data type. Refer to the Spark SQL documentation for a list of valid data types and their format. If the data type is correct, please ensure that you are using a supported version of Spark SQL.
-
-### CANNOT_RENAME_ACROSS_SCHEMA
-
-[SQLSTATE: 0AKD0](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Renaming a `<type>` across schemas is not allowed.
-
-### CANNOT_RESOLVE_DATAFRAME_COLUMN
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve dataframe column `<name>`. It's probably because of illegal references like `df1.select(df2.col("a"))`.
-
-### CANNOT_RESOLVE_STAR_EXPAND
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve `<targetString>`.* given input columns `<columns>`. Please check that the specified table or struct exists and is accessible in the input columns.
-
-### CANNOT_RESTORE_PERMISSIONS_FOR_PATH
-
-SQLSTATE: 58030
-
-Failed to set permissions on created path `<path>` back to `<permission>`.
-
-### CANNOT_SAVE_VARIANT
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Cannot save variant data type into external storage.
-
-### [CANNOT_UPDATE_FIELD](sql-error-conditions-cannot-update-field-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Cannot update `<table>` field `<fieldName>` type:
-
-For more details see [CANNOT_UPDATE_FIELD](sql-error-conditions-cannot-update-field-error-class.html)
-
-### CANNOT_UP_CAST_DATATYPE
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot up cast `<expression>` from `<sourceType>` to `<targetType>`.
-`<details>`
-
-### [CANNOT_WRITE_STATE_STORE](sql-error-conditions-cannot-write-state-store-error-class.html)
-
-SQLSTATE: 58030
-
-Error writing state store files for provider `<providerClass>`.
-
-For more details see [CANNOT_WRITE_STATE_STORE](sql-error-conditions-cannot-write-state-store-error-class.html)
-
-### CAST_INVALID_INPUT
-
-[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value `<expression>` of the type `<sourceType>` cannot be cast to `<targetType>` because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
-
-### CAST_OVERFLOW
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value `<value>` of the type `<sourceType>` cannot be cast to `<targetType>` due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
-
-### CAST_OVERFLOW_IN_TABLE_INSERT
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Fail to assign a value of `<sourceType>` type to the `<targetType>` type column or variable `<columnName>` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead.
-
-### CATALOG_NOT_FOUND
-
-[SQLSTATE: 42P08](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The catalog `<catalogName>` not found. Consider to set the SQL config `<config>` to a catalog plugin.
-
-### CHECKPOINT_RDD_BLOCK_ID_NOT_FOUND
-
-SQLSTATE: 56000
-
-Checkpoint block `<rddBlockId>` not found!
-Either the executor that originally checkpointed this partition is no longer alive, or the original RDD is unpersisted.
-If this problem persists, you may consider using `rdd.checkpoint()` instead, which is slower than local checkpointing but more fault-tolerant.
-
-### CLASS_NOT_OVERRIDE_EXPECTED_METHOD
-
-[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
-
-`<className>` must override either `<method1>` or `<method2>`.
-
-### CLASS_UNSUPPORTED_BY_MAP_OBJECTS
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-`MapObjects` does not support the class `<cls>` as resulting collection.
-
-### [CODEC_NOT_AVAILABLE](sql-error-conditions-codec-not-available-error-class.html)
-
-SQLSTATE: 56038
-
-The codec `<codecName>` is not available.
-
-For more details see [CODEC_NOT_AVAILABLE](sql-error-conditions-codec-not-available-error-class.html)
-
-### CODEC_SHORT_NAME_NOT_FOUND
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot find a short name for the codec `<codecName>`.
-
-### COLLATION_INVALID_NAME
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The value `<collationName>` does not represent a correct collation name. Suggested valid collation name: [`<proposal>`].
-
-### [COLLATION_MISMATCH](sql-error-conditions-collation-mismatch-error-class.html)
-
-[SQLSTATE: 42P21](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Could not determine which collation to use for string functions and operators.
-
-For more details see [COLLATION_MISMATCH](sql-error-conditions-collation-mismatch-error-class.html)
-
-### [COLLECTION_SIZE_LIMIT_EXCEEDED](sql-error-conditions-collection-size-limit-exceeded-error-class.html)
-
-[SQLSTATE: 54000](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
-
-Can't create array with `<numberOfElements>` elements which exceeding the array size limit `<maxRoundedArrayLength>`,
-
-For more details see [COLLECTION_SIZE_LIMIT_EXCEEDED](sql-error-conditions-collection-size-limit-exceeded-error-class.html)
-
-### COLUMN_ALIASES_NOT_ALLOWED
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Column aliases are not allowed in `<op>`.
-
-### COLUMN_ALREADY_EXISTS
-
-[SQLSTATE: 42711](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The column `<columnName>` already exists. Choose another name or rename the existing column.
-
-### COLUMN_NOT_DEFINED_IN_TABLE
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<colType>` column `<colName>` is not defined in table `<tableName>`, defined table columns are: `<tableCols>`.
-
-### COLUMN_NOT_FOUND
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The column `<colName>` cannot be found. Verify the spelling and correctness of the column name according to the SQL config `<caseSensitiveConfig>`.
-
-### COMPARATOR_RETURNS_NULL
-
-[SQLSTATE: 22004](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The comparator has returned a NULL for a comparison between `<firstValue>` and `<secondValue>`.
-It should return a positive integer for "greater than", 0 for "equal" and a negative integer for "less than".
-To revert to deprecated behavior where NULL is treated as 0 (equal), you must set "spark.sql.legacy.allowNullComparisonResultInArraySort" to "true".
-
-### [COMPLEX_EXPRESSION_UNSUPPORTED_INPUT](sql-error-conditions-complex-expression-unsupported-input-error-class.html)
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot process input data types for the expression: `<expression>`.
-
-For more details see [COMPLEX_EXPRESSION_UNSUPPORTED_INPUT](sql-error-conditions-complex-expression-unsupported-input-error-class.html)
-
-### CONCURRENT_QUERY
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Another instance of this query was just started by a concurrent session.
-
-### CONCURRENT_STREAM_LOG_UPDATE
-
-SQLSTATE: 40000
-
-Concurrent update to the log. Multiple streaming jobs detected for `<batchId>`.
-Please make sure only one streaming job runs on a specific checkpoint location at a time.
-
-### [CONNECT](sql-error-conditions-connect-error-class.html)
-
-SQLSTATE: 56K00
-
-Generic Spark Connect error.
-
-For more details see [CONNECT](sql-error-conditions-connect-error-class.html)
-
-### CONVERSION_INVALID_INPUT
-
-[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value `<str>` (`<fmt>`) cannot be converted to `<targetType>` because it is malformed. Correct the value as per the syntax, or change its format. Use `<suggestion>` to tolerate malformed input and return NULL instead.
-
-### CREATE_PERMANENT_VIEW_WITHOUT_ALIAS
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Not allowed to create the permanent view `<name>` without explicitly assigning an alias for the expression `<attr>`.
-
-### CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-CREATE TABLE column `<columnName>` specifies descriptor "`<optionName>`" more than once, which is invalid.
-
-### [CREATE_VIEW_COLUMN_ARITY_MISMATCH](sql-error-conditions-create-view-column-arity-mismatch-error-class.html)
-
-[SQLSTATE: 21S01](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
-
-Cannot create view `<viewName>`, the reason is
-
-For more details see [CREATE_VIEW_COLUMN_ARITY_MISMATCH](sql-error-conditions-create-view-column-arity-mismatch-error-class.html)
-
-### [DATATYPE_MISMATCH](sql-error-conditions-datatype-mismatch-error-class.html)
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve `<sqlExpr>` due to data type mismatch:
-
-For more details see [DATATYPE_MISMATCH](sql-error-conditions-datatype-mismatch-error-class.html)
-
-### DATATYPE_MISSING_SIZE
-
-[SQLSTATE: 42K01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-DataType `<type>` requires a length parameter, for example `<type>`(10). Please specify the length.
-
-### DATA_SOURCE_ALREADY_EXISTS
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Data source '`<provider>`' already exists. Please choose a different name for the new data source.
-
-### DATA_SOURCE_NOT_EXIST
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Data source '`<provider>`' not found. Please make sure the data source is registered.
-
-### DATA_SOURCE_NOT_FOUND
-
-[SQLSTATE: 42K02](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to find the data source: `<provider>`. Make sure the provider name is correct and the package is properly registered and compatible with your Spark version.
-
-### DATA_SOURCE_TABLE_SCHEMA_MISMATCH
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The schema of the data source table does not match the expected schema. If you are using the DataFrameReader.schema API or creating a table, avoid specifying the schema.
-Data Source schema: `<dsSchema>`
-Expected schema: `<expectedSchema>`
-
-### DATETIME_OVERFLOW
-
-[SQLSTATE: 22008](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Datetime operation overflow: `<operation>`.
-
-### DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Decimal precision `<precision>` exceeds max precision `<maxPrecision>`.
-
-### DEFAULT_DATABASE_NOT_EXISTS
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Default database `<defaultDatabase>` does not exist, please create it first or change default database to ``<defaultDatabase>``.
-
-### DEFAULT_PLACEMENT_INVALID
-
-[SQLSTATE: 42608](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A DEFAULT keyword in a MERGE, INSERT, UPDATE, or SET VARIABLE command could not be directly assigned to a target column because it was part of an expression.
-For example: `UPDATE SET c1 = DEFAULT` is allowed, but `UPDATE T SET c1 = DEFAULT + 1` is not allowed.
-
-### DISTINCT_WINDOW_FUNCTION_UNSUPPORTED
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Distinct window functions are not supported: `<windowExpr>`.
-
-### DIVIDE_BY_ZERO
-
-[SQLSTATE: 22012](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set `<config>` to "false" to bypass this error.
-
-### DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT
-
-[SQLSTATE: 42713](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Duplicated field names in Arrow Struct are not allowed, got `<fieldNames>`.
-
-### DUPLICATED_MAP_KEY
-
-[SQLSTATE: 23505](sql-error-conditions-sqlstates.html#class-23-integrity-constraint-violation)
-
-Duplicate map key `<key>` was found, please check the input data.
-If you want to remove the duplicated keys, you can set `<mapKeyDedupPolicy>` to "LAST_WIN" so that the key inserted at last takes precedence.
-
-### DUPLICATED_METRICS_NAME
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The metric name is not unique: `<metricName>`. The same name cannot be used for metrics with different results.
-However multiple instances of metrics with with same result and name are allowed (e.g. self-joins).
-
-### DUPLICATE_ASSIGNMENTS
-
-[SQLSTATE: 42701](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The columns or variables `<nameList>` appear more than once as assignment targets.
-
-### DUPLICATE_CLAUSES
-
-[SQLSTATE: 42614](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Found duplicate clauses: `<clauseName>`. Please, remove one of them.
-
-### DUPLICATE_KEY
-
-[SQLSTATE: 23505](sql-error-conditions-sqlstates.html#class-23-integrity-constraint-violation)
-
-Found duplicate keys `<keyColumn>`.
-
-### [DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT](sql-error-conditions-duplicate-routine-parameter-assignment-error-class.html)
-
-[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Call to function `<functionName>` is invalid because it includes multiple argument assignments to the same parameter name `<parameterName>`.
-
-For more details see [DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT](sql-error-conditions-duplicate-routine-parameter-assignment-error-class.html)
-
-### EMPTY_JSON_FIELD_VALUE
-
-[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to parse an empty string for data type `<dataType>`.
-
-### ENCODER_NOT_FOUND
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Not found an encoder of the type `<typeName>` to Spark SQL internal representation.
-Consider to change the input type to one of supported at '`<docroot>`/sql-ref-datatypes.html'.
-
-### ERROR_READING_AVRO_UNKNOWN_FINGERPRINT
-
-SQLSTATE: KD00B
-
-Error reading avro data -- encountered an unknown fingerprint: `<fingerprint>`, not sure what schema to use.
-This could happen if you registered additional schemas after starting your spark context.
-
-### EVENT_TIME_IS_NOT_ON_TIMESTAMP_TYPE
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The event time `<eventName>` has the invalid type `<eventType>`, but expected "TIMESTAMP".
-
-### EXCEED_LIMIT_LENGTH
-
-[SQLSTATE: 54006](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
-
-Exceeds char/varchar type length limitation: `<limit>`.
-
-### EXCEPT_NESTED_COLUMN_INVALID_TYPE
-
-[SQLSTATE: 428H2](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-EXCEPT column `<columnName>` was resolved and expected to be StructType, but found type `<dataType>`.
-
-### EXCEPT_OVERLAPPING_COLUMNS
-
-[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Columns in an EXCEPT list must be distinct and non-overlapping, but got (`<columns>`).
-
-### EXEC_IMMEDIATE_DUPLICATE_ARGUMENT_ALIASES
-
-[SQLSTATE: 42701](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The USING clause of this EXECUTE IMMEDIATE command contained multiple arguments with same alias (`<aliases>`), which is invalid; please update the command to specify unique aliases and then try it again.
-
-### EXPECT_PERMANENT_VIEW_NOT_TEMP
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-'`<operation>`' expects a permanent view but `<viewName>` is a temp view.
-
-### [EXPECT_TABLE_NOT_VIEW](sql-error-conditions-expect-table-not-view-error-class.html)
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-'`<operation>`' expects a table but `<viewName>` is a view.
-
-For more details see [EXPECT_TABLE_NOT_VIEW](sql-error-conditions-expect-table-not-view-error-class.html)
-
-### [EXPECT_VIEW_NOT_TABLE](sql-error-conditions-expect-view-not-table-error-class.html)
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The table `<tableName>` does not support `<operation>`.
-
-For more details see [EXPECT_VIEW_NOT_TABLE](sql-error-conditions-expect-view-not-table-error-class.html)
-
-### EXPRESSION_DECODING_FAILED
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to decode a row to a value of the expressions: `<expressions>`.
-
-### EXPRESSION_ENCODING_FAILED
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to encode a value of the expressions: `<expressions>` to a row.
-
-### EXPRESSION_TYPE_IS_NOT_ORDERABLE
-
-[SQLSTATE: 42822](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Column expression `<expr>` cannot be sorted because its type `<exprType>` is not orderable.
-
-### FAILED_EXECUTE_UDF
-
-[SQLSTATE: 39000](sql-error-conditions-sqlstates.html#class-39-external-routine-invocation-exception)
-
-User defined function (`<functionName>`: (`<signature>`) => `<result>`) failed due to: `<reason>`.
-
-### FAILED_FUNCTION_CALL
-
-[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
-
-Failed preparing of the function `<funcName>` for call. Please, double check function's arguments.
-
-### [FAILED_JDBC](sql-error-conditions-failed-jdbc-error-class.html)
-
-SQLSTATE: HV000
-
-Failed JDBC `<url>` on the operation:
-
-For more details see [FAILED_JDBC](sql-error-conditions-failed-jdbc-error-class.html)
-
-### FAILED_PARSE_STRUCT_TYPE
-
-[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Failed parsing struct: `<raw>`.
-
-### [FAILED_READ_FILE](sql-error-conditions-failed-read-file-error-class.html)
-
-SQLSTATE: KD001
-
-Encountered error while reading file `<path>`.
-
-For more details see [FAILED_READ_FILE](sql-error-conditions-failed-read-file-error-class.html)
-
-### FAILED_REGISTER_CLASS_WITH_KRYO
-
-SQLSTATE: KD000
-
-Failed to register classes with Kryo.
-
-### FAILED_RENAME_PATH
-
-[SQLSTATE: 42K04](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to rename `<sourcePath>` to `<targetPath>` as destination already exists.
-
-### FAILED_RENAME_TEMP_FILE
-
-SQLSTATE: 58030
-
-Failed to rename temp file `<srcPath>` to `<dstPath>` as FileSystem.rename returned false.
-
-### FAILED_ROW_TO_JSON
-
-[SQLSTATE: 2203G](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Failed to convert the row value `<value>` of the class `<class>` to the target SQL type `<sqlType>` in the JSON format.
-
-### FIELDS_ALREADY_EXISTS
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot `<op>` column, because `<fieldNames>` already exists in `<struct>`.
-
-### FIELD_NOT_FOUND
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-No such struct field `<fieldName>` in `<fields>`.
-
-### FORBIDDEN_OPERATION
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The operation `<statement>` is not allowed on the `<objectType>`: `<objectName>`.
-
-### FOREACH_BATCH_USER_FUNCTION_ERROR
-
-[SQLSTATE: 39000](sql-error-conditions-sqlstates.html#class-39-external-routine-invocation-exception)
-
-An error occurred in the user provided function in foreach batch sink. Reason: `<reason>`
-
-### FOUND_MULTIPLE_DATA_SOURCES
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Detected multiple data sources with the name '`<provider>`'. Please check the data source isn't simultaneously registered and located in the classpath.
-
-### GENERATED_COLUMN_WITH_DEFAULT_VALUE
-
-[SQLSTATE: 42623](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A column cannot have both a default value and a generation expression but column `<colName>` has default value: (`<defaultValue>`) and generation expression: (`<genExpr>`).
-
-### GET_TABLES_BY_TYPE_UNSUPPORTED_BY_HIVE_VERSION
-
-SQLSTATE: 56038
-
-Hive 2.2 and lower versions don't support getTablesByType. Please use Hive 2.3 or higher version.
-
-### GRAPHITE_SINK_INVALID_PROTOCOL
-
-SQLSTATE: KD000
-
-Invalid Graphite protocol: `<protocol>`.
-
-### GRAPHITE_SINK_PROPERTY_MISSING
-
-SQLSTATE: KD000
-
-Graphite sink requires '`<property>`' property.
-
-### GROUPING_COLUMN_MISMATCH
-
-[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Column of grouping (`<grouping>`) can't be found in grouping columns `<groupingColumns>`.
-
-### GROUPING_ID_COLUMN_MISMATCH
-
-[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Columns of grouping_id (`<groupingIdColumn>`) does not match grouping columns (`<groupByColumns>`).
-
-### GROUPING_SIZE_LIMIT_EXCEEDED
-
-[SQLSTATE: 54000](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
-
-Grouping sets size cannot be greater than `<maxSize>`.
-
-### GROUP_BY_AGGREGATE
-
-[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Aggregate functions are not allowed in GROUP BY, but found `<sqlExpr>`.
-
-### GROUP_BY_POS_AGGREGATE
-
-[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-GROUP BY `<index>` refers to an expression `<aggExpr>` that contains an aggregate function. Aggregate functions are not allowed in GROUP BY.
-
-### GROUP_BY_POS_OUT_OF_RANGE
-
-[SQLSTATE: 42805](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-GROUP BY position `<index>` is not in select list (valid range is [1, `<size>`]).
-
-### GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE
-
-[SQLSTATE: 42822](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The expression `<sqlExpr>` cannot be used as a grouping expression because its data type `<dataType>` is not an orderable data type.
-
-### HLL_INVALID_INPUT_SKETCH_BUFFER
-
-[SQLSTATE: 22546](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Invalid call to `<function>`; only valid HLL sketch buffers are supported as inputs (such as those produced by the `hll_sketch_agg` function).
-
-### HLL_INVALID_LG_K
-
-[SQLSTATE: 22546](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Invalid call to `<function>`; the `lgConfigK` value must be between `<min>` and `<max>`, inclusive: `<value>`.
-
-### HLL_UNION_DIFFERENT_LG_K
-
-[SQLSTATE: 22000](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Sketches have different `lgConfigK` values: `<left>` and `<right>`. Set the `allowDifferentLgConfigK` parameter to true to call `<function>` with different `lgConfigK` values.
-
-### IDENTIFIER_TOO_MANY_NAME_PARTS
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<identifier>` is not a valid identifier as it has more than 2 name parts.
-
-### [ILLEGAL_STATE_STORE_VALUE](sql-error-conditions-illegal-state-store-value-error-class.html)
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Illegal value provided to the State Store
-
-For more details see [ILLEGAL_STATE_STORE_VALUE](sql-error-conditions-illegal-state-store-value-error-class.html)
-
-### INCOMPARABLE_PIVOT_COLUMN
-
-[SQLSTATE: 42818](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid pivot column `<columnName>`. Pivot columns must be comparable.
-
-### INCOMPATIBLE_COLUMN_TYPE
-
-[SQLSTATE: 42825](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<operator>` can only be performed on tables with compatible column types. The `<columnOrdinalNumber>` column of the `<tableOrdinalNumber>` table is `<dataType1>` type which is not compatible with `<dataType2>` at the same column of the first table.`<hint>`.
-
-### INCOMPATIBLE_DATASOURCE_REGISTER
-
-SQLSTATE: 56038
-
-Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: `<message>`
-
-### [INCOMPATIBLE_DATA_FOR_TABLE](sql-error-conditions-incompatible-data-for-table-error-class.html)
-
-SQLSTATE: KD000
-
-Cannot write incompatible data for the table `<tableName>`:
-
-For more details see [INCOMPATIBLE_DATA_FOR_TABLE](sql-error-conditions-incompatible-data-for-table-error-class.html)
-
-### INCOMPATIBLE_JOIN_TYPES
-
-[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The join types `<joinType1>` and `<joinType2>` are incompatible.
-
-### INCOMPATIBLE_VIEW_SCHEMA_CHANGE
-
-SQLSTATE: 51024
-
-The SQL query of view `<viewName>` has an incompatible schema change and column `<colName>` cannot be resolved. Expected `<expectedNum>` columns named `<colName>` but got `<actualCols>`.
-Please try to re-create the view by running: `<suggestion>`.
-
-### [INCOMPLETE_TYPE_DEFINITION](sql-error-conditions-incomplete-type-definition-error-class.html)
-
-[SQLSTATE: 42K01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Incomplete complex type:
-
-For more details see [INCOMPLETE_TYPE_DEFINITION](sql-error-conditions-incomplete-type-definition-error-class.html)
-
-### [INCONSISTENT_BEHAVIOR_CROSS_VERSION](sql-error-conditions-inconsistent-behavior-cross-version-error-class.html)
-
-[SQLSTATE: 42K0B](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-You may get a different result due to the upgrading to
-
-For more details see [INCONSISTENT_BEHAVIOR_CROSS_VERSION](sql-error-conditions-inconsistent-behavior-cross-version-error-class.html)
-
-### INCORRECT_RAMP_UP_RATE
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Max offset with `<rowsPerSecond>` rowsPerSecond is `<maxSeconds>`, but 'rampUpTimeSeconds' is `<rampUpTimeSeconds>`.
-
-### INDETERMINATE_COLLATION
-
-[SQLSTATE: 42P22](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Function called requires knowledge of the collation it should apply, but indeterminate collation was found. Use COLLATE function to set the collation explicitly.
-
-### INDEX_ALREADY_EXISTS
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create the index `<indexName>` on table `<tableName>` because it already exists.
-
-### INDEX_NOT_FOUND
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot find the index `<indexName>` on table `<tableName>`.
-
-### [INSERT_COLUMN_ARITY_MISMATCH](sql-error-conditions-insert-column-arity-mismatch-error-class.html)
-
-[SQLSTATE: 21S01](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
-
-Cannot write to `<tableName>`, the reason is
-
-For more details see [INSERT_COLUMN_ARITY_MISMATCH](sql-error-conditions-insert-column-arity-mismatch-error-class.html)
-
-### INSERT_PARTITION_COLUMN_ARITY_MISMATCH
-
-[SQLSTATE: 21S01](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
-
-Cannot write to '`<tableName>`', `<reason>`:
-Table columns: `<tableColumns>`.
-Partition columns with static values: `<staticPartCols>`.
-Data columns: `<dataColumns>`.
-
-### [INSUFFICIENT_TABLE_PROPERTY](sql-error-conditions-insufficient-table-property-error-class.html)
-
-[SQLSTATE: XXKUC](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Can't find table property:
-
-For more details see [INSUFFICIENT_TABLE_PROPERTY](sql-error-conditions-insufficient-table-property-error-class.html)
-
-### INTERNAL_ERROR
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### INTERNAL_ERROR_BROADCAST
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### INTERNAL_ERROR_EXECUTOR
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### INTERNAL_ERROR_MEMORY
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### [INTERNAL_ERROR_METADATA_CATALOG](sql-error-conditions-internal-error-metadata-catalog-error-class.html)
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-An object in the metadata catalog has been corrupted:
-
-For more details see [INTERNAL_ERROR_METADATA_CATALOG](sql-error-conditions-internal-error-metadata-catalog-error-class.html)
-
-### INTERNAL_ERROR_NETWORK
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### INTERNAL_ERROR_SHUFFLE
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### INTERNAL_ERROR_STORAGE
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### INTERNAL_ERROR_TWS
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<message>`
-
-### INTERVAL_ARITHMETIC_OVERFLOW
-
-[SQLSTATE: 22015](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-`<message>`.`<alternative>`
-
-### INTERVAL_DIVIDED_BY_ZERO
-
-[SQLSTATE: 22012](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead.
-
-### [INVALID_AGGREGATE_FILTER](sql-error-conditions-invalid-aggregate-filter-error-class.html)
-
-[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The FILTER expression `<filterExpr>` in an aggregate function is invalid.
-
-For more details see [INVALID_AGGREGATE_FILTER](sql-error-conditions-invalid-aggregate-filter-error-class.html)
-
-### INVALID_ARRAY_INDEX
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The index `<indexValue>` is out of bounds. The array has `<arraySize>` elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
-
-### INVALID_ARRAY_INDEX_IN_ELEMENT_AT
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The index `<indexValue>` is out of bounds. The array has `<arraySize>` elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
-
-### INVALID_BITMAP_POSITION
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The 0-indexed bitmap position `<bitPosition>` is out of bounds. The bitmap has `<bitmapNumBits>` bits (`<bitmapNumBytes>` bytes).
-
-### [INVALID_BOUNDARY](sql-error-conditions-invalid-boundary-error-class.html)
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The boundary `<boundary>` is invalid: `<invalidValue>`.
-
-For more details see [INVALID_BOUNDARY](sql-error-conditions-invalid-boundary-error-class.html)
-
-### INVALID_BUCKET_COLUMN_DATA_TYPE
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot use `<type>` for bucket column. Collated data types are not supported for bucketing.
-
-### INVALID_BUCKET_FILE
-
-SQLSTATE: 58030
-
-Invalid bucket file: `<path>`.
-
-### INVALID_BYTE_STRING
-
-[SQLSTATE: 22P03](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The expected format is ByteString, but was `<unsupported>` (`<class>`).
-
-### INVALID_COLUMN_NAME_AS_PATH
-
-[SQLSTATE: 46121](sql-error-conditions-sqlstates.html#class-46-java-ddl-1)
-
-The datasource `<datasource>` cannot save the column `<columnName>` because its name contains some characters that are not allowed in file paths. Please, use an alias to rename it.
-
-### INVALID_COLUMN_OR_FIELD_DATA_TYPE
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Column or field `<name>` is of type `<type>` while it's required to be `<expectedType>`.
-
-### [INVALID_CONF_VALUE](sql-error-conditions-invalid-conf-value-error-class.html)
-
-[SQLSTATE: 22022](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value '`<confValue>`' in the config "`<confName>`" is invalid.
-
-For more details see [INVALID_CONF_VALUE](sql-error-conditions-invalid-conf-value-error-class.html)
-
-### [INVALID_CURSOR](sql-error-conditions-invalid-cursor-error-class.html)
-
-[SQLSTATE: HY109](sql-error-conditions-sqlstates.html#class-HY-cli-specific-condition)
-
-The cursor is invalid.
-
-For more details see [INVALID_CURSOR](sql-error-conditions-invalid-cursor-error-class.html)
-
-### [INVALID_DATETIME_PATTERN](sql-error-conditions-invalid-datetime-pattern-error-class.html)
-
-[SQLSTATE: 22007](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Unrecognized datetime pattern: `<pattern>`.
-
-For more details see [INVALID_DATETIME_PATTERN](sql-error-conditions-invalid-datetime-pattern-error-class.html)
-
-### [INVALID_DEFAULT_VALUE](sql-error-conditions-invalid-default-value-error-class.html)
-
-[SQLSTATE: 42623](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to execute `<statement>` command because the destination column or variable `<colName>` has a DEFAULT value `<defaultValue>`,
-
-For more details see [INVALID_DEFAULT_VALUE](sql-error-conditions-invalid-default-value-error-class.html)
-
-### [INVALID_DELIMITER_VALUE](sql-error-conditions-invalid-delimiter-value-error-class.html)
-
-[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid value for delimiter.
-
-For more details see [INVALID_DELIMITER_VALUE](sql-error-conditions-invalid-delimiter-value-error-class.html)
-
-### INVALID_DRIVER_MEMORY
-
-SQLSTATE: F0000
-
-System memory `<systemMemory>` must be at least `<minSystemMemory>`.
-Please increase heap size using the --driver-memory option or "`<config>`" in Spark configuration.
-
-### INVALID_EMPTY_LOCATION
-
-[SQLSTATE: 42K05](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The location name cannot be empty string, but ``<location>`` was given.
-
-### INVALID_ESC
-
-[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Found an invalid escape string: `<invalidEscape>`. The escape string must contain only one character.
-
-### INVALID_ESCAPE_CHAR
-
-[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`EscapeChar` should be a string literal of length one, but got `<sqlExpr>`.
-
-### INVALID_EXECUTOR_MEMORY
-
-SQLSTATE: F0000
-
-Executor memory `<executorMemory>` must be at least `<minSystemMemory>`.
-Please increase executor memory using the --executor-memory option or "`<config>`" in Spark configuration.
-
-### INVALID_EXPRESSION_ENCODER
-
-[SQLSTATE: 42001](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Found an invalid expression encoder. Expects an instance of ExpressionEncoder but got `<encoderType>`. For more information consult '`<docroot>`/api/java/index.html?org/apache/spark/sql/Encoder.html'.
-
-### INVALID_EXTRACT_BASE_FIELD_TYPE
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Can't extract a value from `<base>`. Need a complex type [STRUCT, ARRAY, MAP] but got `<other>`.
-
-### INVALID_EXTRACT_FIELD
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot extract `<field>` from `<expr>`.
-
-### INVALID_EXTRACT_FIELD_TYPE
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Field name should be a non-null string literal, but it's `<extraction>`.
-
-### INVALID_FIELD_NAME
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Field name `<fieldName>` is invalid: `<path>` is not a struct.
-
-### [INVALID_FORMAT](sql-error-conditions-invalid-format-error-class.html)
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The format is invalid: `<format>`.
-
-For more details see [INVALID_FORMAT](sql-error-conditions-invalid-format-error-class.html)
-
-### INVALID_FRACTION_OF_SECOND
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The fraction of sec must be zero. Valid range is [0, 60]. If necessary set `<ansiConfig>` to "false" to bypass this error.
-
-### [INVALID_HANDLE](sql-error-conditions-invalid-handle-error-class.html)
-
-[SQLSTATE: HY000](sql-error-conditions-sqlstates.html#class-HY-cli-specific-condition)
-
-The handle `<handle>` is invalid.
-
-For more details see [INVALID_HANDLE](sql-error-conditions-invalid-handle-error-class.html)
-
-### INVALID_IDENTIFIER
-
-[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The unquoted identifier `<ident>` is invalid and must be back quoted as: ``<ident>``.
-Unquoted identifiers can only contain ASCII letters ('a' - 'z', 'A' - 'Z'), digits ('0' - '9'), and underbar ('_').
-Unquoted identifiers must also not start with a digit.
-Different data sources and meta stores may impose additional restrictions on valid identifiers.
-
-### INVALID_INDEX_OF_ZERO
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The index 0 is invalid. An index shall be either `< 0 or >` 0 (the first element has index 1).
-
-### [INVALID_INLINE_TABLE](sql-error-conditions-invalid-inline-table-error-class.html)
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid inline table.
-
-For more details see [INVALID_INLINE_TABLE](sql-error-conditions-invalid-inline-table-error-class.html)
-
-### [INVALID_INTERVAL_FORMAT](sql-error-conditions-invalid-interval-format-error-class.html)
-
-[SQLSTATE: 22006](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Error parsing '`<input>`' to interval. Please ensure that the value provided is in a valid format for defining an interval. You can reference the documentation for the correct format.
-
-For more details see [INVALID_INTERVAL_FORMAT](sql-error-conditions-invalid-interval-format-error-class.html)
-
-### [INVALID_INVERSE_DISTRIBUTION_FUNCTION](sql-error-conditions-invalid-inverse-distribution-function-error-class.html)
-
-[SQLSTATE: 42K0K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid inverse distribution function `<funcName>`.
-
-For more details see [INVALID_INVERSE_DISTRIBUTION_FUNCTION](sql-error-conditions-invalid-inverse-distribution-function-error-class.html)
-
-### INVALID_JSON_DATA_TYPE
-
-[SQLSTATE: 2203G](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Failed to convert the JSON string '`<invalidType>`' to a data type. Please enter a valid data type.
-
-### INVALID_JSON_ROOT_FIELD
-
-[SQLSTATE: 22032](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Cannot convert JSON root field to target Spark type.
-
-### INVALID_JSON_SCHEMA_MAP_TYPE
-
-[SQLSTATE: 22032](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Input schema `<jsonSchema>` can only contain STRING as a key type for a MAP.
-
-### INVALID_KRYO_SERIALIZER_BUFFER_SIZE
-
-SQLSTATE: F0000
-
-The value of the config "`<bufferSizeConfKey>`" must be less than 2048 MiB, but got `<bufferSizeConfValue>` MiB.
-
-### [INVALID_LAMBDA_FUNCTION_CALL](sql-error-conditions-invalid-lambda-function-call-error-class.html)
-
-[SQLSTATE: 42K0D](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid lambda function call.
-
-For more details see [INVALID_LAMBDA_FUNCTION_CALL](sql-error-conditions-invalid-lambda-function-call-error-class.html)
-
-### INVALID_LATERAL_JOIN_TYPE
-
-[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The `<joinType>` JOIN with LATERAL correlation is not allowed because an OUTER subquery cannot correlate to its join partner. Remove the LATERAL correlation or use an INNER JOIN, or LEFT OUTER JOIN instead.
-
-### [INVALID_LIMIT_LIKE_EXPRESSION](sql-error-conditions-invalid-limit-like-expression-error-class.html)
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The limit like expression `<expr>` is invalid.
-
-For more details see [INVALID_LIMIT_LIKE_EXPRESSION](sql-error-conditions-invalid-limit-like-expression-error-class.html)
-
-### INVALID_NON_DETERMINISTIC_EXPRESSIONS
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The operator expects a deterministic expression, but the actual expression is `<sqlExprs>`.
-
-### INVALID_NUMERIC_LITERAL_RANGE
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Numeric literal `<rawStrippedQualifier>` is outside the valid range for `<typeName>` with minimum value of `<minValue>` and maximum value of `<maxValue>`. Please adjust the value accordingly.
-
-### [INVALID_OBSERVED_METRICS](sql-error-conditions-invalid-observed-metrics-error-class.html)
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid observed metrics.
-
-For more details see [INVALID_OBSERVED_METRICS](sql-error-conditions-invalid-observed-metrics-error-class.html)
-
-### [INVALID_OPTIONS](sql-error-conditions-invalid-options-error-class.html)
-
-[SQLSTATE: 42K06](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid options:
-
-For more details see [INVALID_OPTIONS](sql-error-conditions-invalid-options-error-class.html)
-
-### INVALID_PANDAS_UDF_PLACEMENT
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The group aggregate pandas UDF `<functionList>` cannot be invoked together with as other, non-pandas aggregate functions.
-
-### [INVALID_PARAMETER_VALUE](sql-error-conditions-invalid-parameter-value-error-class.html)
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value of parameter(s) `<parameter>` in `<functionName>` is invalid:
-
-For more details see [INVALID_PARAMETER_VALUE](sql-error-conditions-invalid-parameter-value-error-class.html)
-
-### INVALID_PARTITION_COLUMN_DATA_TYPE
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Cannot use `<type>` for partition column.
-
-### [INVALID_PARTITION_OPERATION](sql-error-conditions-invalid-partition-operation-error-class.html)
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The partition command is invalid.
-
-For more details see [INVALID_PARTITION_OPERATION](sql-error-conditions-invalid-partition-operation-error-class.html)
-
-### INVALID_PROPERTY_KEY
-
-[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<key>` is an invalid property key, please use quotes, e.g. SET `<key>`=`<value>`.
-
-### INVALID_PROPERTY_VALUE
-
-[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<value>` is an invalid property value, please use quotes, e.g. SET `<key>`=`<value>`
-
-### INVALID_QUERY_MIXED_QUERY_PARAMETERS
-
-[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Parameterized query must either use positional, or named parameters, but not both.
-
-### INVALID_SAVE_MODE
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The specified save mode `<mode>` is invalid. Valid save modes include "append", "overwrite", "ignore", "error", "errorifexists", and "default".
-
-### [INVALID_SCHEMA](sql-error-conditions-invalid-schema-error-class.html)
-
-[SQLSTATE: 42K07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The input schema `<inputSchema>` is not a valid schema string.
-
-For more details see [INVALID_SCHEMA](sql-error-conditions-invalid-schema-error-class.html)
-
-### INVALID_SCHEMA_OR_RELATION_NAME
-
-[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<name>` is not a valid name for tables/schemas. Valid names only contain alphabet characters, numbers and _.
-
-### INVALID_SET_SYNTAX
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key, or include semicolon in value, please use backquotes, e.g., SET `key`=`value`.
-
-### INVALID_SQL_ARG
-
-[SQLSTATE: 42K08](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The argument `<name>` of `sql()` is invalid. Consider to replace it either by a SQL literal or by collection constructor functions such as `map()`, `array()`, `struct()`.
-
-### [INVALID_SQL_SYNTAX](sql-error-conditions-invalid-sql-syntax-error-class.html)
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid SQL syntax:
-
-For more details see [INVALID_SQL_SYNTAX](sql-error-conditions-invalid-sql-syntax-error-class.html)
-
-### INVALID_STATEMENT_FOR_EXECUTE_INTO
-
-SQLSTATE: 07501
-
-The INTO clause of EXECUTE IMMEDIATE is only valid for queries but the given statement is not a query: `<sqlString>`.
-
-### INVALID_STATEMENT_OR_CLAUSE
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The statement or clause: `<operation>` is not valid.
-
-### [INVALID_SUBQUERY_EXPRESSION](sql-error-conditions-invalid-subquery-expression-error-class.html)
-
-[SQLSTATE: 42823](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid subquery:
-
-For more details see [INVALID_SUBQUERY_EXPRESSION](sql-error-conditions-invalid-subquery-expression-error-class.html)
-
-### INVALID_TEMP_OBJ_REFERENCE
-
-[SQLSTATE: 42K0F](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create the persistent object `<objName>` of the type `<obj>` because it references to the temporary object `<tempObjName>` of the type `<tempObj>`. Please make the temporary object `<tempObjName>` persistent, or make the persistent object `<objName>` temporary.
-
-### INVALID_TIME_TRAVEL_SPEC
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot specify both version and timestamp when time travelling the table.
-
-### [INVALID_TIME_TRAVEL_TIMESTAMP_EXPR](sql-error-conditions-invalid-time-travel-timestamp-expr-error-class.html)
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The time travel timestamp expression `<expr>` is invalid.
-
-For more details see [INVALID_TIME_TRAVEL_TIMESTAMP_EXPR](sql-error-conditions-invalid-time-travel-timestamp-expr-error-class.html)
-
-### INVALID_TYPED_LITERAL
-
-[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The value of the typed literal `<valueType>` is invalid: `<value>`.
-
-### INVALID_UDF_IMPLEMENTATION
-
-[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
-
-Function `<funcName>` does not implement a ScalarFunction or AggregateFunction.
-
-### INVALID_URL
-
-[SQLSTATE: 22P02](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The url is invalid: `<url>`. If necessary set `<ansiConfig>` to "false" to bypass this error.
-
-### INVALID_USAGE_OF_STAR_OR_REGEX
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid usage of `<elem>` in `<prettyName>`.
-
-### INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Variable type must be string type but got `<varType>`.
-
-### INVALID_VARIANT_CAST
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The variant value ``<value>`` cannot be cast into ``<dataType>``. Please use `try_variant_get` instead.
-
-### INVALID_VARIANT_GET_PATH
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The path ``<path>`` is not a valid variant extraction path in ``<functionName>``.
-A valid path should start with `$` and is followed by zero or more segments like `[123]`, `.name`, `['name']`, or `["name"]`.
-
-### INVALID_VIEW_TEXT
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-The view `<viewName>` cannot be displayed due to invalid view text: `<viewText>`. This may be caused by an unauthorized modification of the view or an incorrect query syntax. Please check your query syntax and verify that the view has not been tampered with.
-
-### INVALID_WHERE_CONDITION
-
-[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The WHERE condition `<condition>` contains invalid expressions: `<expressionList>`.
-Rewrite the query to avoid window functions, aggregate functions, and generator functions in the WHERE clause.
-
-### INVALID_WINDOW_SPEC_FOR_AGGREGATION_FUNC
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot specify ORDER BY or a window frame for `<aggFunc>`.
-
-### INVALID_WRITER_COMMIT_MESSAGE
-
-[SQLSTATE: 42KDE](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The data source writer has generated an invalid number of commit messages. Expected exactly one writer commit message from each task, but received `<detail>`.
-
-### [INVALID_WRITE_DISTRIBUTION](sql-error-conditions-invalid-write-distribution-error-class.html)
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The requested write distribution is invalid.
-
-For more details see [INVALID_WRITE_DISTRIBUTION](sql-error-conditions-invalid-write-distribution-error-class.html)
-
-### JOIN_CONDITION_IS_NOT_BOOLEAN_TYPE
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The join condition `<joinCondition>` has the invalid type `<conditionType>`, expected "BOOLEAN".
-
-### KRYO_BUFFER_OVERFLOW
-
-[SQLSTATE: 54006](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
-
-Kryo serialization failed: `<exceptionMsg>`. To avoid this, increase "`<bufferSizeConfKey>`" value.
-
-### LOAD_DATA_PATH_NOT_EXISTS
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-LOAD DATA input path does not exist: `<path>`.
-
-### LOCAL_MUST_WITH_SCHEMA_FILE
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-LOCAL must be used together with the schema of `file`, but got: ``<actualSchema>``.
-
-### LOCATION_ALREADY_EXISTS
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot name the managed table as `<identifier>`, as its associated location `<location>` already exists. Please pick a different table name, or remove the existing location first.
-
-### MALFORMED_CSV_RECORD
-
-SQLSTATE: KD000
-
-Malformed CSV record: `<badRecord>`
-
-### MALFORMED_PROTOBUF_MESSAGE
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Malformed Protobuf messages are detected in message deserialization. Parse Mode: `<failFastMode>`. To process malformed protobuf message as null result, try setting the option 'mode' as 'PERMISSIVE'.
-
-### [MALFORMED_RECORD_IN_PARSING](sql-error-conditions-malformed-record-in-parsing-error-class.html)
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Malformed records are detected in record parsing: `<badRecord>`.
-Parse Mode: `<failFastMode>`. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'.
-
-For more details see [MALFORMED_RECORD_IN_PARSING](sql-error-conditions-malformed-record-in-parsing-error-class.html)
-
-### MALFORMED_VARIANT
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Variant binary is malformed. Please check the data source is valid.
-
-### MERGE_CARDINALITY_VIOLATION
-
-[SQLSTATE: 23K01](sql-error-conditions-sqlstates.html#class-23-integrity-constraint-violation)
-
-The ON search condition of the MERGE statement matched a single row from the target table with multiple rows of the source table.
-This could result in the target row being operated on more than once with an update or delete operation and is not allowed.
-
-### MISSING_AGGREGATION
-
-[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The non-aggregating expression `<expression>` is based on columns which are not participating in the GROUP BY clause.
-Add the columns or the expression to the GROUP BY, aggregate the expression, or use `<expressionAnyValue>` if you do not care which of the values within a group is returned.
-
-### [MISSING_ATTRIBUTES](sql-error-conditions-missing-attributes-error-class.html)
-
-[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Resolved attribute(s) `<missingAttributes>` missing from `<input>` in operator `<operator>`.
-
-For more details see [MISSING_ATTRIBUTES](sql-error-conditions-missing-attributes-error-class.html)
-
-### MISSING_GROUP_BY
-
-[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses.
-
-### MULTIPLE_TIME_TRAVEL_SPEC
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot specify time travel in both the time travel clause and options.
-
-### MULTIPLE_XML_DATA_SOURCE
-
-[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Detected multiple data sources with the name `<provider>` (`<sourceNames>`). Please specify the fully qualified class name or remove `<externalSource>` from the classpath.
-
-### MULTI_SOURCES_UNSUPPORTED_FOR_EXPRESSION
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The expression `<expr>` does not support more than one source.
-
-### MULTI_UDF_INTERFACE_ERROR
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Not allowed to implement multiple UDF interfaces, UDF class `<className>`.
-
-### NAMED_PARAMETERS_NOT_SUPPORTED
-
-[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Named parameters are not supported for function `<functionName>`; please retry the query with positional arguments to the function call instead.
-
-### NAMED_PARAMETER_SUPPORT_DISABLED
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Cannot call function `<functionName>` because named argument references are not enabled here.
-In this case, the named argument reference was `<argument>`.
-Set "spark.sql.allowNamedFunctionArguments" to "true" to turn on feature.
-
-### NESTED_AGGREGATE_FUNCTION
-
-[SQLSTATE: 42607](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
-
-### NESTED_EXECUTE_IMMEDIATE
-
-SQLSTATE: 07501
-
-Nested EXECUTE IMMEDIATE commands are not allowed. Please ensure that the SQL query provided (`<sqlString>`) does not contain another EXECUTE IMMEDIATE command.
-
-### NONEXISTENT_FIELD_NAME_IN_LIST
-
-SQLSTATE: HV091
-
-Field(s) `<nonExistFields>` do(es) not exist. Available fields: `<fieldNames>`
-
-### NON_FOLDABLE_ARGUMENT
-
-[SQLSTATE: 42K08](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The function `<funcName>` requires the parameter `<paramName>` to be a foldable expression of the type `<paramType>`, but the actual argument is a non-foldable.
-
-### NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION
-
-[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-When there are more than one MATCHED clauses in a MERGE statement, only the last MATCHED clause can omit the condition.
-
-### NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION
-
-[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-When there are more than one NOT MATCHED BY SOURCE clauses in a MERGE statement, only the last NOT MATCHED BY SOURCE clause can omit the condition.
-
-### NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION
-
-[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-When there are more than one NOT MATCHED [BY TARGET] clauses in a MERGE statement, only the last NOT MATCHED [BY TARGET] clause can omit the condition.
-
-### NON_LITERAL_PIVOT_VALUES
-
-[SQLSTATE: 42K08](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Literal expressions required for pivot values, found `<expression>`.
-
-### NON_PARTITION_COLUMN
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-PARTITION clause cannot contain the non-partition column: `<columnName>`.
-
-### NON_TIME_WINDOW_NOT_SUPPORTED_IN_STREAMING
-
-[SQLSTATE: 42KDE](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Window function is not supported in `<windowFunc>` (as column `<columnName>`) on streaming DataFrames/Datasets.
-Structured Streaming only supports time-window aggregation using the WINDOW function. (window specification: `<windowSpec>`)
-
-### [NOT_ALLOWED_IN_FROM](sql-error-conditions-not-allowed-in-from-error-class.html)
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Not allowed in the FROM clause:
-
-For more details see [NOT_ALLOWED_IN_FROM](sql-error-conditions-not-allowed-in-from-error-class.html)
-
-### [NOT_A_CONSTANT_STRING](sql-error-conditions-not-a-constant-string-error-class.html)
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The expression `<expr>` used for the routine or clause `<name>` must be a constant STRING which is NOT NULL.
-
-For more details see [NOT_A_CONSTANT_STRING](sql-error-conditions-not-a-constant-string-error-class.html)
-
-### NOT_A_PARTITIONED_TABLE
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Operation `<operation>` is not allowed for `<tableIdentWithDB>` because it is not a partitioned table.
-
-### [NOT_NULL_CONSTRAINT_VIOLATION](sql-error-conditions-not-null-constraint-violation-error-class.html)
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Assigning a NULL is not allowed here.
-
-For more details see [NOT_NULL_CONSTRAINT_VIOLATION](sql-error-conditions-not-null-constraint-violation-error-class.html)
-
-### NOT_SUPPORTED_CHANGE_COLUMN
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-ALTER TABLE ALTER/CHANGE COLUMN is not supported for changing `<table>`'s column `<originName>` with type `<originType>` to `<newName>` with type `<newType>`.
-
-### NOT_SUPPORTED_COMMAND_FOR_V2_TABLE
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-`<cmd>` is not supported for v2 tables.
-
-### NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-`<cmd>` is not supported, if you want to enable it, please set "spark.sql.catalogImplementation" to "hive".
-
-### [NOT_SUPPORTED_IN_JDBC_CATALOG](sql-error-conditions-not-supported-in-jdbc-catalog-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Not supported command in JDBC catalog:
-
-For more details see [NOT_SUPPORTED_IN_JDBC_CATALOG](sql-error-conditions-not-supported-in-jdbc-catalog-error-class.html)
-
-### NOT_UNRESOLVED_ENCODER
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unresolved encoder expected, but `<attr>` was found.
-
-### NO_DEFAULT_COLUMN_VALUE_AVAILABLE
-
-[SQLSTATE: 42608](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Can't determine the default value for `<colName>` since it is not nullable and it has no default value.
-
-### NO_HANDLER_FOR_UDAF
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-No handler for UDAF '`<functionName>`'. Use sparkSession.udf.register(...) instead.
-
-### NO_MERGE_ACTION_SPECIFIED
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-df.mergeInto needs to be followed by at least one of whenMatched/whenNotMatched/whenNotMatchedBySource.
-
-### NO_SQL_TYPE_IN_PROTOBUF_SCHEMA
-
-[SQLSTATE: 42S22](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot find `<catalystFieldPath>` in Protobuf schema.
-
-### NO_UDF_INTERFACE
-
-[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
-
-UDF class `<className>` doesn't implement any UDF interface.
-
-### NULLABLE_COLUMN_OR_FIELD
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Column or field `<name>` is nullable while it's required to be non-nullable.
-
-### NULLABLE_ROW_ID_ATTRIBUTES
-
-[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Row ID attributes cannot be nullable: `<nullableRowIdAttrs>`.
-
-### NULL_MAP_KEY
-
-[SQLSTATE: 2200E](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Cannot use null as map key.
-
-### NULL_QUERY_STRING_EXECUTE_IMMEDIATE
-
-[SQLSTATE: 22004](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Execute immediate requires a non-null variable as the query string, but the provided variable `<varName>` is null.
-
-### NUMERIC_OUT_OF_SUPPORTED_RANGE
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The value `<value>` cannot be interpreted as a numeric since it has more than 38 digits.
-
-### [NUMERIC_VALUE_OUT_OF_RANGE](sql-error-conditions-numeric-value-out-of-range-error-class.html)
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-
-
-For more details see [NUMERIC_VALUE_OUT_OF_RANGE](sql-error-conditions-numeric-value-out-of-range-error-class.html)
-
-### NUM_COLUMNS_MISMATCH
-
-[SQLSTATE: 42826](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<operator>` can only be performed on inputs with the same number of columns, but the first input has `<firstNumColumns>` columns and the `<invalidOrdinalNum>` input has `<invalidNumColumns>` columns.
-
-### NUM_TABLE_VALUE_ALIASES_MISMATCH
-
-[SQLSTATE: 42826](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Number of given aliases does not match number of output columns.
-Function name: `<funcName>`; number of aliases: `<aliasesNum>`; number of output columns: `<outColsNum>`.
-
-### OPERATION_CANCELED
-
-[SQLSTATE: HY008](sql-error-conditions-sqlstates.html#class-HY-cli-specific-condition)
-
-Operation has been canceled.
-
-### ORDER_BY_POS_OUT_OF_RANGE
-
-[SQLSTATE: 42805](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-ORDER BY position `<index>` is not in select list (valid range is [1, `<size>`]).
-
-### PARSE_EMPTY_STATEMENT
-
-[SQLSTATE: 42617](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Syntax error, unexpected empty statement.
-
-### PARSE_SYNTAX_ERROR
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Syntax error at or near `<error>``<hint>`.
-
-### PARTITIONS_ALREADY_EXIST
-
-[SQLSTATE: 428FT](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot ADD or RENAME TO partition(s) `<partitionList>` in table `<tableName>` because they already exist.
-Choose a different name, drop the existing partition, or add the IF NOT EXISTS clause to tolerate a pre-existing partition.
-
-### PARTITIONS_NOT_FOUND
-
-[SQLSTATE: 428FT](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The partition(s) `<partitionList>` cannot be found in table `<tableName>`.
-Verify the partition specification and table name.
-To tolerate the error on drop use ALTER TABLE … DROP IF EXISTS PARTITION.
-
-### PATH_ALREADY_EXISTS
-
-[SQLSTATE: 42K04](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Path `<outputPath>` already exists. Set mode as "overwrite" to overwrite the existing path.
-
-### PATH_NOT_FOUND
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Path does not exist: `<path>`.
-
-### PIVOT_VALUE_DATA_TYPE_MISMATCH
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid pivot value '`<value>`': value data type `<valueType>` does not match pivot column data type `<pivotType>`.
-
-### PLAN_VALIDATION_FAILED_RULE_EXECUTOR
-
-[SQLSTATE: XXKD0](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-The input plan of `<ruleExecutor>` is invalid: `<reason>`
-
-### PLAN_VALIDATION_FAILED_RULE_IN_BATCH
-
-[SQLSTATE: XXKD0](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Rule `<rule>` in batch `<batch>` generated an invalid plan: `<reason>`
-
-### PROTOBUF_DEPENDENCY_NOT_FOUND
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Could not find dependency: `<dependencyName>`.
-
-### PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Error reading Protobuf descriptor file at path: `<filePath>`.
-
-### PROTOBUF_FIELD_MISSING
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Searching for `<field>` in Protobuf schema at `<protobufSchema>` gave `<matchSize>` matches. Candidates: `<matches>`.
-
-### PROTOBUF_FIELD_MISSING_IN_SQL_SCHEMA
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Found `<field>` in Protobuf schema but there is no match in the SQL schema.
-
-### PROTOBUF_FIELD_TYPE_MISMATCH
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Type mismatch encountered for field: `<field>`.
-
-### PROTOBUF_MESSAGE_NOT_FOUND
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unable to locate Message `<messageName>` in Descriptor.
-
-### PROTOBUF_TYPE_NOT_SUPPORT
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Protobuf type not yet supported: `<protobufType>`.
-
-### PYTHON_DATA_SOURCE_ERROR
-
-[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
-
-Failed to `<action>` Python data source `<type>`: `<msg>`
-
-### PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR
-
-[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
-
-Failed when Python streaming data source perform `<action>`: `<msg>`
-
-### RECURSIVE_PROTOBUF_SCHEMA
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Found recursive reference in Protobuf schema, which can not be processed by Spark by default: `<fieldDescriptor>`. try setting the option `recursive.fields.max.depth` 0 to 10. Going beyond 10 levels of recursion is not allowed.
-
-### RECURSIVE_VIEW
-
-[SQLSTATE: 42K0H](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Recursive view `<viewIdent>` detected (cycle: `<newPath>`).
-
-### REF_DEFAULT_VALUE_IS_NOT_ALLOWED_IN_PARTITION
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-References to DEFAULT column values are not allowed within the PARTITION clause.
-
-### RENAME_SRC_PATH_NOT_FOUND
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to rename as `<sourcePath>` was not found.
-
-### REPEATED_CLAUSE
-
-[SQLSTATE: 42614](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The `<clause>` clause may be used at most once per `<operation>` operation.
-
-### REQUIRED_PARAMETER_NOT_FOUND
-
-[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot invoke function `<functionName>` because the parameter named `<parameterName>` is required, but the function call did not supply a value. Please update the function call to supply an argument value (either positionally at index `<index>` or by name) and retry the query again.
-
-### REQUIRES_SINGLE_PART_NAMESPACE
-
-[SQLSTATE: 42K05](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<sessionCatalog>` requires a single-part namespace, but got `<namespace>`.
-
-### ROUTINE_ALREADY_EXISTS
-
-[SQLSTATE: 42723](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create the function `<routineName>` because it already exists.
-Choose a different name, drop or replace the existing function, or add the IF NOT EXISTS clause to tolerate a pre-existing function.
-
-### ROUTINE_NOT_FOUND
-
-[SQLSTATE: 42883](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The function `<routineName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
-If you did not qualify the name with a schema and catalog, verify the current_schema() output, or qualify the name with the correct schema and catalog.
-To tolerate the error on drop use DROP FUNCTION IF EXISTS.
-
-### ROW_SUBQUERY_TOO_MANY_ROWS
-
-[SQLSTATE: 21000](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
-
-More than one row returned by a subquery used as a row.
-
-### RULE_ID_NOT_FOUND
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Not found an id for the rule name "`<ruleName>`". Please modify RuleIdCollection.scala if you are adding a new rule.
-
-### SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The correlated scalar subquery '`<sqlExpr>`' is neither present in GROUP BY, nor in an aggregate function.
-Add it to GROUP BY using ordinal position or wrap it in `first()` (or `first_value`) if you don't care which value you get.
-
-### SCALAR_SUBQUERY_TOO_MANY_ROWS
-
-[SQLSTATE: 21000](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
-
-More than one row returned by a subquery used as an expression.
-
-### SCHEMA_ALREADY_EXISTS
-
-[SQLSTATE: 42P06](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create schema `<schemaName>` because it already exists.
-Choose a different name, drop the existing schema, or add the IF NOT EXISTS clause to tolerate pre-existing schema.
-
-### SCHEMA_NOT_EMPTY
-
-[SQLSTATE: 2BP01](sql-error-conditions-sqlstates.html#class-2B-dependent-privilege-descriptors-still-exist)
-
-Cannot drop a schema `<schemaName>` because it contains objects.
-Use DROP SCHEMA ... CASCADE to drop the schema and all its objects.
-
-### SCHEMA_NOT_FOUND
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The schema `<schemaName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
-If you did not qualify the name with a catalog, verify the current_schema() output, or qualify the name with the correct catalog.
-To tolerate the error on drop use DROP SCHEMA IF EXISTS.
-
-### SECOND_FUNCTION_ARGUMENT_NOT_INTEGER
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The second argument of `<functionName>` function needs to be an integer.
-
-### SEED_EXPRESSION_IS_UNFOLDABLE
-
-[SQLSTATE: 42K08](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The seed expression `<seedExpr>` of the expression `<exprWithSeed>` must be foldable.
-
-### SORT_BY_WITHOUT_BUCKETING
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-sortBy must be used together with bucketBy.
-
-### SPARK_JOB_CANCELLED
-
-[SQLSTATE: XXKDA](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Job `<jobId>` cancelled `<reason>`
-
-### SPECIFY_BUCKETING_IS_NOT_ALLOWED
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A CREATE TABLE without explicit column list cannot specify bucketing information.
-Please use the form with explicit column list and specify bucketing information.
-Alternatively, allow bucketing information to be inferred by omitting the clause.
-
-### SPECIFY_CLUSTER_BY_WITH_BUCKETING_IS_NOT_ALLOWED
-
-[SQLSTATE: 42908](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot specify both CLUSTER BY and CLUSTERED BY INTO BUCKETS.
-
-### SPECIFY_CLUSTER_BY_WITH_PARTITIONED_BY_IS_NOT_ALLOWED
-
-[SQLSTATE: 42908](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot specify both CLUSTER BY and PARTITIONED BY.
-
-### SPECIFY_PARTITION_IS_NOT_ALLOWED
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A CREATE TABLE without explicit column list cannot specify PARTITIONED BY.
-Please use the form with explicit column list and specify PARTITIONED BY.
-Alternatively, allow partitioning to be inferred by omitting the PARTITION BY clause.
-
-### SQL_CONF_NOT_FOUND
-
-[SQLSTATE: 42K0I](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The SQL config `<sqlConf>` cannot be found. Please verify that the config exists.
-
-### STAR_GROUP_BY_POS
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Star (*) is not allowed in a select list when GROUP BY an ordinal position is used.
-
-### STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_HANDLE_STATE
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to perform stateful processor operation=`<operationType>` with invalid handle state=`<handleState>`.
-
-### STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_TIME_MODE
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to perform stateful processor operation=`<operationType>` with invalid timeMode=`<timeMode>`
-
-### STATEFUL_PROCESSOR_CANNOT_REINITIALIZE_STATE_ON_KEY
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot re-initialize state on the same grouping key during initial state handling for stateful processor. Invalid grouping key=`<groupingKey>`.
-
-### STATEFUL_PROCESSOR_INCORRECT_TIME_MODE_TO_ASSIGN_TTL
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot use TTL for state=`<stateName>` in timeMode=`<timeMode>`, use TimeMode.ProcessingTime() instead.
-
-### STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-TTL duration must be greater than zero for State store operation=`<operationType>` on state=`<stateName>`.
-
-### STATE_STORE_CANNOT_CREATE_COLUMN_FAMILY_WITH_RESERVED_CHARS
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to create column family with unsupported starting character and name=`<colFamilyName>`.
-
-### STATE_STORE_CANNOT_USE_COLUMN_FAMILY_WITH_INVALID_NAME
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to perform column family operation=`<operationName>` with invalid name=`<colFamilyName>`. Column family name cannot be empty or include leading/trailing spaces or use the reserved keyword=default
-
-### STATE_STORE_HANDLE_NOT_INITIALIZED
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The handle has not been initialized for this StatefulProcessor.
-Please only use the StatefulProcessor within the transformWithState operator.
-
-### STATE_STORE_INCORRECT_NUM_ORDERING_COLS_FOR_RANGE_SCAN
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Incorrect number of ordering ordinals=`<numOrderingCols>` for range scan encoder. The number of ordering ordinals cannot be zero or greater than number of schema columns.
-
-### STATE_STORE_INCORRECT_NUM_PREFIX_COLS_FOR_PREFIX_SCAN
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Incorrect number of prefix columns=`<numPrefixCols>` for prefix scan encoder. Prefix columns cannot be zero or greater than or equal to num of schema columns.
-
-### STATE_STORE_NULL_TYPE_ORDERING_COLS_NOT_SUPPORTED
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Null type ordering column with name=`<fieldName>` at index=`<index>` is not supported for range scan encoder.
-
-### STATE_STORE_UNSUPPORTED_OPERATION
-
-[SQLSTATE: XXKST](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-`<operationType>` operation not supported with `<entity>`
-
-### STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY
-
-[SQLSTATE: XXKST](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Binary inequality column is not supported with state store. Provided schema: `<schema>`.
-
-### STATE_STORE_UNSUPPORTED_OPERATION_ON_MISSING_COLUMN_FAMILY
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-State store operation=`<operationType>` not supported on missing column family=`<colFamilyName>`.
-
-### STATE_STORE_VARIABLE_SIZE_ORDERING_COLS_NOT_SUPPORTED
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Variable size ordering column with name=`<fieldName>` at index=`<index>` is not supported for range scan encoder.
-
-### STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST
-
-[SQLSTATE: 42713](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Static partition column `<staticName>` is also specified in the column list.
-
-### STDS_COMMITTED_BATCH_UNAVAILABLE
-
-SQLSTATE: KD006
-
-No committed batch found, checkpoint location: `<checkpointLocation>`. Ensure that the query has run and committed any microbatch before stopping.
-
-### STDS_CONFLICT_OPTIONS
-
-[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The options `<options>` cannot be specified together. Please specify the one.
-
-### STDS_FAILED_TO_READ_STATE_SCHEMA
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to read the state schema. Either the file does not exist, or the file is corrupted. options: `<sourceOptions>`.
-Rerun the streaming query to construct the state schema, and report to the corresponding communities or vendors if the error persists.
-
-### STDS_INTERNAL_ERROR
-
-[SQLSTATE: XXKST](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Internal error: `<message>`
-Please, report this bug to the corresponding communities or vendors, and provide the full stack trace.
-
-### [STDS_INVALID_OPTION_VALUE](sql-error-conditions-stds-invalid-option-value-error-class.html)
-
-[SQLSTATE: 42616](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Invalid value for source option '`<optionName>`':
-
-For more details see [STDS_INVALID_OPTION_VALUE](sql-error-conditions-stds-invalid-option-value-error-class.html)
-
-### STDS_NO_PARTITION_DISCOVERED_IN_STATE_STORE
-
-SQLSTATE: KD006
-
-The state does not have any partition. Please double check that the query points to the valid state. options: `<sourceOptions>`
-
-### STDS_OFFSET_LOG_UNAVAILABLE
-
-SQLSTATE: KD006
-
-The offset log for `<batchId>` does not exist, checkpoint location: `<checkpointLocation>`.
-Please specify the batch ID which is available for querying - you can query the available batch IDs via using state metadata data source.
-
-### STDS_OFFSET_METADATA_LOG_UNAVAILABLE
-
-SQLSTATE: KD006
-
-Metadata is not available for offset log for `<batchId>`, checkpoint location: `<checkpointLocation>`.
-The checkpoint seems to be only run with older Spark version(s). Run the streaming query with the recent Spark version, so that Spark constructs the state metadata.
-
-### STDS_REQUIRED_OPTION_UNSPECIFIED
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-'`<optionName>`' must be specified.
-
-### STREAMING_STATEFUL_OPERATOR_NOT_MATCH_IN_STATE_METADATA
-
-[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Streaming stateful operator name does not match with the operator in state metadata. This likely to happen when user adds/removes/changes stateful operator of existing streaming query.
-Stateful operators in the metadata: [`<OpsInMetadataSeq>`]; Stateful operators in current batch: [`<OpsInCurBatchSeq>`].
-
-### STREAM_FAILED
-
-[SQLSTATE: XXKST](sql-error-conditions-sqlstates.html#class-XX-internal-error)
-
-Query [id = `<id>`, runId = `<runId>`] terminated with exception: `<message>`
-
-### SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT
-
-[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = `<limit>`, offset = `<offset>`.
-
-### TABLE_OR_VIEW_ALREADY_EXISTS
-
-[SQLSTATE: 42P07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create table or view `<relationName>` because it already exists.
-Choose a different name, drop or replace the existing object, or add the IF NOT EXISTS clause to tolerate pre-existing objects.
-
-### TABLE_OR_VIEW_NOT_FOUND
-
-[SQLSTATE: 42P01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The table or view `<relationName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
-If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
-To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS.
-
-### TABLE_VALUED_FUNCTION_FAILED_TO_ANALYZE_IN_PYTHON
-
-[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
-
-Failed to analyze the Python user defined table function: `<msg>`
-
-### TABLE_VALUED_FUNCTION_REQUIRED_METADATA_INCOMPATIBLE_WITH_CALL
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Failed to evaluate the table function `<functionName>` because its table metadata `<requestedMetadata>`, but the function call `<invalidFunctionCallProperty>`.
-
-### TABLE_VALUED_FUNCTION_REQUIRED_METADATA_INVALID
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Failed to evaluate the table function `<functionName>` because its table metadata was invalid; `<reason>`.
-
-### TABLE_VALUED_FUNCTION_TOO_MANY_TABLE_ARGUMENTS
-
-[SQLSTATE: 54023](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
-
-There are too many table arguments for table-valued function.
-It allows one table argument, but got: `<num>`.
-If you want to allow it, please set "spark.sql.allowMultipleTableArguments.enabled" to "true"
-
-### TASK_WRITE_FAILED
-
-SQLSTATE: 58030
-
-Task failed while writing rows to `<path>`.
-
-### TEMP_TABLE_OR_VIEW_ALREADY_EXISTS
-
-[SQLSTATE: 42P07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create the temporary view `<relationName>` because it already exists.
-Choose a different name, drop or replace the existing view,  or add the IF NOT EXISTS clause to tolerate pre-existing views.
-
-### TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS
-
-[SQLSTATE: 428EK](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-CREATE TEMPORARY VIEW or the corresponding Dataset APIs only accept single-part view names, but got: `<actualName>`.
-
-### UDTF_ALIAS_NUMBER_MISMATCH
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF.
-Expected `<aliasesSize>` aliases, but got `<aliasesNames>`.
-Please ensure that the number of aliases provided matches the number of columns output by the UDTF.
-
-### UDTF_INVALID_ALIAS_IN_REQUESTED_ORDERING_STRING_FROM_ANALYZE_METHOD
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to evaluate the user-defined table function because its 'analyze' method returned a requested OrderingColumn whose column name expression included an unnecessary alias `<aliasName>`; please remove this alias and then try the query again.
-
-### UDTF_INVALID_REQUESTED_SELECTED_EXPRESSION_FROM_ANALYZE_METHOD_REQUIRES_ALIAS
-
-[SQLSTATE: 42802](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Failed to evaluate the user-defined table function because its 'analyze' method returned a requested 'select' expression (`<expression>`) that does not include a corresponding alias; please update the UDTF to specify an alias there and then try the query again.
-
-### UNABLE_TO_ACQUIRE_MEMORY
-
-[SQLSTATE: 53200](sql-error-conditions-sqlstates.html#class-53-insufficient-resources)
-
-Unable to acquire `<requestedBytes>` bytes of memory, got `<receivedBytes>`.
-
-### UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unable to convert SQL type `<toType>` to Protobuf type `<protobufType>`.
-
-### UNABLE_TO_FETCH_HIVE_TABLES
-
-SQLSTATE: 58030
-
-Unable to fetch tables of Hive database: `<dbName>`.
-
-### UNABLE_TO_INFER_SCHEMA
-
-[SQLSTATE: 42KD9](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unable to infer schema for `<format>`. It must be specified manually.
-
-### UNBOUND_SQL_PARAMETER
-
-[SQLSTATE: 42P02](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Found the unbound parameter: `<name>`. Please, fix `args` and provide a mapping of the parameter to either a SQL literal or collection constructor functions such as `map()`, `array()`, `struct()`.
-
-### UNCLOSED_BRACKETED_COMMENT
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Found an unclosed bracketed comment. Please, append */ at the end of the comment.
-
-### UNEXPECTED_INPUT_TYPE
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Parameter `<paramIndex>` of function `<functionName>` requires the `<requiredType>` type, however `<inputSql>` has the type `<inputType>`.
-
-### UNEXPECTED_POSITIONAL_ARGUMENT
-
-[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot invoke function `<functionName>` because it contains positional argument(s) following the named argument assigned to `<parameterName>`; please rearrange them so the positional arguments come first and then retry the query again.
-
-### UNEXPECTED_SERIALIZER_FOR_CLASS
-
-[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The class `<className>` has an unexpected expression serializer. Expects "STRUCT" or "IF" which returns "STRUCT" but found `<expr>`.
-
-### UNKNOWN_PROTOBUF_MESSAGE_TYPE
-
-[SQLSTATE: 42K0G](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Attempting to treat `<descriptorName>` as a Message, but it was `<containingType>`.
-
-### UNPIVOT_REQUIRES_ATTRIBUTES
-
-[SQLSTATE: 42K0A](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-UNPIVOT requires all given `<given>` expressions to be columns when no `<empty>` expressions are given. These are not columns: [`<expressions>`].
-
-### UNPIVOT_REQUIRES_VALUE_COLUMNS
-
-[SQLSTATE: 42K0A](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-At least one value column needs to be specified for UNPIVOT, all columns specified as ids.
-
-### UNPIVOT_VALUE_DATA_TYPE_MISMATCH
-
-[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unpivot value columns must share a least common type, some types do not: [`<types>`].
-
-### UNPIVOT_VALUE_SIZE_MISMATCH
-
-[SQLSTATE: 428C4](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-All unpivot value columns must have the same size as there are value column names (`<names>`).
-
-### UNRECOGNIZED_PARAMETER_NAME
-
-[SQLSTATE: 4274K](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot invoke function `<functionName>` because the function call included a named argument reference for the argument named `<argumentName>`, but this function does not include any signature containing an argument with this name. Did you mean one of the following? [`<proposal>`].
-
-### UNRECOGNIZED_SQL_TYPE
-
-[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Unrecognized SQL type - name: `<typeName>`, id: `<jdbcType>`.
-
-### UNRESOLVABLE_TABLE_VALUED_FUNCTION
-
-[SQLSTATE: 42883](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Could not resolve `<name>` to a table-valued function.
-Please make sure that `<name>` is defined as a table-valued function and that all required parameters are provided correctly.
-If `<name>` is not defined, please create the table-valued function before using it.
-For more information about defining table-valued functions, please refer to the Apache Spark documentation.
-
-### UNRESOLVED_ALL_IN_GROUP_BY
-
-[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot infer grouping columns for GROUP BY ALL based on the select clause. Please explicitly specify the grouping columns.
-
-### [UNRESOLVED_COLUMN](sql-error-conditions-unresolved-column-error-class.html)
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A column, variable, or function parameter with name `<objectName>` cannot be resolved.
-
-For more details see [UNRESOLVED_COLUMN](sql-error-conditions-unresolved-column-error-class.html)
-
-### [UNRESOLVED_FIELD](sql-error-conditions-unresolved-field-error-class.html)
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A field with name `<fieldName>` cannot be resolved with the struct-type column `<columnPath>`.
-
-For more details see [UNRESOLVED_FIELD](sql-error-conditions-unresolved-field-error-class.html)
-
-### [UNRESOLVED_MAP_KEY](sql-error-conditions-unresolved-map-key-error-class.html)
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve column `<objectName>` as a map key. If the key is a string literal, add the single quotes '' around it.
-
-For more details see [UNRESOLVED_MAP_KEY](sql-error-conditions-unresolved-map-key-error-class.html)
-
-### UNRESOLVED_ROUTINE
-
-[SQLSTATE: 42883](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve function `<routineName>` on search path `<searchPath>`.
-
-### UNRESOLVED_USING_COLUMN_FOR_JOIN
-
-[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-USING column `<colName>` cannot be resolved on the `<side>` side of the join. The `<side>`-side columns: [`<suggestion>`].
-
-### UNRESOLVED_VARIABLE
-
-[SQLSTATE: 42883](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot resolve variable `<variableName>` on search path `<searchPath>`.
-
-### UNSET_NONEXISTENT_PROPERTIES
-
-[SQLSTATE: 42K0J](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Attempted to unset non-existent properties [`<properties>`] in table `<table>`.
-
-### [UNSUPPORTED_ADD_FILE](sql-error-conditions-unsupported-add-file-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Don't support add file.
-
-For more details see [UNSUPPORTED_ADD_FILE](sql-error-conditions-unsupported-add-file-error-class.html)
-
-### UNSUPPORTED_ARROWTYPE
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Unsupported arrow type `<typeName>`.
-
-### [UNSUPPORTED_CALL](sql-error-conditions-unsupported-call-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Cannot call the method "`<methodName>`" of the class "`<className>`".
-
-For more details see [UNSUPPORTED_CALL](sql-error-conditions-unsupported-call-error-class.html)
-
-### UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The char/varchar type can't be used in the table schema.
-If you want Spark treat them as string type as same as Spark 3.0 and earlier, please set "spark.sql.legacy.charVarcharAsString" to "true".
-
-### [UNSUPPORTED_COLLATION](sql-error-conditions-unsupported-collation-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Collation `<collationName>` is not supported for:
-
-For more details see [UNSUPPORTED_COLLATION](sql-error-conditions-unsupported-collation-error-class.html)
-
-### UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Unsupported data source type for direct query on files: `<dataSourceType>`
-
-### UNSUPPORTED_DATATYPE
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Unsupported data type `<typeName>`.
-
-### UNSUPPORTED_DATA_SOURCE_SAVE_MODE
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The data source "`<source>`" cannot be written in the `<createMode>` mode. Please use either the "Append" or "Overwrite" mode instead.
-
-### UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The `<format>` datasource doesn't support the column `<columnName>` of the type `<columnType>`.
-
-### [UNSUPPORTED_DEFAULT_VALUE](sql-error-conditions-unsupported-default-value-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-DEFAULT column values is not supported.
-
-For more details see [UNSUPPORTED_DEFAULT_VALUE](sql-error-conditions-unsupported-default-value-error-class.html)
-
-### [UNSUPPORTED_DESERIALIZER](sql-error-conditions-unsupported-deserializer-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The deserializer is not supported:
-
-For more details see [UNSUPPORTED_DESERIALIZER](sql-error-conditions-unsupported-deserializer-error-class.html)
-
-### UNSUPPORTED_EXPRESSION_GENERATED_COLUMN
-
-[SQLSTATE: 42621](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create generated column `<fieldName>` with generation expression `<expressionStr>` because `<reason>`.
-
-### UNSUPPORTED_EXPR_FOR_OPERATOR
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A query operator contains one or more unsupported expressions.
-Consider to rewrite it to avoid window functions, aggregate functions, and generator functions in the WHERE clause.
-Invalid expressions: [`<invalidExprSqls>`]
-
-### UNSUPPORTED_EXPR_FOR_PARAMETER
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-A query parameter contains unsupported expression.
-Parameters can either be variables or literals.
-Invalid expression: [`<invalidExprSql>`]
-
-### UNSUPPORTED_EXPR_FOR_WINDOW
-
-[SQLSTATE: 42P20](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Expression `<sqlExpr>` not supported within a window function.
-
-### [UNSUPPORTED_FEATURE](sql-error-conditions-unsupported-feature-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The feature is not supported:
-
-For more details see [UNSUPPORTED_FEATURE](sql-error-conditions-unsupported-feature-error-class.html)
-
-### [UNSUPPORTED_GENERATOR](sql-error-conditions-unsupported-generator-error-class.html)
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The generator is not supported:
-
-For more details see [UNSUPPORTED_GENERATOR](sql-error-conditions-unsupported-generator-error-class.html)
-
-### UNSUPPORTED_GROUPING_EXPRESSION
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup.
-
-### [UNSUPPORTED_INSERT](sql-error-conditions-unsupported-insert-error-class.html)
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Can't insert into the target.
-
-For more details see [UNSUPPORTED_INSERT](sql-error-conditions-unsupported-insert-error-class.html)
-
-### [UNSUPPORTED_MERGE_CONDITION](sql-error-conditions-unsupported-merge-condition-error-class.html)
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-MERGE operation contains unsupported `<condName>` condition.
-
-For more details see [UNSUPPORTED_MERGE_CONDITION](sql-error-conditions-unsupported-merge-condition-error-class.html)
-
-### [UNSUPPORTED_OVERWRITE](sql-error-conditions-unsupported-overwrite-error-class.html)
-
-[SQLSTATE: 42902](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Can't overwrite the target that is also being read from.
-
-For more details see [UNSUPPORTED_OVERWRITE](sql-error-conditions-unsupported-overwrite-error-class.html)
-
-### [UNSUPPORTED_SAVE_MODE](sql-error-conditions-unsupported-save-mode-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-The save mode `<saveMode>` is not supported for:
-
-For more details see [UNSUPPORTED_SAVE_MODE](sql-error-conditions-unsupported-save-mode-error-class.html)
-
-### [UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY](sql-error-conditions-unsupported-subquery-expression-category-error-class.html)
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Unsupported subquery expression:
-
-For more details see [UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY](sql-error-conditions-unsupported-subquery-expression-category-error-class.html)
-
-### UNSUPPORTED_TYPED_LITERAL
-
-[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
-
-Literals of the type `<unsupportedType>` are not supported. Supported types are `<supportedTypes>`.
-
-### UNTYPED_SCALA_UDF
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-You're using untyped Scala UDF, which does not have the input type information. Spark may blindly pass null to the Scala closure with primitive-type argument, and the closure will see the default value of the Java type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`, the result is 0 for null input. To get rid of this error, you could:
-1. use typed Scala UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`.
-2. use Java UDF APIs, e.g. `udf(new UDF1[String, Integer] { override def call(s: String): Integer = s.length() }, IntegerType)`, if input types are all non primitive.
-3. set "spark.sql.legacy.allowUntypedScalaUDF" to "true" and use this API with caution.
-
-### USER_RAISED_EXCEPTION
-
-SQLSTATE: P0001
-
-`<errorMessage>`
-
-### USER_RAISED_EXCEPTION_PARAMETER_MISMATCH
-
-SQLSTATE: P0001
-
-The `raise_error()` function was used to raise error class: `<errorClass>` which expects parameters: `<expectedParms>`.
-The provided parameters `<providedParms>` do not match the expected parameters.
-Please make sure to provide all expected parameters.
-
-### USER_RAISED_EXCEPTION_UNKNOWN_ERROR_CLASS
-
-SQLSTATE: P0001
-
-The `raise_error()` function was used to raise an unknown error class: `<errorClass>`
-
-### VARIABLE_ALREADY_EXISTS
-
-[SQLSTATE: 42723](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create the variable `<variableName>` because it already exists.
-Choose a different name, or drop or replace the existing variable.
-
-### VARIABLE_NOT_FOUND
-
-[SQLSTATE: 42883](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The variable `<variableName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
-If you did not qualify the name with a schema and catalog, verify the current_schema() output, or qualify the name with the correct schema and catalog.
-To tolerate the error on drop use DROP VARIABLE IF EXISTS.
-
-### VARIANT_CONSTRUCTOR_SIZE_LIMIT
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Cannot construct a Variant larger than 16 MiB. The maximum allowed size of a Variant value is 16 MiB.
-
-### VARIANT_DUPLICATE_KEY
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Failed to build variant because of a duplicate object key ``<key>``.
-
-### VARIANT_SIZE_LIMIT
-
-[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
-
-Cannot build variant bigger than `<sizeLimit>` in `<functionName>`.
-Please avoid large input strings to this expression (for example, add function calls(s) to check the expression size and convert it to NULL first if it is too big).
-
-### VIEW_ALREADY_EXISTS
-
-[SQLSTATE: 42P07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Cannot create view `<relationName>` because it already exists.
-Choose a different name, drop or replace the existing object, or add the IF NOT EXISTS clause to tolerate pre-existing objects.
-
-### VIEW_EXCEED_MAX_NESTED_DEPTH
-
-[SQLSTATE: 54K00](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
-
-The depth of view `<viewName>` exceeds the maximum view resolution depth (`<maxNestedDepth>`).
-Analysis is aborted to avoid errors. If you want to work around this, please try to increase the value of "spark.sql.view.maxNestedViewDepth".
-
-### VIEW_NOT_FOUND
-
-[SQLSTATE: 42P01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The view `<relationName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
-If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
-To tolerate the error on drop use DROP VIEW IF EXISTS.
-
-### WINDOW_FUNCTION_AND_FRAME_MISMATCH
-
-[SQLSTATE: 42K0E](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<funcName>` function can only be evaluated in an ordered row-based window frame with a single offset: `<windowExpr>`.
-
-### WINDOW_FUNCTION_WITHOUT_OVER_CLAUSE
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-Window function `<funcName>` requires an OVER clause.
-
-### WRITE_STREAM_NOT_ALLOWED
-
-[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`writeStream` can be called only on streaming Dataset/DataFrame.
-
-### WRONG_COMMAND_FOR_OBJECT_TYPE
-
-[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The operation `<operation>` requires a `<requiredType>`. But `<objectName>` is a `<foundType>`. Use `<alternative>` instead.
-
-### [WRONG_NUM_ARGS](sql-error-conditions-wrong-num-args-error-class.html)
-
-[SQLSTATE: 42605](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-The `<functionName>` requires `<expectedNum>` parameters but the actual number is `<actualNum>`.
-
-For more details see [WRONG_NUM_ARGS](sql-error-conditions-wrong-num-args-error-class.html)
-
-### XML_ROW_TAG_MISSING
-
-[SQLSTATE: 42KDF](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
-
-`<rowTag>` option is required for reading files in XML format.
+{% include_api_gen _generated/error-conditions.html %}
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 3004008b8ec78..4707e491fa674 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -24,13 +24,15 @@ license: |
 
 ## Upgrading from Spark SQL 3.5 to 4.0
 
+- Since Spark 4.0, `spark.sql.ansi.enabled` is on by default. To restore the previous behavior, set `spark.sql.ansi.enabled` to `false` or `SPARK_ANSI_SQL_MODE` to `false`.
+- Since Spark 4.0, `CREATE TABLE` syntax without `USING` and `STORED AS` will use the value of `spark.sql.sources.default` as the table provider instead of `Hive`. To restore the previous behavior, set `spark.sql.legacy.createHiveTableByDefault` to `true` or `SPARK_SQL_LEGACY_CREATE_HIVE_TABLE` to `true`.
 - Since Spark 4.0, the default behaviour when inserting elements in a map is changed to first normalize keys -0.0 to 0.0. The affected SQL functions are `create_map`, `map_from_arrays`, `map_from_entries`, and `map_concat`. To restore the previous behaviour, set `spark.sql.legacy.disableMapKeyNormalization` to `true`.
 - Since Spark 4.0, the default value of `spark.sql.maxSinglePartitionBytes` is changed from `Long.MaxValue` to `128m`. To restore the previous behavior, set `spark.sql.maxSinglePartitionBytes` to `9223372036854775807`(`Long.MaxValue`).
 - Since Spark 4.0, any read of SQL tables takes into consideration the SQL configs `spark.sql.files.ignoreCorruptFiles`/`spark.sql.files.ignoreMissingFiles` instead of the core config `spark.files.ignoreCorruptFiles`/`spark.files.ignoreMissingFiles`.
 - Since Spark 4.0, `spark.sql.hive.metastore` drops the support of Hive prior to 2.0.0 as they require JDK 8 that Spark does not support anymore. Users should migrate to higher versions.
 - Since Spark 4.0, `spark.sql.parquet.compression.codec` drops the support of codec name `lz4raw`, please use `lz4_raw` instead.
 - Since Spark 4.0, when overflowing during casting timestamp to byte/short/int under non-ansi mode, Spark will return null instead a wrapping value.
-- Since Spark 4.0, the `encode()` and `decode()` functions support only the following charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'. To restore the previous behavior when the function accepts charsets of the current JDK used by Spark, set `spark.sql.legacy.javaCharsets` to `true`.
+- Since Spark 4.0, the `encode()` and `decode()` functions support only the following charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32'. To restore the previous behavior when the function accepts charsets of the current JDK used by Spark, set `spark.sql.legacy.javaCharsets` to `true`.
 - Since Spark 4.0, the legacy datetime rebasing SQL configs with the prefix `spark.sql.legacy` are removed. To restore the previous behavior, use the following configs:
   - `spark.sql.parquet.int96RebaseModeInWrite` instead of `spark.sql.legacy.parquet.int96RebaseModeInWrite`
   - `spark.sql.parquet.datetimeRebaseModeInWrite` instead of `spark.sql.legacy.parquet.datetimeRebaseModeInWrite`
@@ -39,15 +41,25 @@ license: |
   - `spark.sql.avro.datetimeRebaseModeInRead` instead of `spark.sql.legacy.avro.datetimeRebaseModeInRead`
 - Since Spark 4.0, the default value of `spark.sql.orc.compression.codec` is changed from `snappy` to `zstd`. To restore the previous behavior, set `spark.sql.orc.compression.codec` to `snappy`.
 - Since Spark 4.0, the SQL config `spark.sql.legacy.allowZeroIndexInFormatString` is deprecated. Consider to change `strfmt` of the `format_string` function to use 1-based indexes. The first argument must be referenced by "1$", the second by "2$", etc.
-- Since Spark 4.0, JDBC read option `preferTimestampNTZ=true` will not convert Postgres TIMESTAMP WITH TIME ZONE and TIME WITH TIME ZONE data types to TimestampNTZType, which is available in Spark 3.5. 
-- Since Spark 4.0, JDBC read option `preferTimestampNTZ=true` will not convert MySQL TIMESTAMP to TimestampNTZType, which is available in Spark 3.5. MySQL DATETIME is not affected.
+- Since Spark 4.0, Postgres JDBC datasource will read JDBC read TIMESTAMP WITH TIME ZONE as TimestampType regardless of the JDBC read option `preferTimestampNTZ`, while in 3.5 and previous, TimestampNTZType when `preferTimestampNTZ=true`. To restore the previous behavior, set `spark.sql.legacy.postgres.datetimeMapping.enabled` to `true`.
+- Since Spark 4.0, Postgres JDBC datasource will write TimestampType as TIMESTAMP WITH TIME ZONE, while in 3.5 and previous, it wrote as TIMESTAMP a.k.a. TIMESTAMP WITHOUT TIME ZONE. To restore the previous behavior, set `spark.sql.legacy.postgres.datetimeMapping.enabled` to `true`.
+- Since Spark 4.0, MySQL JDBC datasource will read TIMESTAMP as TimestampType regardless of the JDBC read option `preferTimestampNTZ`, while in 3.5 and previous, TimestampNTZType when `preferTimestampNTZ=true`. To restore the previous behavior, set `spark.sql.legacy.mysql.timestampNTZMapping.enabled` to `true`, MySQL DATETIME is not affected.
 - Since Spark 4.0, MySQL JDBC datasource will read SMALLINT as ShortType, while in Spark 3.5 and previous, it was read as IntegerType. MEDIUMINT UNSIGNED is read as IntegerType, while in Spark 3.5 and previous, it was read as LongType. To restore the previous behavior, you can cast the column to the old type.
 - Since Spark 4.0, MySQL JDBC datasource will read FLOAT as FloatType, while in Spark 3.5 and previous, it was read as DoubleType. To restore the previous behavior, you can cast the column to the old type.
 - Since Spark 4.0, MySQL JDBC datasource will read BIT(n > 1) as BinaryType, while in Spark 3.5 and previous, read as LongType. To restore the previous behavior, set `spark.sql.legacy.mysql.bitArrayMapping.enabled` to `true`.
 - Since Spark 4.0, MySQL JDBC datasource will write ShortType as SMALLINT, while in Spark 3.5 and previous, write as INTEGER. To restore the previous behavior, you can replace the column with IntegerType whenever before writing.
+- Since Spark 4.0, MySQL JDBC datasource will write TimestampNTZType as MySQL DATETIME because they both represent TIMESTAMP WITHOUT TIME ZONE, while in 3.5 and previous, it wrote as MySQL TIMESTAMP. To restore the previous behavior, set `spark.sql.legacy.mysql.timestampNTZMapping.enabled` to `true`.
 - Since Spark 4.0, Oracle JDBC datasource will write TimestampType as TIMESTAMP WITH LOCAL TIME ZONE, while in Spark 3.5 and previous, write as TIMESTAMP. To restore the previous behavior, set `spark.sql.legacy.oracle.timestampMapping.enabled` to `true`.
+- Since Spark 4.0, MsSQL Server JDBC datasource will read TINYINT as ShortType, while in Spark 3.5 and previous, read as IntegerType. To restore the previous behavior, set `spark.sql.legacy.mssqlserver.numericMapping.enabled` to `true`.
+- Since Spark 4.0, MsSQL Server JDBC datasource will read DATETIMEOFFSET as TimestampType, while in Spark 3.5 and previous, read as StringType. To restore the previous behavior, set `spark.sql.legacy.mssqlserver.datetimeoffsetMapping.enabled` to `true`.
+- Since Spark 4.0, DB2 JDBC datasource will read SMALLINT as ShortType, while in Spark 3.5 and previous, it was read as IntegerType. To restore the previous behavior, set `spark.sql.legacy.db2.numericMapping.enabled` to `true`.
+- Since Spark 4.0, DB2 JDBC datasource will write BooleanType as BOOLEAN, while in Spark 3.5 and previous, write as CHAR(1). To restore the previous behavior, set `spark.sql.legacy.db2.booleanMapping.enabled` to `true`.
 - Since Spark 4.0, The default value for `spark.sql.legacy.ctePrecedencePolicy` has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an error, inner CTE definitions take precedence over outer definitions.
 - Since Spark 4.0, The default value for `spark.sql.legacy.timeParserPolicy` has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an `INCONSISTENT_BEHAVIOR_CROSS_VERSION` error, `CANNOT_PARSE_TIMESTAMP` will be raised if ANSI mode is enable. `NULL` will be returned if ANSI mode is disabled. See [Datetime Patterns for Formatting and Parsing](sql-ref-datetime-pattern.html).
+- Since Spark 4.0, A bug falsely allowing `!` instead of `NOT` when `!` is not a prefix operator has been fixed. Clauses such as `expr ! IN (...)`, `expr ! BETWEEN ...`, or `col ! NULL` now raise syntax errors. To restore the previous behavior, set `spark.sql.legacy.bangEqualsNot` to `true`. 
+- Since Spark 4.0, By default views tolerate column type changes in the query and compensate with casts. To restore the previous behavior, allowing up-casts only, set `spark.sql.legacy.viewSchemaCompensation` to `false`.
+- Since Spark 4.0, Views allow control over how they react to underlying query changes. By default views tolerate column type changes in the query and compensate with casts. To disable thsi feature set `spark.sql.legacy.viewSchemaBindingMode` to `false`. This also removes the clause from `DESCRIBE EXTENDED` and `SHOW CREATE TABLE`.
+- Since Spark 4.0, The Storage-Partitioned Join feature flag `spark.sql.sources.v2.bucketing.pushPartValues.enabled` is set to `true`. To restore the previous behavior, set `spark.sql.sources.v2.bucketing.pushPartValues.enabled` to `false`.
 
 ## Upgrading from Spark SQL 3.5.1 to 3.5.2
 
@@ -85,6 +97,7 @@ license: |
   - Since Spark 3.4, `BinaryType` is not supported in CSV datasource. In Spark 3.3 or earlier, users can write binary columns in CSV datasource, but the output content in CSV files is `Object.toString()` which is meaningless; meanwhile, if users read CSV tables with binary columns, Spark will throw an `Unsupported type: binary` exception.
   - Since Spark 3.4, bloom filter joins are enabled by default. To restore the legacy behavior, set `spark.sql.optimizer.runtime.bloomFilter.enabled` to `false`.
   - Since Spark 3.4, when schema inference on external Parquet files, INT64 timestamps with annotation `isAdjustedToUTC=false` will be inferred as TimestampNTZ type instead of Timestamp type. To restore the legacy behavior, set `spark.sql.parquet.inferTimestampNTZ.enabled` to `false`.
+  - Since Spark 3.4, the behavior for `CREATE TABLE AS SELECT ...` is changed from OVERWRITE to APPEND when `spark.sql.legacy.allowNonEmptyLocationInCTAS` is set to `true`. Users are recommended to avoid CTAS with a non-empty table location.
 
 ## Upgrading from Spark SQL 3.2 to 3.3
 
@@ -1062,7 +1075,7 @@ Python UDF registration is unchanged.
 Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
 Currently, Hive SerDes and UDFs are based on built-in Hive,
 and Spark SQL can be connected to different versions of Hive Metastore
-(from 0.12.0 to 2.3.9 and 3.0.0 to 3.1.3. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
+(from 2.0.0 to 2.3.10 and 3.0.0 to 3.1.3. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
 
 #### Deploying in Existing Hive Warehouses
 {:.no_toc}
diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index b443e3d9c5f59..12b79828e44cb 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -428,3 +428,122 @@ You can control the details of how AQE works by providing your own cost evaluato
       <td>3.2.0</td>
     </tr>
   </table>
+
+## Storage Partition Join
+
+Storage Partition Join (SPJ) is an optimization technique in Spark SQL that makes use the existing storage layout to avoid the shuffle phase.
+
+This is a generalization of the concept of Bucket Joins, which is only applicable for [bucketed](sql-data-sources-load-save-functions.html#bucketing-sorting-and-partitioning) tables, to tables partitioned by functions registered in FunctionCatalog. Storage Partition Joins are currently supported for compatible V2 DataSources.
+
+The following SQL properties enable Storage Partition Join in different join queries with various optimizations.
+
+  <table class="spark-config">
+    <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
+    <tr>
+      <td><code>spark.sql.sources.v2.bucketing.enabled</code></td>
+      <td>false</td>
+      <td>
+        When true, try to eliminate shuffle by using the partitioning reported by a compatible V2 data source.
+      </td>
+      <td>3.3.0</td>
+    </tr>
+    <tr>
+      <td><code>spark.sql.sources.v2.bucketing.pushPartValues.enabled</code></td>
+      <td>true</td>
+      <td>
+        When enabled, try to eliminate shuffle if one side of the join has missing partition values from the other side. This config requires <code>spark.sql.sources.v2.bucketing.enabled</code> to be true.
+      </td>
+      <td>3.4.0</td>
+    </tr>
+    <tr>
+      <td><code>spark.sql.requireAllClusterKeysForCoPartition</code></td>
+      <td>true</td>
+      <td>
+        When true, require the join or MERGE keys to be same and in the same order as the partition keys to eliminate shuffle. Hence, set to <b>false</b> in this situation to eliminate shuffle.
+      </td>
+      <td>3.4.0</td>
+    </tr>
+    <tr>
+      <td><code>spark.sql.sources.v2.bucketing.partiallyClusteredDistribution.enabled</code></td>
+      <td>false</td>
+      <td>
+        When true, and when the join is not a full outer join, enable skew optimizations to handle partitions with large amounts of data when avoiding shuffle. One side will be chosen as the big table based on table statistics, and the splits on this side will be partially-clustered. The splits of the other side will be grouped and replicated to match. This config requires both <code>spark.sql.sources.v2.bucketing.enabled</code> and <code>spark.sql.sources.v2.bucketing.pushPartValues.enabled</code> to be true.
+      </td>
+      <td>3.4.0</td>
+    </tr>
+    <tr>
+      <td><code>spark.sql.sources.v2.bucketing.allowJoinKeysSubsetOfPartitionKeys.enabled</code></td>
+      <td>false</td>
+      <td>
+        When enabled, try to avoid shuffle if join or MERGE condition does not include all partition columns. This config requires both <code>spark.sql.sources.v2.bucketing.enabled</code> and <code>spark.sql.sources.v2.bucketing.pushPartValues.enabled</code> to be true, and <code>spark.sql.requireAllClusterKeysForCoPartition</code> to be false.
+      </td>
+      <td>4.0.0</td>
+    </tr>
+    <tr>
+      <td><code>spark.sql.sources.v2.bucketing.allowCompatibleTransforms.enabled</code></td>
+      <td>false</td>
+      <td>
+        When enabled, try to avoid shuffle if partition transforms are compatible but not identical. This config requires both <code>spark.sql.sources.v2.bucketing.enabled</code> and <code>spark.sql.sources.v2.bucketing.pushPartValues.enabled</code> to be true.
+      </td>
+      <td>4.0.0</td>
+    </tr>
+    <tr>
+      <td><code>spark.sql.sources.v2.bucketing.shuffle.enabled</code></td>
+      <td>false</td>
+      <td>
+        When enabled, try to avoid shuffle on one side of the join, by recognizing the partitioning reported by a V2 data source on the other side.
+      </td>
+      <td>4.0.0</td>
+    </tr>
+  </table>
+
+If Storage Partition Join is performed, the query plan will not contain Exchange nodes prior to the join.
+
+The following example uses Iceberg ([https://iceberg.apache.org/docs/latest/spark-getting-started/](https://iceberg.apache.org/docs/latest/spark-getting-started/)), a Spark V2 DataSource that supports Storage Partition Join.
+```sql
+CREATE TABLE prod.db.target (id INT, salary INT, dep STRING)
+USING iceberg
+PARTITIONED BY (dep, bucket(8, id))
+
+CREATE TABLE prod.db.source (id INT, salary INT, dep STRING)
+USING iceberg
+PARTITIONED BY (dep, bucket(8, id))
+
+EXPLAIN SELECT * FROM target t INNER JOIN source s
+ON t.dep = s.dep AND t.id = s.id
+
+-- Plan without Storage Partition Join
+== Physical Plan ==
+* Project (12)
++- * SortMergeJoin Inner (11)
+   :- * Sort (5)
+   :  +- Exchange (4) // DATA SHUFFLE
+   :     +- * Filter (3)
+   :        +- * ColumnarToRow (2)
+   :           +- BatchScan (1)
+   +- * Sort (10)
+      +- Exchange (9) // DATA SHUFFLE
+         +- * Filter (8)
+            +- * ColumnarToRow (7)
+               +- BatchScan (6)
+
+
+SET 'spark.sql.sources.v2.bucketing.enabled' 'true'
+SET 'spark.sql.iceberg.planning.preserve-data-grouping' 'true'
+SET 'spark.sql.sources.v2.bucketing.pushPartValues.enabled' 'true'
+SET 'spark.sql.requireAllClusterKeysForCoPartition' 'false'
+SET 'spark.sql.sources.v2.bucketing.partiallyClusteredDistribution.enabled' 'true'
+
+-- Plan with Storage Partition Join
+== Physical Plan ==
+* Project (10)
++- * SortMergeJoin Inner (9)
+   :- * Sort (4)
+   :  +- * Filter (3)
+   :     +- * ColumnarToRow (2)
+   :        +- BatchScan (1)
+   +- * Sort (8)
+      +- * Filter (7)
+         +- * ColumnarToRow (6)
+            +- BatchScan (5)
+```
\ No newline at end of file
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index bf1819b9767b0..920b3392854c9 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -21,18 +21,18 @@ license: |
 
 In Spark SQL, there are two options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy` (See a table below for details).
 
-When `spark.sql.ansi.enabled` is set to `true`, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. For example, Spark will throw an exception at runtime instead of returning null results if the inputs to a SQL operator/function are invalid. Some ANSI dialect features may be not from the ANSI SQL standard directly, but their behaviors align with ANSI SQL's style.
+By default, `spark.sql.ansi.enabled` is `true` and Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. For example, Spark will throw an exception at runtime instead of returning null results if the inputs to a SQL operator/function are invalid. Some ANSI dialect features may be not from the ANSI SQL standard directly, but their behaviors align with ANSI SQL's style.
 
 Moreover, Spark SQL has an independent option to control implicit casting behaviours when inserting rows in a table.
 The casting behaviours are defined as store assignment rules in the standard.
 
-When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules. This is a separate configuration because its default value is `ANSI`, while the configuration `spark.sql.ansi.enabled` is disabled by default.
+By default, `spark.sql.storeAssignmentPolicy` is `ANSI` and Spark SQL complies with the ANSI store assignment rules.
 
 <table class="spark-config">
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.sql.ansi.enabled</code></td>
-  <td>false</td>
+  <td>true</td>
   <td>
     When true, Spark tries to conform to the ANSI SQL specification: <br/>
     1. Spark SQL will throw runtime exceptions on invalid operations, including integer overflow
@@ -67,10 +67,8 @@ The following subsections present behaviour changes in arithmetic operations, ty
 
 ### Arithmetic Operations
 
-In Spark SQL, arithmetic operations performed on numeric types (with the exception of decimal) are not checked for overflows by default.
-This means that in case an operation causes overflows, the result is the same with the corresponding operation in a Java/Scala program (e.g., if the sum of 2 integers is higher than the maximum value representable, the result is a negative number).
-On the other hand, Spark SQL returns null for decimal overflows.
-When `spark.sql.ansi.enabled` is set to `true` and an overflow occurs in numeric and interval arithmetic operations, it throws an arithmetic exception at runtime.
+In Spark SQL, by default, Spark throws an arithmetic exception at runtime for both interval and numeric type overflows.
+If `spark.sql.ansi.enabled` is `false`, then the decimal type will produce `null` values and other numeric types will behave in the same way as the corresponding operation in a Java/Scala program (e.g., if the sum of 2 integers is higher than the maximum value representable, the result is a negative number) which is the behavior of Spark 3 or older.
 
 ```sql
 -- `spark.sql.ansi.enabled=true`
@@ -141,7 +139,7 @@ In the table above, all the `CAST`s with new syntax are marked as red <span styl
 ```sql
 -- Examples of explicit casting
 
--- `spark.sql.ansi.enabled=true`
+-- `spark.sql.ansi.enabled=true` (This is a default behaviour)
 SELECT CAST('a' AS INT);
 org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
 == SQL(line 1, position 8) ==
@@ -155,7 +153,7 @@ SELECT CAST(DATE'2020-01-01' AS INT);
 org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(DATE '2020-01-01' AS INT)' due to data type mismatch: cannot cast date to int.
 To convert values from date to int, you can use function UNIX_DATE instead.
 
--- `spark.sql.ansi.enabled=false` (This is a default behaviour)
+-- `spark.sql.ansi.enabled=false`
 SELECT CAST('a' AS INT);
 +--------------+
 |CAST(a AS INT)|
@@ -376,6 +374,7 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t
   - `try_subtract`: identical to the add operator `-`, except that it returns `NULL` result instead of throwing an exception on integral value overflow.
   - `try_multiply`: identical to the add operator `*`, except that it returns `NULL` result instead of throwing an exception on integral value overflow.
   - `try_divide`: identical to the division operator `/`, except that it returns `NULL` result instead of throwing an exception on dividing 0.
+  - `try_remainder`: identical to the remainder operator `%`, except that it returns `NULL` result instead of throwing an exception on dividing 0.
   - `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal/interval value overflow.
   - `try_avg`: identical to the function `avg`, except that it returns `NULL` result instead of throwing an exception on decimal/interval value overflow.
   - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound.
@@ -393,11 +392,11 @@ With the default parser, Spark SQL has two kinds of keywords:
 * Non-reserved keywords: Same definition as the one when the ANSI mode enabled.
 * Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias.
 
-By default, both `spark.sql.ansi.enabled` and `spark.sql.ansi.enforceReservedKeywords` are false.
+By default, `spark.sql.ansi.enforceReservedKeywords` is false.
 
 Below is a list of all the keywords in Spark SQL.
 
-|Keyword|Spark SQL<br/>ANSI Mode|Spark SQL<br/>Default Mode|SQL-2016|
+|Keyword|Spark SQL<br/>ANSI Mode|Spark SQL<br/>NonANSI Mode|SQL-2016|
 |--|----------------------|-------------------------|--------|
 |ADD|non-reserved|non-reserved|non-reserved|
 |AFTER|non-reserved|non-reserved|non-reserved|
@@ -415,9 +414,11 @@ Below is a list of all the keywords in Spark SQL.
 |ASC|non-reserved|non-reserved|non-reserved|
 |AT|non-reserved|non-reserved|reserved|
 |AUTHORIZATION|reserved|non-reserved|reserved|
+|BEGIN|non-reserved|non-reserved|non-reserved|
 |BETWEEN|non-reserved|non-reserved|reserved|
 |BIGINT|non-reserved|non-reserved|reserved|
 |BINARY|non-reserved|non-reserved|reserved|
+|BINDING|non-reserved|non-reserved|non-reserved|
 |BOOLEAN|non-reserved|non-reserved|reserved|
 |BOTH|reserved|non-reserved|reserved|
 |BUCKET|non-reserved|non-reserved|non-reserved|
@@ -425,6 +426,7 @@ Below is a list of all the keywords in Spark SQL.
 |BY|non-reserved|non-reserved|reserved|
 |BYTE|non-reserved|non-reserved|non-reserved|
 |CACHE|non-reserved|non-reserved|non-reserved|
+|CALLED|non-reserved|non-reserved|non-reserved|
 |CASCADE|non-reserved|non-reserved|non-reserved|
 |CASE|reserved|non-reserved|reserved|
 |CAST|reserved|non-reserved|reserved|
@@ -447,9 +449,11 @@ Below is a list of all the keywords in Spark SQL.
 |COMMIT|non-reserved|non-reserved|reserved|
 |COMPACT|non-reserved|non-reserved|non-reserved|
 |COMPACTIONS|non-reserved|non-reserved|non-reserved|
+|COMPENSATION|non-reserved|non-reserved|non-reserved|
 |COMPUTE|non-reserved|non-reserved|non-reserved|
 |CONCATENATE|non-reserved|non-reserved|non-reserved|
 |CONSTRAINT|reserved|non-reserved|reserved|
+|CONTAINS|non-reserved|non-reserved|non-reserved|
 |COST|non-reserved|non-reserved|non-reserved|
 |CREATE|reserved|non-reserved|reserved|
 |CROSS|reserved|strict-non-reserved|reserved|
@@ -476,10 +480,12 @@ Below is a list of all the keywords in Spark SQL.
 |DECLARE|non-reserved|non-reserved|non-reserved|
 |DEFAULT|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
+|DEFINER|non-reserved|non-reserved|non-reserved|
 |DELETE|non-reserved|non-reserved|reserved|
 |DELIMITED|non-reserved|non-reserved|non-reserved|
 |DESC|non-reserved|non-reserved|non-reserved|
 |DESCRIBE|non-reserved|non-reserved|reserved|
+|DETERMINISTIC|non-reserved|non-reserved|reserved|
 |DFS|non-reserved|non-reserved|non-reserved|
 |DIRECTORIES|non-reserved|non-reserved|non-reserved|
 |DIRECTORY|non-reserved|non-reserved|non-reserved|
@@ -492,6 +498,7 @@ Below is a list of all the keywords in Spark SQL.
 |END|reserved|non-reserved|reserved|
 |ESCAPE|reserved|non-reserved|reserved|
 |ESCAPED|non-reserved|non-reserved|non-reserved|
+|EVOLUTION|non-reserved|non-reserved|non-reserved|
 |EXCEPT|reserved|strict-non-reserved|reserved|
 |EXCHANGE|non-reserved|non-reserved|non-reserved|
 |EXCLUDE|non-reserved|non-reserved|non-reserved|
@@ -537,6 +544,7 @@ Below is a list of all the keywords in Spark SQL.
 |INDEXES|non-reserved|non-reserved|non-reserved|
 |INNER|reserved|strict-non-reserved|reserved|
 |INPATH|non-reserved|non-reserved|non-reserved|
+|INPUT|non-reserved|non-reserved|non-reserved|
 |INPUTFORMAT|non-reserved|non-reserved|non-reserved|
 |INSERT|non-reserved|non-reserved|reserved|
 |INT|non-reserved|non-reserved|reserved|
@@ -544,10 +552,12 @@ Below is a list of all the keywords in Spark SQL.
 |INTERSECT|reserved|strict-non-reserved|reserved|
 |INTERVAL|non-reserved|non-reserved|reserved|
 |INTO|reserved|non-reserved|reserved|
+|INVOKER|non-reserved|non-reserved|non-reserved|
 |IS|reserved|non-reserved|reserved|
 |ITEMS|non-reserved|non-reserved|non-reserved|
 |JOIN|reserved|strict-non-reserved|reserved|
 |KEYS|non-reserved|non-reserved|non-reserved|
+|LANGUAGE|non-reserved|non-reserved|reserved|
 |LAST|non-reserved|non-reserved|non-reserved|
 |LATERAL|reserved|strict-non-reserved|reserved|
 |LAZY|non-reserved|non-reserved|non-reserved|
@@ -576,6 +586,7 @@ Below is a list of all the keywords in Spark SQL.
 |MINUTE|non-reserved|non-reserved|non-reserved|
 |MINUTES|non-reserved|non-reserved|non-reserved|
 |MINUS|non-reserved|strict-non-reserved|non-reserved|
+|MODIFIES|non-reserved|non-reserved|non-reserved|
 |MONTH|non-reserved|non-reserved|non-reserved|
 |MONTHS|non-reserved|non-reserved|non-reserved|
 |MSCK|non-reserved|non-reserved|non-reserved|
@@ -609,8 +620,6 @@ Below is a list of all the keywords in Spark SQL.
 |PARTITIONED|non-reserved|non-reserved|non-reserved|
 |PARTITIONS|non-reserved|non-reserved|non-reserved|
 |PERCENT|non-reserved|non-reserved|non-reserved|
-|PERCENTILE_CONT|reserved|non-reserved|non-reserved|
-|PERCENTILE_DISC|reserved|non-reserved|non-reserved|
 |PIVOT|non-reserved|non-reserved|non-reserved|
 |PLACING|non-reserved|non-reserved|non-reserved|
 |POSITION|non-reserved|non-reserved|reserved|
@@ -622,6 +631,7 @@ Below is a list of all the keywords in Spark SQL.
 |QUARTER|non-reserved|non-reserved|non-reserved|
 |QUERY|non-reserved|non-reserved|non-reserved|
 |RANGE|non-reserved|non-reserved|reserved|
+|READS|non-reserved|non-reserved|non-reserved|
 |REAL|non-reserved|non-reserved|reserved|
 |RECORDREADER|non-reserved|non-reserved|non-reserved|
 |RECORDWRITER|non-reserved|non-reserved|non-reserved|
@@ -637,6 +647,8 @@ Below is a list of all the keywords in Spark SQL.
 |RESET|non-reserved|non-reserved|non-reserved|
 |RESPECT|non-reserved|non-reserved|non-reserved|
 |RESTRICT|non-reserved|non-reserved|non-reserved|
+|RETURN|non-reserved|non-reserved|reserved|
+|RETURNS|non-reserved|non-reserved|reserved|
 |REVOKE|non-reserved|non-reserved|reserved|
 |RIGHT|reserved|strict-non-reserved|reserved|
 |RLIKE|non-reserved|non-reserved|non-reserved|
@@ -650,6 +662,7 @@ Below is a list of all the keywords in Spark SQL.
 |SCHEMAS|non-reserved|non-reserved|non-reserved|
 |SECOND|non-reserved|non-reserved|non-reserved|
 |SECONDS|non-reserved|non-reserved|non-reserved|
+|SECURITY|non-reserved|non-reserved|non-reserved|
 |SELECT|reserved|non-reserved|reserved|
 |SEMI|non-reserved|strict-non-reserved|non-reserved|
 |SEPARATED|non-reserved|non-reserved|non-reserved|
@@ -667,6 +680,8 @@ Below is a list of all the keywords in Spark SQL.
 |SORT|non-reserved|non-reserved|non-reserved|
 |SORTED|non-reserved|non-reserved|non-reserved|
 |SOURCE|non-reserved|non-reserved|non-reserved|
+|SPECIFIC|non-reserved|non-reserved|reserved|
+|SQL|reserved|non-reserved|reserved|
 |START|non-reserved|non-reserved|reserved|
 |STATISTICS|non-reserved|non-reserved|non-reserved|
 |STORED|non-reserved|non-reserved|non-reserved|
diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md
index 8d75b4a175ab7..3a4530dcecaef 100644
--- a/docs/sql-ref-datatypes.md
+++ b/docs/sql-ref-datatypes.md
@@ -126,7 +126,9 @@ from pyspark.sql.types import *
 |**FloatType**|float<br/>**Note:** Numbers will be converted to 4-byte single-precision floating point numbers at runtime.|FloatType()|
 |**DoubleType**|float|DoubleType()|
 |**DecimalType**|decimal.Decimal|DecimalType()|
-|**StringType**|string|StringType()|
+|**StringType**|str|StringType()|
+|**CharType(length)**|str|CharType(length)|
+|**VarcharType(length)**|str|VarcharType(length)|
 |**BinaryType**|bytearray|BinaryType()|
 |**BooleanType**|bool|BooleanType()|
 |**TimestampType**|datetime.datetime|TimestampType()|
@@ -157,6 +159,8 @@ You can access them by doing
 |**DoubleType**|Double|DoubleType|
 |**DecimalType**|java.math.BigDecimal|DecimalType|
 |**StringType**|String|StringType|
+|**CharType(length)**|String|CharType(length)|
+|**VarcharType(length)**|String|VarcharType(length)|
 |**BinaryType**|Array[Byte]|BinaryType|
 |**BooleanType**|Boolean|BooleanType|
 |**TimestampType**|java.time.Instant or java.sql.Timestamp|TimestampType|
@@ -188,6 +192,8 @@ please use factory methods provided in
 |**DoubleType**|double or Double|DataTypes.DoubleType|
 |**DecimalType**|java.math.BigDecimal|DataTypes.createDecimalType()<br/>DataTypes.createDecimalType(*precision*, *scale*).|
 |**StringType**|String|DataTypes.StringType|
+|**CharType(length)**|String|DataTypes.createCharType(length)|
+|**VarcharType(length)**|String|DataTypes.createVarcharType(length)|
 |**BinaryType**|byte[]|DataTypes.BinaryType|
 |**BooleanType**|boolean or Boolean|DataTypes.BooleanType|
 |**TimestampType**|java.time.Instant or java.sql.Timestamp|DataTypes.TimestampType|
@@ -242,6 +248,8 @@ The following table shows the type names as well as aliases used in Spark SQL pa
 |**TimestampType**|TIMESTAMP, TIMESTAMP_LTZ|
 |**TimestampNTZType**|TIMESTAMP_NTZ|
 |**StringType**|STRING|
+|**CharType(length)**|CHAR(length)|
+|**VarcharType(length)**|VARCHAR(length)|
 |**BinaryType**|BINARY|
 |**DecimalType**|DECIMAL, DEC, NUMERIC|
 |**YearMonthIntervalType**|INTERVAL YEAR, INTERVAL YEAR TO MONTH, INTERVAL MONTH|
diff --git a/docs/sql-ref-identifier.md b/docs/sql-ref-identifier.md
index e4d9727c09b7e..7aca08ea9fd8d 100644
--- a/docs/sql-ref-identifier.md
+++ b/docs/sql-ref-identifier.md
@@ -30,7 +30,7 @@ An identifier is a string used to identify a database object such as a table, vi
 ```sql
 { letter | digit | '_' } [ , ... ]
 ```
-**Note:** If `spark.sql.ansi.enabled` is set to true, ANSI SQL reserved keywords cannot be used as identifiers. For more details, please refer to [ANSI Compliance](sql-ref-ansi-compliance.html).
+**Note:** If `spark.sql.ansi.enforceReservedKeywords` is set to true, ANSI SQL reserved keywords cannot be used as identifiers. For more details, please refer to [ANSI Compliance](sql-ref-ansi-compliance.html).
 
 #### Delimited Identifier
 
diff --git a/docs/sql-ref-operators.md b/docs/sql-ref-operators.md
new file mode 100644
index 0000000000000..102e45fba8d20
--- /dev/null
+++ b/docs/sql-ref-operators.md
@@ -0,0 +1,124 @@
+---
+layout: global
+title: Operators
+displayTitle: Operators
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+An SQL operator is a symbol specifying an action that is performed on one or more expressions. Operators are represented by special characters or by keywords.
+
+### Operator Precedence
+
+When a complex expression has multiple operators, operator precedence determines the sequence of operations in the expression,
+e.g. in expression `1 + 2 * 3`, `*` has higher precedence than `+`, so the expression is evaluated as `1 + (2 * 3) = 7`.
+The order of execution can significantly affect the resulting value.
+
+Operators have the precedence levels shown in the following table.
+An operator on higher precedence is evaluated before an operator on a lower level.
+In the following table, the operators in descending order of precedence, a.k.a. 1 is the highest level.
+Operators listed on the same table cell have the same precedence and are evaluated from left to right or right to left based on the associativity.
+
+<table>
+  <thead>
+    <tr>
+      <th>Precedence</th>
+      <th>Operator</th>
+      <th>Operation</th>
+      <th>Associativity</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>1</td>
+      <td>.<br/>[]<br/>::</td>
+      <td>member access<br/>element access<br/>cast</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>2</td>
+      <td>+<br/>-<br/>~</td>
+      <td>unary plus<br/>unary minus<br/>bitwise NOT</td>
+      <td>Right to left</td>
+    </tr>
+    <tr>
+      <td>3</td>
+      <td>*<br/>/<br/>%<br/>DIV</td>
+      <td>multiplication<br/>division, modulo<br/>integral division</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>4</td>
+      <td>+<br/>-<br/>||</td>
+      <td>addition<br/>subtraction<br/>concatenation</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>5</td>
+      <td>&lt;&lt;<br/>&gt;&gt;<br/>&gt;&gt;&gt;</td>
+      <td>bitwise shift left<br/>bitwise shift right<br/>bitwise shift right unsigned</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>6</td>
+      <td>&</td>
+      <td>bitwise AND</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>7</td>
+      <td>^</td>
+      <td>bitwise XOR(exclusive or)</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>8</td>
+      <td>|</td>
+      <td>bitwise OR(inclusive or)</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>9</td>
+      <td>=, ==<br/>&lt;&gt;, !=<br/>&lt;, &lt;=<br/>&gt;, &gt;=</td>
+      <td>comparison operators</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>10</td>
+      <td>NOT, !<br/>EXISTS</td>
+      <td>logical NOT<br/>existence</td>
+      <td>Right to left</td>
+    </tr>
+    <tr>
+      <td>11</td>
+      <td>BETWEEN<br/>IN<br/>RLIKE, REGEXP<br/>ILIKE<br/>LIKE<br/>IS [NULL, TRUE, FALSE]<br/>IS DISTINCT FROM</td>
+      <td>other predicates</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>12</td>
+      <td>AND</td>
+      <td>conjunction</td>
+      <td>Left to right</td>
+    </tr>
+    <tr>
+      <td>13</td>
+      <td>OR</td>
+      <td>disjunction</td>
+      <td>Left to right</td>
+    </tr>
+  </tbody>
+</table>
diff --git a/docs/sql-ref-syntax-ddl-alter-database.md b/docs/sql-ref-syntax-ddl-alter-database.md
index 0ac0038236433..727fedb19e237 100644
--- a/docs/sql-ref-syntax-ddl-alter-database.md
+++ b/docs/sql-ref-syntax-ddl-alter-database.md
@@ -25,7 +25,7 @@ license: |
 `DATABASE`, `SCHEMA` and `NAMESPACE` are interchangeable and one can be used in place of the others. An error message
 is issued if the database is not found in the system.
 
-### ALTER PROPERTIES
+### SET PROPERTIES
 `ALTER DATABASE SET DBPROPERTIES` statement changes the properties associated with a database.
 The specified property values override any existing value with the same property name. 
 This command is mostly used to record the metadata for a database and may be used for auditing purposes.
@@ -43,7 +43,25 @@ ALTER { DATABASE | SCHEMA | NAMESPACE } database_name
 
     Specifies the name of the database to be altered.
 
-### ALTER LOCATION
+### UNSET PROPERTIES
+`ALTER DATABASE UNSET DBPROPERTIES` statement unsets the properties associated with a database.
+If the specified property key does not exist, the command will ignore it and finally succeed.
+(available since Spark 4.0.0).
+
+#### Syntax
+
+```sql
+ALTER { DATABASE | SCHEMA | NAMESPACE } database_name
+    UNSET { DBPROPERTIES | PROPERTIES } ( property_name [ , ... ] )
+```
+
+#### Parameters
+
+* **database_name**
+
+  Specifies the name of the database to be altered.
+
+### SET LOCATION
 `ALTER DATABASE SET LOCATION` statement changes the default parent-directory where new tables will be added 
 for a database. Please note that it does not move the contents of the database's current directory to the newly 
 specified location or change the locations associated with any tables/partitions under the specified database 
@@ -95,6 +113,24 @@ DESCRIBE DATABASE EXTENDED inventory;
 |                 Location|file:/temp/spark-warehouse/new_inventory.db|
 |               Properties| ((Edit-date,01/01/2001), (Edited-by,John))|
 +-------------------------+-------------------------------------------+
+
+-- Alters the database to unset the property `Edited-by`
+ALTER DATABASE inventory UNSET DBPROPERTIES ('Edited-by');
+
+-- Verify that the property `Edited-by` has been unset.
+DESCRIBE DATABASE EXTENDED inventory;
++-------------------------+-------------------------------------------+
+|database_description_item|                 database_description_value|
++-------------------------+-------------------------------------------+
+|            Database Name|                                  inventory|
+|              Description|                                           |
+|                 Location|file:/temp/spark-warehouse/new_inventory.db|
+|               Properties| ((Edit-date,01/01/2001))                  |
++-------------------------+-------------------------------------------+
+
+-- Alters the database to unset a non-existent property `non-existent`
+-- Note: The command will ignore 'non-existent' and finally succeed
+ALTER DATABASE inventory UNSET DBPROPERTIES ('non-existent');
 ```
 
 ### Related Statements
diff --git a/docs/sql-ref-syntax-ddl-alter-view.md b/docs/sql-ref-syntax-ddl-alter-view.md
index d69f24677116d..ca272d27bc7e6 100644
--- a/docs/sql-ref-syntax-ddl-alter-view.md
+++ b/docs/sql-ref-syntax-ddl-alter-view.md
@@ -109,6 +109,32 @@ Note that `ALTER VIEW` statement does not support `SET SERDE` or `SET SERDEPROPE
 
     Specifies the definition of the view. Check [select_statement](sql-ref-syntax-qry-select.html) for details.
 
+#### ALTER View WITH SCHEMA
+
+Changes the view's schema binding behavior. 
+
+If the view is cached, the command clears cached data of the view and all its dependents that refer to it. View's cache will be lazily filled when the next time the view is accessed. The command leaves view's dependents as uncached.
+
+This statement is not supported for `TEMPORARY` views.
+
+#### Syntax
+```sql
+ALTER VIEW view_identifier WITH SCHEMA { BINDING | COMPENSATION | [ TYPE ] EVOLUTION }
+```
+
+#### Parameters
+* **view_identifier**
+
+  Specifies a view name, which may be optionally qualified with a database name.
+
+  **Syntax:** `[ database_name. ] view_name`
+
+* **BINDING** - The view can tolerate only type changes in the underlying schema requiring safe up-casts.
+* **COMPENSATION** - The view can tolerate type changes in the underlying schema requiring casts. Runtime casting errors may occur.
+* **TYPE EVOLUTION** - The view will adapt to any type changes in the underlying schema.
+* **EVOLUTION** - For views defined without a column lists any schema changes are adapted by the view, including, for queries with `SELECT *` dropped or added columns.
+  If the view is defined with a column list, the clause is interpreted as `TYPE EVOLUTION`.
+
 ### Examples
 
 ```sql
@@ -196,6 +222,24 @@ DESC TABLE EXTENDED tempdb1.v2;
 |                   View Text|   select * from tempdb1.v1|       |
 |          View Original Text|   select * from tempdb1.v1|       |
 +----------------------------+---------------------------+-------+
+
+CREATE OR REPLACE VIEW open_orders AS SELECT * FROM orders WHERE status = 'open';
+ALTER VIEW open_orders WITH SCHEMA EVOLUTION;
+DESC TABLE EXTENDED open_orders;
++----------------------------+---------------------------+-------+
+|                    col_name|                  data_type|comment|
++----------------------------+---------------------------+-------+
+|                    order_no|                        int|   null|
+|                  order_date|                       date|   null|
+|                            |                           |       |
+|# Detailed Table Information|                           |       |
+|                    Database|                       mydb|       |
+|                       Table|                open_orders|       |
+|                        Type|                       VIEW|       |
+|                   View Text|       select * from orders|       |
+|          View Original Text|       select * from orders|       |
+|          View Schema Mode  |                  EVOLUTION|       |    
++----------------------------+---------------------------+-------+
 ```
 
 ### Related Statements
diff --git a/docs/sql-ref-syntax-ddl-create-view.md b/docs/sql-ref-syntax-ddl-create-view.md
index 1a9c1f62728e7..21174f12300e3 100644
--- a/docs/sql-ref-syntax-ddl-create-view.md
+++ b/docs/sql-ref-syntax-ddl-create-view.md
@@ -61,6 +61,17 @@ CREATE [ OR REPLACE ] [ [ GLOBAL ] TEMPORARY ] VIEW [ IF NOT EXISTS ] view_ident
     * `[ ( column_name [ COMMENT column_comment ], ... ) ]` to specify column-level comments.
     * `[ COMMENT view_comment ]` to specify view-level comments.
     * `[ TBLPROPERTIES ( property_name = property_value [ , ... ] ) ]` to add metadata key-value pairs.
+    * `[ WITH SCHEMA { BINDING | COMPENSATION | [ TYPE ] EVOLUTION } ]` to specify how the view reacts to schema changes
+
+      This clause is not supported for `TEMPORARY` views.
+
+      * **BINDING** - The view can tolerate only type changes in the underlying schema requiring safe up-casts.
+      * **COMPENSATION** - The view can tolerate type changes in the underlying schema requiring casts. Runtime casting errors may occur.
+      * **TYPE EVOLUTION** - The view will adapt to any type changes in the underlying schema.
+      * **EVOLUTION** - For views defined without a column lists any schema changes are adapted by the view, including, for queries with `SELECT *` dropped or added columns.
+        If the view is defined with a column list, the clause is interpreted as `TYPE EVOLUTION`.
+      
+      The default is `WITH SCHEMA COMPENSATION`.
 
 * **query**
   A [SELECT](sql-ref-syntax-qry-select.html) statement that constructs the view from base tables or other views.
@@ -80,6 +91,10 @@ CREATE GLOBAL TEMPORARY VIEW IF NOT EXISTS subscribed_movies
     AS SELECT mo.member_id, mb.full_name, mo.movie_title
         FROM movies AS mo INNER JOIN members AS mb 
         ON mo.member_id = mb.id;
+
+-- Create a view filtering the `orders` table which will adjust to schema changes in `orders`.
+CREATE OR REPLACE VIEW open_orders WITH SCHEMA EVOLUTION
+    AS SELECT * FROM orders WHERE status = 'open';
 ```
 
 ### Related Statements
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index f3a8a0a40694b..fabe7f17b78b3 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -574,6 +574,10 @@ Here are the details of all the sources in Spark.
         <br/>
         <code>maxFileAge</code>: Maximum age of a file that can be found in this directory, before it is ignored. For the first batch all files will be considered valid. If <code>latestFirst</code> is set to `true` and <code>maxFilesPerTrigger</code> or <code>maxBytesPerTrigger</code> is set, then this parameter will be ignored, because old files that are valid, and should be processed, may be ignored. The max age is specified with respect to the timestamp of the latest file, and not the timestamp of the current system.(default: 1 week)
         <br/>
+        <code>maxCachedFiles</code>: maximum number of files to cache to be processed in subsequent batches (default: 10000).  If files are available in the cache, they will be read from first before listing from the input source.
+        <br/>
+        <code>discardCachedInputRatio</code>: ratio of cached files/bytes to max files/bytes to allow for listing from input source when there is less cached input than could be available to be read (default: 0.2).  For example, if there are only 10 cached files remaining for a batch but the <code>maxFilesPerTrigger</code> is set to 100, the 10 cached files would be discarded and a new listing would be performed instead. Similarly, if there are cached files that are 10 MB remaining for a batch, but the <code>maxBytesPerTrigger</code> is set to 100MB, the cached files would be discarded.
+        <br/>
         <code>cleanSource</code>: option to clean up completed files after processing.<br/>
         Available options are "archive", "delete", "off". If the option is not provided, the default value is "off".<br/>
         When "archive" is provided, additional option <code>sourceArchiveDir</code> must be provided as well. The value of "sourceArchiveDir" must not match with source pattern in depth (the number of directories from the root directory), where the depth is minimum of depth on both paths. This will ensure archived files are never included as new source files.<br/>
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index bf02ec137e200..071fbf5549398 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -91,7 +91,7 @@ run it with `--help`. Here are a few examples of common options:
 # Run application locally on 8 cores
 ./bin/spark-submit \
   --class org.apache.spark.examples.SparkPi \
-  --master local[8] \
+  --master "local[8]" \
   /path/to/examples.jar \
   100
 
@@ -178,8 +178,13 @@ The master URL passed to Spark can be in one of the following formats:
 # Loading Configuration from a File
 
 The `spark-submit` script can load default [Spark configuration values](configuration.html) from a
-properties file and pass them on to your application. By default, it will read options
-from `conf/spark-defaults.conf` in the `SPARK_HOME` directory.
+properties file and pass them on to your application. The file can be specified via the `--properties-file`
+parameter. When this is not specified, by default Spark will read options from `conf/spark-defaults.conf`
+in the `SPARK_HOME` directory.
+
+An additional flag `--load-spark-defaults` can be used to tell Spark to load configurations from `conf/spark-defaults.conf`
+even when a property file is provided via `--properties-file`. This is useful, for instance, when users
+want to put system-wide default settings in the former while user/cluster specific settings in the latter.
 
 Loading default Spark configurations this way can obviate the need for certain flags to
 `spark-submit`. For instance, if the `spark.master` property is set, you can safely omit the
diff --git a/docs/util/build-error-docs.py b/docs/util/build-error-docs.py
new file mode 100644
index 0000000000000..df6b9e3c05270
--- /dev/null
+++ b/docs/util/build-error-docs.py
@@ -0,0 +1,152 @@
+"""
+Generate a unified page of documentation for all error conditions.
+"""
+import json
+import os
+import re
+from itertools import chain
+from pathlib import Path
+from textwrap import dedent
+
+# To avoid adding new direct dependencies, we import from within mkdocs.
+# This is not ideal as unrelated updates to mkdocs may break this script.
+from mkdocs.structure.pages import markdown
+
+THIS_DIR = Path(__file__).parent
+SPARK_PROJECT_ROOT = THIS_DIR.parents[1]
+DOCS_ROOT = SPARK_PROJECT_ROOT / "docs"
+ERROR_CONDITIONS_PATH = (
+    SPARK_PROJECT_ROOT / "common/utils/src/main/resources/error/error-conditions.json"
+)
+
+
+def assemble_message(message_parts):
+    message = " ".join(message_parts)
+    cleaned_message = re.sub(r"(<.*?>)", lambda x: f"`{x.group(1)}`", message)
+    return markdown.markdown(cleaned_message)
+
+
+def load_error_conditions(path):
+    with open(path) as f:
+        raw_error_conditions = json.load(f)
+    error_conditions = dict()
+    for name, details in raw_error_conditions.items():
+        if name.startswith("_LEGACY_ERROR") or name.startswith("INTERNAL_ERROR"):
+            continue
+        if "subClass" in details:
+            for sub_name in details["subClass"]:
+                details["subClass"][sub_name]["message"] = (
+                    assemble_message(details["subClass"][sub_name]["message"])
+                )
+        details["message"] = assemble_message(details["message"])
+        error_conditions[name] = details
+    return error_conditions
+
+
+def anchor_name(condition_name: str, sub_condition_name: str = None):
+    """
+    URLs can, in practice, be up to 2,000 characters long without causing any issues. So we preserve
+    the condition name mostly as-is for use in the anchor, even when that name is very long.
+    See: https://stackoverflow.com/a/417184
+    """
+    parts = [
+        part for part in (condition_name, sub_condition_name)
+        if part
+    ]
+    anchor = "-".join(parts).lower().replace("_", "-")
+    return anchor
+
+
+def generate_doc_rows(condition_name, condition_details):
+    condition_row = [
+        """
+        <tr id="{anchor}">
+            <td>{sql_state}</td>
+            <td>
+                <span class="error-condition-name">
+                    <code>
+                    <a href="#{anchor}">#</a>
+                    </code>
+                    {condition_name}
+                </span>
+            </td>
+            <td>{message}</td>
+        </tr>
+        """
+        .format(
+            anchor=anchor_name(condition_name),
+            sql_state=condition_details["sqlState"],
+            # This inserts soft break opportunities so that if a long name needs to be wrapped
+            # it will wrap in a visually pleasing manner.
+            # See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/wbr
+            condition_name=condition_name.replace("_", "<wbr />_"),
+            message=condition_details["message"],
+        )
+    ]
+    sub_condition_rows = []
+    if "subClass" in condition_details:
+        for sub_condition_name in sorted(condition_details["subClass"]):
+            sub_condition_rows.append(
+                """
+                <tr id="{anchor}">
+                    <td></td>
+                    <td class="error-sub-condition">
+                        <span class="error-condition-name">
+                            <code>
+                            <a href="#{anchor}">#</a>
+                            </code>
+                            {sub_condition_name}
+                        </span>
+                    </td>
+                    <td class="error-sub-condition">{message}</td>
+                </tr>
+                """
+                .format(
+                    anchor=anchor_name(condition_name, sub_condition_name),
+                    # See comment above for explanation of `<wbr />`.
+                    sub_condition_name=sub_condition_name.replace("_", "<wbr />_"),
+                    message=condition_details["subClass"][sub_condition_name]["message"],
+                )
+            )
+    doc_rows = condition_row + sub_condition_rows
+    return [
+        dedent(row).strip()
+        for row in doc_rows
+    ]
+
+
+def generate_doc_table(error_conditions):
+    doc_rows = chain.from_iterable([
+        generate_doc_rows(condition_name, condition_details)
+        for condition_name, condition_details
+        in sorted(
+            error_conditions.items(),
+            key=lambda x: (x[1]["sqlState"], x[0]),
+        )
+    ])
+    table_html = (
+        """
+        <table id="error-conditions">
+        <tr>
+            <th>Error State / SQLSTATE</th>
+            <th>Error Condition & Sub-Condition</th>
+            <th>Message</th>
+        </tr>
+        {rows}
+        </table>
+        """
+    )
+    # We dedent here rather than above so that the interpolated rows (which are not
+    # indented) don't prevent the dedent from working.
+    table_html = dedent(table_html).strip().format(rows="\n".join(list(doc_rows)))
+    return table_html
+
+
+if __name__ == "__main__":
+    error_conditions = load_error_conditions(ERROR_CONDITIONS_PATH)
+    doc_table = generate_doc_table(error_conditions)
+    (DOCS_ROOT / "_generated").mkdir(exist_ok=True)
+    html_table_path = DOCS_ROOT / "_generated" / "error-conditions.html"
+    with open(html_table_path, "w") as f:
+        f.write(doc_table)
+    print("Generated:", os.path.relpath(html_table_path, start=SPARK_PROJECT_ROOT))
diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py
index 03daf18eadbf3..0200d094185d5 100644
--- a/examples/src/main/python/sql/arrow.py
+++ b/examples/src/main/python/sql/arrow.py
@@ -33,6 +33,25 @@
 require_minimum_pyarrow_version()
 
 
+def dataframe_to_from_arrow_table_example(spark: SparkSession) -> None:
+    import pyarrow as pa
+    import numpy as np
+
+    # Create a PyArrow Table
+    table = pa.table([pa.array(np.random.rand(100)) for i in range(3)], names=["a", "b", "c"])
+
+    # Create a Spark DataFrame from the PyArrow Table
+    df = spark.createDataFrame(table)
+
+    # Convert the Spark DataFrame to a PyArrow Table
+    result_table = df.select("*").toArrow()
+
+    print(result_table.schema)
+    # a: double
+    # b: double
+    # c: double
+
+
 def dataframe_with_arrow_example(spark: SparkSession) -> None:
     import numpy as np
     import pandas as pd
@@ -302,6 +321,8 @@ def arrow_slen(s):  # type: ignore[no-untyped-def]
         .appName("Python Arrow-in-Spark example") \
         .getOrCreate()
 
+    print("Running Arrow conversion example: DataFrame to Table")
+    dataframe_to_from_arrow_table_example(spark)
     print("Running Pandas to/from conversion example")
     dataframe_with_arrow_example(spark)
     print("Running pandas_udf example: Series to Frame")
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index 193d6551a3666..045149922c8ed 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.TimeUnit
 import org.apache.spark.SparkContext
 import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.TOTAL_TIME
+import org.apache.spark.internal.LogKeys.TOTAL_TIME
 import org.apache.spark.storage.StorageLevel
 
 /**
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
index e1b9b2fe5ae64..1493d8114c699 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -21,7 +21,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.graphx.util.PeriodicGraphCheckpointer
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.NUM_ITERATIONS
+import org.apache.spark.internal.LogKeys.NUM_ITERATIONS
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rdd.util.PeriodicRDDCheckpointer
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index aa5898fb585c9..4fe010bfce785 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{Vector => BV}
 
 import org.apache.spark.graphx._
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.NUM_ITERATIONS
+import org.apache.spark.internal.LogKeys.NUM_ITERATIONS
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 
 /**
diff --git a/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala b/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala
index 2afab01ec7b03..599361009fcc5 100644
--- a/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala
+++ b/hadoop-cloud/src/main/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala
@@ -24,7 +24,7 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.execution.streaming.AbstractFileContextBasedCheckpointFileManager
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
 
@@ -36,7 +36,7 @@ class AbortableStreamBasedCheckpointFileManager(path: Path, hadoopConf: Configur
       s" an fs (path: $path) with abortable stream support")
   }
 
-  logInfo(s"Writing atomically to $path based on abortable stream")
+  logInfo(log"Writing atomically to ${MDC(LogKeys.PATH, path)} based on abortable stream")
 
   class AbortableStreamBasedFSDataOutputStream(
       fsDataOutputStream: FSDataOutputStream,
@@ -53,7 +53,8 @@ class AbortableStreamBasedCheckpointFileManager(path: Path, hadoopConf: Configur
         fsDataOutputStream.close()
       } catch {
           case NonFatal(e) =>
-            logWarning(s"Error cancelling write to $path (stream: $fsDataOutputStream)", e)
+            logWarning(log"Error cancelling write to ${MDC(LogKeys.PATH, path)} " +
+              log"(stream: ${MDC(LogKeys.FS_DATA_OUTPUT_STREAM, fsDataOutputStream)})", e)
       } finally {
         terminated = true
       }
@@ -71,7 +72,8 @@ class AbortableStreamBasedCheckpointFileManager(path: Path, hadoopConf: Configur
         fsDataOutputStream.close()
       } catch {
           case NonFatal(e) =>
-            logWarning(s"Error closing $path (stream: $fsDataOutputStream)", e)
+            logWarning(log"Error closing ${MDC(LogKeys.PATH, path)} " +
+              log"(stream: ${MDC(LogKeys.FS_DATA_OUTPUT_STREAM, fsDataOutputStream)})", e)
       } finally {
         terminated = true
       }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
index dc5840185d629..7b23db052e8b8 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
@@ -27,6 +27,7 @@
 public class JavaModuleOptions {
     private static final String[] DEFAULT_MODULE_OPTIONS = {
       "-XX:+IgnoreUnrecognizedVMOptions",
+      "--add-modules=jdk.incubator.vector",
       "--add-opens=java.base/java.lang=ALL-UNNAMED",
       "--add-opens=java.base/java.lang.invoke=ALL-UNNAMED",
       "--add-opens=java.base/java.lang.reflect=ALL-UNNAMED",
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
index df4fccd0f01e7..e4511421cd13c 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
@@ -55,6 +55,7 @@ class SparkSubmitOptionParser {
   protected final String PACKAGES = "--packages";
   protected final String PACKAGES_EXCLUDE = "--exclude-packages";
   protected final String PROPERTIES_FILE = "--properties-file";
+  protected final String LOAD_SPARK_DEFAULTS = "--load-spark-defaults";
   protected final String PROXY_USER = "--proxy-user";
   protected final String PY_FILES = "--py-files";
   protected final String REPOSITORIES = "--repositories";
@@ -130,6 +131,7 @@ class SparkSubmitOptionParser {
     { USAGE_ERROR },
     { VERBOSE, "-v" },
     { VERSION },
+    { LOAD_SPARK_DEFAULTS },
   };
 
   /**
diff --git a/licenses-binary/LICENSE-check-qual.txt b/licenses-binary/LICENSE-check-qual.txt
new file mode 100644
index 0000000000000..d542ab3ec3ed8
--- /dev/null
+++ b/licenses-binary/LICENSE-check-qual.txt
@@ -0,0 +1,413 @@
+The Checker Framework
+Copyright 2004-present by the Checker Framework developers
+
+
+Most of the Checker Framework is licensed under the GNU General Public
+License, version 2 (GPL2), with the classpath exception.  The text of this
+license appears below.  This is the same license used for OpenJDK.
+
+A few parts of the Checker Framework have more permissive licenses, notably
+the parts that you might want to include with your own program.
+
+ * The annotations and utility files are licensed under the MIT License.
+   (The text of this license also appears below.)  This applies to
+   checker-qual*.jar and checker-util.jar and all the files that appear in
+   them, which is all files in checker-qual and checker-util directories.
+   It also applies to the cleanroom implementations of
+   third-party annotations (in checker/src/testannotations/,
+   framework/src/main/java/org/jmlspecs/, and
+   framework/src/main/java/com/google/).
+
+The Checker Framework includes annotations for some libraries.  Those in
+.astub files use the MIT License.  Those in https://github.com/typetools/jdk
+(which appears in the annotated-jdk directory of file checker.jar) use the
+GPL2 license.
+
+Some external libraries that are included with the Checker Framework
+distribution have different licenses.  Here are some examples.
+
+ * JavaParser is dual licensed under the LGPL or the Apache license -- you
+   may use it under whichever one you want.  (The JavaParser source code
+   contains a file with the text of the GPL, but it is not clear why, since
+   JavaParser does not use the GPL.)  See
+   https://github.com/typetools/stubparser .
+
+ * Annotation Tools (https://github.com/typetools/annotation-tools) uses
+   the MIT license.
+
+ * Libraries in plume-lib (https://github.com/plume-lib/) are licensed
+   under the MIT License.
+
+===========================================================================
+
+The GNU General Public License (GPL)
+
+Version 2, June 1991
+
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Everyone is permitted to copy and distribute verbatim copies of this license
+document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to share
+and change it.  By contrast, the GNU General Public License is intended to
+guarantee your freedom to share and change free software--to make sure the
+software is free for all its users.  This General Public License applies to
+most of the Free Software Foundation's software and to any other program whose
+authors commit to using it.  (Some other Free Software Foundation software is
+covered by the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+When we speak of free software, we are referring to freedom, not price.  Our
+General Public Licenses are designed to make sure that you have the freedom to
+distribute copies of free software (and charge for this service if you wish),
+that you receive source code or can get it if you want it, that you can change
+the software or use pieces of it in new free programs; and that you know you
+can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to deny
+you these rights or to ask you to surrender the rights.  These restrictions
+translate to certain responsibilities for you if you distribute copies of the
+software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or for
+a fee, you must give the recipients all the rights that you have.  You must
+make sure that they, too, receive or can get the source code.  And you must
+show them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and (2)
+offer you this license which gives you legal permission to copy, distribute
+and/or modify the software.
+
+Also, for each author's protection and ours, we want to make certain that
+everyone understands that there is no warranty for this free software.  If the
+software is modified by someone else and passed on, we want its recipients to
+know that what they have is not the original, so that any problems introduced
+by others will not reflect on the original authors' reputations.
+
+Finally, any free program is threatened constantly by software patents.  We
+wish to avoid the danger that redistributors of a free program will
+individually obtain patent licenses, in effect making the program proprietary.
+To prevent this, we have made it clear that any patent must be licensed for
+everyone's free use or not licensed at all.
+
+The precise terms and conditions for copying, distribution and modification
+follow.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice
+placed by the copyright holder saying it may be distributed under the terms of
+this General Public License.  The "Program", below, refers to any such program
+or work, and a "work based on the Program" means either the Program or any
+derivative work under copyright law: that is to say, a work containing the
+Program or a portion of it, either verbatim or with modifications and/or
+translated into another language.  (Hereinafter, translation is included
+without limitation in the term "modification".) Each licensee is addressed as
+"you".
+
+Activities other than copying, distribution and modification are not covered by
+this License; they are outside its scope.  The act of running the Program is
+not restricted, and the output from the Program is covered only if its contents
+constitute a work based on the Program (independent of having been made by
+running the Program).  Whether that is true depends on what the Program does.
+
+1. You may copy and distribute verbatim copies of the Program's source code as
+you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice and
+disclaimer of warranty; keep intact all the notices that refer to this License
+and to the absence of any warranty; and give any other recipients of the
+Program a copy of this License along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and you may
+at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Program or any portion of it, thus
+forming a work based on the Program, and copy and distribute such modifications
+or work under the terms of Section 1 above, provided that you also meet all of
+these conditions:
+
+    a) You must cause the modified files to carry prominent notices stating
+    that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in whole or
+    in part contains or is derived from the Program or any part thereof, to be
+    licensed as a whole at no charge to all third parties under the terms of
+    this License.
+
+    c) If the modified program normally reads commands interactively when run,
+    you must cause it, when started running for such interactive use in the
+    most ordinary way, to print or display an announcement including an
+    appropriate copyright notice and a notice that there is no warranty (or
+    else, saying that you provide a warranty) and that users may redistribute
+    the program under these conditions, and telling the user how to view a copy
+    of this License.  (Exception: if the Program itself is interactive but does
+    not normally print such an announcement, your work based on the Program is
+    not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If identifiable
+sections of that work are not derived from the Program, and can be reasonably
+considered independent and separate works in themselves, then this License, and
+its terms, do not apply to those sections when you distribute them as separate
+works.  But when you distribute the same sections as part of a whole which is a
+work based on the Program, the distribution of the whole must be on the terms
+of this License, whose permissions for other licensees extend to the entire
+whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest your
+rights to work written entirely by you; rather, the intent is to exercise the
+right to control the distribution of derivative or collective works based on
+the Program.
+
+In addition, mere aggregation of another work not based on the Program with the
+Program (or with a work based on the Program) on a volume of a storage or
+distribution medium does not bring the other work under the scope of this
+License.
+
+3. You may copy and distribute the Program (or a work based on it, under
+Section 2) in object code or executable form under the terms of Sections 1 and
+2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable source
+    code, which must be distributed under the terms of Sections 1 and 2 above
+    on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three years, to
+    give any third party, for a charge no more than your cost of physically
+    performing source distribution, a complete machine-readable copy of the
+    corresponding source code, to be distributed under the terms of Sections 1
+    and 2 above on a medium customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer to
+    distribute corresponding source code.  (This alternative is allowed only
+    for noncommercial distribution and only if you received the program in
+    object code or executable form with such an offer, in accord with
+    Subsection b above.)
+
+The source code for a work means the preferred form of the work for making
+modifications to it.  For an executable work, complete source code means all
+the source code for all modules it contains, plus any associated interface
+definition files, plus the scripts used to control compilation and installation
+of the executable.  However, as a special exception, the source code
+distributed need not include anything that is normally distributed (in either
+source or binary form) with the major components (compiler, kernel, and so on)
+of the operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the source
+code from the same place counts as distribution of the source code, even though
+third parties are not compelled to copy the source along with the object code.
+
+4. You may not copy, modify, sublicense, or distribute the Program except as
+expressly provided under this License.  Any attempt otherwise to copy, modify,
+sublicense or distribute the Program is void, and will automatically terminate
+your rights under this License.  However, parties who have received copies, or
+rights, from you under this License will not have their licenses terminated so
+long as such parties remain in full compliance.
+
+5. You are not required to accept this License, since you have not signed it.
+However, nothing else grants you permission to modify or distribute the Program
+or its derivative works.  These actions are prohibited by law if you do not
+accept this License.  Therefore, by modifying or distributing the Program (or
+any work based on the Program), you indicate your acceptance of this License to
+do so, and all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+6. Each time you redistribute the Program (or any work based on the Program),
+the recipient automatically receives a license from the original licensor to
+copy, distribute or modify the Program subject to these terms and conditions.
+You may not impose any further restrictions on the recipients' exercise of the
+rights granted herein.  You are not responsible for enforcing compliance by
+third parties to this License.
+
+7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues), conditions
+are imposed on you (whether by court order, agreement or otherwise) that
+contradict the conditions of this License, they do not excuse you from the
+conditions of this License.  If you cannot distribute so as to satisfy
+simultaneously your obligations under this License and any other pertinent
+obligations, then as a consequence you may not distribute the Program at all.
+For example, if a patent license would not permit royalty-free redistribution
+of the Program by all those who receive copies directly or indirectly through
+you, then the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply and
+the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any patents or
+other property right claims or to contest validity of any such claims; this
+section has the sole purpose of protecting the integrity of the free software
+distribution system, which is implemented by public license practices.  Many
+people have made generous contributions to the wide range of software
+distributed through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing to
+distribute software through any other system and a licensee cannot impose that
+choice.
+
+This section is intended to make thoroughly clear what is believed to be a
+consequence of the rest of this License.
+
+8. If the distribution and/or use of the Program is restricted in certain
+countries either by patents or by copyrighted interfaces, the original
+copyright holder who places the Program under this License may add an explicit
+geographical distribution limitation excluding those countries, so that
+distribution is permitted only in or among countries not thus excluded.  In
+such case, this License incorporates the limitation as if written in the body
+of this License.
+
+9. The Free Software Foundation may publish revised and/or new versions of the
+General Public License from time to time.  Such new versions will be similar in
+spirit to the present version, but may differ in detail to address new problems
+or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any later
+version", you have the option of following the terms and conditions either of
+that version or of any later version published by the Free Software Foundation.
+If the Program does not specify a version number of this License, you may
+choose any version ever published by the Free Software Foundation.
+
+10. If you wish to incorporate parts of the Program into other free programs
+whose distribution conditions are different, write to the author to ask for
+permission.  For software which is copyrighted by the Free Software Foundation,
+write to the Free Software Foundation; we sometimes make exceptions for this.
+Our decision will be guided by the two goals of preserving the free status of
+all derivatives of our free software and of promoting the sharing and reuse of
+software generally.
+
+NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR
+THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN OTHERWISE
+STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE
+PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND
+PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE,
+YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
+ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE
+PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
+INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
+BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER
+OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+
+If you develop a new program, and you want it to be of the greatest possible
+use to the public, the best way to achieve this is to make it free software
+which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program.  It is safest to attach
+them to the start of each source file to most effectively convey the exclusion
+of warranty; and each file should have at least the "copyright" line and a
+pointer to where the full notice is found.
+
+    One line to give the program's name and a brief idea of what it does.
+
+    Copyright (C) <year> <name of author>
+
+    This program is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by the Free
+    Software Foundation; either version 2 of the License, or (at your option)
+    any later version.
+
+    This program is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+    more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc., 59
+    Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this when it
+starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author Gnomovision comes
+    with ABSOLUTELY NO WARRANTY; for details type 'show w'.  This is free
+    software, and you are welcome to redistribute it under certain conditions;
+    type 'show c' for details.
+
+The hypothetical commands 'show w' and 'show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may be
+called something other than 'show w' and 'show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.  Here
+is a sample; alter the names:
+
+    Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+    'Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+    signature of Ty Coon, 1 April 1989
+
+    Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General Public
+License instead of this License.
+
+
+"CLASSPATH" EXCEPTION TO THE GPL
+
+Certain source files distributed by Oracle America and/or its affiliates are
+subject to the following clarification and special exception to the GPL, but
+only where Oracle has expressly included in the particular source file's header
+the words "Oracle designates this particular file as subject to the "Classpath"
+exception as provided by Oracle in the LICENSE file that accompanied this code."
+
+    Linking this library statically or dynamically with other modules is making
+    a combined work based on this library.  Thus, the terms and conditions of
+    the GNU General Public License cover the whole combination.
+
+    As a special exception, the copyright holders of this library give you
+    permission to link this library with independent modules to produce an
+    executable, regardless of the license terms of these independent modules,
+    and to copy and distribute the resulting executable under terms of your
+    choice, provided that you also meet, for each linked independent module,
+    the terms and conditions of the license of that module.  An independent
+    module is a module which is not derived from or based on this library.  If
+    you modify this library, you may extend this exception to your version of
+    the library, but you are not obligated to do so.  If you do not wish to do
+    so, delete this exception statement from your version.
+
+===========================================================================
+
+MIT License:
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+===========================================================================
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-icu4j.txt b/licenses-binary/LICENSE-icu4j.txt
new file mode 100644
index 0000000000000..80b587723a67f
--- /dev/null
+++ b/licenses-binary/LICENSE-icu4j.txt
@@ -0,0 +1,519 @@
+UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
+
+See Terms of Use <https://www.unicode.org/copyright.html>
+for definitions of Unicode Inc.’s Data Files and Software.
+
+NOTICE TO USER: Carefully read the following legal agreement.
+BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
+DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
+YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
+TERMS AND CONDITIONS OF THIS AGREEMENT.
+IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
+THE DATA FILES OR SOFTWARE.
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright © 1991-2022 Unicode, Inc. All rights reserved.
+Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of the Unicode data files and any associated documentation
+(the "Data Files") or Unicode software and any associated documentation
+(the "Software") to deal in the Data Files or Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of
+the Data Files or Software, and to permit persons to whom the Data Files
+or Software are furnished to do so, provided that either
+(a) this copyright and permission notice appear with all copies
+of the Data Files or Software, or
+(b) this copyright and permission notice appear in associated
+Documentation.
+
+THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THE DATA FILES OR SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale,
+use or other dealings in these Data Files or Software without prior
+written authorization of the copyright holder.
+
+----------------------------------------------------------------------
+
+Third-Party Software Licenses
+
+This section contains third-party software notices and/or additional
+terms for licensed third-party software components included within ICU
+libraries.
+
+----------------------------------------------------------------------
+
+ICU License - ICU 1.8.1 to ICU 57.1
+
+COPYRIGHT AND PERMISSION NOTICE
+
+Copyright (c) 1995-2016 International Business Machines Corporation and others
+All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, and/or sell copies of the Software, and to permit persons
+to whom the Software is furnished to do so, provided that the above
+copyright notice(s) and this permission notice appear in all copies of
+the Software and that both the above copyright notice(s) and this
+permission notice appear in supporting documentation.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
+SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
+RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
+CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+Except as contained in this notice, the name of a copyright holder
+shall not be used in advertising or otherwise to promote the sale, use
+or other dealings in this Software without prior written authorization
+of the copyright holder.
+
+All trademarks and registered trademarks mentioned herein are the
+property of their respective owners.
+
+----------------------------------------------------------------------
+
+Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
+
+ #     The Google Chrome software developed by Google is licensed under
+ # the BSD license. Other software included in this distribution is
+ # provided under other licenses, as set forth below.
+ #
+ #  The BSD License
+ #  http://opensource.org/licenses/bsd-license.php
+ #  Copyright (C) 2006-2008, Google Inc.
+ #
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ # modification, are permitted provided that the following conditions are met:
+ #
+ #  Redistributions of source code must retain the above copyright notice,
+ # this list of conditions and the following disclaimer.
+ #  Redistributions in binary form must reproduce the above
+ # copyright notice, this list of conditions and the following
+ # disclaimer in the documentation and/or other materials provided with
+ # the distribution.
+ #  Neither the name of  Google Inc. nor the names of its
+ # contributors may be used to endorse or promote products derived from
+ # this software without specific prior written permission.
+ #
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ #
+ #
+ #  The word list in cjdict.txt are generated by combining three word lists
+ # listed below with further processing for compound word breaking. The
+ # frequency is generated with an iterative training against Google web
+ # corpora.
+ #
+ #  * Libtabe (Chinese)
+ #    - https://sourceforge.net/project/?group_id=1519
+ #    - Its license terms and conditions are shown below.
+ #
+ #  * IPADIC (Japanese)
+ #    - http://chasen.aist-nara.ac.jp/chasen/distribution.html
+ #    - Its license terms and conditions are shown below.
+ #
+ #  ---------COPYING.libtabe ---- BEGIN--------------------
+ #
+ #  /*
+ #   * Copyright (c) 1999 TaBE Project.
+ #   * Copyright (c) 1999 Pai-Hsiang Hsiao.
+ #   * All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the TaBE Project nor the names of its
+ #   *   contributors may be used to endorse or promote products derived
+ #   *   from this software without specific prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  /*
+ #   * Copyright (c) 1999 Computer Systems and Communication Lab,
+ #   *                    Institute of Information Science, Academia
+ #       *                    Sinica. All rights reserved.
+ #   *
+ #   * Redistribution and use in source and binary forms, with or without
+ #   * modification, are permitted provided that the following conditions
+ #   * are met:
+ #   *
+ #   * . Redistributions of source code must retain the above copyright
+ #   *   notice, this list of conditions and the following disclaimer.
+ #   * . Redistributions in binary form must reproduce the above copyright
+ #   *   notice, this list of conditions and the following disclaimer in
+ #   *   the documentation and/or other materials provided with the
+ #   *   distribution.
+ #   * . Neither the name of the Computer Systems and Communication Lab
+ #   *   nor the names of its contributors may be used to endorse or
+ #   *   promote products derived from this software without specific
+ #   *   prior written permission.
+ #   *
+ #   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ #   * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ #   * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ #   * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ #   * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ #   * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ #   * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ #   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ #   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ #   * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ #   * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ #   * OF THE POSSIBILITY OF SUCH DAMAGE.
+ #   */
+ #
+ #  Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
+ #      University of Illinois
+ #  c-tsai4@uiuc.edu  http://casper.beckman.uiuc.edu/~c-tsai4
+ #
+ #  ---------------COPYING.libtabe-----END--------------------------------
+ #
+ #
+ #  ---------------COPYING.ipadic-----BEGIN-------------------------------
+ #
+ #  Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
+ #  and Technology.  All Rights Reserved.
+ #
+ #  Use, reproduction, and distribution of this software is permitted.
+ #  Any copy of this software, whether in its original form or modified,
+ #  must include both the above copyright notice and the following
+ #  paragraphs.
+ #
+ #  Nara Institute of Science and Technology (NAIST),
+ #  the copyright holders, disclaims all warranties with regard to this
+ #  software, including all implied warranties of merchantability and
+ #  fitness, in no event shall NAIST be liable for
+ #  any special, indirect or consequential damages or any damages
+ #  whatsoever resulting from loss of use, data or profits, whether in an
+ #  action of contract, negligence or other tortuous action, arising out
+ #  of or in connection with the use or performance of this software.
+ #
+ #  A large portion of the dictionary entries
+ #  originate from ICOT Free Software.  The following conditions for ICOT
+ #  Free Software applies to the current dictionary as well.
+ #
+ #  Each User may also freely distribute the Program, whether in its
+ #  original form or modified, to any third party or parties, PROVIDED
+ #  that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
+ #  on, or be attached to, the Program, which is distributed substantially
+ #  in the same form as set out herein and that such intended
+ #  distribution, if actually made, will neither violate or otherwise
+ #  contravene any of the laws and regulations of the countries having
+ #  jurisdiction over the User or the intended distribution itself.
+ #
+ #  NO WARRANTY
+ #
+ #  The program was produced on an experimental basis in the course of the
+ #  research and development conducted during the project and is provided
+ #  to users as so produced on an experimental basis.  Accordingly, the
+ #  program is provided without any warranty whatsoever, whether express,
+ #  implied, statutory or otherwise.  The term "warranty" used herein
+ #  includes, but is not limited to, any warranty of the quality,
+ #  performance, merchantability and fitness for a particular purpose of
+ #  the program and the nonexistence of any infringement or violation of
+ #  any right of any third party.
+ #
+ #  Each user of the program will agree and understand, and be deemed to
+ #  have agreed and understood, that there is no warranty whatsoever for
+ #  the program and, accordingly, the entire risk arising from or
+ #  otherwise connected with the program is assumed by the user.
+ #
+ #  Therefore, neither ICOT, the copyright holder, or any other
+ #  organization that participated in or was otherwise related to the
+ #  development of the program and their respective officials, directors,
+ #  officers and other employees shall be held liable for any and all
+ #  damages, including, without limitation, general, special, incidental
+ #  and consequential damages, arising out of or otherwise in connection
+ #  with the use or inability to use the program or any product, material
+ #  or result produced or otherwise obtained by using the program,
+ #  regardless of whether they have been advised of, or otherwise had
+ #  knowledge of, the possibility of such damages at any time during the
+ #  project or thereafter.  Each user will be deemed to have agreed to the
+ #  foregoing by his or her commencement of use of the program.  The term
+ #  "use" as used herein includes, but is not limited to, the use,
+ #  modification, copying and distribution of the program and the
+ #  production of secondary products from the program.
+ #
+ #  In the case where the program, whether in its original form or
+ #  modified, was distributed or delivered to or received by a user from
+ #  any person, organization or entity other than ICOT, unless it makes or
+ #  grants independently of ICOT any specific warranty to the user in
+ #  writing, such person, organization or entity, will also be exempted
+ #  from and not be held liable to the user for any such damages as noted
+ #  above as far as the program is concerned.
+ #
+ #  ---------------COPYING.ipadic-----END----------------------------------
+
+----------------------------------------------------------------------
+
+Lao Word Break Dictionary Data (laodict.txt)
+
+ # Copyright (C) 2016 and later: Unicode, Inc. and others.
+ # License & terms of use: http://www.unicode.org/copyright.html
+ # Copyright (c) 2015 International Business Machines Corporation
+ # and others. All Rights Reserved.
+ #
+ # Project: https://github.com/rober42539/lao-dictionary
+ # Dictionary: https://github.com/rober42539/lao-dictionary/laodict.txt
+ # License: https://github.com/rober42539/lao-dictionary/LICENSE.txt
+ #          (copied below)
+ #
+ #	This file is derived from the above dictionary version of Nov 22, 2020
+ #  ----------------------------------------------------------------------
+ #  Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification, are permitted provided that the following conditions are met:
+ #
+ #  Redistributions of source code must retain the above copyright notice, this
+ #  list of conditions and the following disclaimer. Redistributions in binary
+ #  form must reproduce the above copyright notice, this list of conditions and
+ #  the following disclaimer in the documentation and/or other materials
+ #  provided with the distribution.
+ #
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ # INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ # STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ # OF THE POSSIBILITY OF SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+----------------------------------------------------------------------
+
+Burmese Word Break Dictionary Data (burmesedict.txt)
+
+ #  Copyright (c) 2014 International Business Machines Corporation
+ #  and others. All Rights Reserved.
+ #
+ #  This list is part of a project hosted at:
+ #    github.com/kanyawtech/myanmar-karen-word-lists
+ #
+ #  --------------------------------------------------------------------------
+ #  Copyright (c) 2013, LeRoy Benjamin Sharon
+ #  All rights reserved.
+ #
+ #  Redistribution and use in source and binary forms, with or without
+ #  modification, are permitted provided that the following conditions
+ #  are met: Redistributions of source code must retain the above
+ #  copyright notice, this list of conditions and the following
+ #  disclaimer.  Redistributions in binary form must reproduce the
+ #  above copyright notice, this list of conditions and the following
+ #  disclaimer in the documentation and/or other materials provided
+ #  with the distribution.
+ #
+ #    Neither the name Myanmar Karen Word Lists, nor the names of its
+ #    contributors may be used to endorse or promote products derived
+ #    from this software without specific prior written permission.
+ #
+ #  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ #  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ #  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ #  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ #  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
+ #  BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ #  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ #  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ #  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ #  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+ #  TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+ #  THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ #  SUCH DAMAGE.
+ #  --------------------------------------------------------------------------
+
+----------------------------------------------------------------------
+
+Time Zone Database
+
+  ICU uses the public domain data and code derived from Time Zone
+Database for its time zone support. The ownership of the TZ database
+is explained in BCP 175: Procedure for Maintaining the Time Zone
+Database section 7.
+
+ # 7.  Database Ownership
+ #
+ #    The TZ database itself is not an IETF Contribution or an IETF
+ #    document.  Rather it is a pre-existing and regularly updated work
+ #    that is in the public domain, and is intended to remain in the
+ #    public domain.  Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
+ #    not apply to the TZ Database or contributions that individuals make
+ #    to it.  Should any claims be made and substantiated against the TZ
+ #    Database, the organization that is providing the IANA
+ #    Considerations defined in this RFC, under the memorandum of
+ #    understanding with the IETF, currently ICANN, may act in accordance
+ #    with all competent court orders.  No ownership claims will be made
+ #    by ICANN or the IETF Trust on the database or the code.  Any person
+ #    making a contribution to the database or code waives all rights to
+ #    future claims in that contribution or in the TZ Database.
+
+----------------------------------------------------------------------
+
+Google double-conversion
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+----------------------------------------------------------------------
+
+File: aclocal.m4 (only for ICU4C)
+Section: pkg.m4 - Macros to locate and utilise pkg-config.
+
+
+Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
+Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+
+As a special exception to the GNU General Public License, if you
+distribute this file as part of a program that contains a
+configuration script generated by Autoconf, you may include it under
+the same distribution terms that you use for the rest of that
+program.
+
+
+(The condition for the exception is fulfilled because
+ICU4C includes a configuration script generated by Autoconf,
+namely the `configure` script.)
+
+----------------------------------------------------------------------
+
+File: config.guess (only for ICU4C)
+
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, see <https://www.gnu.org/licenses/>.
+
+As a special exception to the GNU General Public License, if you
+distribute this file as part of a program that contains a
+configuration script generated by Autoconf, you may include it under
+the same distribution terms that you use for the rest of that
+program.  This Exception is an additional permission under section 7
+of the GNU General Public License, version 3 ("GPLv3").
+
+
+(The condition for the exception is fulfilled because
+ICU4C includes a configuration script generated by Autoconf,
+namely the `configure` script.)
+
+----------------------------------------------------------------------
+
+File: install-sh (only for ICU4C)
+
+
+Copyright 1991 by the Massachusetts Institute of Technology
+
+Permission to use, copy, modify, distribute, and sell this software and its
+documentation for any purpose is hereby granted without fee, provided that
+the above copyright notice appear in all copies and that both that
+copyright notice and this permission notice appear in supporting
+documentation, and that the name of M.I.T. not be used in advertising or
+publicity pertaining to distribution of the software without specific,
+written prior permission.  M.I.T. makes no representations about the
+suitability of this software for any purpose.  It is provided "as is"
+without express or implied warranty.
diff --git a/licenses-binary/LICENSE-jakarta-servlet-api.txt b/licenses-binary/LICENSE-jakarta-servlet-api.txt
new file mode 100644
index 0000000000000..e23ece2c85241
--- /dev/null
+++ b/licenses-binary/LICENSE-jakarta-servlet-api.txt
@@ -0,0 +1,277 @@
+Eclipse Public License - v 2.0
+
+    THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE
+    PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION
+    OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
+
+1. DEFINITIONS
+
+"Contribution" means:
+
+  a) in the case of the initial Contributor, the initial content
+     Distributed under this Agreement, and
+
+  b) in the case of each subsequent Contributor:
+     i) changes to the Program, and
+     ii) additions to the Program;
+  where such changes and/or additions to the Program originate from
+  and are Distributed by that particular Contributor. A Contribution
+  "originates" from a Contributor if it was added to the Program by
+  such Contributor itself or anyone acting on such Contributor's behalf.
+  Contributions do not include changes or additions to the Program that
+  are not Modified Works.
+
+"Contributor" means any person or entity that Distributes the Program.
+
+"Licensed Patents" mean patent claims licensable by a Contributor which
+are necessarily infringed by the use or sale of its Contribution alone
+or when combined with the Program.
+
+"Program" means the Contributions Distributed in accordance with this
+Agreement.
+
+"Recipient" means anyone who receives the Program under this Agreement
+or any Secondary License (as applicable), including Contributors.
+
+"Derivative Works" shall mean any work, whether in Source Code or other
+form, that is based on (or derived from) the Program and for which the
+editorial revisions, annotations, elaborations, or other modifications
+represent, as a whole, an original work of authorship.
+
+"Modified Works" shall mean any work in Source Code or other form that
+results from an addition to, deletion from, or modification of the
+contents of the Program, including, for purposes of clarity any new file
+in Source Code form that contains any contents of the Program. Modified
+Works shall not include works that contain only declarations,
+interfaces, types, classes, structures, or files of the Program solely
+in each case in order to link to, bind by name, or subclass the Program
+or Modified Works thereof.
+
+"Distribute" means the acts of a) distributing or b) making available
+in any manner that enables the transfer of a copy.
+
+"Source Code" means the form of a Program preferred for making
+modifications, including but not limited to software source code,
+documentation source, and configuration files.
+
+"Secondary License" means either the GNU General Public License,
+Version 2.0, or any later versions of that license, including any
+exceptions or additional permissions as identified by the initial
+Contributor.
+
+2. GRANT OF RIGHTS
+
+  a) Subject to the terms of this Agreement, each Contributor hereby
+  grants Recipient a non-exclusive, worldwide, royalty-free copyright
+  license to reproduce, prepare Derivative Works of, publicly display,
+  publicly perform, Distribute and sublicense the Contribution of such
+  Contributor, if any, and such Derivative Works.
+
+  b) Subject to the terms of this Agreement, each Contributor hereby
+  grants Recipient a non-exclusive, worldwide, royalty-free patent
+  license under Licensed Patents to make, use, sell, offer to sell,
+  import and otherwise transfer the Contribution of such Contributor,
+  if any, in Source Code or other form. This patent license shall
+  apply to the combination of the Contribution and the Program if, at
+  the time the Contribution is added by the Contributor, such addition
+  of the Contribution causes such combination to be covered by the
+  Licensed Patents. The patent license shall not apply to any other
+  combinations which include the Contribution. No hardware per se is
+  licensed hereunder.
+
+  c) Recipient understands that although each Contributor grants the
+  licenses to its Contributions set forth herein, no assurances are
+  provided by any Contributor that the Program does not infringe the
+  patent or other intellectual property rights of any other entity.
+  Each Contributor disclaims any liability to Recipient for claims
+  brought by any other entity based on infringement of intellectual
+  property rights or otherwise. As a condition to exercising the
+  rights and licenses granted hereunder, each Recipient hereby
+  assumes sole responsibility to secure any other intellectual
+  property rights needed, if any. For example, if a third party
+  patent license is required to allow Recipient to Distribute the
+  Program, it is Recipient's responsibility to acquire that license
+  before distributing the Program.
+
+  d) Each Contributor represents that to its knowledge it has
+  sufficient copyright rights in its Contribution, if any, to grant
+  the copyright license set forth in this Agreement.
+
+  e) Notwithstanding the terms of any Secondary License, no
+  Contributor makes additional grants to any Recipient (other than
+  those set forth in this Agreement) as a result of such Recipient's
+  receipt of the Program under the terms of a Secondary License
+  (if permitted under the terms of Section 3).
+
+3. REQUIREMENTS
+
+3.1 If a Contributor Distributes the Program in any form, then:
+
+  a) the Program must also be made available as Source Code, in
+  accordance with section 3.2, and the Contributor must accompany
+  the Program with a statement that the Source Code for the Program
+  is available under this Agreement, and informs Recipients how to
+  obtain it in a reasonable manner on or through a medium customarily
+  used for software exchange; and
+
+  b) the Contributor may Distribute the Program under a license
+  different than this Agreement, provided that such license:
+     i) effectively disclaims on behalf of all other Contributors all
+     warranties and conditions, express and implied, including
+     warranties or conditions of title and non-infringement, and
+     implied warranties or conditions of merchantability and fitness
+     for a particular purpose;
+
+     ii) effectively excludes on behalf of all other Contributors all
+     liability for damages, including direct, indirect, special,
+     incidental and consequential damages, such as lost profits;
+
+     iii) does not attempt to limit or alter the recipients' rights
+     in the Source Code under section 3.2; and
+
+     iv) requires any subsequent distribution of the Program by any
+     party to be under a license that satisfies the requirements
+     of this section 3.
+
+3.2 When the Program is Distributed as Source Code:
+
+  a) it must be made available under this Agreement, or if the
+  Program (i) is combined with other material in a separate file or
+  files made available under a Secondary License, and (ii) the initial
+  Contributor attached to the Source Code the notice described in
+  Exhibit A of this Agreement, then the Program may be made available
+  under the terms of such Secondary Licenses, and
+
+  b) a copy of this Agreement must be included with each copy of
+  the Program.
+
+3.3 Contributors may not remove or alter any copyright, patent,
+trademark, attribution notices, disclaimers of warranty, or limitations
+of liability ("notices") contained within the Program from any copy of
+the Program which they Distribute, provided that Contributors may add
+their own appropriate notices.
+
+4. COMMERCIAL DISTRIBUTION
+
+Commercial distributors of software may accept certain responsibilities
+with respect to end users, business partners and the like. While this
+license is intended to facilitate the commercial use of the Program,
+the Contributor who includes the Program in a commercial product
+offering should do so in a manner which does not create potential
+liability for other Contributors. Therefore, if a Contributor includes
+the Program in a commercial product offering, such Contributor
+("Commercial Contributor") hereby agrees to defend and indemnify every
+other Contributor ("Indemnified Contributor") against any losses,
+damages and costs (collectively "Losses") arising from claims, lawsuits
+and other legal actions brought by a third party against the Indemnified
+Contributor to the extent caused by the acts or omissions of such
+Commercial Contributor in connection with its distribution of the Program
+in a commercial product offering. The obligations in this section do not
+apply to any claims or Losses relating to any actual or alleged
+intellectual property infringement. In order to qualify, an Indemnified
+Contributor must: a) promptly notify the Commercial Contributor in
+writing of such claim, and b) allow the Commercial Contributor to control,
+and cooperate with the Commercial Contributor in, the defense and any
+related settlement negotiations. The Indemnified Contributor may
+participate in any such claim at its own expense.
+
+For example, a Contributor might include the Program in a commercial
+product offering, Product X. That Contributor is then a Commercial
+Contributor. If that Commercial Contributor then makes performance
+claims, or offers warranties related to Product X, those performance
+claims and warranties are such Commercial Contributor's responsibility
+alone. Under this section, the Commercial Contributor would have to
+defend claims against the other Contributors related to those performance
+claims and warranties, and if a court requires any other Contributor to
+pay any damages as a result, the Commercial Contributor must pay
+those damages.
+
+5. NO WARRANTY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
+PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS"
+BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR
+IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF
+TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR
+PURPOSE. Each Recipient is solely responsible for determining the
+appropriateness of using and distributing the Program and assumes all
+risks associated with its exercise of rights under this Agreement,
+including but not limited to the risks and costs of program errors,
+compliance with applicable laws, damage to or loss of data, programs
+or equipment, and unavailability or interruption of operations.
+
+6. DISCLAIMER OF LIABILITY
+
+EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT
+PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS
+SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST
+PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE
+EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+7. GENERAL
+
+If any provision of this Agreement is invalid or unenforceable under
+applicable law, it shall not affect the validity or enforceability of
+the remainder of the terms of this Agreement, and without further
+action by the parties hereto, such provision shall be reformed to the
+minimum extent necessary to make such provision valid and enforceable.
+
+If Recipient institutes patent litigation against any entity
+(including a cross-claim or counterclaim in a lawsuit) alleging that the
+Program itself (excluding combinations of the Program with other software
+or hardware) infringes such Recipient's patent(s), then such Recipient's
+rights granted under Section 2(b) shall terminate as of the date such
+litigation is filed.
+
+All Recipient's rights under this Agreement shall terminate if it
+fails to comply with any of the material terms or conditions of this
+Agreement and does not cure such failure in a reasonable period of
+time after becoming aware of such noncompliance. If all Recipient's
+rights under this Agreement terminate, Recipient agrees to cease use
+and distribution of the Program as soon as reasonably practicable.
+However, Recipient's obligations under this Agreement and any licenses
+granted by Recipient relating to the Program shall continue and survive.
+
+Everyone is permitted to copy and distribute copies of this Agreement,
+but in order to avoid inconsistency the Agreement is copyrighted and
+may only be modified in the following manner. The Agreement Steward
+reserves the right to publish new versions (including revisions) of
+this Agreement from time to time. No one other than the Agreement
+Steward has the right to modify this Agreement. The Eclipse Foundation
+is the initial Agreement Steward. The Eclipse Foundation may assign the
+responsibility to serve as the Agreement Steward to a suitable separate
+entity. Each new version of the Agreement will be given a distinguishing
+version number. The Program (including Contributions) may always be
+Distributed subject to the version of the Agreement under which it was
+received. In addition, after a new version of the Agreement is published,
+Contributor may elect to Distribute the Program (including its
+Contributions) under the new version.
+
+Except as expressly stated in Sections 2(a) and 2(b) above, Recipient
+receives no rights or licenses to the intellectual property of any
+Contributor under this Agreement, whether expressly, by implication,
+estoppel or otherwise. All rights in the Program not expressly granted
+under this Agreement are reserved. Nothing in this Agreement is intended
+to be enforceable by any entity that is not a Contributor or Recipient.
+No third-party beneficiary rights are created under this Agreement.
+
+Exhibit A - Form of Secondary Licenses Notice
+
+"This Source Code may also be made available under the following 
+Secondary Licenses when the conditions for such availability set forth 
+in the Eclipse Public License, v. 2.0 are satisfied: {name license(s),
+version(s), and exceptions or additional permissions here}."
+
+  Simply including a copy of this Agreement, including this Exhibit A
+  is not sufficient to license the Source Code under Secondary Licenses.
+
+  If it is not possible or desirable to put the notice in a particular
+  file, then You may include the notice in a location (such as a LICENSE
+  file in a relevant directory) where a recipient would be likely to
+  look for such a notice.
+
+  You may add additional accurate notices of copyright ownership.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-jline3.txt b/licenses-binary/LICENSE-jline3.txt
new file mode 100644
index 0000000000000..ed9503f23c239
--- /dev/null
+++ b/licenses-binary/LICENSE-jline3.txt
@@ -0,0 +1,34 @@
+Copyright (c) 2002-2023, the original author or authors.
+All rights reserved.
+
+https://opensource.org/licenses/BSD-3-Clause
+
+Redistribution and use in source and binary forms, with or
+without modification, are permitted provided that the following
+conditions are met:
+
+Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with
+the distribution.
+
+Neither the name of JLine nor the names of its contributors
+may be used to endorse or promote products derived from this
+software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-loose-version.txt b/licenses-binary/LICENSE-loose-version.txt
new file mode 100644
index 0000000000000..c96142b38228f
--- /dev/null
+++ b/licenses-binary/LICENSE-loose-version.txt
@@ -0,0 +1,279 @@
+A. HISTORY OF THE SOFTWARE
+==========================
+
+Python was created in the early 1990s by Guido van Rossum at Stichting
+Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands
+as a successor of a language called ABC.  Guido remains Python's
+principal author, although it includes many contributions from others.
+
+In 1995, Guido continued his work on Python at the Corporation for
+National Research Initiatives (CNRI, see https://www.cnri.reston.va.us)
+in Reston, Virginia where he released several versions of the
+software.
+
+In May 2000, Guido and the Python core development team moved to
+BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+year, the PythonLabs team moved to Digital Creations, which became
+Zope Corporation.  In 2001, the Python Software Foundation (PSF, see
+https://www.python.org/psf/) was formed, a non-profit organization
+created specifically to own Python-related Intellectual Property.
+Zope Corporation was a sponsoring member of the PSF.
+
+All Python releases are Open Source (see https://opensource.org for
+the Open Source Definition).  Historically, most, but not all, Python
+releases have also been GPL-compatible; the table below summarizes
+the various releases.
+
+    Release         Derived     Year        Owner       GPL-
+                    from                                compatible? (1)
+
+    0.9.0 thru 1.2              1991-1995   CWI         yes
+    1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
+    1.6             1.5.2       2000        CNRI        no
+    2.0             1.6         2000        BeOpen.com  no
+    1.6.1           1.6         2001        CNRI        yes (2)
+    2.1             2.0+1.6.1   2001        PSF         no
+    2.0.1           2.0+1.6.1   2001        PSF         yes
+    2.1.1           2.1+2.0.1   2001        PSF         yes
+    2.1.2           2.1.1       2002        PSF         yes
+    2.1.3           2.1.2       2002        PSF         yes
+    2.2 and above   2.1.1       2001-now    PSF         yes
+
+Footnotes:
+
+(1) GPL-compatible doesn't mean that we're distributing Python under
+    the GPL.  All Python licenses, unlike the GPL, let you distribute
+    a modified version without making your changes open source.  The
+    GPL-compatible licenses make it possible to combine Python with
+    other software that is released under the GPL; the others don't.
+
+(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+    because its license has a choice of law clause.  According to
+    CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+    is "not incompatible" with the GPL.
+
+Thanks to the many outside volunteers who have worked under Guido's
+direction to make these releases possible.
+
+
+B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+===============================================================
+
+Python software and documentation are licensed under the
+Python Software Foundation License Version 2.
+
+Starting with Python 3.8.6, examples, recipes, and other code in
+the documentation are dual licensed under the PSF License Version 2
+and the Zero-Clause BSD license.
+
+Some software incorporated into Python is under different licenses.
+The licenses are listed with code falling under that license.
+
+
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+--------------------------------------------
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation;
+All Rights Reserved" are retained in Python alone or in any derivative version
+prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS"
+basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee.  This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+
+
+BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+-------------------------------------------
+
+BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+
+1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+Individual or Organization ("Licensee") accessing and otherwise using
+this software in source or binary form and its associated
+documentation ("the Software").
+
+2. Subject to the terms and conditions of this BeOpen Python License
+Agreement, BeOpen hereby grants Licensee a non-exclusive,
+royalty-free, world-wide license to reproduce, analyze, test, perform
+and/or display publicly, prepare derivative works, distribute, and
+otherwise use the Software alone or in any derivative version,
+provided, however, that the BeOpen Python License is retained in the
+Software, alone or in any derivative version prepared by Licensee.
+
+3. BeOpen is making the Software available to Licensee on an "AS IS"
+basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+5. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+6. This License Agreement shall be governed by and interpreted in all
+respects by the law of the State of California, excluding conflict of
+law provisions.  Nothing in this License Agreement shall be deemed to
+create any relationship of agency, partnership, or joint venture
+between BeOpen and Licensee.  This License Agreement does not grant
+permission to use BeOpen trademarks or trade names in a trademark
+sense to endorse or promote products or services of Licensee, or any
+third party.  As an exception, the "BeOpen Python" logos available at
+http://www.pythonlabs.com/logos.html may be used according to the
+permissions granted on that web page.
+
+7. By copying, installing or otherwise using the software, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+
+
+CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+---------------------------------------
+
+1. This LICENSE AGREEMENT is between the Corporation for National
+Research Initiatives, having an office at 1895 Preston White Drive,
+Reston, VA 20191 ("CNRI"), and the Individual or Organization
+("Licensee") accessing and otherwise using Python 1.6.1 software in
+source or binary form and its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, CNRI
+hereby grants Licensee a nonexclusive, royalty-free, world-wide
+license to reproduce, analyze, test, perform and/or display publicly,
+prepare derivative works, distribute, and otherwise use Python 1.6.1
+alone or in any derivative version, provided, however, that CNRI's
+License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+1995-2001 Corporation for National Research Initiatives; All Rights
+Reserved" are retained in Python 1.6.1 alone or in any derivative
+version prepared by Licensee.  Alternately, in lieu of CNRI's License
+Agreement, Licensee may substitute the following text (omitting the
+quotes): "Python 1.6.1 is made available subject to the terms and
+conditions in CNRI's License Agreement.  This Agreement together with
+Python 1.6.1 may be located on the internet using the following
+unique, persistent identifier (known as a handle): 1895.22/1013.  This
+Agreement may also be obtained from a proxy server on the internet
+using the following URL: http://hdl.handle.net/1895.22/1013".
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python 1.6.1 or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python 1.6.1.
+
+4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. This License Agreement shall be governed by the federal
+intellectual property law of the United States, including without
+limitation the federal copyright law, and, to the extent such
+U.S. federal law does not apply, by the law of the Commonwealth of
+Virginia, excluding Virginia's conflict of law provisions.
+Notwithstanding the foregoing, with regard to derivative works based
+on Python 1.6.1 that incorporate non-separable material that was
+previously distributed under the GNU General Public License (GPL), the
+law of the Commonwealth of Virginia shall govern this License
+Agreement only as to issues arising under or with respect to
+Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
+License Agreement shall be deemed to create any relationship of
+agency, partnership, or joint venture between CNRI and Licensee.  This
+License Agreement does not grant permission to use CNRI trademarks or
+trade name in a trademark sense to endorse or promote products or
+services of Licensee, or any third party.
+
+8. By clicking on the "ACCEPT" button where indicated, or by copying,
+installing or otherwise using Python 1.6.1, Licensee agrees to be
+bound by the terms and conditions of this License Agreement.
+
+        ACCEPT
+
+
+CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+--------------------------------------------------
+
+Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+The Netherlands.  All rights reserved.
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of Stichting Mathematisch
+Centrum or CWI not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON DOCUMENTATION
+----------------------------------------------------------------------
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-txw2.txt b/licenses-binary/LICENSE-txw2.txt
new file mode 100644
index 0000000000000..da1c1cea70215
--- /dev/null
+++ b/licenses-binary/LICENSE-txw2.txt
@@ -0,0 +1,28 @@
+Copyright (c) 2018 Oracle and/or its affiliates. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+  - Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  - Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+  - Neither the name of the Eclipse Foundation, Inc. nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses/LICENSE-loose-version.txt b/licenses/LICENSE-loose-version.txt
new file mode 100644
index 0000000000000..c96142b38228f
--- /dev/null
+++ b/licenses/LICENSE-loose-version.txt
@@ -0,0 +1,279 @@
+A. HISTORY OF THE SOFTWARE
+==========================
+
+Python was created in the early 1990s by Guido van Rossum at Stichting
+Mathematisch Centrum (CWI, see https://www.cwi.nl) in the Netherlands
+as a successor of a language called ABC.  Guido remains Python's
+principal author, although it includes many contributions from others.
+
+In 1995, Guido continued his work on Python at the Corporation for
+National Research Initiatives (CNRI, see https://www.cnri.reston.va.us)
+in Reston, Virginia where he released several versions of the
+software.
+
+In May 2000, Guido and the Python core development team moved to
+BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+year, the PythonLabs team moved to Digital Creations, which became
+Zope Corporation.  In 2001, the Python Software Foundation (PSF, see
+https://www.python.org/psf/) was formed, a non-profit organization
+created specifically to own Python-related Intellectual Property.
+Zope Corporation was a sponsoring member of the PSF.
+
+All Python releases are Open Source (see https://opensource.org for
+the Open Source Definition).  Historically, most, but not all, Python
+releases have also been GPL-compatible; the table below summarizes
+the various releases.
+
+    Release         Derived     Year        Owner       GPL-
+                    from                                compatible? (1)
+
+    0.9.0 thru 1.2              1991-1995   CWI         yes
+    1.3 thru 1.5.2  1.2         1995-1999   CNRI        yes
+    1.6             1.5.2       2000        CNRI        no
+    2.0             1.6         2000        BeOpen.com  no
+    1.6.1           1.6         2001        CNRI        yes (2)
+    2.1             2.0+1.6.1   2001        PSF         no
+    2.0.1           2.0+1.6.1   2001        PSF         yes
+    2.1.1           2.1+2.0.1   2001        PSF         yes
+    2.1.2           2.1.1       2002        PSF         yes
+    2.1.3           2.1.2       2002        PSF         yes
+    2.2 and above   2.1.1       2001-now    PSF         yes
+
+Footnotes:
+
+(1) GPL-compatible doesn't mean that we're distributing Python under
+    the GPL.  All Python licenses, unlike the GPL, let you distribute
+    a modified version without making your changes open source.  The
+    GPL-compatible licenses make it possible to combine Python with
+    other software that is released under the GPL; the others don't.
+
+(2) According to Richard Stallman, 1.6.1 is not GPL-compatible,
+    because its license has a choice of law clause.  According to
+    CNRI, however, Stallman's lawyer has told CNRI's lawyer that 1.6.1
+    is "not incompatible" with the GPL.
+
+Thanks to the many outside volunteers who have worked under Guido's
+direction to make these releases possible.
+
+
+B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+===============================================================
+
+Python software and documentation are licensed under the
+Python Software Foundation License Version 2.
+
+Starting with Python 3.8.6, examples, recipes, and other code in
+the documentation are dual licensed under the PSF License Version 2
+and the Zero-Clause BSD license.
+
+Some software incorporated into Python is under different licenses.
+The licenses are listed with code falling under that license.
+
+
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+--------------------------------------------
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023 Python Software Foundation;
+All Rights Reserved" are retained in Python alone or in any derivative version
+prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS"
+basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee.  This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+
+
+BEOPEN.COM LICENSE AGREEMENT FOR PYTHON 2.0
+-------------------------------------------
+
+BEOPEN PYTHON OPEN SOURCE LICENSE AGREEMENT VERSION 1
+
+1. This LICENSE AGREEMENT is between BeOpen.com ("BeOpen"), having an
+office at 160 Saratoga Avenue, Santa Clara, CA 95051, and the
+Individual or Organization ("Licensee") accessing and otherwise using
+this software in source or binary form and its associated
+documentation ("the Software").
+
+2. Subject to the terms and conditions of this BeOpen Python License
+Agreement, BeOpen hereby grants Licensee a non-exclusive,
+royalty-free, world-wide license to reproduce, analyze, test, perform
+and/or display publicly, prepare derivative works, distribute, and
+otherwise use the Software alone or in any derivative version,
+provided, however, that the BeOpen Python License is retained in the
+Software, alone or in any derivative version prepared by Licensee.
+
+3. BeOpen is making the Software available to Licensee on an "AS IS"
+basis.  BEOPEN MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, BEOPEN MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF THE SOFTWARE WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+4. BEOPEN SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF THE
+SOFTWARE FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS
+AS A RESULT OF USING, MODIFYING OR DISTRIBUTING THE SOFTWARE, OR ANY
+DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+5. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+6. This License Agreement shall be governed by and interpreted in all
+respects by the law of the State of California, excluding conflict of
+law provisions.  Nothing in this License Agreement shall be deemed to
+create any relationship of agency, partnership, or joint venture
+between BeOpen and Licensee.  This License Agreement does not grant
+permission to use BeOpen trademarks or trade names in a trademark
+sense to endorse or promote products or services of Licensee, or any
+third party.  As an exception, the "BeOpen Python" logos available at
+http://www.pythonlabs.com/logos.html may be used according to the
+permissions granted on that web page.
+
+7. By copying, installing or otherwise using the software, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+
+
+CNRI LICENSE AGREEMENT FOR PYTHON 1.6.1
+---------------------------------------
+
+1. This LICENSE AGREEMENT is between the Corporation for National
+Research Initiatives, having an office at 1895 Preston White Drive,
+Reston, VA 20191 ("CNRI"), and the Individual or Organization
+("Licensee") accessing and otherwise using Python 1.6.1 software in
+source or binary form and its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, CNRI
+hereby grants Licensee a nonexclusive, royalty-free, world-wide
+license to reproduce, analyze, test, perform and/or display publicly,
+prepare derivative works, distribute, and otherwise use Python 1.6.1
+alone or in any derivative version, provided, however, that CNRI's
+License Agreement and CNRI's notice of copyright, i.e., "Copyright (c)
+1995-2001 Corporation for National Research Initiatives; All Rights
+Reserved" are retained in Python 1.6.1 alone or in any derivative
+version prepared by Licensee.  Alternately, in lieu of CNRI's License
+Agreement, Licensee may substitute the following text (omitting the
+quotes): "Python 1.6.1 is made available subject to the terms and
+conditions in CNRI's License Agreement.  This Agreement together with
+Python 1.6.1 may be located on the internet using the following
+unique, persistent identifier (known as a handle): 1895.22/1013.  This
+Agreement may also be obtained from a proxy server on the internet
+using the following URL: http://hdl.handle.net/1895.22/1013".
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python 1.6.1 or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python 1.6.1.
+
+4. CNRI is making Python 1.6.1 available to Licensee on an "AS IS"
+basis.  CNRI MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, CNRI MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON 1.6.1 WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. CNRI SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+1.6.1 FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON 1.6.1,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. This License Agreement shall be governed by the federal
+intellectual property law of the United States, including without
+limitation the federal copyright law, and, to the extent such
+U.S. federal law does not apply, by the law of the Commonwealth of
+Virginia, excluding Virginia's conflict of law provisions.
+Notwithstanding the foregoing, with regard to derivative works based
+on Python 1.6.1 that incorporate non-separable material that was
+previously distributed under the GNU General Public License (GPL), the
+law of the Commonwealth of Virginia shall govern this License
+Agreement only as to issues arising under or with respect to
+Paragraphs 4, 5, and 7 of this License Agreement.  Nothing in this
+License Agreement shall be deemed to create any relationship of
+agency, partnership, or joint venture between CNRI and Licensee.  This
+License Agreement does not grant permission to use CNRI trademarks or
+trade name in a trademark sense to endorse or promote products or
+services of Licensee, or any third party.
+
+8. By clicking on the "ACCEPT" button where indicated, or by copying,
+installing or otherwise using Python 1.6.1, Licensee agrees to be
+bound by the terms and conditions of this License Agreement.
+
+        ACCEPT
+
+
+CWI LICENSE AGREEMENT FOR PYTHON 0.9.0 THROUGH 1.2
+--------------------------------------------------
+
+Copyright (c) 1991 - 1995, Stichting Mathematisch Centrum Amsterdam,
+The Netherlands.  All rights reserved.
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the name of Stichting Mathematisch
+Centrum or CWI not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior
+permission.
+
+STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
+THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
+FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+ZERO-CLAUSE BSD LICENSE FOR CODE IN THE PYTHON DOCUMENTATION
+----------------------------------------------------------------------
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
+REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
+INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
+OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
\ No newline at end of file
diff --git a/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt b/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
index 98e8b9592a2f5..b599123685236 100644
--- a/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
@@ -2,337 +2,337 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 136            141           3        733.9           1.4       1.0X
-java                                                142            146           3        706.3           1.4       1.0X
-native                                              136            141           2        734.3           1.4       1.0X
+f2j                                                 166            169           2        603.4           1.7       1.0X
+java                                                155            157           1        646.0           1.5       1.1X
+native                                              165            170           1        605.0           1.7       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  79             82           2       1267.0           0.8       1.0X
-java                                                 89             92           1       1118.6           0.9       0.9X
-native                                               80             83           2       1248.2           0.8       1.0X
+f2j                                                  90             92           1       1110.4           0.9       1.0X
+java                                                 78             80           1       1277.7           0.8       1.2X
+native                                               91             93           1       1098.6           0.9       1.0X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 136            140           3        736.7           1.4       1.0X
-java                                                120            135          12        833.0           1.2       1.1X
-native                                              133            141           7        750.1           1.3       1.0X
+f2j                                                 163            168           3        614.6           1.6       1.0X
+java                                                154            162           4        649.8           1.5       1.1X
+native                                              158            162           2        632.7           1.6       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  69             77           6       1441.4           0.7       1.0X
-java                                                 67             69           1       1498.5           0.7       1.0X
-native                                               67             74           6       1485.8           0.7       1.0X
+f2j                                                  78             86           6       1275.7           0.8       1.0X
+java                                                 72             81           6       1391.8           0.7       1.1X
+native                                               77             86           6       1291.6           0.8       1.0X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           0       1052.1           1.0       1.0X
-java                                                 47             49           1       2107.8           0.5       2.0X
-native                                               95             95           0       1054.5           0.9       1.0X
+f2j                                                  95             95           0       1052.3           1.0       1.0X
+java                                                 51             54           1       1954.2           0.5       1.9X
+native                                               95             95           0       1055.1           0.9       1.0X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1075.5           0.9       1.0X
-java                                                 37             38           1       2677.6           0.4       2.5X
-native                                               93             93           0       1076.6           0.9       1.0X
+f2j                                                  93             93           0       1074.3           0.9       1.0X
+java                                                 26             27           0       3891.3           0.3       3.6X
+native                                               93             93           0       1075.0           0.9       1.0X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 143            144           1        698.1           1.4       1.0X
-java                                                 35             37           1       2822.8           0.4       4.0X
-native                                               94             95           1       1065.1           0.9       1.5X
+f2j                                                 142            143           2        702.7           1.4       1.0X
+java                                                 36             37           0       2791.0           0.4       4.0X
+native                                               94             95           1       1059.9           0.9       1.5X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 107            108           1        932.0           1.1       1.0X
-java                                                 31             32           0       3231.1           0.3       3.5X
-native                                               91             93           2       1104.9           0.9       1.2X
+f2j                                                 123            124           1        813.3           1.2       1.0X
+java                                                 18             18           0       5598.9           0.2       6.9X
+native                                               93             93           1       1074.8           0.9       1.3X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 127            130           2        789.7           1.3       1.0X
-java                                                116            119           2        863.3           1.2       1.1X
-native                                              122            126           4        819.4           1.2       1.0X
+f2j                                                 153            157           2        655.5           1.5       1.0X
+java                                                135            139           2        740.6           1.4       1.1X
+native                                              150            153           5        667.4           1.5       1.0X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  75             83           7       1328.3           0.8       1.0X
-java                                                 52             59           5       1911.0           0.5       1.4X
-native                                               67             74           7       1502.7           0.7       1.1X
+f2j                                                  83             91           7       1204.0           0.8       1.0X
+java                                                 63             70           4       1593.5           0.6       1.3X
+native                                               77             84           7       1304.7           0.8       1.1X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  96             96           1       1041.2           1.0       1.0X
-java                                                 31             32           1       3241.1           0.3       3.1X
-native                                               43             44           1       2303.4           0.4       2.2X
+f2j                                                  97             97           1       1031.7           1.0       1.0X
+java                                                 25             26           0       3958.2           0.3       3.8X
+native                                               55             57           1       1814.1           0.6       1.8X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  92             93           0       1085.6           0.9       1.0X
-java                                                 31             33           0       3187.8           0.3       2.9X
-native                                               92             93           0       1086.1           0.9       1.0X
+f2j                                                  93             94           0       1070.0           0.9       1.0X
+java                                                 25             26           0       3995.9           0.3       3.7X
+native                                               94             94           0       1068.7           0.9       1.0X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             96           0       1074.4           0.9       1.0X
-java                                                 32             33           0       3131.9           0.3       2.9X
-native                                               33             35           1       3028.8           0.3       2.8X
+f2j                                                  95             96           0       1049.7           1.0       1.0X
+java                                                 13             14           0       7739.8           0.1       7.4X
+native                                               36             38           1       2745.4           0.4       2.6X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1079.9           0.9       1.0X
-java                                                 23             24           0       4315.5           0.2       4.0X
-native                                               89             93           0       1121.6           0.9       1.0X
+f2j                                                  93             93           0       1078.6           0.9       1.0X
+java                                                 13             14           0       7912.9           0.1       7.3X
+native                                               93             93           0       1079.6           0.9       1.0X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 130            133           1        768.3           1.3       1.0X
-java                                                139            144           9        718.8           1.4       0.9X
-native                                              130            135           4        766.6           1.3       1.0X
+f2j                                                 158            161           1        633.4           1.6       1.0X
+java                                                131            134           2        763.1           1.3       1.2X
+native                                              158            162           2        634.7           1.6       1.0X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  89             89           0        561.3           1.8       1.0X
-java                                                 49             50           1       1022.3           1.0       1.8X
-native                                               46             47           0       1076.9           0.9       1.9X
+f2j                                                  89             90           0        560.1           1.8       1.0X
+java                                                 13             14           0       3730.5           0.3       6.7X
+native                                               47             47           0       1063.5           0.9       1.9X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             95           2        535.3           1.9       1.0X
-java                                                 83             91           8        601.4           1.7       1.1X
-native                                               59             65           6        853.9           1.2       1.6X
+f2j                                                  93            100           8        537.7           1.9       1.0X
+java                                                 92            100           8        541.3           1.8       1.0X
+native                                               73             80           8        683.4           1.5       1.3X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 107            109           1        466.5           2.1       1.0X
-java                                                107            109           1        468.7           2.1       1.0X
-native                                              108            111           1        464.7           2.2       1.0X
+f2j                                                 148            151           1        337.0           3.0       1.0X
+java                                                149            151           1        336.2           3.0       1.0X
+native                                              128            132           1        389.5           2.6       1.2X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 652            666           2       1534.0           0.7       1.0X
-java                                                421            424           1       2372.7           0.4       1.5X
-native                                              629            634          10       1589.5           0.6       1.0X
+f2j                                                 668            669           3       1497.4           0.7       1.0X
+java                                                 63             69           4      15802.2           0.1      10.6X
+native                                              631            633           3       1584.1           0.6       1.1X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 663            668           2       1508.2           0.7       1.0X
-java                                                418            424           2       2392.8           0.4       1.6X
-native                                              374            376           1       2673.7           0.4       1.8X
+f2j                                                 661            663           1       1512.6           0.7       1.0X
+java                                                 64             68           4      15730.6           0.1      10.4X
+native                                              374            376           1       2672.3           0.4       1.8X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 900            901           2       1111.7           0.9       1.0X
-java                                                421            424           3       2377.0           0.4       2.1X
-native                                              901            903           1       1109.6           0.9       1.0X
+f2j                                                 899            901           1       1111.9           0.9       1.0X
+java                                                 63             68           4      15890.7           0.1      14.3X
+native                                              902            903           1       1108.7           0.9       1.0X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 945            948           6       1058.3           0.9       1.0X
-java                                                421            424           1       2377.0           0.4       2.2X
-native                                              907            914           2       1102.1           0.9       1.0X
+f2j                                                 940            943           4       1063.9           0.9       1.0X
+java                                                 63             68           5      15828.7           0.1      14.9X
+native                                              914            916           1       1094.3           0.9       1.0X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 640            647           2       1562.1           0.6       1.0X
-java                                                415            421           2       2411.2           0.4       1.5X
-native                                              362            370           2       2758.9           0.4       1.8X
+f2j                                                 649            650           1       1541.4           0.6       1.0X
+java                                                 40             41           1      25057.3           0.0      16.3X
+native                                              371            372           1       2696.7           0.4       1.7X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 641            649           2       1560.0           0.6       1.0X
-java                                                414            422           2       2415.8           0.4       1.5X
-native                                              371            372           1       2696.9           0.4       1.7X
+f2j                                                 650            651           1       1538.5           0.6       1.0X
+java                                                 40             41           1      24717.0           0.0      16.1X
+native                                              371            372           1       2692.9           0.4       1.8X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 898            899           2       1113.0           0.9       1.0X
-java                                                418            421           1       2390.6           0.4       2.1X
-native                                              913            918           1       1095.4           0.9       1.0X
+f2j                                                 900            901           0       1111.2           0.9       1.0X
+java                                                 40             42           1      25076.9           0.0      22.6X
+native                                              917            920           2       1090.1           0.9       1.0X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 935            938           6       1070.0           0.9       1.0X
-java                                                419            422           1       2387.2           0.4       2.2X
-native                                              898            913           2       1113.1           0.9       1.0X
+f2j                                                 940            943           1       1063.6           0.9       1.0X
+java                                                 40             42           1      24825.6           0.0      23.3X
+native                                              914            916           1       1094.0           0.9       1.0X
 
 
diff --git a/mllib-local/benchmarks/BLASBenchmark-results.txt b/mllib-local/benchmarks/BLASBenchmark-results.txt
index 8bafcdd8fe702..8fde701d5b2b9 100644
--- a/mllib-local/benchmarks/BLASBenchmark-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-results.txt
@@ -2,337 +2,337 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 145            153           5        691.6           1.4       1.0X
-java                                                154            158           2        647.5           1.5       0.9X
-native                                              148            152           5        676.5           1.5       1.0X
+f2j                                                 152            157           2        656.7           1.5       1.0X
+java                                                146            152           3        686.0           1.5       1.0X
+native                                              154            160           2        647.8           1.5       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  85             89           3       1177.3           0.8       1.0X
-java                                                 92             94           1       1084.0           0.9       0.9X
-native                                               85             88           2       1173.3           0.9       1.0X
+f2j                                                  86             88           1       1167.6           0.9       1.0X
+java                                                 73             76           1       1367.1           0.7       1.2X
+native                                               87             89           1       1150.2           0.9       1.0X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 141            147           5        707.3           1.4       1.0X
-java                                                138            143           3        723.3           1.4       1.0X
-native                                              143            150           4        697.8           1.4       1.0X
+f2j                                                 149            156           3        673.1           1.5       1.0X
+java                                                148            154           3        676.5           1.5       1.0X
+native                                              149            154           2        668.9           1.5       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  82             84           1       1221.3           0.8       1.0X
-java                                                 71             74           1       1402.9           0.7       1.1X
-native                                               79             82           1       1259.0           0.8       1.0X
+f2j                                                  83             86           1       1199.9           0.8       1.0X
+java                                                 75             78           2       1337.1           0.7       1.1X
+native                                               81             83           1       1240.3           0.8       1.0X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             96           1       1049.5           1.0       1.0X
-java                                                 47             50           2       2111.5           0.5       2.0X
-native                                               95             95           2       1055.4           0.9       1.0X
+f2j                                                  95             96           0       1048.1           1.0       1.0X
+java                                                 45             48           2       2208.4           0.5       2.1X
+native                                               95             96           1       1053.9           0.9       1.0X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           1       1072.9           0.9       1.0X
-java                                                 36             37           0       2741.2           0.4       2.6X
-native                                               93             93           1       1074.6           0.9       1.0X
+f2j                                                  93             93           0       1073.1           0.9       1.0X
+java                                                 22             23           1       4467.0           0.2       4.2X
+native                                               93             93           0       1075.0           0.9       1.0X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 143            144           3        699.2           1.4       1.0X
-java                                                 37             40           2       2696.5           0.4       3.9X
-native                                               94             95           3       1060.8           0.9       1.5X
+f2j                                                 143            143           1        699.6           1.4       1.0X
+java                                                 32             33           0       3090.1           0.3       4.4X
+native                                               94             95           1       1059.3           0.9       1.5X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 116            116           1        863.5           1.2       1.0X
-java                                                 32             32           0       3134.9           0.3       3.6X
-native                                               93             93           1       1074.7           0.9       1.2X
+f2j                                                 116            116           1        862.5           1.2       1.0X
+java                                                 16             16           0       6189.1           0.2       7.2X
+native                                               93             93           0       1074.4           0.9       1.2X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 137            141           2        732.4           1.4       1.0X
-java                                                125            129           2        799.9           1.3       1.1X
-native                                              135            138           2        743.4           1.3       1.0X
+f2j                                                 144            148           3        696.6           1.4       1.0X
+java                                                132            136           2        757.4           1.3       1.1X
+native                                              139            145           2        718.7           1.4       1.0X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  88             91           1       1131.6           0.9       1.0X
-java                                                 65             68           3       1537.5           0.7       1.4X
-native                                               80             82           1       1255.9           0.8       1.1X
+f2j                                                  88             90           1       1130.9           0.9       1.0X
+java                                                 66             69           1       1506.1           0.7       1.3X
+native                                               80             83           1       1244.9           0.8       1.1X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  96             97           2       1036.4           1.0       1.0X
-java                                                 33             33           1       3047.4           0.3       2.9X
-native                                               44             47           2       2250.1           0.4       2.2X
+f2j                                                  96             97           0       1038.0           1.0       1.0X
+java                                                 23             24           0       4285.5           0.2       4.1X
+native                                               46             48           1       2175.7           0.5       2.1X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  94             94           1       1061.4           0.9       1.0X
-java                                                 32             33           0       3078.4           0.3       2.9X
-native                                               93             94           1       1071.3           0.9       1.0X
+f2j                                                  94             95           1       1061.5           0.9       1.0X
+java                                                 23             24           0       4279.5           0.2       4.0X
+native                                               93             94           0       1072.6           0.9       1.0X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           1       1052.9           0.9       1.0X
-java                                                 38             39           0       2602.1           0.4       2.5X
-native                                               34             36           1       2932.4           0.3       2.8X
+f2j                                                  95             95           0       1053.1           0.9       1.0X
+java                                                 12             13           0       8517.5           0.1       8.1X
+native                                               34             36           1       2909.1           0.3       2.8X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1078.0           0.9       1.0X
-java                                                 28             28           0       3609.5           0.3       3.3X
-native                                               93             93           0       1078.9           0.9       1.0X
+f2j                                                  93             93           0       1077.2           0.9       1.0X
+java                                                 12             12           0       8423.1           0.1       7.8X
+native                                               93             93           0       1078.4           0.9       1.0X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 142            146           2        705.7           1.4       1.0X
-java                                                147            151           4        681.2           1.5       1.0X
-native                                              139            144           5        721.4           1.4       1.0X
+f2j                                                 149            154           2        672.9           1.5       1.0X
+java                                                130            134           2        768.8           1.3       1.1X
+native                                              146            150           2        686.0           1.5       1.0X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 100            100           1        501.6           2.0       1.0X
-java                                                 48             48           0       1038.9           1.0       2.1X
-native                                               47             47           0       1065.8           0.9       2.1X
+f2j                                                 100            100           0        500.5           2.0       1.0X
+java                                                 12             13           0       4225.7           0.2       8.4X
+native                                               47             47           0       1066.5           0.9       2.1X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             98           2        526.0           1.9       1.0X
-java                                                 96             98           1        523.0           1.9       1.0X
-native                                               71             76           2        701.6           1.4       1.3X
+f2j                                                  97             99           1        515.9           1.9       1.0X
+java                                                 97             99           1        517.1           1.9       1.0X
+native                                               74             78           1        677.5           1.5       1.3X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 118            122           2        424.8           2.4       1.0X
-java                                                119            123           3        420.6           2.4       1.0X
-native                                              117            122           3        426.4           2.3       1.0X
+f2j                                                 145            149           2        345.3           2.9       1.0X
+java                                                146            150           2        342.4           2.9       1.0X
+native                                              122            128           2        409.9           2.4       1.2X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 670            671           2       1492.7           0.7       1.0X
-java                                                422            424           3       2370.0           0.4       1.6X
-native                                              631            633           4       1586.0           0.6       1.1X
+f2j                                                 670            673           4       1491.8           0.7       1.0X
+java                                                 65             72           1      15466.7           0.1      10.4X
+native                                              632            634           3       1583.1           0.6       1.1X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 672            677           5       1487.0           0.7       1.0X
-java                                                422            424           3       2368.6           0.4       1.6X
-native                                              375            377           3       2666.7           0.4       1.8X
+f2j                                                 673            675           2       1486.3           0.7       1.0X
+java                                                 72             73           2      13912.8           0.1       9.4X
+native                                              376            377           1       2662.5           0.4       1.8X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 923            925           6       1083.4           0.9       1.0X
-java                                                421            423           4       2372.8           0.4       2.2X
-native                                              902            906           8       1108.1           0.9       1.0X
+f2j                                                 923            925           1       1082.9           0.9       1.0X
+java                                                 64             72           1      15595.7           0.1      14.4X
+native                                              902            904           1       1108.7           0.9       1.0X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 955            957           5       1047.1           1.0       1.0X
-java                                                422            423           3       2370.8           0.4       2.3X
-native                                              915            917           3       1092.7           0.9       1.0X
+f2j                                                 953            957           2       1049.0           1.0       1.0X
+java                                                 65             73           1      15430.1           0.1      14.7X
+native                                              915            917           1       1092.5           0.9       1.0X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 649            651           2       1539.7           0.6       1.0X
-java                                                420            421           3       2381.8           0.4       1.5X
-native                                              371            372           1       2693.9           0.4       1.7X
+f2j                                                 649            651           2       1540.5           0.6       1.0X
+java                                                 41             42           1      24371.9           0.0      15.8X
+native                                              371            373           5       2695.0           0.4       1.7X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 650            652           4       1538.8           0.6       1.0X
-java                                                420            421           3       2381.1           0.4       1.5X
-native                                              372            373           4       2689.5           0.4       1.7X
+f2j                                                 651            653           3       1535.9           0.7       1.0X
+java                                                 41             42           1      24106.8           0.0      15.7X
+native                                              372            373           1       2688.0           0.4       1.8X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 902            903           1       1108.5           0.9       1.0X
-java                                                419            420           3       2386.9           0.4       2.2X
-native                                              918            920           4       1089.6           0.9       1.0X
+f2j                                                 903            904           1       1107.6           0.9       1.0X
+java                                                 40             41           1      24712.8           0.0      22.3X
+native                                              919            921           1       1088.1           0.9       1.0X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 949            952           8       1053.6           0.9       1.0X
-java                                                419            420           2       2384.2           0.4       2.3X
-native                                              914            916           5       1094.3           0.9       1.0X
+f2j                                                 950            953           5       1052.9           0.9       1.0X
+java                                                 41             42           1      24365.2           0.0      23.1X
+native                                              915            916           1       1093.0           0.9       1.0X
 
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
index c66e79500f4d3..af675fe609fe2 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                           102            106           3          0.0      101512.2       1.0X
-deserialize                                          75             78           1          0.0       75331.6       1.3X
+serialize                                            95             97           1          0.0       94974.9       1.0X
+deserialize                                          67             69           1          0.0       66631.2       1.4X
 
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
index c54679dfa205f..d22630df30dd8 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                           109            113           3          0.0      109208.6       1.0X
-deserialize                                          77             80           1          0.0       77116.0       1.4X
+serialize                                            90             99           3          0.0       89569.0       1.0X
+deserialize                                          68             72           3          0.0       68026.4       1.3X
 
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index 41f39461f71a6..83b77510602b2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.linalg.VectorUDT
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -192,8 +193,8 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
     if ($(predictionCol).nonEmpty) {
       transformImpl(dataset)
     } else {
-      this.logWarning(s"$uid: Predictor.transform() does nothing" +
-        " because no output columns were set.")
+      logWarning(log"${MDC(LogKeys.UUID, uid)}: Predictor.transform() does nothing because " +
+        log"no output columns were set.")
       dataset.toDF()
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index e12c68f31099e..7883a0dea54f1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.ParamMap
@@ -149,8 +150,8 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
     }
 
     if (numColsOutput == 0) {
-      logWarning(s"$uid: ClassificationModel.transform() does nothing" +
-        " because no output columns were set.")
+      logWarning(log"${MDC(LogKeys.UUID, uid)}: ClassificationModel.transform() does nothing " +
+        log"because no output columns were set.")
     }
     outputData.toDF()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 024693ba06f20..4bcc7877658d1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{COUNT, RANGE}
+import org.apache.spark.internal.LogKeys.{COUNT, RANGE}
 import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.optim.aggregator._
@@ -179,8 +179,8 @@ class LinearSVC @Since("2.2.0") (
       maxBlockSizeInMB)
 
     if (dataset.storageLevel != StorageLevel.NONE) {
-      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
-        s"then cached during training. Be careful of double caching!")
+      instr.logWarning("Input instances will be standardized, blockified to blocks, and " +
+        "then cached during training. Be careful of double caching!")
     }
 
     val instances = dataset.select(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 0d487377b9319..b3c48f13591fd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -27,8 +27,8 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{COUNT, RANGE}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{COUNT, RANGE}
 import org.apache.spark.ml.feature._
 import org.apache.spark.ml.impl.Utils
 import org.apache.spark.ml.linalg._
@@ -503,8 +503,8 @@ class LogisticRegression @Since("1.2.0") (
       tol, fitIntercept, maxBlockSizeInMB)
 
     if (dataset.storageLevel != StorageLevel.NONE) {
-      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
-        s"then cached during training. Be careful of double caching!")
+      instr.logWarning("Input instances will be standardized, blockified to blocks, and " +
+        "then cached during training. Be careful of double caching!")
     }
 
     val instances = dataset.select(
@@ -569,8 +569,8 @@ class LogisticRegression @Since("1.2.0") (
 
     val isConstantLabel = histogram.count(_ != 0.0) == 1
     if ($(fitIntercept) && isConstantLabel && !usingBoundConstrainedOptimization) {
-      instr.logWarning(s"All labels are the same value and fitIntercept=true, so the " +
-        s"coefficients will be zeros. Training is not needed.")
+      instr.logWarning("All labels are the same value and fitIntercept=true, so the " +
+        "coefficients will be zeros. Training is not needed.")
       val constantLabelIndex = Vectors.dense(histogram).argmax
       val coefMatrix = new SparseMatrix(numCoefficientSets, numFeatures,
         new Array[Int](numCoefficientSets + 1), Array.emptyIntArray, Array.emptyDoubleArray,
@@ -584,8 +584,8 @@ class LogisticRegression @Since("1.2.0") (
     }
 
     if (!$(fitIntercept) && isConstantLabel) {
-      instr.logWarning(s"All labels belong to a single class and fitIntercept=false. It's a " +
-        s"dangerous ground, so the algorithm may not converge.")
+      instr.logWarning("All labels belong to a single class and fitIntercept=false. It's a " +
+        "dangerous ground, so the algorithm may not converge.")
     }
 
     val featuresMean = summarizer.mean.toArray
@@ -847,9 +847,11 @@ class LogisticRegression @Since("1.2.0") (
           (_initialModel.interceptVector.size == numCoefficientSets) &&
           (_initialModel.getFitIntercept == $(fitIntercept))
         if (!modelIsValid) {
-          instr.logWarning(s"Initial coefficients will be ignored! Its dimensions " +
-            s"(${providedCoefs.numRows}, ${providedCoefs.numCols}) did not match the " +
-            s"expected size ($numCoefficientSets, $numFeatures)")
+          instr.logWarning(log"Initial coefficients will be ignored! Its dimensions " +
+            log"(${MDC(LogKeys.NUM_ROWS, providedCoefs.numRows)}}, " +
+            log"${MDC(LogKeys.NUM_COLUMNS, providedCoefs.numCols)}) did not match the " +
+            log"expected size (${MDC(LogKeys.NUM_COEFFICIENTS, numCoefficientSets)}, " +
+            log"${MDC(LogKeys.NUM_FEATURES, numFeatures)})")
         }
         modelIsValid
       case None => false
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index b70f3ddd4c14d..18643f74b700f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -30,6 +30,7 @@ import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml._
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.{Vector, Vectors}
@@ -180,8 +181,8 @@ final class OneVsRestModel private[ml] (
     val outputSchema = transformSchema(dataset.schema, logging = true)
 
     if (getPredictionCol.isEmpty && getRawPredictionCol.isEmpty) {
-      logWarning(s"$uid: OneVsRestModel.transform() does nothing" +
-        " because no output columns were set.")
+      logWarning(log"${MDC(LogKeys.UUID, uid)}: OneVsRestModel.transform() does nothing " +
+        log"because no output columns were set.")
       return dataset.toDF()
     }
 
@@ -400,7 +401,8 @@ final class OneVsRest @Since("1.4.0") (
       getClassifier match {
         case _: HasWeightCol => true
         case c =>
-          instr.logWarning(s"weightCol is ignored, as it is not supported by $c now.")
+          instr.logWarning(log"weightCol is ignored, as it is not supported by " +
+            log"${MDC(LogKeys.CLASSIFIER, c)} now.")
           false
       }
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
index 460f2398a4628..61fab02cb4518 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.ml.classification
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.linalg.{DenseVector, Vector, VectorUDT}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared._
@@ -154,8 +155,8 @@ abstract class ProbabilisticClassificationModel[
     }
 
     if (numColsOutput == 0) {
-      this.logWarning(s"$uid: ProbabilisticClassificationModel.transform() does nothing" +
-        " because no output columns were set.")
+      this.logWarning(log"${MDC(LogKeys.UUID, uid)}: ProbabilisticClassificationModel.transform()" +
+        log" does nothing because no output columns were set.")
     }
     outputData.toDF()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 19ae8359b9a37..a68b2fc0dec83 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -21,6 +21,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.impl.Utils.{unpackUpperTriangular, EPSILON}
 import org.apache.spark.ml.linalg._
@@ -142,8 +143,8 @@ class GaussianMixtureModel private[ml] (
     }
 
     if (numColsOutput == 0) {
-      this.logWarning(s"$uid: GaussianMixtureModel.transform() does nothing" +
-        " because no output columns were set.")
+      this.logWarning(log"${MDC(LogKeys.UUID, uid)}: GaussianMixtureModel.transform() does " +
+        log"nothing because no output columns were set.")
     }
     outputData.toDF()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 519978a0733b9..04f76660aee6a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -22,6 +22,8 @@ import scala.collection.mutable
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.LogKeys.{COST, INIT_MODE, NUM_ITERATIONS, TOTAL_TIME}
+import org.apache.spark.internal.MDC
 import org.apache.spark.ml.{Estimator, Model, PipelineStage}
 import org.apache.spark.ml.feature.{Instance, InstanceBlock}
 import org.apache.spark.ml.linalg._
@@ -449,14 +451,15 @@ class KMeans @Since("1.5.0") (
 
   private def trainWithBlock(dataset: Dataset[_], instr: Instrumentation) = {
     if (dataset.storageLevel != StorageLevel.NONE) {
-      instr.logWarning(s"Input vectors will be blockified to blocks, and " +
-        s"then cached during training. Be careful of double caching!")
+      instr.logWarning("Input vectors will be blockified to blocks, and " +
+        "then cached during training. Be careful of double caching!")
     }
 
-    val initStartTime = System.nanoTime
+    val initStartTime = System.currentTimeMillis
     val centers = initialize(dataset)
-    val initTimeInSeconds = (System.nanoTime - initStartTime) / 1e9
-    instr.logInfo(f"Initialization with ${$(initMode)} took $initTimeInSeconds%.3f seconds.")
+    val initTimeMs = System.currentTimeMillis - initStartTime
+    instr.logInfo(log"Initialization with ${MDC(INIT_MODE, $(initMode))} took " +
+      log"${MDC(TOTAL_TIME, initTimeMs)} ms.")
 
     val numFeatures = centers.head.size
     instr.logNumFeatures(numFeatures)
@@ -492,7 +495,7 @@ class KMeans @Since("1.5.0") (
 
     val distanceFunction = getDistanceFunction
     val sc = dataset.sparkSession.sparkContext
-    val iterationStartTime = System.nanoTime
+    val iterationStartTime = System.currentTimeMillis
     var converged = false
     var cost = 0.0
     var iteration = 0
@@ -549,15 +552,16 @@ class KMeans @Since("1.5.0") (
     }
     blocks.unpersist()
 
-    val iterationTimeInSeconds = (System.nanoTime() - iterationStartTime) / 1e9
-    instr.logInfo(f"Iterations took $iterationTimeInSeconds%.3f seconds.")
+    val iterationTimeMs = System.currentTimeMillis - iterationStartTime
+    instr.logInfo(log"Iterations took ${MDC(TOTAL_TIME, iterationTimeMs)} ms.")
 
     if (iteration == $(maxIter)) {
-      instr.logInfo(s"KMeans reached the max number of iterations: ${$(maxIter)}.")
+      instr.logInfo(log"KMeans reached the max number of iterations: " +
+        log"${MDC(NUM_ITERATIONS, $(maxIter))}.")
     } else {
-      instr.logInfo(s"KMeans converged in $iteration iterations.")
+      instr.logInfo(log"KMeans converged in ${MDC(NUM_ITERATIONS, iteration)} iterations.")
     }
-    instr.logInfo(s"The cost is $cost.")
+    instr.logInfo(log"The cost is ${MDC(COST, cost)}.")
     new MLlibKMeansModel(centers.map(OldVectors.fromML), $(distanceMeasure), cost, iteration)
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index 3727eb17dcd0a..c726aed14ee51 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.feature
 import scala.collection.mutable.ArrayBuilder
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg._
@@ -139,8 +140,9 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
           }.apply(col(colName))
 
         case _: VectorUDT if td < 0 =>
-          this.logWarning(s"Binarization operations on sparse dataset with negative threshold " +
-            s"$td will build a dense output, so take care when applying to sparse input.")
+          logWarning(log"Binarization operations on sparse dataset with negative threshold " +
+            log"${MDC(LogKeys.THRESHOLD, td)} will build a dense output, so take care when " +
+            log"applying to sparse input.")
           udf { vector: Vector =>
             val values = Array.fill(vector.size)(1.0)
             var nnz = vector.size
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 5862a60a407d4..93956fc1811ef 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.feature
 import java.util.Locale
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.Transformer
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.{HasInputCol, HasInputCols, HasOutputCol, HasOutputCols}
@@ -129,9 +130,9 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
     if (Locale.getAvailableLocales.contains(Locale.getDefault)) {
       Locale.getDefault
     } else {
-      logWarning(s"Default locale set was [${Locale.getDefault.toString}]; however, it was " +
-        "not found in available locales in JVM, falling back to en_US locale. Set param `locale` " +
-        "in order to respect another locale.")
+      logWarning(log"Default locale set was [${MDC(LogKeys.LOCALE, Locale.getDefault)}]; " +
+        log"however, it was not found in available locales in JVM, falling back to en_US locale. " +
+        log"Set param `locale` in order to respect another locale.")
       Locale.US
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index f52f56174ed23..60dc4d0240716 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -21,6 +21,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.{Estimator, Model, Transformer}
 import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.ml.param._
@@ -431,8 +432,8 @@ class StringIndexerModel (
       val labels = labelsArray(i)
 
       if (!dataset.schema.fieldNames.contains(inputColName)) {
-        logWarning(s"Input column ${inputColName} does not exist during transformation. " +
-          "Skip StringIndexerModel for this column.")
+        logWarning(log"Input column ${MDC(LogKeys.COLUMN_NAME, inputColName)} does not exist " +
+          log"during transformation. Skip StringIndexerModel for this column.")
         outputColNames(i) = null
       } else {
         val filteredLabels = getHandleInvalid match {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
index c2fe001d4048d..17adf61e46e9d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/IterativelyReweightedLeastSquares.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.ml.optim
 
+import org.apache.spark.internal.LogKeys.{NUM_ITERATIONS, RELATIVE_TOLERANCE}
+import org.apache.spark.internal.MDC
 import org.apache.spark.ml.feature.{Instance, OffsetInstance}
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.util.OptionalInstrumentation
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.MavenUtils.LogStringContext
 
 /**
  * Model fitted by [[IterativelyReweightedLeastSquares]].
@@ -101,14 +104,15 @@ private[ml] class IterativelyReweightedLeastSquares(
 
       if (maxTol < tol) {
         converged = true
-        instr.logInfo(s"IRLS converged in $iter iterations.")
+        instr.logInfo(log"IRLS converged in ${MDC(NUM_ITERATIONS, iter)} iterations.")
       }
 
-      instr.logInfo(s"Iteration $iter : relative tolerance = $maxTol")
+      instr.logInfo(log"Iteration ${MDC(NUM_ITERATIONS, iter)}: " +
+        log"relative tolerance = ${MDC(RELATIVE_TOLERANCE, maxTol)}")
       iter = iter + 1
 
       if (iter == maxIter) {
-        instr.logInfo(s"IRLS reached the max number of iterations: $maxIter.")
+        instr.logInfo(log"IRLS reached the max number of iterations: ${MDC(NUM_ITERATIONS, iter)}.")
       }
 
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
index 9acc20b8eb2e8..eff100cc3ae3a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala
@@ -17,10 +17,13 @@
 
 package org.apache.spark.ml.optim
 
+import org.apache.spark.internal.LogKeys.COUNT
+import org.apache.spark.internal.MDC
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.util.OptionalInstrumentation
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.MavenUtils.LogStringContext
 
 /**
  * Model fitted by [[WeightedLeastSquares]].
@@ -106,7 +109,7 @@ private[ml] class WeightedLeastSquares(
 
     val summary = instances.treeAggregate(new Aggregator)(_.add(_), _.merge(_), depth)
     summary.validate()
-    instr.logInfo(s"Number of instances: ${summary.count}.")
+    instr.logInfo(log"Number of instances: ${MDC(COUNT, summary.count)}.")
     val k = if (fitIntercept) summary.k + 1 else summary.k
     val numFeatures = summary.k
     val triK = summary.triK
@@ -121,13 +124,13 @@ private[ml] class WeightedLeastSquares(
     if (rawBStd == 0) {
       if (fitIntercept || rawBBar == 0.0) {
         if (rawBBar == 0.0) {
-          instr.logWarning(s"Mean and standard deviation of the label are zero, so the " +
-            s"coefficients and the intercept will all be zero; as a result, training is not " +
-            s"needed.")
+          instr.logWarning("Mean and standard deviation of the label are zero, so the " +
+            "coefficients and the intercept will all be zero; as a result, training is not " +
+            "needed.")
         } else {
-          instr.logWarning(s"The standard deviation of the label is zero, so the coefficients " +
-            s"will be zeros and the intercept will be the mean of the label; as a result, " +
-            s"training is not needed.")
+          instr.logWarning("The standard deviation of the label is zero, so the coefficients " +
+            "will be zeros and the intercept will be the mean of the label; as a result, " +
+            "training is not needed.")
         }
         val coefficients = new DenseVector(Array.ofDim(numFeatures))
         val intercept = rawBBar
@@ -136,8 +139,8 @@ private[ml] class WeightedLeastSquares(
       } else {
         require(!(regParam > 0.0 && standardizeLabel), "The standard deviation of the label is " +
           "zero. Model cannot be regularized when labels are standardized.")
-        instr.logWarning(s"The standard deviation of the label is zero. Consider setting " +
-          s"fitIntercept=true.")
+        instr.logWarning("The standard deviation of the label is zero. Consider setting " +
+          "fitIntercept=true.")
       }
     }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
index 665e50af67d46..9e66647ef35fb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/r/RWrapperUtils.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.ml.r
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{FEATURE_COLUMN, LABEL_COLUMN, NEW_FEATURE_COLUMN_NAME, NEW_LABEL_COLUMN_NAME}
 import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute}
 import org.apache.spark.ml.feature.{RFormula, RFormulaModel}
 import org.apache.spark.ml.util.Identifiable
@@ -37,15 +38,15 @@ private[r] object RWrapperUtils extends Logging {
   def checkDataColumns(rFormula: RFormula, data: Dataset[_]): Unit = {
     if (data.schema.fieldNames.contains(rFormula.getFeaturesCol)) {
       val newFeaturesName = s"${Identifiable.randomUID(rFormula.getFeaturesCol)}"
-      logInfo(s"data containing ${rFormula.getFeaturesCol} column, " +
-        s"using new name $newFeaturesName instead")
+      logInfo(log"data containing ${MDC(FEATURE_COLUMN, rFormula.getFeaturesCol)} column, " +
+        log"using new name ${MDC(NEW_FEATURE_COLUMN_NAME, newFeaturesName)} instead")
       rFormula.setFeaturesCol(newFeaturesName)
     }
 
     if (rFormula.getForceIndexLabel && data.schema.fieldNames.contains(rFormula.getLabelCol)) {
       val newLabelName = s"${Identifiable.randomUID(rFormula.getLabelCol)}"
-      logInfo(s"data containing ${rFormula.getLabelCol} column and we force to index label, " +
-        s"using new name $newLabelName instead")
+      logInfo(log"data containing ${MDC(LABEL_COLUMN, rFormula.getLabelCol)} column and we force " +
+        log"to index label, using new name ${MDC(NEW_LABEL_COLUMN_NAME, newLabelName)} instead")
       rFormula.setLabelCol(newLabelName)
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 1e6be16ef62b7..50f94a5799444 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -33,7 +33,8 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.{Partitioner, SparkException}
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.linalg.BLAS
 import org.apache.spark.ml.param._
@@ -1027,7 +1028,7 @@ object ALS extends DefaultParamsReadable[ALS] with Logging {
           checkpointFile.getFileSystem(sc.hadoopConfiguration).delete(checkpointFile, true)
         } catch {
           case e: IOException =>
-            logWarning(s"Cannot delete checkpoint file $file:", e)
+            logWarning(log"Cannot delete checkpoint file ${MDC(PATH, file)}:", e)
         }
       }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 57d20bcd6f49d..788ad65497dfc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -24,7 +24,7 @@ import breeze.optimize.{CachedDiffFunction, LBFGS => BreezeLBFGS}
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg._
@@ -206,8 +206,8 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
     instr.logNamedValue("quantileProbabilities.size", $(quantileProbabilities).length)
 
     if (dataset.storageLevel != StorageLevel.NONE) {
-      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
-        s"then cached during training. Be careful of double caching!")
+      instr.logWarning("Input instances will be standardized, blockified to blocks, and " +
+        "then cached during training. Be careful of double caching!")
     }
 
     val validatedCensorCol = {
@@ -441,8 +441,8 @@ class AFTSurvivalRegressionModel private[ml] (
     if (predictionColNames.nonEmpty) {
       dataset.withColumns(predictionColNames, predictionColumns)
     } else {
-      this.logWarning(s"$uid: AFTSurvivalRegressionModel.transform() does nothing" +
-        " because no output columns were set.")
+      this.logWarning(log"${MDC(LogKeys.UUID, uid)}: AFTSurvivalRegressionModel.transform() " +
+        log"does nothing because no output columns were set.")
       dataset.toDF()
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 6c0089b689499..481e8c8357f16 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -22,6 +22,7 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
@@ -238,8 +239,8 @@ class DecisionTreeRegressionModel private[ml] (
     if (predictionColNames.nonEmpty) {
       dataset.withColumns(predictionColNames, predictionColumns)
     } else {
-      this.logWarning(s"$uid: DecisionTreeRegressionModel.transform() does nothing" +
-        " because no output columns were set.")
+      this.logWarning(log"${MDC(LogKeys.UUID, uid)}: DecisionTreeRegressionModel.transform() " +
+        log"does nothing because no output columns were set.")
       dataset.toDF()
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index 0c58cc2449b99..732bfcbd671ed 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -21,7 +21,7 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.ml.linalg.{BLAS, Vector}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -288,8 +288,8 @@ class GBTRegressionModel private[ml](
     if (predictionColNames.nonEmpty) {
       dataset.withColumns(predictionColNames, predictionColumns)
     } else {
-      this.logWarning(s"$uid: GBTRegressionModel.transform() does nothing" +
-        " because no output columns were set.")
+      this.logWarning(log"${MDC(LogKeys.UUID, uid)}: GBTRegressionModel.transform() " +
+        log"does nothing because no output columns were set.")
       dataset.toDF()
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index aa39a3e177eeb..4ded2f8d7bf5c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.feature.{Instance, OffsetInstance}
@@ -1074,8 +1074,8 @@ class GeneralizedLinearRegressionModel private[ml] (
     }
 
     if (numColsOutput == 0) {
-      this.logWarning(s"$uid: GeneralizedLinearRegressionModel.transform() does nothing" +
-        " because no output columns were set.")
+      this.logWarning(log"${MDC(LogKeys.UUID, uid)}: GeneralizedLinearRegressionModel.transform()" +
+        log" does nothing because no output columns were set.")
     }
     outputData.toDF()
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index d53b8b270f2d6..23e536ce45eb5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -333,8 +333,8 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       epsilon, maxBlockSizeInMB)
 
     if (dataset.storageLevel != StorageLevel.NONE) {
-      instr.logWarning(s"Input instances will be standardized, blockified to blocks, and " +
-        s"then cached during training. Be careful of double caching!")
+      instr.logWarning("Input instances will be standardized, blockified to blocks, and " +
+        "then cached during training. Be careful of double caching!")
     }
 
     // Extract the number of features before deciding optimization solver.
@@ -377,7 +377,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
       } else {
         require($(regParam) == 0.0, "The standard deviation of the label is zero. " +
           "Model cannot be regularized.")
-        instr.logWarning(s"The standard deviation of the label is zero. " +
+        instr.logWarning("The standard deviation of the label is zero. " +
           "Consider setting fitIntercept=true.")
       }
     }
@@ -472,13 +472,13 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     // Also, if rawYStd==0 and yMean==0, all the coefficients are zero regardless of
     // the fitIntercept.
     if (yMean == 0.0) {
-      instr.logWarning(s"Mean and standard deviation of the label are zero, so the " +
-        s"coefficients and the intercept will all be zero; as a result, training is not " +
-        s"needed.")
+      instr.logWarning("Mean and standard deviation of the label are zero, so the " +
+        "coefficients and the intercept will all be zero; as a result, training is not " +
+        "needed.")
     } else {
-      instr.logWarning(s"The standard deviation of the label is zero, so the coefficients " +
-        s"will be zeros and the intercept will be the mean of the label; as a result, " +
-        s"training is not needed.")
+      instr.logWarning("The standard deviation of the label is zero, so the coefficients " +
+        "will be zeros and the intercept will be the mean of the label; as a result, " +
+        "training is not needed.")
     }
     val coefficients = Vectors.sparse(numFeatures, Seq.empty)
     val intercept = yMean
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index f241ff3e41153..4135afb5ed0b2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -21,6 +21,7 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
@@ -161,7 +162,7 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
     trees.foreach(copyValues(_))
 
     val numFeatures = trees.head.numFeatures
-    instr.logNamedValue(Instrumentation.loggerTags.numFeatures, numFeatures)
+    instr.logNumFeatures(numFeatures)
     new RandomForestRegressionModel(uid, trees, numFeatures)
   }
 
@@ -254,8 +255,8 @@ class RandomForestRegressionModel private[ml] (
     if (predictionColNames.nonEmpty) {
       dataset.withColumns(predictionColNames, predictionColumns)
     } else {
-      this.logWarning(s"$uid: RandomForestRegressionModel.transform() does nothing" +
-        " because no output columns were set.")
+      this.logWarning(log"${MDC(LogKeys.UUID, uid)}: RandomForestRegressionModel.transform() " +
+        log"does nothing because no output columns were set.")
       dataset.toDF()
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index 4697bfbe4b092..7a27b32aa24c5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggreg
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /**
  * A builder object that provides summary statistics about a given column.
@@ -397,17 +398,12 @@ private[spark] object SummaryBuilderImpl extends Logging {
 
     override def serialize(state: SummarizerBuffer): Array[Byte] = {
       // TODO: Use ByteBuffer to optimize
-      val bos = new ByteArrayOutputStream()
-      val oos = new ObjectOutputStream(bos)
-      oos.writeObject(state)
-      bos.toByteArray
+      Utils.serialize(state)
     }
 
     override def deserialize(bytes: Array[Byte]): SummarizerBuffer = {
       // TODO: Use ByteBuffer to optimize
-      val bis = new ByteArrayInputStream(bytes)
-      val ois = new ObjectInputStream(bis)
-      ois.readObject().asInstanceOf[SummarizerBuffer]
+      Utils.deserialize(bytes)
     }
 
     override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): MetricsAggregate = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
index a9c2941ef3a53..2f63f4ae073e5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/DecisionTreeMetadata.scala
@@ -20,7 +20,7 @@ package org.apache.spark.ml.tree.impl
 import scala.collection.mutable
 import scala.util.Try
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.tree.TreeEnsembleParams
 import org.apache.spark.mllib.tree.configuration.Algo._
@@ -134,8 +134,10 @@ private[spark] object DecisionTreeMetadata extends Logging {
 
     val maxPossibleBins = math.min(strategy.maxBins, numExamples).toInt
     if (maxPossibleBins < strategy.maxBins) {
-      logWarning(s"DecisionTree reducing maxBins from ${strategy.maxBins} to $maxPossibleBins" +
-        s" (= number of training instances)")
+      logWarning(log"DecisionTree reducing maxBins from " +
+        log"${MDC(LogKeys.MAX_NUM_BINS, strategy.maxBins)} to " +
+        log"${MDC(LogKeys.MAX_NUM_POSSIBLE_BINS, maxPossibleBins)} " +
+        log"(= number of training instances)")
     }
 
     // We check the number of bins here against maxPossibleBins.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
index 8cf19f27cbbf9..5184109bd3a52 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/GradientBoostedTrees.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.ml.tree.impl
 
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.TIMER
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
@@ -477,7 +478,7 @@ private[spark] object GradientBoostedTrees extends Logging {
     timer.stop("total")
 
     logInfo("Internal timing for DecisionTree:")
-    logInfo(s"$timer")
+    logInfo(log"${MDC(TIMER, timer)}")
 
     bcSplits.destroy()
     treePoints.unpersist()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
index 440b6635a52db..452532df5a2b6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala
@@ -21,7 +21,8 @@ import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{MAX_MEMORY_SIZE, MEMORY_SIZE, NUM_CLASSES, NUM_EXAMPLES, NUM_FEATURES, NUM_NODES, NUM_WEIGHTED_EXAMPLES, TIMER}
 import org.apache.spark.ml.classification.DecisionTreeClassificationModel
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.impl.Utils
@@ -131,10 +132,11 @@ private[spark] object RandomForest extends Logging with Serializable {
         instrumentation.logNumExamples(metadata.numExamples)
         instrumentation.logSumOfWeights(metadata.weightedNumExamples)
       case None =>
-        logInfo(s"numFeatures: ${metadata.numFeatures}")
-        logInfo(s"numClasses: ${metadata.numClasses}")
-        logInfo(s"numExamples: ${metadata.numExamples}")
-        logInfo(s"weightedNumExamples: ${metadata.weightedNumExamples}")
+        logInfo(log"numFeatures: ${MDC(NUM_FEATURES, metadata.numFeatures)}")
+        logInfo(log"numClasses: ${MDC(NUM_CLASSES, metadata.numClasses)}")
+        logInfo(log"numExamples: ${MDC(NUM_EXAMPLES, metadata.numExamples)}")
+        logInfo(log"weightedNumExamples: " +
+          log"${MDC(NUM_WEIGHTED_EXAMPLES, metadata.weightedNumExamples)}")
     }
 
     timer.start("init")
@@ -217,7 +219,7 @@ private[spark] object RandomForest extends Logging with Serializable {
     timer.stop("total")
 
     logInfo("Internal timing for DecisionTree:")
-    logInfo(s"$timer")
+    logInfo(log"${MDC(TIMER, timer)}")
 
     if (strategy.useNodeIdCache) {
       // Delete any remaining checkpoints used for node Id cache.
@@ -1286,9 +1288,10 @@ private[spark] object RandomForest extends Logging with Serializable {
     }
     if (memUsage > maxMemoryUsage) {
       // If maxMemoryUsage is 0, we should still allow splitting 1 node.
-      logWarning(s"Tree learning is using approximately $memUsage bytes per iteration, which" +
-        s" exceeds requested limit maxMemoryUsage=$maxMemoryUsage. This allows splitting" +
-        s" $numNodesInGroup nodes in this iteration.")
+      logWarning(log"Tree learning is using approximately ${MDC(MEMORY_SIZE, memUsage)} " +
+        log"bytes per iteration, which exceeds requested limit " +
+        log"maxMemoryUsage=${MDC(MAX_MEMORY_SIZE, maxMemoryUsage)}. This allows splitting " +
+        log"${MDC(NUM_NODES, numNodesInGroup)} nodes in this iteration.")
     }
     // Convert mutable maps to immutable ones.
     val nodesForGroup: Map[Int, Array[LearningNode]] =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 33b7963788fa5..867f35a5d2b80 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -27,7 +27,8 @@ import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CROSS_VALIDATION_METRIC, CROSS_VALIDATION_METRICS, ESTIMATOR_PARAM_MAP}
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
 import org.apache.spark.ml.param.{IntParam, Param, ParamMap, ParamValidators}
@@ -192,12 +193,13 @@ class CrossValidator @Since("1.2.0") (@Since("1.4.0") override val uid: String)
       foldMetrics
     }.transpose.map(_.sum / $(numFolds)) // Calculate average metric over all splits
 
-    instr.logInfo(s"Average cross-validation metrics: ${metrics.toImmutableArraySeq}")
+    instr.logInfo(log"Average cross-validation metrics: ${MDC(
+      CROSS_VALIDATION_METRICS, metrics.mkString("[", ", ", "]"))}")
     val (bestMetric, bestIndex) =
       if (eval.isLargerBetter) metrics.zipWithIndex.maxBy(_._1)
       else metrics.zipWithIndex.minBy(_._1)
-    instr.logInfo(s"Best set of parameters:\n${epm(bestIndex)}")
-    instr.logInfo(s"Best cross-validation metric: $bestMetric.")
+    instr.logInfo(log"Best set of parameters:\n${MDC(ESTIMATOR_PARAM_MAP, epm(bestIndex))}")
+    instr.logInfo(log"Best cross-validation metric: ${MDC(CROSS_VALIDATION_METRIC, bestMetric)}.")
     val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]]
     copyValues(new CrossValidatorModel(uid, bestModel, metrics)
       .setSubModels(subModels).setParent(this))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 58487b6ccbb8e..8e33ae6aad28b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -28,7 +28,8 @@ import org.apache.hadoop.fs.Path
 import org.json4s.DefaultFormats
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{ESTIMATOR_PARAM_MAP, TRAIN_VALIDATION_SPLIT_METRIC, TRAIN_VALIDATION_SPLIT_METRICS}
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
 import org.apache.spark.ml.param.{DoubleParam, ParamMap, ParamValidators}
@@ -168,12 +169,14 @@ class TrainValidationSplit @Since("1.5.0") (@Since("1.5.0") override val uid: St
     trainingDataset.unpersist()
     validationDataset.unpersist()
 
-    instr.logInfo(s"Train validation split metrics: ${metrics.toImmutableArraySeq}")
+    instr.logInfo(log"Train validation split metrics: ${MDC(
+      TRAIN_VALIDATION_SPLIT_METRICS, metrics.mkString("[", ", ", "]"))}")
     val (bestMetric, bestIndex) =
       if (eval.isLargerBetter) metrics.zipWithIndex.maxBy(_._1)
       else metrics.zipWithIndex.minBy(_._1)
-    instr.logInfo(s"Best set of parameters:\n${epm(bestIndex)}")
-    instr.logInfo(s"Best train validation split metric: $bestMetric.")
+    instr.logInfo(log"Best set of parameters:\n${MDC(ESTIMATOR_PARAM_MAP, epm(bestIndex))}")
+    instr.logInfo(log"Best train validation split metric: " +
+      log"${MDC(TRAIN_VALIDATION_SPLIT_METRIC, bestMetric)}.")
     val bestModel = est.fit(dataset, epm(bestIndex)).asInstanceOf[Model[_]]
     copyValues(new TrainValidationSplitModel(uid, bestModel, metrics)
       .setSubModels(subModels).setParent(this))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
index b3cb9c7f2dd12..d5b6396e5ba80 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.ml.util
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, LABEL_COLUMN, NUM_CLASSES}
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.classification.ClassifierParams
 import org.apache.spark.ml.feature.Instance
@@ -195,8 +196,9 @@ private[spark] object DatasetUtils extends Logging {
           s" to be inferred from values.  To avoid this error for labels with > $maxNumClasses" +
           s" classes, specify numClasses explicitly in the metadata; this can be done by applying" +
           s" StringIndexer to the label column.")
-        logInfo(this.getClass.getCanonicalName + s" inferred $numClasses classes for" +
-          s" labelCol=$labelCol since numClasses was not specified in the column metadata.")
+        logInfo(log"${MDC(CLASS_NAME, this.getClass.getCanonicalName)} inferred ${MDC(
+          NUM_CLASSES, numClasses)} classes for labelCol=${MDC(LABEL_COLUMN, labelCol)}" +
+          log" since numClasses was not specified in the column metadata.")
         numClasses
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
index bfc6465c58bd1..9413605a31ced 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/Instrumentation.scala
@@ -27,7 +27,8 @@ import org.json4s._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.internal.{LogEntry, Logging}
+import org.apache.spark.internal.{LogEntry, Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, NUM_PARTITIONS, PIPELINE_STAGE_UID, STORAGE_LEVEL}
 import org.apache.spark.ml.{MLEvents, PipelineStage}
 import org.apache.spark.ml.param.{Param, Params}
 import org.apache.spark.rdd.RDD
@@ -53,8 +54,8 @@ private[spark] class Instrumentation private () extends Logging with MLEvents {
     // estimator.getClass.getSimpleName can cause Malformed class name error,
     // call safer `Utils.getSimpleName` instead
     val className = Utils.getSimpleName(stage.getClass)
-    logInfo(s"Stage class: $className")
-    logInfo(s"Stage uid: ${stage.uid}")
+    logInfo(log"Stage class: ${MDC(CLASS_NAME, className)}")
+    logInfo(log"Stage uid: ${MDC(PIPELINE_STAGE_UID, stage.uid)}")
   }
 
   /**
@@ -66,8 +67,8 @@ private[spark] class Instrumentation private () extends Logging with MLEvents {
    * Log some data about the dataset being fit.
    */
   def logDataset(dataset: RDD[_]): Unit = {
-    logInfo(s"training: numPartitions=${dataset.partitions.length}" +
-      s" storageLevel=${dataset.getStorageLevel}")
+    logInfo(log"training: numPartitions=${MDC(NUM_PARTITIONS, dataset.partitions.length)}" +
+      log" storageLevel=${MDC(STORAGE_LEVEL, dataset.getStorageLevel)}")
   }
 
   /**
@@ -253,6 +254,13 @@ private[spark] class OptionalInstrumentation private(
     }
   }
 
+  override def logInfo(logEntry: LogEntry): Unit = {
+    instrumentation match {
+      case Some(instr) => instr.logInfo(logEntry)
+      case None => super.logInfo(logEntry)
+    }
+  }
+
   override def logWarning(msg: => String): Unit = {
     instrumentation match {
       case Some(instr) => instr.logWarning(msg)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index 2083a07e2cb5a..9b26d0a911aca 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -32,7 +32,8 @@ import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.annotation.{Since, Unstable}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.ml._
 import org.apache.spark.ml.classification.{OneVsRest, OneVsRestModel}
 import org.apache.spark.ml.feature.RFormulaModel
@@ -674,7 +675,7 @@ private[ml] class FileSystemOverwrite extends Logging {
     val qualifiedOutputPath = outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
     if (fs.exists(qualifiedOutputPath)) {
       if (shouldOverwrite) {
-        logInfo(s"Path $path already exists. It will be overwritten.")
+        logInfo(log"Path ${MDC(PATH, path)} already exists. It will be overwritten.")
         // TODO: Revert back to the original content if save is not successful.
         fs.delete(qualifiedOutputPath, true)
       } else {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
index 6e5c026cd0143..c826654f0893c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeans.scala
@@ -24,7 +24,8 @@ import scala.collection.mutable
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLUSTER_LEVEL, COST, DIVISIBLE_CLUSTER_INDICES_SIZE, FEATURE_DIMENSION, MIN_POINT_PER_CLUSTER, NUM_POINT}
 import org.apache.spark.ml.util.Instrumentation
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS.axpy
@@ -158,7 +159,7 @@ class BisectingKMeans private (
       handlePersistence: Boolean,
       instr: Option[Instrumentation]): BisectingKMeansModel = {
     val d = instances.map(_._1.size).first()
-    logInfo(s"Feature dimension: $d.")
+    logInfo(log"Feature dimension: ${MDC(FEATURE_DIMENSION, d)}.")
 
     val dMeasure = DistanceMeasure.decodeFromString(this.distanceMeasure)
     val norms = instances.map(d => Vectors.norm(d._1, 2.0))
@@ -178,14 +179,15 @@ class BisectingKMeans private (
     instr.foreach(_.logSumOfWeights(activeClusters.values.map(_.weightSum).sum))
     val rootSummary = activeClusters(ROOT_INDEX)
     val n = rootSummary.size
-    logInfo(s"Number of points: $n.")
-    logInfo(s"Initial cost: ${rootSummary.cost}.")
+    logInfo(log"Number of points: ${MDC(NUM_POINT, n)}.")
+    logInfo(log"Initial cost: ${MDC(COST, rootSummary.cost)}.")
     val minSize = if (minDivisibleClusterSize >= 1.0) {
       math.ceil(minDivisibleClusterSize).toLong
     } else {
       math.ceil(minDivisibleClusterSize * n).toLong
     }
-    logInfo(s"The minimum number of points of a divisible cluster is $minSize.")
+    logInfo(log"The minimum number of points of a divisible cluster is " +
+      log"${MDC(MIN_POINT_PER_CLUSTER, minSize)}.")
     var inactiveClusters = mutable.Seq.empty[(Long, ClusterSummary)]
     val random = new Random(seed)
     var numLeafClustersNeeded = k - 1
@@ -206,7 +208,8 @@ class BisectingKMeans private (
       }
       if (divisibleClusters.nonEmpty) {
         val divisibleIndices = divisibleClusters.keys.toSet
-        logInfo(s"Dividing ${divisibleIndices.size} clusters on level $level.")
+        logInfo(log"Dividing ${MDC(DIVISIBLE_CLUSTER_INDICES_SIZE, divisibleIndices.size)}" +
+          log" clusters on level ${MDC(CLUSTER_LEVEL, level)}.")
         var newClusterCenters = divisibleClusters.flatMap { case (index, summary) =>
           val (left, right) = splitCenter(summary.center, random, dMeasure)
           Iterator((leftChildIndex(index), left), (rightChildIndex(index), right))
@@ -233,7 +236,8 @@ class BisectingKMeans private (
         activeClusters = newClusters
         numLeafClustersNeeded -= divisibleClusters.size
       } else {
-        logInfo(s"None active and divisible clusters left on level $level. Stop iterations.")
+        logInfo(log"None active and divisible clusters left " +
+          log"on level ${MDC(CLUSTER_LEVEL, level)}. Stop iterations.")
         inactiveClusters ++= activeClusters
         activeClusters = Map.empty
       }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index f6c73a88e3634..52fbc7a5a47f5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -21,7 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{COST, INIT_MODE, NUM_ITERATIONS, TOTAL_TIME}
 import org.apache.spark.ml.util.Instrumentation
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.linalg.BLAS.axpy
@@ -259,7 +260,7 @@ class KMeans private (
 
     val sc = data.sparkContext
 
-    val initStartTime = System.nanoTime()
+    val initStartTime = System.currentTimeMillis()
 
     val distanceMeasureInstance = DistanceMeasure.decodeFromString(this.distanceMeasure)
 
@@ -274,14 +275,15 @@ class KMeans private (
         }
     }
     val numFeatures = centers.head.vector.size
-    val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
-    logInfo(f"Initialization with $initializationMode took $initTimeInSeconds%.3f seconds.")
+    val initTimeMs = System.currentTimeMillis() - initStartTime
+    logInfo(log"Initialization with ${MDC(INIT_MODE, initializationMode)}" +
+      log" took ${MDC(TOTAL_TIME, initTimeMs)} ms.")
 
     var converged = false
     var cost = 0.0
     var iteration = 0
 
-    val iterationStartTime = System.nanoTime()
+    val iterationStartTime = System.currentTimeMillis()
 
     instr.foreach(_.logNumFeatures(numFeatures))
 
@@ -357,16 +359,17 @@ class KMeans private (
       iteration += 1
     }
 
-    val iterationTimeInSeconds = (System.nanoTime() - iterationStartTime) / 1e9
-    logInfo(f"Iterations took $iterationTimeInSeconds%.3f seconds.")
+    val iterationTimeMs = System.currentTimeMillis() - iterationStartTime
+    logInfo(log"Iterations took ${MDC(TOTAL_TIME, iterationTimeMs)} ms")
 
     if (iteration == maxIterations) {
-      logInfo(s"KMeans reached the max number of iterations: $maxIterations.")
+      logInfo(log"KMeans reached the max number of" +
+        log" iterations: ${MDC(NUM_ITERATIONS, maxIterations)}.")
     } else {
-      logInfo(s"KMeans converged in $iteration iterations.")
+      logInfo(log"KMeans converged in ${MDC(NUM_ITERATIONS, iteration)} iterations.")
     }
 
-    logInfo(s"The cost is $cost.")
+    logInfo(log"The cost is ${MDC(COST, cost)}.")
 
     new KMeansModel(centers.map(_.vector), distanceMeasure, cost, iteration)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
index 9e2113f1c0fc9..ea83be1237298 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala
@@ -19,7 +19,8 @@ package org.apache.spark.mllib.clustering
 
 import scala.util.Random
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{NUM_ITERATIONS, POINT_OF_CENTER}
 import org.apache.spark.mllib.linalg.BLAS.{axpy, scal}
 import org.apache.spark.mllib.linalg.Vectors
 
@@ -58,8 +59,8 @@ private[mllib] object LocalKMeans extends Logging {
         j += 1
       }
       if (j == 0) {
-        logWarning("kMeansPlusPlus initialization ran out of distinct points for centers." +
-          s" Using duplicate point for center k = $i.")
+        logWarning(log"kMeansPlusPlus initialization ran out of distinct points for centers." +
+          log" Using duplicate point for center k = ${MDC(POINT_OF_CENTER, i)}.")
         centers(i) = points(0).toDense
       } else {
         centers(i) = points(j - 1).toDense
@@ -112,9 +113,10 @@ private[mllib] object LocalKMeans extends Logging {
     }
 
     if (iteration == maxIterations) {
-      logInfo(s"Local KMeans++ reached the max number of iterations: $maxIterations.")
+      logInfo(log"Local KMeans++ reached the max number of " +
+        log"iterations: ${MDC(NUM_ITERATIONS, maxIterations)}.")
     } else {
-      logInfo(s"Local KMeans++ converged in $iteration iterations.")
+      logInfo(log"Local KMeans++ converged in ${MDC(NUM_ITERATIONS, iteration)} iterations.")
     }
 
     centers
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
index 12c7ae5066c82..9150bb305876b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala
@@ -25,7 +25,8 @@ import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.graphx._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{DELTA, DIFF_DELTA, NORM, NUM_ITERATIONS}
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.{Loader, MLUtils, Saveable}
 import org.apache.spark.rdd.RDD
@@ -368,7 +369,7 @@ object PowerIterationClustering extends Logging {
     var diffDelta = Double.MaxValue
     var curG = g
     for (iter <- 0 until maxIterations if math.abs(diffDelta) > tol) {
-      val msgPrefix = s"Iteration $iter"
+      val msgPrefix = log"Iteration ${MDC(NUM_ITERATIONS, iter)}:"
       // multiply W by vt
       val v = curG.aggregateMessages[Double](
         sendMsg = ctx => ctx.sendToSrc(ctx.attr * ctx.dstAttr),
@@ -378,15 +379,15 @@ object PowerIterationClustering extends Logging {
                           /* useEdge */ true)).cache()
       // normalize v
       val norm = v.values.map(math.abs).sum()
-      logInfo(s"$msgPrefix: norm(v) = $norm.")
+      logInfo(msgPrefix + log" norm(v) = ${MDC(NORM, norm)}.")
       val v1 = v.mapValues(x => x / norm)
       // compare difference
       val delta = curG.joinVertices(v1) { case (_, x, y) =>
         math.abs(x - y)
       }.vertices.values.sum()
-      logInfo(s"$msgPrefix: delta = $delta.")
+      logInfo(msgPrefix + log" delta = ${MDC(DELTA, delta)}.")
       diffDelta = math.abs(delta - prevDelta)
-      logInfo(s"$msgPrefix: diff(delta) = $diffDelta.")
+      logInfo(msgPrefix + log" diff(delta) = ${MDC(DIFF_DELTA, diffDelta)}.")
 
       if (math.abs(diffDelta) < tol) {
         /**
@@ -404,8 +405,8 @@ object PowerIterationClustering extends Logging {
         val rayleigh = xTAx / xTx
 
         if (math.abs(norm - math.abs(rayleigh)) > tol) {
-          logWarning(s"Power Iteration fail to converge. delta = ${delta}," +
-            s" difference delta = ${diffDelta} and norm = ${norm}")
+          logWarning(log"Power Iteration fail to converge. delta = ${MDC(DELTA, delta)}," +
+            log" difference delta = ${MDC(DIFF_DELTA, diffDelta)} and norm = ${MDC(NORM, norm)}")
         }
       }
       curG.vertices.unpersist()
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
index ba14dc739a235..85a7350078101 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala
@@ -21,7 +21,8 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaSparkContext._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLUSTER_CENTROIDS, CLUSTER_LABEL, CLUSTER_WEIGHT, LARGEST_CLUSTER_INDEX, SMALLEST_CLUSTER_INDEX}
 import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream}
@@ -129,14 +130,16 @@ class StreamingKMeansModel @Since("1.2.0") (
         case _ => centroid.toArray.mkString("[", ",", "]")
       }
 
-      logInfo(s"Cluster $label updated with weight $updatedWeight and centroid: $display")
+      logInfo(log"Cluster ${MDC(CLUSTER_LABEL, label)} updated with weight " +
+        log"${MDC(CLUSTER_WEIGHT, updatedWeight)} and centroid: ${MDC(CLUSTER_CENTROIDS, display)}")
     }
 
     // Check whether the smallest cluster is dying. If so, split the largest cluster.
     val (maxWeight, largest) = clusterWeights.iterator.zipWithIndex.maxBy(_._1)
     val (minWeight, smallest) = clusterWeights.iterator.zipWithIndex.minBy(_._1)
     if (minWeight < 1e-8 * maxWeight) {
-      logInfo(s"Cluster $smallest is dying. Split the largest cluster $largest into two.")
+      logInfo(log"Cluster ${MDC(SMALLEST_CLUSTER_INDEX, smallest)} is dying. " +
+        log"Split the largest cluster ${MDC(LARGEST_CLUSTER_INDEX, largest)} into two.")
       val weight = (maxWeight + minWeight) / 2.0
       clusterWeights(largest) = weight
       clusterWeights(smallest) = weight
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 869fe7155a268..a203d3fc73537 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.mllib.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{COUNT, NUM_BIN}
 import org.apache.spark.mllib.evaluation.binary._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
@@ -201,7 +202,8 @@ class BinaryClassificationMetrics @Since("3.0.0") (
         val grouping = countsSize / numBins
         if (grouping < 2) {
           // numBins was more than half of the size; no real point in down-sampling to bins
-          logInfo(s"Curve is too small ($countsSize) for $numBins bins to be useful")
+          logInfo(log"Curve is too small (${MDC(COUNT, countsSize)}) " +
+            log"for ${MDC(NUM_BIN, numBins)} bins to be useful")
           counts
         } else {
           counts.mapPartitions { iter =>
@@ -243,7 +245,7 @@ class BinaryClassificationMetrics @Since("3.0.0") (
     val partitionwiseCumulativeCounts =
       agg.scanLeft(new BinaryLabelCounter())((agg, c) => agg.clone() += c)
     val totalCount = partitionwiseCumulativeCounts.last
-    logInfo(s"Total counts: $totalCount")
+    logInfo(log"Total counts: ${MDC(COUNT, totalCount)}")
     val cumulativeCounts = binnedCounts.mapPartitionsWithIndex(
       (index: Int, iter: Iterator[(Double, BinaryLabelCounter)]) => {
         val cumCount = partitionwiseCumulativeCounts(index)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 0dddbec8a7ed8..499dc09b86211 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -31,7 +31,8 @@ import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{ALPHA, COUNT, NUM_TRAIN_WORD, VOCAB_SIZE}
 import org.apache.spark.internal.config.Kryo.KRYO_SERIALIZER_MAX_BUFFER_SIZE
 import org.apache.spark.ml.linalg.BLAS
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
@@ -208,7 +209,8 @@ class Word2Vec extends Serializable with Logging {
       trainWordsCount += vocab(a).cn
       a += 1
     }
-    logInfo(s"vocabSize = $vocabSize, trainWordsCount = $trainWordsCount")
+    logInfo(log"vocabSize = ${MDC(VOCAB_SIZE, vocabSize)}," +
+      log" trainWordsCount = ${MDC(NUM_TRAIN_WORD, trainWordsCount)}")
   }
 
   private def createExpTable(): Array[Float] = {
@@ -379,8 +381,9 @@ class Word2Vec extends Serializable with Logging {
                 (1 - (numPartitions * wordCount.toDouble + numWordsProcessedInPreviousIterations) /
                   totalWordsCounts)
               if (alpha < learningRate * 0.0001) alpha = learningRate * 0.0001
-              logInfo(s"wordCount = ${wordCount + numWordsProcessedInPreviousIterations}, " +
-                s"alpha = $alpha")
+              logInfo(log"wordCount =" +
+                log" ${MDC(COUNT, wordCount + numWordsProcessedInPreviousIterations)}," +
+                log" alpha = ${MDC(ALPHA, alpha)}")
             }
             wc += sentence.length
             var pos = 0
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index 59d22f0eac991..3c648f34c6100 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -33,7 +33,8 @@ import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{MIN_NUM_FREQUENT_PATTERN, NUM_FREQUENT_ITEMS, NUM_LOCAL_FREQUENT_PATTERN, NUM_PREFIXES, NUM_SEQUENCES}
 import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
@@ -139,13 +140,13 @@ class PrefixSpan private (
     }
 
     val totalCount = data.count()
-    logInfo(s"number of sequences: $totalCount")
+    logInfo(log"number of sequences: ${MDC(NUM_SEQUENCES, totalCount)}")
     val minCount = math.ceil(minSupport * totalCount).toLong
-    logInfo(s"minimum count for a frequent pattern: $minCount")
+    logInfo(log"minimum count for a frequent pattern: ${MDC(MIN_NUM_FREQUENT_PATTERN, minCount)}")
 
     // Find frequent items.
     val freqItems = findFrequentItems(data, minCount)
-    logInfo(s"number of frequent items: ${freqItems.length}")
+    logInfo(log"number of frequent items: ${MDC(NUM_FREQUENT_ITEMS, freqItems.length)}")
 
     // Keep only frequent items from input sequences and convert them to internal storage.
     val itemToInt = Utils.toMapWithIndex(freqItems)
@@ -298,18 +299,20 @@ object PrefixSpan extends Logging {
     var largePrefixes = mutable.Map(emptyPrefix.id -> emptyPrefix)
     while (largePrefixes.nonEmpty) {
       val numLocalFreqPatterns = localFreqPatterns.length
-      logInfo(s"number of local frequent patterns: $numLocalFreqPatterns")
+      logInfo(log"number of local frequent patterns: " +
+        log"${MDC(NUM_LOCAL_FREQUENT_PATTERN, numLocalFreqPatterns)}")
       if (numLocalFreqPatterns > 1000000) {
         logWarning(
-          s"""
-             | Collected $numLocalFreqPatterns local frequent patterns. You may want to consider:
+          log"""
+             | Collected ${MDC(NUM_LOCAL_FREQUENT_PATTERN, numLocalFreqPatterns)}
+             | local frequent patterns. You may want to consider:
              |   1. increase minSupport,
              |   2. decrease maxPatternLength,
              |   3. increase maxLocalProjDBSize.
            """.stripMargin)
       }
-      logInfo(s"number of small prefixes: ${smallPrefixes.size}")
-      logInfo(s"number of large prefixes: ${largePrefixes.size}")
+      logInfo(log"number of small prefixes: ${MDC(NUM_PREFIXES, smallPrefixes.size)}")
+      logInfo(log"number of large prefixes: ${MDC(NUM_PREFIXES, largePrefixes.size)}")
       val largePrefixArray = largePrefixes.values.toArray
       val freqPrefixes = postfixes.flatMap { postfix =>
           largePrefixArray.flatMap { prefix =>
@@ -339,7 +342,8 @@ object PrefixSpan extends Logging {
     var freqPatterns = sc.parallelize(localFreqPatterns.toSeq, 1)
 
     val numSmallPrefixes = smallPrefixes.size
-    logInfo(s"number of small prefixes for local processing: $numSmallPrefixes")
+    logInfo(log"number of small prefixes for local processing: " +
+      log"${MDC(NUM_PREFIXES, numSmallPrefixes)}")
     if (numSmallPrefixes > 0) {
       // Switch to local processing.
       val bcSmallPrefixes = sc.broadcast(smallPrefixes)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 4e9952e6d768f..3329682d3b550 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -23,7 +23,7 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, Matrix => BM}
 
 import org.apache.spark.{Partitioner, PartitionIdPassthrough, SparkException}
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.storage.StorageLevel
@@ -322,7 +322,10 @@ class BlockMatrix @Since("1.3.0") (
     val m = numRows().toInt
     val n = numCols().toInt
     val mem = m * n / 125000
-    if (mem > 500) logWarning(s"Storing this matrix will require $mem MiB of memory!")
+    if (mem > 500) {
+      logWarning(log"Storing this matrix will require ${MDC(LogKeys.MEMORY_SIZE, mem)} " +
+        log"MiB of memory!")
+    }
     val localBlocks = blocks.collect()
     val values = new Array[Double](m * n)
     localBlocks.foreach { case ((blockRowIndex, blockColIndex), submat) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 882872709ac35..63cd41439054e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -26,7 +26,7 @@ import breeze.linalg.{axpy => brzAxpy, inv, svd => brzSvd, DenseMatrix => BDM, D
 import breeze.numerics.{sqrt => brzSqrt}
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.stat._
@@ -251,7 +251,8 @@ class RowMatrix @Since("1.0.0") (
     }
     if (cols > 10000) {
       val memMB = (cols.toLong * cols) / 125000
-      logWarning(s"$cols columns will require at least $memMB megabytes of memory!")
+      logWarning(log"${MDC(LogKeys.NUM_COLUMNS, cols)} columns will require at least " +
+        log"${MDC(LogKeys.MEMORY_SIZE, memMB)} megabytes of memory!")
     }
   }
 
@@ -342,7 +343,8 @@ class RowMatrix @Since("1.0.0") (
     val computeMode = mode match {
       case "auto" =>
         if (k > 5000) {
-          logWarning(s"computing svd with k=$k and n=$n, please check necessity")
+          logWarning(log"computing svd with k=${MDC(LogKeys.NUM_LEADING_SINGULAR_VALUES, k)} and " +
+            log"n=${MDC(LogKeys.NUM_COLUMNS, n)}, please check necessity")
         }
 
         // TODO: The conditions below are not fully tested.
@@ -395,7 +397,8 @@ class RowMatrix @Since("1.0.0") (
     // criterion specified by tol after max number of iterations.
     // Thus use i < min(k, sigmas.length) instead of i < k.
     if (sigmas.length < k) {
-      logWarning(s"Requested $k singular values but only found ${sigmas.length} converged.")
+      logWarning(log"Requested ${MDC(LogKeys.NUM_LEADING_SINGULAR_VALUES, k)} singular " +
+        log"values but only found ${MDC(LogKeys.SIGMAS_LENGTH, sigmas.length)} converged.")
     }
     while (i < math.min(k, sigmas.length) && sigmas(i) >= threshold) {
       i += 1
@@ -403,7 +406,8 @@ class RowMatrix @Since("1.0.0") (
     val sk = i
 
     if (sk < k) {
-      logWarning(s"Requested $k singular values but only found $sk nonzeros.")
+      logWarning(log"Requested ${MDC(LogKeys.NUM_LEADING_SINGULAR_VALUES, k)} singular " +
+        log"values but only found ${MDC(LogKeys.COUNT, sk)} nonzeros.")
     }
 
     // Warn at the end of the run as well, for increased visibility.
@@ -625,9 +629,9 @@ class RowMatrix @Since("1.0.0") (
     require(threshold >= 0, s"Threshold cannot be negative: $threshold")
 
     if (threshold > 1) {
-      logWarning(s"Threshold is greater than 1: $threshold " +
-      "Computation will be more efficient with promoted sparsity, " +
-      " however there is no correctness guarantee.")
+      logWarning(log"Threshold is greater than 1: ${MDC(LogKeys.THRESHOLD, threshold)} " +
+        log"Computation will be more efficient with promoted sparsity, " +
+        log"however there is no correctness guarantee.")
     }
 
     val gamma = if (threshold < 1e-6) {
@@ -828,9 +832,9 @@ class RowMatrix @Since("1.0.0") (
     val desiredTreeDepth = math.ceil(numerator / denominator)
 
     if (desiredTreeDepth > 4) {
-      logWarning(
-        s"Desired tree depth for treeAggregation is big ($desiredTreeDepth)."
-          + "Consider increasing driver max result size or reducing number of partitions")
+      logWarning(log"Desired tree depth for treeAggregation is big " +
+        log"(${MDC(LogKeys.DESIRED_TREE_DEPTH, desiredTreeDepth)}). " +
+        log"Consider increasing driver max result size or reducing number of partitions")
     }
 
     math.min(math.max(1, desiredTreeDepth), 10).toInt
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
index d40e640a33d6f..a288d13e57f7b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import breeze.linalg.{norm, DenseVector => BDV}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.rdd.RDD
 
@@ -203,8 +203,9 @@ object GradientDescent extends Logging {
     }
 
     if (numIterations * miniBatchFraction < 1.0) {
-      logWarning("Not all examples will be used if numIterations * miniBatchFraction < 1.0: " +
-        s"numIterations=$numIterations and miniBatchFraction=$miniBatchFraction")
+      logWarning(log"Not all examples will be used if numIterations * miniBatchFraction < 1.0: " +
+        log"numIterations=${MDC(LogKeys.NUM_ITERATIONS, numIterations)} and " +
+        log"miniBatchFraction=${MDC(LogKeys.MINI_BATCH_FRACTION, miniBatchFraction)}")
     }
 
     val stochasticLossHistory = new ArrayBuffer[Double](numIterations + 1)
@@ -291,7 +292,9 @@ object GradientDescent extends Logging {
           }
         }
       } else {
-        logWarning(s"Iteration ($i/$numIterations). The size of sampled batch is zero")
+        logWarning(log"Iteration " +
+          log"(${MDC(LogKeys.INDEX, i)}/${MDC(LogKeys.NUM_ITERATIONS, numIterations)}). " +
+          log"The size of sampled batch is zero")
       }
       i += 1
     }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
index 9ffee8832db93..bc888aecec0ab 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala
@@ -30,7 +30,7 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaPairRDD, JavaRDD}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.mllib.linalg.BLAS
 import org.apache.spark.mllib.rdd.MLPairRDDFunctions._
 import org.apache.spark.mllib.util.{Loader, Saveable}
@@ -66,11 +66,12 @@ class MatrixFactorizationModel @Since("0.8.0") (
     require(features.first()._2.length == rank,
       s"$name feature dimension does not match the rank $rank.")
     if (features.partitioner.isEmpty) {
-      logWarning(s"$name factor does not have a partitioner. "
-        + "Prediction on individual records could be slow.")
+      logWarning(log"${MDC(LogKeys.FEATURE_NAME, name)} factor does not have a partitioner. " +
+        log"Prediction on individual records could be slow.")
     }
     if (features.getStorageLevel == StorageLevel.NONE) {
-      logWarning(s"$name factor is not cached. Prediction could be slow.")
+      logWarning(log"${MDC(LogKeys.FEATURE_NAME, name)} factor is not cached. " +
+        log"Prediction could be slow.")
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index 94848cb5033a1..2fa1339bc72a9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -21,7 +21,8 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{MODEL_WEIGHTS, TIME}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream}
 import org.apache.spark.streaming.dstream.DStream
@@ -90,12 +91,12 @@ abstract class StreamingLinearAlgorithm[
     data.foreachRDD { (rdd, time) =>
       if (!rdd.isEmpty()) {
         model = Some(algorithm.run(rdd, model.get.weights))
-        logInfo(s"Model updated at time ${time.toString}")
+        logInfo(log"Model updated at time ${MDC(TIME, time)}")
         val display = model.get.weights.size match {
           case x if x > 100 => model.get.weights.toArray.take(100).mkString("[", ",", "...")
           case _ => model.get.weights.toArray.mkString("[", ",", "]")
         }
-        logInfo(s"Current model: weights, ${display}")
+        logInfo(log"Current model: weights, ${MDC(MODEL_WEIGHTS, display)}")
       }
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 9aeab65e25de4..2059a9f785381 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.stat.test
 import org.apache.commons.math3.distribution.ChiSquaredDistribution
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.RDD
@@ -221,8 +221,9 @@ private[spark] object ChiSqTest extends Logging {
     }
     val size = observed.size
     if (size > 1000) {
-      logWarning("Chi-squared approximation may not be accurate due to low expected frequencies "
-        + s" as a result of a large number of categories: $size.")
+      logWarning(log"Chi-squared approximation may not be accurate due to low expected " +
+        log"frequencies as a result of a large number of categories: " +
+        log"${MDC(LogKeys.NUM_CATEGORIES, size)}.")
     }
     val obsArr = observed.toArray
     val expArr = if (expected.size == 0) Array.tabulate(size)(_ => 1.0 / size) else expected.toArray
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
index c282dc59fa8d3..2f65dea0c4a89 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala
@@ -26,7 +26,7 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.tree.configuration.{Algo, FeatureType}
 import org.apache.spark.mllib.tree.configuration.Algo._
@@ -209,15 +209,19 @@ object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging {
           .map(Utils.memoryStringToMb)
           .getOrElse(Utils.DEFAULT_DRIVER_MEM_MB)
         if (driverMemory <= memThreshold) {
-          logWarning(s"$thisClassName.save() was called, but it may fail because of too little" +
-            s" driver memory (${driverMemory}m)." +
-            s"  If failure occurs, try setting driver-memory ${memThreshold}m (or larger).")
+          logWarning(log"${MDC(LogKeys.CLASS_NAME, thisClassName)}.save() was called, " +
+            log"but it may fail because of too little driver memory " +
+            log"(${MDC(LogKeys.DRIVER_MEMORY_SIZE, driverMemory)}m). If failure occurs, " +
+            log"try setting driver-memory ${MDC(LogKeys.MEMORY_THRESHOLD_SIZE, memThreshold)}m " +
+            log"(or larger).")
         }
       } else {
         if (sc.executorMemory <= memThreshold) {
-          logWarning(s"$thisClassName.save() was called, but it may fail because of too little" +
-            s" executor memory (${sc.executorMemory}m)." +
-            s"  If failure occurs try setting executor-memory ${memThreshold}m (or larger).")
+          logWarning(log"${MDC(LogKeys.CLASS_NAME, thisClassName)}.save() was called, " +
+            log"but it may fail because of too little executor memory " +
+            log"(${MDC(LogKeys.EXECUTOR_MEMORY_SIZE, sc.executorMemory)}m). If failure occurs, " +
+            log"try setting executor-memory ${MDC(LogKeys.MEMORY_THRESHOLD_SIZE, memThreshold)}m " +
+            log"(or larger).")
         }
       }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
index 579d6b77f62c3..aa2287f3af896 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala
@@ -26,7 +26,7 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.ml.linalg.BLAS
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.mllib.regression.LabeledPoint
@@ -407,15 +407,19 @@ private[tree] object TreeEnsembleModel extends Logging {
           .map(Utils.memoryStringToMb)
           .getOrElse(Utils.DEFAULT_DRIVER_MEM_MB)
         if (driverMemory <= memThreshold) {
-          logWarning(s"$className.save() was called, but it may fail because of too little" +
-            s" driver memory (${driverMemory}m)." +
-            s"  If failure occurs, try setting driver-memory ${memThreshold}m (or larger).")
+          logWarning(log"${MDC(LogKeys.CLASS_NAME, className)}.save() was called, " +
+            log"but it may fail because of too little driver memory " +
+            log"(${MDC(LogKeys.DRIVER_MEMORY_SIZE, driverMemory)}m). If failure occurs, " +
+            log"try setting driver-memory ${MDC(LogKeys.MEMORY_THRESHOLD_SIZE, memThreshold)}m " +
+            log"(or larger).")
         }
       } else {
         if (sc.executorMemory <= memThreshold) {
-          logWarning(s"$className.save() was called, but it may fail because of too little" +
-            s" executor memory (${sc.executorMemory}m)." +
-            s"  If failure occurs try setting executor-memory ${memThreshold}m (or larger).")
+          logWarning(log"${MDC(LogKeys.CLASS_NAME, className)}.save() was called, " +
+            log"but it may fail because of too little executor memory " +
+            log"(${MDC(LogKeys.EXECUTOR_MEMORY_SIZE, sc.executorMemory)}m). If failure occurs, " +
+            log"try setting executor-memory ${MDC(LogKeys.MEMORY_THRESHOLD_SIZE, memThreshold)}m " +
+            log"(or larger).")
         }
       }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
index d8c0f8711cabc..4857c9b00f421 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/DataValidators.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.util
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{COUNT, RANGE}
+import org.apache.spark.internal.LogKeys.{COUNT, RANGE}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.RDD
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 10adf10690b77..e23423e4c004e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -23,7 +23,7 @@ import scala.reflect.ClassTag
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.OPTIMIZER_CLASS_NAME
+import org.apache.spark.internal.LogKeys.OPTIMIZER_CLASS_NAME
 import org.apache.spark.ml.linalg.{MatrixUDT => MLMatrixUDT, VectorUDT => MLVectorUDT}
 import org.apache.spark.ml.util.Instrumentation
 import org.apache.spark.mllib.linalg._
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
index 0a9347b87977e..384fcf6ceb859 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorIndexerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CATEGORICAL_FEATURES, MAX_CATEGORIES}
+import org.apache.spark.internal.LogKeys.{CATEGORICAL_FEATURES, MAX_CATEGORIES}
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.{SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index ff85831a7a6b2..94abeaf0804ed 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -28,7 +28,8 @@ import org.apache.commons.io.FileUtils
 import org.apache.commons.io.filefilter.TrueFileFilter
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{RMSE, TEST_SIZE, TRAINING_SIZE}
 import org.apache.spark.ml.linalg.{BLAS, Vectors}
 import org.apache.spark.ml.recommendation.ALS._
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
@@ -353,8 +354,8 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
         }
       }
     }
-    logInfo(s"Generated an explicit feedback dataset with ${training.size} ratings for training " +
-      s"and ${test.size} for test.")
+    logInfo(log"Generated an explicit feedback dataset with ${MDC(TRAINING_SIZE, training.size)} " +
+      log"ratings for training and ${MDC(TEST_SIZE, test.size)} for test.")
     (sc.parallelize(training.toSeq, 2), sc.parallelize(test.toSeq, 2))
   }
 
@@ -485,7 +486,7 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
               val mse = errorSquares.sum / errorSquares.length
               math.sqrt(mse)
             }
-          logInfo(s"Test RMSE is $rmse.")
+          logInfo(log"Test RMSE is ${MDC(RMSE, rmse)}.")
           assert(rmse < targetRMSE)
     }
 
@@ -1246,8 +1247,8 @@ object ALSSuite extends Logging {
         }
       }
     }
-    logInfo(s"Generated an implicit feedback dataset with ${training.size} ratings for training " +
-      s"and ${test.size} for test.")
+    logInfo(log"Generated an implicit feedback dataset with ${MDC(TRAINING_SIZE, training.size)}" +
+      log" ratings for training and ${MDC(TEST_SIZE, test.size)} for test.")
     (sc.parallelize(training.toSeq, 2), sc.parallelize(test.toSeq, 2))
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index a25a19e2d354b..135d7e26c6d8c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -25,6 +25,8 @@ import breeze.linalg.{DenseMatrix => BDM}
 import org.json4s.jackson.JsonMethods.{parse => parseJson}
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
+import org.apache.spark.internal.LogKeys.MALFORMATTED_STRING
+import org.apache.spark.internal.MDC
 import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.{linalg => newlinalg}
 import org.apache.spark.mllib.util.TestingUtils._
@@ -226,7 +228,7 @@ class VectorsSuite extends SparkFunSuite {
     malformatted.foreach { s =>
       intercept[SparkException] {
         Vectors.parse(s)
-        logInfo(s"Didn't detect malformatted string $s.")
+        logInfo(log"Didn't detect malformatted string ${MDC(MALFORMATTED_STRING, s)}.")
       }
     }
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
index f5c6abfc66f27..e654eac83649c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.tree
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.{MDC, MessageWithContext}
-import org.apache.spark.internal.LogKey.{LEARNING_RATE, NUM_ITERATIONS, SUBSAMPLING_RATE}
+import org.apache.spark.internal.LogKeys.{LEARNING_RATE, NUM_ITERATIONS, SUBSAMPLING_RATE}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
diff --git a/pom.xml b/pom.xml
index bf8d4f1b417d5..67ff14070b8bb 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,10 +115,10 @@
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
     <java.version>17</java.version>
     <maven.compiler.release>${java.version}</maven.compiler.release>
-    <maven.version>3.9.6</maven.version>
+    <maven.version>3.9.8</maven.version>
     <exec-maven-plugin.version>3.2.0</exec-maven-plugin.version>
     <sbt.project.name>spark</sbt.project.name>
-    <asm.version>9.6</asm.version>
+    <asm.version>9.7</asm.version>
     <slf4j.version>2.0.13</slf4j.version>
     <log4j.version>2.22.1</log4j.version>
     <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
@@ -128,22 +128,19 @@
     <protoc-jar-maven-plugin.version>3.11.4</protoc-jar-maven-plugin.version>
     <yarn.version>${hadoop.version}</yarn.version>
     <zookeeper.version>3.9.2</zookeeper.version>
-    <curator.version>5.6.0</curator.version>
+    <curator.version>5.7.0</curator.version>
     <hive.group>org.apache.hive</hive.group>
     <hive.classifier>core</hive.classifier>
     <!-- Version used in Maven Hive dependency -->
-    <hive.version>2.3.9</hive.version>
-    <hive23.version>2.3.9</hive23.version>
-    <!-- Version used for internal directory structure -->
-    <hive.version.short>2.3</hive.version.short>
+    <hive.version>2.3.10</hive.version>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
     <kafka.version>3.7.0</kafka.version>
     <!-- After 10.17.1.0, the minimum required version is JDK19 -->
     <derby.version>10.16.1.1</derby.version>
-    <parquet.version>1.13.1</parquet.version>
-    <orc.version>2.0.0</orc.version>
+    <parquet.version>1.14.1</parquet.version>
+    <orc.version>2.0.1</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
-    <jetty.version>11.0.20</jetty.version>
+    <jetty.version>11.0.21</jetty.version>
     <jakartaservlet.version>5.0.0</jakartaservlet.version>
     <!-- SPARK-46938: Required by Hive / LibThrift libs -->
     <javaxservlet.version>4.0.1</javaxservlet.version>
@@ -154,7 +151,7 @@
     If you change codahale.metrics.version, you also need to change
     the link to metrics.dropwizard.io in docs/monitoring.md.
     -->
-    <codahale.metrics.version>4.2.25</codahale.metrics.version>
+    <codahale.metrics.version>4.2.26</codahale.metrics.version>
     <!-- Should be consistent with SparkBuild.scala and docs -->
     <avro.version>1.11.3</avro.version>
     <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version>
@@ -172,10 +169,10 @@
     <!-- managed up from 3.2.1 for SPARK-11652 -->
     <commons.collections.version>3.2.2</commons.collections.version>
     <commons.collections4.version>4.4</commons.collections4.version>
-    <scala.version>2.13.13</scala.version>
+    <scala.version>2.13.14</scala.version>
     <scala.binary.version>2.13</scala.binary.version>
     <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
-    <scala-maven-plugin.version>4.8.1</scala-maven-plugin.version>
+    <scala-maven-plugin.version>4.9.1</scala-maven-plugin.version>
     <maven.scaladoc.skip>false</maven.scaladoc.skip>
     <versions-maven-plugin.version>2.16.2</versions-maven-plugin.version>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
@@ -183,16 +180,16 @@
     <scalafmt.validateOnly>true</scalafmt.validateOnly>
     <scalafmt.changedOnly>true</scalafmt.changedOnly>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.17.0</fasterxml.jackson.version>
-    <fasterxml.jackson.databind.version>2.17.0</fasterxml.jackson.databind.version>
+    <fasterxml.jackson.version>2.17.1</fasterxml.jackson.version>
+    <fasterxml.jackson.databind.version>2.17.1</fasterxml.jackson.databind.version>
     <ws.xmlschema.version>2.3.1</ws.xmlschema.version>
     <org.glassfish.jaxb.txw2.version>3.0.2</org.glassfish.jaxb.txw2.version>
     <snappy.version>1.1.10.5</snappy.version>
     <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
-    <commons-codec.version>1.16.1</commons-codec.version>
-    <commons-compress.version>1.26.1</commons-compress.version>
+    <commons-codec.version>1.17.0</commons-codec.version>
+    <commons-compress.version>1.26.2</commons-compress.version>
     <commons-io.version>2.16.1</commons-io.version>
-    <!-- org.apache.commons/commons-lang/-->
+    <!-- To support Hive UDF jars built by Hive 2.0.0 ~ 2.3.9 and 3.0.0 ~ 3.1.3. -->
     <commons-lang2.version>2.6</commons-lang2.version>
     <!-- org.apache.commons/commons-lang3/-->
     <commons-lang3.version>3.14.0</commons-lang3.version>
@@ -206,31 +203,34 @@
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
     <jaxb.version>2.2.11</jaxb.version>
-    <libthrift.version>0.12.0</libthrift.version>
+    <libthrift.version>0.16.0</libthrift.version>
     <antlr4.version>4.13.1</antlr4.version>
     <jpam.version>1.1</jpam.version>
-    <selenium.version>4.17.0</selenium.version>
-    <htmlunit3-driver.version>4.17.0</htmlunit3-driver.version>
+    <selenium.version>4.21.0</selenium.version>
+    <htmlunit3-driver.version>4.21.0</htmlunit3-driver.version>
     <maven-antrun.version>3.1.0</maven-antrun.version>
     <commons-crypto.version>1.1.0</commons-crypto.version>
-    <commons-cli.version>1.6.0</commons-cli.version>
+    <commons-cli.version>1.8.0</commons-cli.version>
     <bouncycastle.version>1.78</bouncycastle.version>
     <tink.version>1.13.0</tink.version>
-    <datasketches.version>5.0.1</datasketches.version>
-    <netty.version>4.1.108.Final</netty.version>
+    <datasketches.version>6.0.0</datasketches.version>
+    <netty.version>4.1.110.Final</netty.version>
     <netty-tcnative.version>2.0.65.Final</netty-tcnative.version>
-    <icu4j.version>72.1</icu4j.version>
+    <icu4j.version>75.1</icu4j.version>
+    <junit-jupiter.version>5.9.3</junit-jupiter.version>
+    <junit-platform.version>1.9.3</junit-platform.version>
+    <sbt-jupiter-interface.version>0.11.1</sbt-jupiter-interface.version>
     <!--
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, ./python/packaging/classic/setup.py
     and ./python/packaging/connect/setup.py too.
     -->
-    <arrow.version>15.0.2</arrow.version>
-    <ammonite.version>3.0.0-M1</ammonite.version>
+    <arrow.version>16.1.0</arrow.version>
+    <ammonite.version>3.0.0-M2</ammonite.version>
 
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>6.12.0</kubernetes-client.version>
+    <kubernetes-client.version>6.13.0</kubernetes-client.version>
 
     <test.java.home>${java.home}</test.java.home>
 
@@ -294,6 +294,9 @@
     <mima.version>1.1.3</mima.version>
     <tomcat.annotations.api.version>6.0.53</tomcat.annotations.api.version>
 
+    <!-- Version used in Profiler -->
+    <ap-loader.version>3.0-9</ap-loader.version>
+
     <CodeCacheSize>128m</CodeCacheSize>
     <!-- Needed for consistent times -->
     <maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss z</maven.build.timestamp.format>
@@ -301,6 +304,7 @@
     <!-- SPARK-36796 for JDK-17 test-->
     <extraJavaTestArgs>
       -XX:+IgnoreUnrecognizedVMOptions
+      --add-modules=jdk.incubator.vector
       --add-opens=java.base/java.lang=ALL-UNNAMED
       --add-opens=java.base/java.lang.invoke=ALL-UNNAMED
       --add-opens=java.base/java.lang.reflect=ALL-UNNAMED
@@ -319,11 +323,11 @@
       -Dio.netty.tryReflectionSetAccessible=true
     </extraJavaTestArgs>
     <mariadb.java.client.version>2.7.12</mariadb.java.client.version>
-    <mysql.connector.version>8.3.0</mysql.connector.version>
+    <mysql.connector.version>8.4.0</mysql.connector.version>
     <postgresql.version>42.7.3</postgresql.version>
     <db2.jcc.version>11.5.9.0</db2.jcc.version>
-    <mssql.jdbc.version>9.4.1.jre8</mssql.jdbc.version>
-    <ojdbc11.version>23.3.0.23.09</ojdbc11.version>
+    <mssql.jdbc.version>12.6.2.jre11</mssql.jdbc.version>
+    <ojdbc11.version>23.4.0.24.05</ojdbc11.version>
   </properties>
   <repositories>
     <repository>
@@ -411,17 +415,17 @@
     </dependency>
     <dependency>
       <groupId>org.scalatestplus</groupId>
-      <artifactId>scalacheck-1-17_${scala.binary.version}</artifactId>
+      <artifactId>scalacheck-1-18_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalatestplus</groupId>
-      <artifactId>mockito-5-10_${scala.binary.version}</artifactId>
+      <artifactId>mockito-5-12_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalatestplus</groupId>
-      <artifactId>selenium-4-17_${scala.binary.version}</artifactId>
+      <artifactId>selenium-4-21_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -481,7 +485,7 @@
       <dependency>
         <groupId>org.apache.xbean</groupId>
         <artifactId>xbean-asm9-shaded</artifactId>
-        <version>4.24</version>
+        <version>4.25</version>
       </dependency>
 
       <!-- Shaded deps marked as provided. These are promoted to compile scope
@@ -606,7 +610,7 @@
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-text</artifactId>
-        <version>1.11.0</version>
+        <version>1.12.0</version>
       </dependency>
       <dependency>
         <groupId>commons-lang</groupId>
@@ -687,7 +691,7 @@
       <dependency>
         <groupId>org.rocksdb</groupId>
         <artifactId>rocksdbjni</artifactId>
-        <version>8.11.3</version>
+        <version>9.2.1</version>
       </dependency>
       <dependency>
         <groupId>${leveldbjni.group}</groupId>
@@ -797,12 +801,12 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.5.6-2</version>
+        <version>1.5.6-3</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
         <artifactId>stream</artifactId>
-        <version>2.9.6</version>
+        <version>2.9.8</version>
         <exclusions>
           <!-- Only HyperLogLogPlus is used, which doesn't depend on fastutil -->
           <exclusion>
@@ -826,7 +830,7 @@
       <dependency>
         <groupId>org.roaringbitmap</groupId>
         <artifactId>RoaringBitmap</artifactId>
-        <version>1.0.5</version>
+        <version>1.1.0</version>
       </dependency>
 
       <!-- Netty Begin -->
@@ -1016,11 +1020,6 @@
         <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
         <version>${fasterxml.jackson.version}</version>
       </dependency>
-      <dependency>
-        <groupId>com.fasterxml.jackson.module</groupId>
-        <artifactId>jackson-module-jaxb-annotations</artifactId>
-        <version>${fasterxml.jackson.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.ws.xmlschema</groupId>
         <artifactId>xmlschema-core</artifactId>
@@ -1079,15 +1078,6 @@
         <groupId>org.glassfish.jersey.test-framework.providers</groupId>
         <artifactId>jersey-test-framework-provider-simple</artifactId>
         <version>${jersey.version}</version>
-        <!--
-          TODO(SPARK-44170): Remove the exclusion of jupiter after Spark migrates to Junit5.
-        -->
-        <exclusions>
-          <exclusion>
-            <groupId>org.junit.jupiter</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-        </exclusions>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1130,7 +1120,7 @@
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-xml_${scala.binary.version}</artifactId>
-        <version>2.2.0</version>
+        <version>2.3.0</version>
       </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
@@ -1156,7 +1146,7 @@
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
-        <version>2.3.0</version>
+        <version>2.4.0</version>
       </dependency>
       <dependency>
         <groupId>jline</groupId>
@@ -1166,25 +1156,25 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>3.2.18</version>
+        <version>3.2.19</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.scalatestplus</groupId>
-        <artifactId>scalacheck-1-17_${scala.binary.version}</artifactId>
-        <version>3.2.18.0</version>
+        <artifactId>scalacheck-1-18_${scala.binary.version}</artifactId>
+        <version>3.2.19.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.scalatestplus</groupId>
-        <artifactId>mockito-5-10_${scala.binary.version}</artifactId>
-        <version>3.2.18.0</version>
+        <artifactId>mockito-5-12_${scala.binary.version}</artifactId>
+        <version>3.2.19.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.scalatestplus</groupId>
-        <artifactId>selenium-4-17_${scala.binary.version}</artifactId>
-        <version>3.2.18.0</version>
+        <artifactId>selenium-4-21_${scala.binary.version}</artifactId>
+        <version>3.2.19.0</version>
         <scope>test</scope>
         <exclusions>
           <exclusion>
@@ -1196,59 +1186,85 @@
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-core</artifactId>
-        <version>5.10.0</version>
+        <version>5.12.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>net.bytebuddy</groupId>
         <artifactId>byte-buddy</artifactId>
-        <version>1.14.11</version>
+        <version>1.14.17</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>net.bytebuddy</groupId>
         <artifactId>byte-buddy-agent</artifactId>
-        <version>1.14.11</version>
+        <version>1.14.17</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.jmock</groupId>
         <artifactId>jmock-junit5</artifactId>
         <scope>test</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>org.junit.jupiter</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.junit.platform</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-        </exclusions>
         <version>2.13.1</version>
       </dependency>
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.binary.version}</artifactId>
-        <version>1.17.0</version>
+        <version>1.18.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.junit.jupiter</groupId>
         <artifactId>junit-jupiter</artifactId>
-        <version>5.9.3</version>
+        <version>${junit-jupiter.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.junit.jupiter</groupId>
+        <artifactId>junit-jupiter-api</artifactId>
+        <version>${junit-jupiter.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.junit.jupiter</groupId>
+        <artifactId>junit-jupiter-engine</artifactId>
+        <version>${junit-jupiter.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.junit.jupiter</groupId>
+        <artifactId>junit-jupiter-params</artifactId>
+        <version>${junit-jupiter.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.junit.platform</groupId>
+        <artifactId>junit-platform-commons</artifactId>
+        <version>${junit-platform.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.junit.platform</groupId>
+        <artifactId>junit-platform-engine</artifactId>
+        <version>${junit-platform.version}</version>
+        <scope>test</scope>
+      </dependency>
+      <dependency>
+        <groupId>org.junit.platform</groupId>
+        <artifactId>junit-platform-launcher</artifactId>
+        <version>${junit-platform.version}</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>net.aichler</groupId>
         <artifactId>jupiter-interface</artifactId>
-        <version>0.11.1</version>
+        <version>${sbt-jupiter-interface.version}</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>com.github.docker-java</groupId>
         <artifactId>docker-java</artifactId>
-        <version>3.3.5</version>
+        <version>3.3.6</version>
         <scope>test</scope>
         <exclusions>
           <exclusion>
@@ -1268,7 +1284,7 @@
       <dependency>
         <groupId>com.github.docker-java</groupId>
         <artifactId>docker-java-transport-zerodep</artifactId>
-        <version>3.3.5</version>
+        <version>3.3.6</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1349,13 +1365,6 @@
         <groupId>org.apache.curator</groupId>
         <artifactId>curator-test</artifactId>
         <version>${curator.version}</version>
-        <!-- TODO(SPARK-44170): Remove this `exclusion` after Spark starts supporting JUnit5. -->
-        <exclusions>
-          <exclusion>
-            <groupId>org.junit.jupiter</groupId>
-            <artifactId>junit-jupiter-api</artifactId>
-          </exclusion>
-        </exclusions>
         <scope>test</scope>
       </dependency>
       <!-- Hadoop 3.x dependencies -->
@@ -1755,83 +1764,6 @@
         <version>${yarn.version}</version>
         <scope>test</scope>
       </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-yarn-server-web-proxy</artifactId>
-        <version>${yarn.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-server-common</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-common</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.bouncycastle</groupId>
-            <artifactId>bcprov-jdk15on</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.bouncycastle</groupId>
-            <artifactId>bcpkix-jdk15on</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.fusesource.leveldbjni</groupId>
-            <artifactId>leveldbjni-all</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.jboss.netty</groupId>
-            <artifactId>netty</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>javax.servlet</groupId>
-            <artifactId>servlet-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>javax.servlet</groupId>
-            <artifactId>javax.servlet-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.jersey-test-framework</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.contribs</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <!-- Hadoop-3.x -->
-          <exclusion>
-            <groupId>com.zaxxer</groupId>
-            <artifactId>HikariCP-java7</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.microsoft.sqlserver</groupId>
-            <artifactId>mssql-jdbc</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-yarn-client</artifactId>
@@ -1902,6 +1834,10 @@
             <groupId>io.netty</groupId>
             <artifactId>netty-transport-native-epoll</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-tcnative-boringssl-static</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>com.github.spotbugs</groupId>
             <artifactId>spotbugs-annotations</artifactId>
@@ -2095,7 +2031,6 @@
             <groupId>commons-logging</groupId>
             <artifactId>commons-logging</artifactId>
           </exclusion>
-          <!-- Begin of Hive 2.3 exclusion -->
           <!--
             ORC is needed, but the version should be consistent with the `sql/core` ORC data source.
             Looks like this is safe, please see the major changes from ORC 1.3.3 to 1.5.4:
@@ -2105,12 +2040,11 @@
             <groupId>org.apache.orc</groupId>
             <artifactId>orc-core</artifactId>
           </exclusion>
-          <!-- jetty-all conflict with jetty 9.4.12.v20180830 -->
+          <!-- jetty-all conflict with Spark's built-in Jetty version -->
           <exclusion>
             <groupId>org.eclipse.jetty.aggregate</groupId>
             <artifactId>jetty-all</artifactId>
           </exclusion>
-          <!-- org.apache.logging.log4j:* conflict with log4j 1.2.17 -->
           <exclusion>
             <groupId>org.apache.logging.log4j</groupId>
             <artifactId>*</artifactId>
@@ -2122,10 +2056,9 @@
           </exclusion>
           <!-- hive-storage-api is needed and must be explicitly included later -->
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-storage-api</artifactId>
           </exclusion>
-          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
 
@@ -2244,7 +2177,6 @@
             <groupId>org.json</groupId>
             <artifactId>json</artifactId>
           </exclusion>
-          <!-- Begin of Hive 2.3 exclusion -->
           <!-- Do not need Tez -->
           <exclusion>
             <groupId>${hive.group}</groupId>
@@ -2259,7 +2191,6 @@
             <groupId>org.apache.calcite.avatica</groupId>
             <artifactId>avatica</artifactId>
           </exclusion>
-          <!-- org.apache.logging.log4j:* conflict with log4j 1.2.17 -->
           <exclusion>
             <groupId>org.apache.logging.log4j</groupId>
             <artifactId>*</artifactId>
@@ -2277,10 +2208,9 @@
             <artifactId>janino</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.pentaho</groupId>
-            <artifactId>pentaho-aggdesigner-algorithm</artifactId>
+            <groupId>net.hydromatic</groupId>
+            <artifactId>aggdesigner-algorithm</artifactId>
           </exclusion>
-          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
       <dependency>
@@ -2348,6 +2278,10 @@
             <groupId>org.codehaus.groovy</groupId>
             <artifactId>groovy-all</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>com.lmax</groupId>
+            <artifactId>disruptor</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
 
@@ -2389,7 +2323,6 @@
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-log4j12</artifactId>
           </exclusion>
-          <!-- Begin of Hive 2.3 exclusion -->
           <!-- Hive removes the HBase Metastore; see HIVE-17234 -->
           <exclusion>
             <groupId>org.apache.hbase</groupId>
@@ -2399,7 +2332,10 @@
             <groupId>co.cask.tephra</groupId>
             <artifactId>*</artifactId>
           </exclusion>
-          <!-- End of Hive 2.3 exclusion -->
+          <exclusion>
+            <groupId>com.jolbox</groupId>
+            <artifactId>bonecp</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
 
@@ -2457,12 +2393,14 @@
             <groupId>org.codehaus.groovy</groupId>
             <artifactId>groovy-all</artifactId>
           </exclusion>
-          <!-- Begin of Hive 2.3 exclusion -->
           <exclusion>
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service-rpc</artifactId>
           </exclusion>
-          <!-- parquet-hadoop-bundle:1.8.1 conflict with 1.13.1 -->
+          <!--
+            Parquet is needed, but the version should be consistent with
+            the `sql/core` Parquet data source.
+            -->
           <exclusion>
             <groupId>org.apache.parquet</groupId>
             <artifactId>parquet-hadoop-bundle</artifactId>
@@ -2476,7 +2414,6 @@
             <groupId>tomcat</groupId>
             <artifactId>jasper-runtime</artifactId>
           </exclusion>
-          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
 
@@ -2553,30 +2490,28 @@
             <groupId>org.codehaus.groovy</groupId>
             <artifactId>groovy-all</artifactId>
           </exclusion>
-          <!-- Begin of Hive 2.3 exclusion -->
           <!-- Exclude log4j-slf4j-impl, otherwise throw NCDFE when starting spark-shell -->
           <exclusion>
             <groupId>org.apache.logging.log4j</groupId>
             <artifactId>log4j-slf4j-impl</artifactId>
           </exclusion>
-          <!-- End of Hive 2.3 exclusion -->
         </exclusions>
       </dependency>
 
       <!-- hive-llap-common is needed when registering UDFs in Hive 2.3.
          We add it here, otherwise -Phive-provided won't work. -->
       <dependency>
-        <groupId>org.apache.hive</groupId>
+        <groupId>${hive.group}</groupId>
         <artifactId>hive-llap-common</artifactId>
-        <version>${hive23.version}</version>
+        <version>${hive.version}</version>
         <scope>${hive.deps.scope}</scope>
         <exclusions>
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-common</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-serde</artifactId>
           </exclusion>
           <exclusion>
@@ -2587,21 +2522,21 @@
       </dependency>
       <!-- hive-llap-client is needed when run MapReduce test in Hive 2.3. -->
       <dependency>
-        <groupId>org.apache.hive</groupId>
+        <groupId>${hive.group}</groupId>
         <artifactId>hive-llap-client</artifactId>
-        <version>${hive23.version}</version>
+        <version>${hive.version}</version>
         <scope>test</scope>
         <exclusions>
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-common</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-serde</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-llap-common</artifactId>
           </exclusion>
           <exclusion>
@@ -2662,7 +2597,7 @@
             <artifactId>hadoop-client-api</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-storage-api</artifactId>
           </exclusion>
         </exclusions>
@@ -2670,7 +2605,7 @@
       <dependency>
         <groupId>io.airlift</groupId>
         <artifactId>aircompressor</artifactId>
-        <version>0.26</version>
+        <version>0.27</version>
       </dependency>
       <dependency>
         <groupId>org.apache.orc</groupId>
@@ -2692,7 +2627,7 @@
             <artifactId>orc-core</artifactId>
           </exclusion>
           <exclusion>
-            <groupId>org.apache.hive</groupId>
+            <groupId>${hive.group}</groupId>
             <artifactId>hive-storage-api</artifactId>
           </exclusion>
           <exclusion>
@@ -2788,6 +2723,10 @@
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>javax.annotation</groupId>
+            <artifactId>javax.annotation-api</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <dependency>
@@ -2877,7 +2816,7 @@
         <version>2.9.1</version>
       </dependency>
       <dependency>
-        <groupId>org.apache.hive</groupId>
+        <groupId>${hive.group}</groupId>
         <artifactId>hive-storage-api</artifactId>
         <version>${hive.storage.version}</version>
         <scope>${hive.storage.scope}</scope>
@@ -2967,6 +2906,14 @@
                     <maxJdkVersion>${java.version}</maxJdkVersion>
                     <ignoredScopes>test</ignoredScopes>
                     <ignoredScopes>provided</ignoredScopes>
+                    <ignoreClasses>
+                      <!--
+                        The package `org.jline.terminal.impl.ffm.*` contains some class files
+                        that are not compatible with JDK17 (only JDK21 is supported).
+                        However, it will not cause problems for use. See: https://github.com/scala/bug/issues/12994
+                      -->
+                      <ignoreClass>org.jline.terminal.impl.ffm.*</ignoreClass>
+                    </ignoreClasses>
                   </enforceBytecodeVersion>
                 </rules>
               </configuration>
@@ -2987,7 +2934,7 @@
             <dependency>
               <groupId>org.codehaus.mojo</groupId>
               <artifactId>extra-enforcer-rules</artifactId>
-              <version>1.7.0</version>
+              <version>1.8.0</version>
             </dependency>
           </dependencies>
         </plugin>
@@ -3110,7 +3057,6 @@
           <artifactId>maven-compiler-plugin</artifactId>
           <version>3.13.0</version>
           <configuration>
-            <release>${java.version}</release>
             <skipMain>true</skipMain> <!-- skip compile -->
             <skip>true</skip> <!-- skip testCompile -->
           </configuration>
@@ -3361,12 +3307,15 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-install-plugin</artifactId>
-          <version>3.1.1</version>
+          <version>3.1.2</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-deploy-plugin</artifactId>
-          <version>3.1.1</version>
+          <version>3.1.2</version>
+          <configuration>
+            <retryFailedDeploymentCount>3</retryFailedDeploymentCount>
+          </configuration>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -3535,7 +3484,7 @@
             -->
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>10.14.0</version>
+            <version>10.17.0</version>
           </dependency>
         </dependencies>
         <executions>
@@ -3597,7 +3546,7 @@
       <plugin>
         <groupId>org.antipathy</groupId>
         <artifactId>mvn-scalafmt_${scala.binary.version}</artifactId>
-        <version>1.1.1684076452.9f83818</version>
+        <version>1.1.1713302731.c3d0074</version>
         <configuration>
           <validateOnly>${scalafmt.validateOnly}</validateOnly> <!-- (Optional) skip formatting -->
           <skipSources>${scalafmt.skip}</skipSources>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 0783b6a611b8f..c684e2e30f7f1 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -15,7 +15,8 @@
  * limitations under the License.
  */
 
-import com.typesafe.tools.mima.core._
+import com.typesafe.tools.mima.core
+import com.typesafe.tools.mima.core.*
 
 /**
  * Additional excludes for checking of Spark's binary compatibility.
@@ -93,7 +94,9 @@ object MimaExcludes {
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.api.python.TestWritable"),
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.api.python.TestWritable$"),
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.api.python.WriteInputFormatTestDataGenerator"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.api.python.WriteInputFormatTestDataGenerator$")
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.api.python.WriteInputFormatTestDataGenerator$"),
+    // SPARK-47764: Cleanup shuffle dependencies based on ShuffleCleanupMode
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.MigratableResolver.addShuffleToSkip")
   )
 
   // Default exclude rules
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index bcaa51ec30ff6..d1b0ed953e30b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -255,16 +255,18 @@ object SparkBuild extends PomBuild {
     }
   )
 
+  val noLintOnCompile = sys.env.contains("NOLINT_ON_COMPILE") &&
+      !sys.env.get("NOLINT_ON_COMPILE").contains("false")
   lazy val sharedSettings = sparkGenjavadocSettings ++
                             compilerWarningSettings ++
-      (if (sys.env.contains("NOLINT_ON_COMPILE")) Nil else enableScalaStyle) ++ Seq(
+      (if (noLintOnCompile) Nil else enableScalaStyle) ++ Seq(
     (Compile / exportJars) := true,
     (Test / exportJars) := false,
     javaHome := sys.env.get("JAVA_HOME")
       .orElse(sys.props.get("java.home").map { p => new File(p).getParentFile().getAbsolutePath() })
       .map(file),
     publishMavenStyle := true,
-    unidocGenjavadocVersion := "0.18",
+    unidocGenjavadocVersion := "0.19",
 
     // Override SBT's default resolvers:
     resolvers := Seq(
@@ -294,13 +296,8 @@ object SparkBuild extends PomBuild {
     publishLocal := Seq((MavenCompile / publishLocal), (SbtCompile / publishLocal)).dependOn.value,
 
     javaOptions ++= {
-      val versionParts = System.getProperty("java.version").split("[+.\\-]+", 3)
-      val major = versionParts(0).toInt
-      if (major >= 21) {
-        Seq("--add-modules=jdk.incubator.vector", "-Dforeign.restricted=warn")
-      } else {
-        Seq("--add-modules=jdk.incubator.vector,jdk.incubator.foreign", "-Dforeign.restricted=warn")
-      }
+      // for `dev.ludovic.netlib.blas` which implements such hardware-accelerated BLAS operations
+      Seq("--add-modules=jdk.incubator.vector")
     },
 
     (Compile / doc / javacOptions) ++= {
@@ -952,7 +949,7 @@ object Unsafe {
 object DockerIntegrationTests {
   // This serves to override the override specified in DependencyOverrides:
   lazy val settings = Seq(
-    dependencyOverrides += "com.google.guava" % "guava" % "33.0.0-jre"
+    dependencyOverrides += "com.google.guava" % "guava" % "33.1.0-jre"
   )
 }
 
diff --git a/project/plugins.sbt b/project/plugins.sbt
index deb06738c642b..98170afd84759 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -17,13 +17,12 @@
 
 addSbtPlugin("software.purpledragon" % "sbt-checkstyle-plugin" % "4.0.1")
 
-// sbt-checkstyle-plugin uses an old version of checkstyle. Match it to Maven's.
 // If you are changing the dependency setting for checkstyle plugin,
 // please check pom.xml in the root of the source tree too.
-libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "10.14.0"
+libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "10.17.0"
 
-// checkstyle uses guava 31.0.1-jre.
-libraryDependencies += "com.google.guava" % "guava" % "31.0.1-jre"
+// checkstyle uses guava 33.1.0-jre.
+libraryDependencies += "com.google.guava" % "guava" % "33.1.0-jre"
 
 addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.2.0")
 
@@ -37,9 +36,9 @@ addSbtPlugin("com.github.sbt" % "sbt-unidoc" % "0.5.0")
 
 addSbtPlugin("io.spray" % "sbt-revolver" % "0.10.0")
 
-libraryDependencies += "org.ow2.asm"  % "asm" % "9.6"
+libraryDependencies += "org.ow2.asm"  % "asm" % "9.7"
 
-libraryDependencies += "org.ow2.asm"  % "asm-commons" % "9.6"
+libraryDependencies += "org.ow2.asm"  % "asm-commons" % "9.7"
 
 addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3")
 
diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 862d62b1d3b29..45c9dca8b474a 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -14,7 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-global-exclude *.py[cod] __pycache__ .DS_Store
+# Reference: https://setuptools.pypa.io/en/latest/userguide/miscellaneous.html
+
+recursive-include pyspark *.pyi py.typed *.json
 recursive-include deps/jars *.jar
 graft deps/bin
 recursive-include deps/sbin spark-config.sh spark-daemon.sh start-history-server.sh stop-history-server.sh
@@ -22,5 +24,8 @@ recursive-include deps/data *.data *.txt
 recursive-include deps/licenses *.txt
 recursive-include deps/examples *.py
 recursive-include lib *.zip
-recursive-include pyspark *.pyi py.typed
 include README.md
+
+# Note that these commands are processed in the order they appear, so keep
+# this exclude at the end.
+global-exclude *.py[cod] __pycache__ .DS_Store
diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst
index 94e485c706e39..d7e87c4de390e 100644
--- a/python/docs/source/development/contributing.rst
+++ b/python/docs/source/development/contributing.rst
@@ -129,7 +129,7 @@ If you are using Conda, the development environment can be set as follows.
 
 .. code-block:: bash
 
-    # Python 3.8+ is required
+    # Python 3.9+ is required
     conda create --name pyspark-dev-env python=3.9
     conda activate pyspark-dev-env
     pip install --upgrade -r dev/requirements.txt
@@ -145,7 +145,7 @@ Now, you can start developing and `running the tests <testing.rst>`_.
 pip
 ~~~
 
-With Python 3.8+, pip can be used as below to install and set up the development environment.
+With Python 3.9+, pip can be used as below to install and set up the development environment.
 
 .. code-block:: bash
 
@@ -248,7 +248,7 @@ Usage
 
 1. Check if an appropriate error class already exists in `Error classes in PySpark <errors.rst#error-classes-in-pyspark>`_.
    If true, use the error class and skip to step 3.
-2. Add a new class to `error_classes.py <https://github.com/apache/spark/blob/master/python/pyspark/errors/error_classes.py>`_; keep in mind the invariants below.
+2. Add a new class to `error-conditions.json <https://github.com/apache/spark/blob/master/python/pyspark/errors/error-conditions.json>`_; keep in mind the invariants below.
 3. Check if the exception type already extends `PySparkException`.
    If true, skip to step 5.
 4. Mix `PySparkException` into the exception.
@@ -266,7 +266,7 @@ Throw with arbitrary error message:
 
 **After**
 
-`error_classes.py`
+`error-conditions.json`
 
 .. code-block:: python
 
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 6a91a06a1c29d..6cc68cd46b117 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -30,7 +30,7 @@ and building from the source.
 Python Versions Supported
 -------------------------
 
-Python 3.8 and above.
+Python 3.9 and above.
 
 
 Using PyPI
@@ -53,6 +53,9 @@ If you want to install extra dependencies for a specific component, you can inst
     # Spark Connect
     pip install pyspark[connect]
 
+
+See :ref:`optional-dependencies` for more detail about extra dependencies.
+
 For PySpark with/without a specific Hadoop version, you can install it by using ``PYSPARK_HADOOP_VERSION`` environment variables as below:
 
 .. code-block:: bash
@@ -121,7 +124,7 @@ the same session as pyspark (you can install in several steps too).
 
 .. code-block:: bash
 
-    conda install -c conda-forge pyspark  # can also add "python=3.8 some_package [etc.]" here
+    conda install -c conda-forge pyspark  # can also add "python=3.9 some_package [etc.]" here
 
 Note that `PySpark for conda <https://anaconda.org/conda-forge/pyspark>`_ is maintained
 separately by the community; while new versions generally get packaged quickly, the
@@ -142,7 +145,7 @@ PySpark is included in the distributions available at the `Apache Spark website
 You can download a distribution you want from the site. After that, uncompress the tar file into the directory where you want
 to install Spark, for example, as below:
 
-.. parsed-literal::
+.. code-block:: bash
 
     tar xzvf spark-\ |release|\-bin-hadoop3.tgz
 
@@ -150,7 +153,7 @@ Ensure the ``SPARK_HOME`` environment variable points to the directory where the
 Update ``PYTHONPATH`` environment variable such that it can find the PySpark and Py4J under ``SPARK_HOME/python/lib``.
 One example of doing this is shown below:
 
-.. parsed-literal::
+.. code-block:: bash
 
     cd spark-\ |release|\-bin-hadoop3
     export SPARK_HOME=`pwd`
@@ -165,16 +168,110 @@ To install PySpark from source, refer to |building_spark|_.
 
 Dependencies
 ------------
-========================== ========================= ======================================================================================
-Package                    Supported version Note
-========================== ========================= ======================================================================================
-`py4j`                     >=0.10.9.7                Required
-`pandas`                   >=1.4.4                   Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
-`pyarrow`                  >=10.0.0                  Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
-`numpy`                    >=1.21                    Required for pandas API on Spark and MLLib DataFrame-based API; Optional for Spark SQL
-`grpcio`                   >=1.62.0                  Required for Spark Connect
-`grpcio-status`            >=1.62.0                  Required for Spark Connect
-`googleapis-common-protos` >=1.56.4                  Required for Spark Connect
-========================== ========================= ======================================================================================
+
+Required dependencies
+~~~~~~~~~~~~~~~~~~~~~
+
+PySpark requires the following dependencies.
+
+========================== ========================= =============================
+Package                    Supported version         Note
+========================== ========================= =============================
+`py4j`                     >=0.10.9.7                Required to interact with JVM
+========================== ========================= =============================
+
+Additional libraries that enhance functionality but are not included in the installation packages:
+
+- **memory-profiler**: Used for PySpark UDF memory profiling, ``spark.profile.show(...)`` and ``spark.sql.pyspark.udf.profiler``.
 
 Note that PySpark requires Java 17 or later with ``JAVA_HOME`` properly set and refer to |downloading|_.
+
+
+.. _optional-dependencies:
+
+Optional dependencies
+~~~~~~~~~~~~~~~~~~~~~
+
+PySpark has several optional dependencies that enhance its functionality for specific modules.
+These dependencies are only required for certain features and are not necessary for the basic functionality of PySpark.
+If these optional dependencies are not installed, PySpark will function correctly for basic operations but will raise an ``ImportError``
+when you try to use features that require these dependencies.
+
+Spark Connect
+^^^^^^^^^^^^^
+
+Installable with ``pip install "pyspark[connect]"``.
+
+========================== ================= ==========================
+Package                    Supported version Note
+========================== ================= ==========================
+`pandas`                   >=2.0.0           Required for Spark Connect
+`pyarrow`                  >=10.0.0          Required for Spark Connect
+`grpcio`                   >=1.62.0          Required for Spark Connect
+`grpcio-status`            >=1.62.0          Required for Spark Connect
+`googleapis-common-protos` >=1.56.4          Required for Spark Connect
+`graphviz`                 >=0.20            Optional for Spark Connect
+========================== ================= ==========================
+
+Spark SQL
+^^^^^^^^^
+
+Installable with ``pip install "pyspark[sql]"``.
+
+========= ================= ======================
+Package   Supported version Note
+========= ================= ======================
+`pandas`  >=2.0.0           Required for Spark SQL
+`pyarrow` >=10.0.0          Required for Spark SQL
+========= ================= ======================
+
+
+Pandas API on Spark
+^^^^^^^^^^^^^^^^^^^
+
+Installable with ``pip install "pyspark[pandas_on_spark]"``.
+
+========= ================= ================================
+Package   Supported version Note
+========= ================= ================================
+`pandas`  >=2.0.0           Required for Pandas API on Spark
+`pyarrow` >=10.0.0          Required for Pandas API on Spark
+========= ================= ================================
+
+Additional libraries that enhance functionality but are not included in the installation packages:
+
+- **mlflow**: Required for ``pyspark.pandas.mlflow``.
+- **plotly**: Provide plotting for visualization. It is recommended using **plotly** over **matplotlib**.
+- **matplotlib**: Provide plotting for visualization. The default is **plotly**.
+
+
+MLlib DataFrame-based API
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Installable with ``pip install "pyspark[ml]"``.
+
+======= ================= ======================================
+Package Supported version Note
+======= ================= ======================================
+`numpy` >=1.21            Required for MLlib DataFrame-based API
+======= ================= ======================================
+
+Additional libraries that enhance functionality but are not included in the installation packages:
+
+- **scipy**: Required for SciPy integration.
+- **scikit-learn**: Required for implementing machine learning algorithms.
+- **torch**: Required for machine learning model training.
+- **torchvision**: Required for supporting image and video processing.
+- **torcheval**: Required for facilitating model evaluation metrics.
+- **deepspeed**: Required for providing high-performance model training optimizations. Installable on non-Darwin systems.
+
+MLlib
+^^^^^
+
+Installable with ``pip install "pyspark[mllib]"``.
+
+======= ================= ==================
+Package Supported version Note
+======= ================= ==================
+`numpy` >=1.21            Required for MLlib
+======= ================= ==================
diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst
index 36c1eacaf2c7b..5292530420025 100644
--- a/python/docs/source/migration_guide/pyspark_upgrade.rst
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -22,8 +22,8 @@ Upgrading PySpark
 Upgrading from PySpark 3.5 to 4.0
 ---------------------------------
 
-* In Spark 4.0, it is recommended to use Pandas version 2.0.0 or above with PySpark for optimal compatibility.
-* In Spark 4.0, the minimum supported version for Pandas has been raised from 1.0.5 to 1.4.4 in PySpark.
+* In Spark 4.0, Python 3.8 support was dropped in PySpark.
+* In Spark 4.0, the minimum supported version for Pandas has been raised from 1.0.5 to 2.0.0 in PySpark.
 * In Spark 4.0, the minimum supported version for Numpy has been raised from 1.15 to 1.21 in PySpark.
 * In Spark 4.0, the minimum supported version for PyArrow has been raised from 4.0.0 to 10.0.0 in PySpark.
 * In Spark 4.0, ``Int64Index`` and ``Float64Index`` have been removed from pandas API on Spark, ``Index`` should be used directly.
@@ -71,7 +71,9 @@ Upgrading from PySpark 3.5 to 4.0
 * In Spark 4.0, when applying ``astype`` to a decimal type object, the existing missing value is changed to ``True`` instead of ``False`` from Pandas API on Spark.
 * In Spark 4.0, ``pyspark.testing.assertPandasOnSparkEqual`` has been removed from Pandas API on Spark, use ``pyspark.pandas.testing.assert_frame_equal`` instead.
 * In Spark 4.0, the aliases ``Y``, ``M``, ``H``, ``T``, ``S`` have been deprecated from Pandas API on Spark, use ``YE``, ``ME``, ``h``, ``min``, ``s`` instead respectively.
-
+* In Spark 4.0, the schema of a map column is inferred by merging the schemas of all pairs in the map. To restore the previous behavior where the schema is only inferred from the first non-null pair, you can set ``spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled`` to ``true``.
+* In Spark 4.0, `compute.ops_on_diff_frames` is on by default. To restore the previous behavior, set `compute.ops_on_diff_frames` to `false`.
+* In Spark 4.0, the data type `YearMonthIntervalType` in ``DataFrame.collect`` no longer returns the underlying integers. To restore the previous behavior, set ``PYSPARK_YM_INTERVAL_LEGACY`` environment variable to ``1``.
 
 
 Upgrading from PySpark 3.3 to 3.4
diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst b/python/docs/source/reference/pyspark.sql/dataframe.rst
index b69a2771b04fc..d0196baa7a05b 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -55,6 +55,7 @@ DataFrame
     DataFrame.dropna
     DataFrame.dtypes
     DataFrame.exceptAll
+    DataFrame.executionInfo
     DataFrame.explain
     DataFrame.fillna
     DataFrame.filter
@@ -109,6 +110,7 @@ DataFrame
     DataFrame.tail
     DataFrame.take
     DataFrame.to
+    DataFrame.toArrow
     DataFrame.toDF
     DataFrame.toJSON
     DataFrame.toLocalIterator
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index e9e2c44767ff1..e0895959e893b 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -143,6 +143,7 @@ Mathematical Functions
     try_add
     try_divide
     try_multiply
+    try_remainder
     try_subtract
     unhex
     width_bucket
@@ -280,6 +281,8 @@ Date and Timestamp Functions
     quarter
     second
     session_window
+    timestamp_add
+    timestamp_diff
     timestamp_micros
     timestamp_millis
     timestamp_seconds
@@ -532,11 +535,24 @@ JSON Functions
     json_array_length
     json_object_keys
     json_tuple
-    parse_json
     schema_of_json
     to_json
 
 
+VARIANT Functions
+-----------------
+.. autosummary::
+    :toctree: api/
+
+    is_variant_null
+    parse_json
+    schema_of_variant
+    schema_of_variant_agg
+    try_variant_get
+    variant_get
+    try_parse_json
+
+
 XML Functions
 --------------
 .. autosummary::
diff --git a/python/docs/source/reference/pyspark.sql/variant_val.rst b/python/docs/source/reference/pyspark.sql/variant_val.rst
index a7f592c18e3a3..8630ae8aace14 100644
--- a/python/docs/source/reference/pyspark.sql/variant_val.rst
+++ b/python/docs/source/reference/pyspark.sql/variant_val.rst
@@ -25,3 +25,4 @@ VariantVal
     :toctree: api/
 
     VariantVal.toPython
+    VariantVal.toJson
diff --git a/python/docs/source/user_guide/pandas_on_spark/typehints.rst b/python/docs/source/user_guide/pandas_on_spark/typehints.rst
index 1405baa39c16e..23126664d78a6 100644
--- a/python/docs/source/user_guide/pandas_on_spark/typehints.rst
+++ b/python/docs/source/user_guide/pandas_on_spark/typehints.rst
@@ -62,7 +62,7 @@ it as a Spark schema. As an example, you can specify the return type hint as bel
 Notice that the function ``pandas_div`` actually takes and outputs a pandas DataFrame instead of
 pandas-on-Spark :class:`DataFrame`. So, technically the correct types should be of pandas.
 
-With Python 3.8+, you can specify the type hints by using pandas instances as follows:
+With Python 3.9+, you can specify the type hints by using pandas instances as follows:
 
 .. code-block:: python
 
diff --git a/python/docs/source/user_guide/sql/arrow_pandas.rst b/python/docs/source/user_guide/sql/arrow_pandas.rst
index 039671608b6d9..fde40140110f9 100644
--- a/python/docs/source/user_guide/sql/arrow_pandas.rst
+++ b/python/docs/source/user_guide/sql/arrow_pandas.rst
@@ -39,6 +39,22 @@ is installed and available on all cluster nodes.
 You can install it using pip or conda from the conda-forge channel. See PyArrow
 `installation <https://arrow.apache.org/docs/python/install.html>`_ for details.
 
+Conversion to/from Arrow Table
+------------------------------
+
+From Spark 4.0, you can create a Spark DataFrame from a PyArrow Table with
+:meth:`SparkSession.createDataFrame`, and you can convert a Spark DataFrame to a PyArrow Table
+with :meth:`DataFrame.toArrow`.
+
+.. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
+    :language: python
+    :lines: 37-52
+    :dedent: 4
+
+Note that :meth:`DataFrame.toArrow` results in the collection of all records in the DataFrame to
+the driver program and should be done on a small subset of the data. Not all Spark and Arrow data
+types are currently supported and an error can be raised if a column has an unsupported type.
+
 Enabling for Conversion to/from Pandas
 --------------------------------------
 
@@ -53,7 +69,7 @@ This can be controlled by ``spark.sql.execution.arrow.pyspark.fallback.enabled``
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 37-52
+    :lines: 56-71
     :dedent: 4
 
 Using the above optimizations with Arrow will produce the same results as when Arrow is not
@@ -90,7 +106,7 @@ specify the type hints of ``pandas.Series`` and ``pandas.DataFrame`` as below:
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 56-80
+    :lines: 75-99
     :dedent: 4
 
 In the following sections, it describes the combinations of the supported type hints. For simplicity,
@@ -113,7 +129,7 @@ The following example shows how to create this Pandas UDF that computes the prod
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 84-114
+    :lines: 103-133
     :dedent: 4
 
 For detailed usage, please see :func:`pandas_udf`.
@@ -152,7 +168,7 @@ The following example shows how to create this Pandas UDF:
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 118-140
+    :lines: 137-159
     :dedent: 4
 
 For detailed usage, please see :func:`pandas_udf`.
@@ -174,7 +190,7 @@ The following example shows how to create this Pandas UDF:
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 144-167
+    :lines: 163-186
     :dedent: 4
 
 For detailed usage, please see :func:`pandas_udf`.
@@ -205,7 +221,7 @@ and window operations:
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 171-212
+    :lines: 190-231
     :dedent: 4
 
 .. currentmodule:: pyspark.sql.functions
@@ -270,7 +286,7 @@ in the group.
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 216-234
+    :lines: 235-253
     :dedent: 4
 
 For detailed usage, please see  please see :meth:`GroupedData.applyInPandas`
@@ -288,7 +304,7 @@ The following example shows how to use :meth:`DataFrame.mapInPandas`:
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 238-249
+    :lines: 257-268
     :dedent: 4
 
 For detailed usage, please see :meth:`DataFrame.mapInPandas`.
@@ -327,7 +343,7 @@ The following example shows how to use ``DataFrame.groupby().cogroup().applyInPa
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 253-275
+    :lines: 272-294
     :dedent: 4
 
 
@@ -339,9 +355,9 @@ Arrow Python UDFs
 Arrow Python UDFs are user defined functions that are executed row-by-row, utilizing Arrow for efficient batch data
 transfer and serialization. To define an Arrow Python UDF, you can use the :meth:`udf` decorator or wrap the function
 with the :meth:`udf` method, ensuring the ``useArrow`` parameter is set to True. Additionally, you can enable Arrow
-optimization for Python UDFs throughout the entire SparkSession by setting the Spark configuration ``spark.sql
-.execution.pythonUDF.arrow.enabled`` to true. It's important to note that the Spark configuration takes effect only
-when ``useArrow`` is either not set or set to None.
+optimization for Python UDFs throughout the entire SparkSession by setting the Spark configuration
+``spark.sql.execution.pythonUDF.arrow.enabled`` to true. It's important to note that the Spark configuration takes
+effect only when ``useArrow`` is either not set or set to None.
 
 The type hints for Arrow Python UDFs should be specified in the same way as for default, pickled Python UDFs.
 
@@ -349,7 +365,7 @@ Here's an example that demonstrates the usage of both a default, pickled Python
 
 .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py
     :language: python
-    :lines: 279-297
+    :lines: 298-316
     :dedent: 4
 
 Compared to the default, pickled Python UDFs, Arrow Python UDFs provide a more coherent type coercion mechanism. UDF
@@ -400,11 +416,15 @@ and each column will be converted to the Spark session time zone then localized
 zone, which removes the time zone and displays values as local time. This will occur
 when calling :meth:`DataFrame.toPandas()` or ``pandas_udf`` with timestamp columns.
 
-When timestamp data is transferred from Pandas to Spark, it will be converted to UTC microseconds. This
-occurs when calling :meth:`SparkSession.createDataFrame` with a Pandas DataFrame or when returning a timestamp from a
-``pandas_udf``. These conversions are done automatically to ensure Spark will have data in the
-expected format, so it is not necessary to do any of these conversions yourself. Any nanosecond
-values will be truncated.
+When timestamp data is transferred from Spark to a PyArrow Table, it will remain in microsecond
+resolution with the UTC time zone. This occurs when calling :meth:`DataFrame.toArrow()` with
+timestamp columns.
+
+When timestamp data is transferred from Pandas or PyArrow to Spark, it will be converted to UTC
+microseconds. This occurs when calling :meth:`SparkSession.createDataFrame` with a Pandas DataFrame
+or PyArrow Table, or when returning a timestamp from a ``pandas_udf``. These conversions are done
+automatically to ensure Spark will have data in the expected format, so it is not necessary to do
+any of these conversions yourself. Any nanosecond values will be truncated.
 
 Note that a standard UDF (non-Pandas) will load timestamp data as Python datetime objects, which is
 different from a Pandas timestamp. It is recommended to use Pandas time series functionality when
@@ -414,16 +434,19 @@ working with timestamps in ``pandas_udf``\s to get the best performance, see
 Recommended Pandas and PyArrow Versions
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-For usage with pyspark.sql, the minimum supported versions of Pandas is 1.4.4 and PyArrow is 10.0.0.
+For usage with pyspark.sql, the minimum supported versions of Pandas is 2.0.0 and PyArrow is 10.0.0.
 Higher versions may be used, however, compatibility and data correctness can not be guaranteed and should
 be verified by the user.
 
 Setting Arrow ``self_destruct`` for memory savings
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Since Spark 3.2, the Spark configuration ``spark.sql.execution.arrow.pyspark.selfDestruct.enabled`` can be used to enable PyArrow's ``self_destruct`` feature, which can save memory when creating a Pandas DataFrame via ``toPandas`` by freeing Arrow-allocated memory while building the Pandas DataFrame.
-This option is experimental, and some operations may fail on the resulting Pandas DataFrame due to immutable backing arrays.
-Typically, you would see the error ``ValueError: buffer source array is read-only``.
-Newer versions of Pandas may fix these errors by improving support for such cases.
-You can work around this error by copying the column(s) beforehand.
-Additionally, this conversion may be slower because it is single-threaded.
+Since Spark 3.2, the Spark configuration ``spark.sql.execution.arrow.pyspark.selfDestruct.enabled``
+can be used to enable PyArrow's ``self_destruct`` feature, which can save memory when creating a
+Pandas DataFrame via ``toPandas`` by freeing Arrow-allocated memory while building the Pandas
+DataFrame. This option can also save memory when creating a PyArrow Table via ``toArrow``.
+This option is experimental. When used with ``toPandas``, some operations may fail on the resulting
+Pandas DataFrame due to immutable backing arrays. Typically, you would see the error
+``ValueError: buffer source array is read-only``. Newer versions of Pandas may fix these errors by
+improving support for such cases. You can work around this error by copying the column(s)
+beforehand. Additionally, this conversion may be slower because it is single-threaded.
diff --git a/python/docs/source/user_guide/sql/index.rst b/python/docs/source/user_guide/sql/index.rst
index 118cf139d9b38..d1b67f7eeb909 100644
--- a/python/docs/source/user_guide/sql/index.rst
+++ b/python/docs/source/user_guide/sql/index.rst
@@ -25,5 +25,6 @@ Spark SQL
 
    arrow_pandas
    python_udtf
+   python_data_source
    type_conversions
 
diff --git a/python/docs/source/user_guide/sql/python_data_source.rst b/python/docs/source/user_guide/sql/python_data_source.rst
new file mode 100644
index 0000000000000..cdbc706993119
--- /dev/null
+++ b/python/docs/source/user_guide/sql/python_data_source.rst
@@ -0,0 +1,395 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+======================
+Python Data Source API
+======================
+
+.. currentmodule:: pyspark.sql
+
+Overview
+--------
+The Python Data Source API is a new feature introduced in Spark 4.0, enabling developers to read from custom data sources and write to custom data sinks in Python.
+This guide provides a comprehensive overview of the API and instructions on how to create, use, and manage Python data sources.
+
+
+Creating a Python Data Source
+-----------------------------
+To create a custom Python data source, you'll need to subclass the :class:`DataSource` base classes and implement the necessary methods for reading and writing data.
+
+This example demonstrates creating a simple data source to generate synthetic data using the `faker` library. Ensure the `faker` library is installed and accessible in your Python environment.
+
+**Define the Data Source**
+
+Start by creating a new subclass of :class:`DataSource` with the source name, schema.
+
+In order to be used as source or sink in batch or streaming query, corresponding method of DataSource needs to be implemented.
+
+Method that needs to be implemented for a capability:
+
++------------+----------------------+------------------+
+|            |       source         |      sink        |
++============+======================+==================+
+| batch      | reader()             | writer()         |
++------------+----------------------+------------------+
+|            | streamReader()       |                  |
+| streaming  | or                   | streamWriter()   |
+|            | simpleStreamReader() |                  |
++------------+----------------------+------------------+
+
+.. code-block:: python
+
+    from pyspark.sql.datasource import DataSource, DataSourceReader
+    from pyspark.sql.types import StructType
+
+    class FakeDataSource(DataSource):
+        """
+        A fake data source for PySpark to generate synthetic data using the `faker` library.
+        Options:
+        - numRows: specify number of rows to generate. Default value is 3.
+        """
+
+        @classmethod
+        def name(cls):
+            return "fake"
+
+        def schema(self):
+            return "name string, date string, zipcode string, state string"
+
+        def reader(self, schema: StructType):
+            return FakeDataSourceReader(schema, self.options)
+
+        def writer(self, schema: StructType, overwrite: bool):
+            return FakeDataSourceWriter(self.options)
+
+        def streamReader(self, schema: StructType):
+            return FakeStreamReader(schema, self.options)
+
+        # Please skip the implementation of this method if streamReader has been implemented.
+        def simpleStreamReader(self, schema: StructType):
+            return SimpleStreamReader()
+
+        def streamWriter(self, schema: StructType, overwrite: bool):
+            return FakeStreamWriter(self.options)
+
+Implementing Batch Reader and Writer for Python Data Source
+-----------------------------------------------------------
+**Implement the Reader**
+
+Define the reader logic to generate synthetic data. Use the `faker` library to populate each field in the schema.
+
+.. code-block:: python
+
+    class FakeDataSourceReader(DataSourceReader):
+
+        def __init__(self, schema, options):
+            self.schema: StructType = schema
+            self.options = options
+
+        def read(self, partition):
+            from faker import Faker
+            fake = Faker()
+            # Note: every value in this `self.options` dictionary is a string.
+            num_rows = int(self.options.get("numRows", 3))
+            for _ in range(num_rows):
+                row = []
+                for field in self.schema.fields:
+                    value = getattr(fake, field.name)()
+                    row.append(value)
+                yield tuple(row)
+
+**Implement the Writer**
+
+Create a fake data source writer that processes each partition of data, counts the rows, and either
+prints the total count of rows after a successful write or the number of failed tasks if the writing process fails.
+
+.. code-block:: python
+
+    from dataclasses import dataclass
+    from typing import Iterator, List
+
+    from pyspark.sql.types import Row
+    from pyspark.sql.datasource import DataSource, DataSourceWriter, WriterCommitMessage
+
+    @dataclass
+    class SimpleCommitMessage(WriterCommitMessage):
+        partition_id: int
+        count: int
+
+    class FakeDataSourceWriter(DataSourceWriter):
+
+        def write(self, rows: Iterator[Row]) -> SimpleCommitMessage:
+            from pyspark import TaskContext
+
+            context = TaskContext.get()
+            partition_id = context.partitionId()
+            cnt = sum(1 for _ in rows)
+            return SimpleCommitMessage(partition_id=partition_id, count=cnt)
+
+        def commit(self, messages: List[SimpleCommitMessage]) -> None:
+            total_count = sum(message.count for message in messages)
+            print(f"Total number of rows: {total_count}")
+
+        def abort(self, messages: List[SimpleCommitMessage]) -> None:
+            failed_count = sum(message is None for message in messages)
+            print(f"Number of failed tasks: {failed_count}")
+
+
+Implementing Streaming Reader and Writer for Python Data Source
+---------------------------------------------------------------
+**Implement the Stream Reader**
+
+This is a dummy streaming data reader that generate 2 rows in every microbatch. The streamReader instance has a integer offset that increase by 2 in every microbatch.
+
+.. code-block:: python
+
+    class RangePartition(InputPartition):
+        def __init__(self, start, end):
+            self.start = start
+            self.end = end
+
+    class FakeStreamReader(DataSourceStreamReader):
+        def __init__(self, schema, options):
+            self.current = 0
+
+        def initialOffset(self) -> dict:
+            """
+            Return the initial start offset of the reader.
+            """
+            return {"offset": 0}
+
+        def latestOffset(self) -> dict:
+            """
+            Return the current latest offset that the next microbatch will read to.
+            """
+            self.current += 2
+            return {"offset": self.current}
+
+        def partitions(self, start: dict, end: dict):
+            """
+            Plans the partitioning of the current microbatch defined by start and end offset,
+            it needs to return a sequence of :class:`InputPartition` object.
+            """
+            return [RangePartition(start["offset"], end["offset"])]
+
+        def commit(self, end: dict):
+            """
+            This is invoked when the query has finished processing data before end offset, this can be used to clean up resource.
+            """
+            pass
+
+        def read(self, partition) -> Iterator[Tuple]:
+            """
+            Takes a partition as an input and read an iterator of tuples from the data source.
+            """
+            start, end = partition.start, partition.end
+            for i in range(start, end):
+                yield (i, str(i))
+
+**Implement the Simple Stream Reader**
+
+If the data source has low throughput and doesn't require partitioning, you can implement SimpleDataSourceStreamReader instead of DataSourceStreamReader.
+
+One of simpleStreamReader() and streamReader() must be implemented for readable streaming data source. And simpleStreamReader() will only be invoked when streamReader() is not implemented.
+
+This is the same dummy streaming reader that generate 2 rows every batch implemented with SimpleDataSourceStreamReader interface.
+
+.. code-block:: python
+
+    class SimpleStreamReader(SimpleDataSourceStreamReader):
+        def initialOffset(self):
+            """
+            Return the initial start offset of the reader.
+            """
+            return {"offset": 0}
+
+        def read(self, start: dict) -> (Iterator[Tuple], dict):
+            """
+            Takes start offset as an input, return an iterator of tuples and the start offset of next read.
+            """
+            start_idx = start["offset"]
+            it = iter([(i,) for i in range(start_idx, start_idx + 2)])
+            return (it, {"offset": start_idx + 2})
+
+        def readBetweenOffsets(self, start: dict, end: dict) -> Iterator[Tuple]:
+            """
+            Takes start and end offset as input and read an iterator of data deterministically.
+            This is called whe query replay batches during restart or after failure.
+            """
+            start_idx = start["offset"]
+            end_idx = end["offset"]
+            return iter([(i,) for i in range(start_idx, end_idx)])
+
+        def commit(self, end):
+            """
+            This is invoked when the query has finished processing data before end offset, this can be used to clean up resource.
+            """
+            pass
+
+**Implement the Stream Writer**
+
+This is a streaming data writer that write the metadata information of each microbatch to a local path.
+
+.. code-block:: python
+
+    class SimpleCommitMessage(WriterCommitMessage):
+       partition_id: int
+       count: int
+
+    class FakeStreamWriter(DataSourceStreamWriter):
+       def __init__(self, options):
+           self.options = options
+           self.path = self.options.get("path")
+           assert self.path is not None
+
+       def write(self, iterator):
+           """
+           Write the data and return the commit message of that partition
+           """
+           from pyspark import TaskContext
+           context = TaskContext.get()
+           partition_id = context.partitionId()
+           cnt = 0
+           for row in iterator:
+               cnt += 1
+           return SimpleCommitMessage(partition_id=partition_id, count=cnt)
+
+       def commit(self, messages, batchId) -> None:
+           """
+           Receives a sequence of :class:`WriterCommitMessage` when all write tasks succeed and decides what to do with it.
+           In this FakeStreamWriter, we write the metadata of the microbatch(number of rows and partitions) into a json file inside commit().
+           """
+           status = dict(num_partitions=len(messages), rows=sum(m.count for m in messages))
+           with open(os.path.join(self.path, f"{batchId}.json"), "a") as file:
+               file.write(json.dumps(status) + "\n")
+
+       def abort(self, messages, batchId) -> None:
+           """
+           Receives a sequence of :class:`WriterCommitMessage` from successful tasks when some tasks fail and decides what to do with it.
+           In this FakeStreamWriter, we write a failure message into a txt file inside abort().
+           """
+           with open(os.path.join(self.path, f"{batchId}.txt"), "w") as file:
+               file.write(f"failed in batch {batchId}")
+
+Serialization Requirement
+-------------------------
+User defined DataSource, DataSourceReader, DataSourceWriter, DataSourceStreamReader and DataSourceStreamWriter and their methods must be able to be serialized by pickle.
+
+For library that are used inside a method, it must be imported inside the method. For example, TaskContext must be imported inside the read() method in the code below.
+
+.. code-block:: python
+
+    def read(self, partition):
+        from pyspark import TaskContext
+        context = TaskContext.get()
+
+Using a Python Data Source
+--------------------------
+**Use a Python Data Source in Batch Query**
+
+After defining your data source, it must be registered before usage.
+
+.. code-block:: python
+
+    spark.dataSource.register(FakeDataSource)
+
+**Read From a Python Data Source**
+
+Read from the fake datasource with the default schema and options:
+
+.. code-block:: python
+
+    spark.read.format("fake").load().show()
+
+    # +-----------+----------+-------+-------+
+    # |       name|      date|zipcode|  state|
+    # +-----------+----------+-------+-------+
+    # |Carlos Cobb|2018-07-15|  73003|Indiana|
+    # | Eric Scott|1991-08-22|  10085|  Idaho|
+    # | Amy Martin|1988-10-28|  68076| Oregon|
+    # +-----------+----------+-------+-------+
+
+Read from the fake datasource with a custom schema:
+
+.. code-block:: python
+
+    spark.read.format("fake").schema("name string, company string").load().show()
+
+    # +---------------------+--------------+
+    # |name                 |company       |
+    # +---------------------+--------------+
+    # |Tanner Brennan       |Adams Group   |
+    # |Leslie Maxwell       |Santiago Group|
+    # |Mrs. Jacqueline Brown|Maynard Inc   |
+    # +---------------------+--------------+
+
+Read from the fake datasource with a different number of rows:
+
+.. code-block:: python
+
+    spark.read.format("fake").option("numRows", 5).load().show()
+
+    # +--------------+----------+-------+------------+
+    # |          name|      date|zipcode|       state|
+    # +--------------+----------+-------+------------+
+    # |  Pam Mitchell|1988-10-20|  23788|   Tennessee|
+    # |Melissa Turner|1996-06-14|  30851|      Nevada|
+    # |  Brian Ramsey|2021-08-21|  55277|  Washington|
+    # |  Caitlin Reed|1983-06-22|  89813|Pennsylvania|
+    # | Douglas James|2007-01-18|  46226|     Alabama|
+    # +--------------+----------+-------+------------+
+
+**Write To a Python Data Source**
+
+To write data to a custom location, make sure that you specify the `mode()` clause. Supported modes are `append` and `overwrite`.
+
+.. code-block:: python
+
+    df = spark.range(0, 10, 1, 5)
+    df.write.format("fake").mode("append").save()
+
+    # You can check the Spark log (standard error) to see the output of the write operation.
+    # Total number of rows: 10
+
+**Use a Python Data Source in Streaming Query**
+
+Once we register the python data source, we can also use it in streaming queries as source of readStream() or sink of writeStream() by passing short name or full name to format().
+
+Start a query that read from fake python data source and write to console
+
+.. code-block:: python
+
+    query = spark.readStream.format("fake").load().writeStream.format("console").start()
+
+    # +---+
+    # | id|
+    # +---+
+    # |  0|
+    # |  1|
+    # +---+
+    # +---+
+    # | id|
+    # +---+
+    # |  2|
+    # |  3|
+    # +---+
+
+We can also use the same data source in streaming reader and writer
+
+.. code-block:: python
+
+    query = spark.readStream.format("fake").load().writeStream.format("fake").start("/output_path")
diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py
index 8eefc17db7002..5e94c2b653806 100755
--- a/python/packaging/classic/setup.py
+++ b/python/packaging/classic/setup.py
@@ -150,7 +150,7 @@ def _supports_symlinks():
 # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
 # Also don't forget to update python/docs/source/getting_started/install.rst, and
 # python/packaging/connect/setup.py
-_minimum_pandas_version = "1.4.4"
+_minimum_pandas_version = "2.0.0"
 _minimum_numpy_version = "1.21"
 _minimum_pyarrow_version = "10.0.0"
 _minimum_grpc_version = "1.62.0"
@@ -204,8 +204,13 @@ def run(self):
     copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py")
 
     if in_spark:
+        # !!HACK ALTERT!!
+        # `setup.py` has to be located with the same directory with the package.
+        # Therefore, we copy the current file, and place it at `spark/python` directory.
+        # After that, we remove it in the end.
         copyfile("packaging/classic/setup.py", "setup.py")
         copyfile("packaging/classic/setup.cfg", "setup.cfg")
+
         # Construct the symlink farm - this is nein_sparkcessary since we can't refer to
         # the path above the package root and we need to copy the jars and scripts which
         # are up above the python root.
@@ -270,12 +275,14 @@ def run(self):
             "pyspark.ml.deepspeed",
             "pyspark.sql",
             "pyspark.sql.avro",
+            "pyspark.sql.classic",
             "pyspark.sql.connect",
             "pyspark.sql.connect.avro",
             "pyspark.sql.connect.client",
             "pyspark.sql.connect.functions",
             "pyspark.sql.connect.proto",
             "pyspark.sql.connect.protobuf",
+            "pyspark.sql.connect.resource",
             "pyspark.sql.connect.shell",
             "pyspark.sql.connect.streaming",
             "pyspark.sql.connect.streaming.worker",
@@ -357,11 +364,10 @@ def run(self):
                 "numpy>=%s" % _minimum_numpy_version,
             ],
         },
-        python_requires=">=3.8",
+        python_requires=">=3.9",
         classifiers=[
             "Development Status :: 5 - Production/Stable",
             "License :: OSI Approved :: Apache Software License",
-            "Programming Language :: Python :: 3.8",
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py
index fe1e7486faa9b..bc1d4fd2868de 100755
--- a/python/packaging/connect/setup.py
+++ b/python/packaging/connect/setup.py
@@ -25,7 +25,7 @@
 import sys
 from setuptools import setup
 import os
-from shutil import copyfile
+from shutil import copyfile, move
 import glob
 from pathlib import Path
 
@@ -70,6 +70,7 @@
     test_packages = [
         "pyspark.tests",  # for Memory profiler parity tests
         "pyspark.testing",
+        "pyspark.resource.tests",
         "pyspark.sql.tests",
         "pyspark.sql.tests.connect",
         "pyspark.sql.tests.connect.streaming",
@@ -108,6 +109,13 @@
 
 try:
     if in_spark:
+        # !!HACK ALTERT!!
+        # 1. `setup.py` has to be located with the same directory with the package.
+        #    Therefore, we copy the current file, and place it at `spark/python` directory.
+        #    After that, we remove it in the end.
+        # 2. Here it renames `lib` to `lib.ack` so MANIFEST.in does not pick `py4j` up.
+        #    We rename it back in the end.
+        move("lib", "lib.back")
         copyfile("packaging/connect/setup.py", "setup.py")
         copyfile("packaging/connect/setup.cfg", "setup.cfg")
 
@@ -116,7 +124,7 @@
     # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications.
     # Also don't forget to update python/docs/source/getting_started/install.rst, and
     # python/packaging/classic/setup.py
-    _minimum_pandas_version = "1.4.4"
+    _minimum_pandas_version = "2.0.0"
     _minimum_numpy_version = "1.21"
     _minimum_pyarrow_version = "10.0.0"
     _minimum_grpc_version = "1.59.3"
@@ -145,6 +153,7 @@
         "pyspark.sql.connect.functions",
         "pyspark.sql.connect.proto",
         "pyspark.sql.connect.protobuf",
+        "pyspark.sql.connect.resource",
         "pyspark.sql.connect.shell",
         "pyspark.sql.connect.streaming",
         "pyspark.sql.connect.streaming.worker",
@@ -178,6 +187,7 @@
         author_email="dev@spark.apache.org",
         url="https://github.com/apache/spark/tree/master/python",
         packages=connect_packages + test_packages,
+        include_package_data=True,
         license="http://www.apache.org/licenses/LICENSE-2.0",
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
@@ -189,11 +199,10 @@
             "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version,
             "numpy>=%s" % _minimum_numpy_version,
         ],
-        python_requires=">=3.8",
+        python_requires=">=3.9",
         classifiers=[
             "Development Status :: 5 - Production/Stable",
             "License :: OSI Approved :: Apache Software License",
-            "Programming Language :: Python :: 3.8",
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
@@ -205,5 +214,6 @@
     )
 finally:
     if in_spark:
+        move("lib.back", "lib")
         os.remove("setup.py")
         os.remove("setup.cfg")
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index 15c21df0c6bf4..49c594f8c7def 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -125,8 +125,10 @@ def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
     # for backward compatibility references.
     sys.modules["pyspark.context"] = context
 
-# for back compatibility
-from pyspark.sql import SQLContext, HiveContext, Row  # noqa: F401
+    # for back compatibility
+    from pyspark.sql import SQLContext, HiveContext  # noqa: F401
+
+from pyspark.sql import Row  # noqa: F401
 
 __all__ = [
     "SparkConf",
diff --git a/python/pyspark/daemon.py b/python/pyspark/daemon.py
index b0e06d13beda7..a23af109ea6de 100644
--- a/python/pyspark/daemon.py
+++ b/python/pyspark/daemon.py
@@ -28,9 +28,9 @@
 from socket import AF_INET, AF_INET6, SOCK_STREAM, SOMAXCONN
 from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
 
-from pyspark.serializers import read_long, write_int, write_with_length, UTF8Deserializer
+from pyspark.serializers import read_int, write_int, write_with_length, UTF8Deserializer
 
-if len(sys.argv) > 1:
+if len(sys.argv) > 1 and sys.argv[1].startswith("pyspark"):
     import importlib
 
     worker_module = importlib.import_module(sys.argv[1])
@@ -139,7 +139,7 @@ def handle_sigterm(*args):
 
             if 0 in ready_fds:
                 try:
-                    worker_pid = read_long(stdin_bin)
+                    worker_pid = read_int(stdin_bin)
                 except EOFError:
                     # Spark told us to exit by closing stdin
                     shutdown(0)
diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json
new file mode 100644
index 0000000000000..dd70e814b1ea8
--- /dev/null
+++ b/python/pyspark/errors/error-conditions.json
@@ -0,0 +1,1166 @@
+{
+  "APPLICATION_NAME_NOT_SET": {
+    "message": [
+      "An application name must be set in your configuration."
+    ]
+  },
+  "ARGUMENT_REQUIRED": {
+    "message": [
+      "Argument `<arg_name>` is required when <condition>."
+    ]
+  },
+  "ARROW_LEGACY_IPC_FORMAT": {
+    "message": [
+      "Arrow legacy IPC format is not supported in PySpark, please unset ARROW_PRE_0_15_IPC_FORMAT."
+    ]
+  },
+  "ATTRIBUTE_NOT_CALLABLE": {
+    "message": [
+      "Attribute `<attr_name>` in provided object `<obj_name>` is not callable."
+    ]
+  },
+  "ATTRIBUTE_NOT_SUPPORTED": {
+    "message": [
+      "Attribute `<attr_name>` is not supported."
+    ]
+  },
+  "AXIS_LENGTH_MISMATCH": {
+    "message": [
+      "Length mismatch: Expected axis has <expected_length> element, new values have <actual_length> elements."
+    ]
+  },
+  "BROADCAST_VARIABLE_NOT_LOADED": {
+    "message": [
+      "Broadcast variable `<variable>` not loaded."
+    ]
+  },
+  "CALL_BEFORE_INITIALIZE": {
+    "message": [
+      "Not supported to call `<func_name>` before initialize <object>."
+    ]
+  },
+  "CANNOT_ACCEPT_OBJECT_IN_TYPE": {
+    "message": [
+      "`<data_type>` can not accept object `<obj_name>` in type `<obj_type>`."
+    ]
+  },
+  "CANNOT_ACCESS_TO_DUNDER": {
+    "message": [
+      "Dunder(double underscore) attribute is for internal use only."
+    ]
+  },
+  "CANNOT_APPLY_IN_FOR_COLUMN": {
+    "message": [
+      "Cannot apply 'in' operator against a column: please use 'contains' in a string column or 'array_contains' function for an array column."
+    ]
+  },
+  "CANNOT_BE_EMPTY": {
+    "message": [
+      "At least one <item> must be specified."
+    ]
+  },
+  "CANNOT_BE_NONE": {
+    "message": [
+      "Argument `<arg_name>` cannot be None."
+    ]
+  },
+  "CANNOT_CONFIGURE_SPARK_CONNECT": {
+    "message": [
+      "Spark Connect server cannot be configured: Existing [<existing_url>], New [<new_url>]."
+    ]
+  },
+  "CANNOT_CONFIGURE_SPARK_CONNECT_MASTER": {
+    "message": [
+      "Spark Connect server and Spark master cannot be configured together: Spark master [<master_url>], Spark Connect [<connect_url>]."
+    ]
+  },
+  "CANNOT_CONVERT_COLUMN_INTO_BOOL": {
+    "message": [
+      "Cannot convert column into bool: please use '&' for 'and', '|' for 'or', '~' for 'not' when building DataFrame boolean expressions."
+    ]
+  },
+  "CANNOT_CONVERT_TYPE": {
+    "message": [
+      "Cannot convert <from_type> into <to_type>."
+    ]
+  },
+  "CANNOT_DETERMINE_TYPE": {
+    "message": [
+      "Some of types cannot be determined after inferring."
+    ]
+  },
+  "CANNOT_GET_BATCH_ID": {
+    "message": [
+      "Could not get batch id from <obj_name>."
+    ]
+  },
+  "CANNOT_INFER_ARRAY_TYPE": {
+    "message": [
+      "Can not infer Array Type from a list with None as the first element."
+    ]
+  },
+  "CANNOT_INFER_EMPTY_SCHEMA": {
+    "message": [
+      "Can not infer schema from an empty dataset."
+    ]
+  },
+  "CANNOT_INFER_SCHEMA_FOR_TYPE": {
+    "message": [
+      "Can not infer schema for type: `<data_type>`."
+    ]
+  },
+  "CANNOT_INFER_TYPE_FOR_FIELD": {
+    "message": [
+      "Unable to infer the type of the field `<field_name>`."
+    ]
+  },
+  "CANNOT_MERGE_TYPE": {
+    "message": [
+      "Can not merge type `<data_type1>` and `<data_type2>`."
+    ]
+  },
+  "CANNOT_OPEN_SOCKET": {
+    "message": [
+      "Can not open socket: <errors>."
+    ]
+  },
+  "CANNOT_PARSE_DATATYPE": {
+    "message": [
+      "Unable to parse datatype. <msg>."
+    ]
+  },
+  "CANNOT_PROVIDE_METADATA": {
+    "message": [
+      "Metadata can only be provided for a single column."
+    ]
+  },
+  "CANNOT_SET_TOGETHER": {
+    "message": [
+      "<arg_list> should not be set together."
+    ]
+  },
+  "CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF": {
+    "message": [
+      "returnType can not be specified when `<arg_name>` is a user-defined function, but got <return_type>."
+    ]
+  },
+  "CANNOT_WITHOUT": {
+    "message": [
+      "Cannot <condition1> without <condition2>."
+    ]
+  },
+  "CLASSIC_OPERATION_NOT_SUPPORTED_ON_DF": {
+    "message": [
+      "Calling property or member '<member>' is not supported in PySpark Classic, please use Spark Connect instead."
+    ]
+  },
+  "COLLATION_INVALID_PROVIDER" : {
+    "message" : [
+      "The value <provider> does not represent a correct collation provider. Supported providers are: [<supportedProviders>]."
+    ]
+  },
+  "COLUMN_IN_LIST": {
+    "message": [
+      "`<func_name>` does not allow a Column in a list."
+    ]
+  },
+  "CONNECT_URL_ALREADY_DEFINED": {
+    "message": [
+      "Only one Spark Connect client URL can be set; however, got a different URL [<new_url>] from the existing [<existing_url>]."
+    ]
+  },
+  "CONNECT_URL_NOT_SET": {
+    "message": [
+      "Cannot create a Spark Connect session because the Spark Connect remote URL has not been set. Please define the remote URL by setting either the 'spark.remote' option or the 'SPARK_REMOTE' environment variable."
+    ]
+  },
+  "CONTEXT_ONLY_VALID_ON_DRIVER": {
+    "message": [
+      "It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transformation. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063."
+    ]
+  },
+  "CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT": {
+    "message": [
+      "Remote client cannot create a SparkContext. Create SparkSession instead."
+    ]
+  },
+  "DATA_SOURCE_CREATE_ERROR": {
+    "message": [
+      "Failed to create python data source instance, error: <error>."
+    ]
+  },
+  "DATA_SOURCE_INVALID_RETURN_TYPE": {
+    "message": [
+      "Unsupported return type ('<type>') from Python data source '<name>'. Expected types: <supported_types>."
+    ]
+  },
+  "DATA_SOURCE_RETURN_SCHEMA_MISMATCH": {
+    "message": [
+      "Return schema mismatch in the result from 'read' method. Expected: <expected> columns, Found: <actual> columns. Make sure the returned values match the required output schema."
+    ]
+  },
+  "DATA_SOURCE_TYPE_MISMATCH": {
+    "message": [
+      "Expected <expected>, but got <actual>."
+    ]
+  },
+  "DIFFERENT_PANDAS_DATAFRAME": {
+    "message": [
+      "DataFrames are not almost equal:",
+      "Left:",
+      "<left>",
+      "<left_dtype>",
+      "Right:",
+      "<right>",
+      "<right_dtype>"
+    ]
+  },
+  "DIFFERENT_PANDAS_INDEX": {
+    "message": [
+      "Indices are not almost equal:",
+      "Left:",
+      "<left>",
+      "<left_dtype>",
+      "Right:",
+      "<right>",
+      "<right_dtype>"
+    ]
+  },
+  "DIFFERENT_PANDAS_MULTIINDEX": {
+    "message": [
+      "MultiIndices are not almost equal:",
+      "Left:",
+      "<left>",
+      "<left_dtype>",
+      "Right:",
+      "<right>",
+      "<right_dtype>"
+    ]
+  },
+  "DIFFERENT_PANDAS_SERIES": {
+    "message": [
+      "Series are not almost equal:",
+      "Left:",
+      "<left>",
+      "<left_dtype>",
+      "Right:",
+      "<right>",
+      "<right_dtype>"
+    ]
+  },
+  "DIFFERENT_ROWS": {
+    "message": [
+      "<error_msg>"
+    ]
+  },
+  "DIFFERENT_SCHEMA": {
+    "message": [
+      "Schemas do not match.",
+      "--- actual",
+      "+++ expected",
+      "<error_msg>"
+    ]
+  },
+  "DISALLOWED_TYPE_FOR_CONTAINER": {
+    "message": [
+      "Argument `<arg_name>`(type: <arg_type>) should only contain a type in [<allowed_types>], got <item_type>"
+    ]
+  },
+  "DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT": {
+    "message": [
+      "Duplicated field names in Arrow Struct are not allowed, got <field_names>"
+    ]
+  },
+  "ERROR_OCCURRED_WHILE_CALLING": {
+    "message": [
+      "An error occurred while calling <func_name>: <error_msg>."
+    ]
+  },
+  "FIELD_DATA_TYPE_UNACCEPTABLE": {
+    "message": [
+      "<data_type> can not accept object <obj> in type <obj_type>."
+    ]
+  },
+  "FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME": {
+    "message": [
+      "<field_name>: <data_type> can not accept object <obj> in type <obj_type>."
+    ]
+  },
+  "FIELD_NOT_NULLABLE": {
+    "message": [
+      "Field is not nullable, but got None."
+    ]
+  },
+  "FIELD_NOT_NULLABLE_WITH_NAME": {
+    "message": [
+      "<field_name>: This field is not nullable, but got None."
+    ]
+  },
+  "FIELD_STRUCT_LENGTH_MISMATCH": {
+    "message": [
+      "Length of object (<object_length>) does not match with length of fields (<field_length>)."
+    ]
+  },
+  "FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME": {
+    "message": [
+      "<field_name>: Length of object (<object_length>) does not match with length of fields (<field_length>)."
+    ]
+  },
+  "FIELD_TYPE_MISMATCH": {
+    "message": [
+      "<obj> is not an instance of type <data_type>."
+    ]
+  },
+  "FIELD_TYPE_MISMATCH_WITH_NAME": {
+    "message": [
+      "<field_name>: <obj> is not an instance of type <data_type>."
+    ]
+  },
+  "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": {
+    "message": [
+      "Function `<func_name>` should return Column, got <return_type>."
+    ]
+  },
+  "INCORRECT_CONF_FOR_PROFILE": {
+    "message": [
+      "`spark.python.profile` or `spark.python.profile.memory` configuration",
+      " must be set to `true` to enable Python profile."
+    ]
+  },
+  "INDEX_NOT_POSITIVE": {
+    "message": [
+      "Index must be positive, got '<index>'."
+    ]
+  },
+  "INDEX_OUT_OF_RANGE": {
+    "message": [
+      "<arg_name> index out of range, got '<index>'."
+    ]
+  },
+  "INVALID_ARROW_UDTF_RETURN_TYPE": {
+    "message": [
+      "The return type of the arrow-optimized Python UDTF should be of type 'pandas.DataFrame', but the '<func>' method returned a value of type <return_type> with value: <value>."
+    ]
+  },
+  "INVALID_BROADCAST_OPERATION": {
+    "message": [
+      "Broadcast can only be <operation> in driver."
+    ]
+  },
+  "INVALID_CALL_ON_UNRESOLVED_OBJECT": {
+    "message": [
+      "Invalid call to `<func_name>` on unresolved object."
+    ]
+  },
+  "INVALID_CONNECT_URL": {
+    "message": [
+      "Invalid URL for Spark Connect: <detail>"
+    ]
+  },
+  "INVALID_INTERVAL_CASTING": {
+    "message": [
+      "Interval <start_field> to <end_field> is invalid."
+    ]
+  },
+  "INVALID_ITEM_FOR_CONTAINER": {
+    "message": [
+      "All items in `<arg_name>` should be in <allowed_types>, got <item_type>."
+    ]
+  },
+  "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS" : {
+    "message" : [
+      "Collations can only be applied to string types, but the JSON data type is <jsonType>."
+    ]
+  },
+  "INVALID_MULTIPLE_ARGUMENT_CONDITIONS": {
+    "message": [
+      "[{arg_names}] cannot be <condition>."
+    ]
+  },
+  "INVALID_NDARRAY_DIMENSION": {
+    "message": [
+      "NumPy array input should be of <dimensions> dimensions."
+    ]
+  },
+  "INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP": {
+    "message": [
+      "Invalid number of dataframes in group <dataframes_in_group>."
+    ]
+  },
+  "INVALID_PANDAS_UDF": {
+    "message": [
+      "Invalid function: <detail>"
+    ]
+  },
+  "INVALID_PANDAS_UDF_TYPE": {
+    "message": [
+      "`<arg_name>` should be one of the values from PandasUDFType, got <arg_type>"
+    ]
+  },
+  "INVALID_RETURN_TYPE_FOR_ARROW_UDF": {
+    "message": [
+      "Grouped and Cogrouped map Arrow UDF should return StructType for <eval_type>, got <return_type>."
+    ]
+  },
+  "INVALID_RETURN_TYPE_FOR_PANDAS_UDF": {
+    "message": [
+      "Pandas UDF should return StructType for <eval_type>, got <return_type>."
+    ]
+  },
+  "INVALID_SESSION_UUID_ID": {
+    "message": [
+      "Parameter value <arg_name> must be a valid UUID format: <origin>"
+    ]
+  },
+  "INVALID_TIMEOUT_TIMESTAMP": {
+    "message": [
+      "Timeout timestamp (<timestamp>) cannot be earlier than the current watermark (<watermark>)."
+    ]
+  },
+  "INVALID_TYPE": {
+    "message": [
+      "Argument `<arg_name>` should not be a <arg_type>."
+    ]
+  },
+  "INVALID_TYPENAME_CALL": {
+    "message": [
+      "StructField does not have typeName. Use typeName on its type explicitly instead."
+    ]
+  },
+  "INVALID_TYPE_DF_EQUALITY_ARG": {
+    "message": [
+      "Expected type <expected_type> for `<arg_name>` but got type <actual_type>."
+    ]
+  },
+  "INVALID_UDF_EVAL_TYPE": {
+    "message": [
+      "Eval type for UDF must be <eval_type>."
+    ]
+  },
+  "INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE": {
+    "message": [
+      "The UDTF '<name>' is invalid. It has both its return type and an 'analyze' attribute. Please make it have one of either the return type or the 'analyze' static method in '<name>' and try again."
+    ]
+  },
+  "INVALID_UDTF_EVAL_TYPE": {
+    "message": [
+      "The eval type for the UDTF '<name>' is invalid. It must be one of <eval_type>."
+    ]
+  },
+  "INVALID_UDTF_HANDLER_TYPE": {
+    "message": [
+      "The UDTF is invalid. The function handler must be a class, but got '<type>'. Please provide a class as the function handler."
+    ]
+  },
+  "INVALID_UDTF_NO_EVAL": {
+    "message": [
+      "The UDTF '<name>' is invalid. It does not implement the required 'eval' method. Please implement the 'eval' method in '<name>' and try again."
+    ]
+  },
+  "INVALID_UDTF_RETURN_TYPE": {
+    "message": [
+      "The UDTF '<name>' is invalid. It does not specify its return type or implement the required 'analyze' static method. Please specify the return type or implement the 'analyze' static method in '<name>' and try again."
+    ]
+  },
+  "INVALID_WHEN_USAGE": {
+    "message": [
+      "when() can only be applied on a Column previously generated by when() function, and cannot be applied once otherwise() is applied."
+    ]
+  },
+  "INVALID_WINDOW_BOUND_TYPE": {
+    "message": [
+      "Invalid window bound type: <window_bound_type>."
+    ]
+  },
+  "JAVA_GATEWAY_EXITED": {
+    "message": [
+      "Java gateway process exited before sending its port number."
+    ]
+  },
+  "JVM_ATTRIBUTE_NOT_SUPPORTED": {
+    "message": [
+      "Attribute `<attr_name>` is not supported in Spark Connect as it depends on the JVM. If you need to use this attribute, do not use Spark Connect when creating your session. Visit https://spark.apache.org/docs/latest/sql-getting-started.html#starting-point-sparksession for creating regular Spark Session in detail."
+    ]
+  },
+  "KEY_NOT_EXISTS": {
+    "message": [
+      "Key `<key>` is not exists."
+    ]
+  },
+  "KEY_VALUE_PAIR_REQUIRED": {
+    "message": [
+      "Key-value pair or a list of pairs is required."
+    ]
+  },
+  "LENGTH_SHOULD_BE_THE_SAME": {
+    "message": [
+      "<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
+    ]
+  },
+  "MALFORMED_VARIANT" : {
+    "message" : [
+      "Variant binary is malformed. Please check the data source is valid."
+    ]
+  },
+  "MASTER_URL_NOT_SET": {
+    "message": [
+      "A master URL must be set in your configuration."
+    ]
+  },
+  "MISSING_LIBRARY_FOR_PROFILER": {
+    "message": [
+      "Install the 'memory_profiler' library in the cluster to enable memory profiling."
+    ]
+  },
+  "MISSING_VALID_PLAN": {
+    "message": [
+      "Argument to <operator> does not contain a valid plan."
+    ]
+  },
+  "MIXED_TYPE_REPLACEMENT": {
+    "message": [
+      "Mixed type replacements are not supported."
+    ]
+  },
+  "NEGATIVE_VALUE": {
+    "message": [
+      "Value for `<arg_name>` must be greater than or equal to 0, got '<arg_value>'."
+    ]
+  },
+  "NOT_BOOL": {
+    "message": [
+      "Argument `<arg_name>` should be a bool, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE": {
+    "message": [
+      "Argument `<arg_name>` should be a bool, dict, float, int, str or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a bool, dict, float, int or str, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_FLOAT_OR_INT": {
+    "message": [
+      "Argument `<arg_name>` should be a bool, float or int, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE": {
+    "message": [
+      "Argument `<arg_name>` should be a bool, float, int, list, None, str or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_FLOAT_OR_INT_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a bool, float, int or str, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_LIST": {
+    "message": [
+      "Argument `<arg_name>` should be a bool or list, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a bool or str, got <arg_type>."
+    ]
+  },
+  "NOT_CALLABLE": {
+    "message": [
+      "Argument `<arg_name>` should be a callable, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN": {
+    "message": [
+      "Argument `<arg_name>` should be a Column, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_DATATYPE_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a Column, str or DataType, but got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a Column, float, integer, list or string, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_INT": {
+    "message": [
+      "Argument `<arg_name>` should be a Column or int, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE": {
+    "message": [
+      "Argument `<arg_name>` should be a Column, int, list, str or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_INT_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a Column, int or str, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_LIST_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a Column, list or str, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a Column or str, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_STR_OR_STRUCT": {
+    "message": [
+      "Argument `<arg_name>` should be a StructType, Column or str, got <arg_type>."
+    ]
+  },
+  "NOT_DATAFRAME": {
+    "message": [
+      "Argument `<arg_name>` should be a DataFrame, got <arg_type>."
+    ]
+  },
+  "NOT_DATATYPE_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a DataType or str, got <arg_type>."
+    ]
+  },
+  "NOT_DICT": {
+    "message": [
+      "Argument `<arg_name>` should be a dict, got <arg_type>."
+    ]
+  },
+  "NOT_EXPRESSION": {
+    "message": [
+      "Argument `<arg_name>` should be an Expression, got <arg_type>."
+    ]
+  },
+  "NOT_FLOAT_OR_INT": {
+    "message": [
+      "Argument `<arg_name>` should be a float or int, got <arg_type>."
+    ]
+  },
+  "NOT_FLOAT_OR_INT_OR_LIST_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a float, int, list or str, got <arg_type>."
+    ]
+  },
+  "NOT_IMPLEMENTED": {
+    "message": [
+      "<feature> is not implemented."
+    ]
+  },
+  "NOT_INT": {
+    "message": [
+      "Argument `<arg_name>` should be an int, got <arg_type>."
+    ]
+  },
+  "NOT_INT_OR_SLICE_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be an int, slice or str, got <arg_type>."
+    ]
+  },
+  "NOT_IN_BARRIER_STAGE": {
+    "message": [
+      "It is not in a barrier stage."
+    ]
+  },
+  "NOT_ITERABLE": {
+    "message": [
+      "<objectName> is not iterable."
+    ]
+  },
+  "NOT_LIST": {
+    "message": [
+      "Argument `<arg_name>` should be a list, got <arg_type>."
+    ]
+  },
+  "NOT_LIST_OF_COLUMN": {
+    "message": [
+      "Argument `<arg_name>` should be a list[Column]."
+    ]
+  },
+  "NOT_LIST_OF_COLUMN_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a list[Column]."
+    ]
+  },
+  "NOT_LIST_OF_FLOAT_OR_INT": {
+    "message": [
+      "Argument `<arg_name>` should be a list[float, int], got <arg_type>."
+    ]
+  },
+  "NOT_LIST_OF_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a list[str], got <arg_type>."
+    ]
+  },
+  "NOT_LIST_OR_NONE_OR_STRUCT": {
+    "message": [
+      "Argument `<arg_name>` should be a list, None or StructType, got <arg_type>."
+    ]
+  },
+  "NOT_LIST_OR_STR_OR_TUPLE": {
+    "message": [
+      "Argument `<arg_name>` should be a list, str or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_LIST_OR_TUPLE": {
+    "message": [
+      "Argument `<arg_name>` should be a list or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_NUMERIC_COLUMNS": {
+    "message": [
+      "Numeric aggregation function can only be applied on numeric columns, got <invalid_columns>."
+    ]
+  },
+  "NOT_OBSERVATION_OR_STR": {
+    "message": [
+      "Argument `<arg_name>` should be an Observation or str, got <arg_type>."
+    ]
+  },
+  "NOT_SAME_TYPE": {
+    "message": [
+      "Argument `<arg_name1>` and `<arg_name2>` should be the same type, got <arg_type1> and <arg_type2>."
+    ]
+  },
+  "NOT_STR": {
+    "message": [
+      "Argument `<arg_name>` should be a str, got <arg_type>."
+    ]
+  },
+  "NOT_STRUCT": {
+    "message": [
+      "Argument `<arg_name>` should be a struct type, got <arg_type>."
+    ]
+  },
+  "NOT_STR_OR_LIST_OF_RDD": {
+    "message": [
+      "Argument `<arg_name>` should be a str or list[RDD], got <arg_type>."
+    ]
+  },
+  "NOT_STR_OR_STRUCT": {
+    "message": [
+      "Argument `<arg_name>` should be a str or struct type, got <arg_type>."
+    ]
+  },
+  "NOT_WINDOWSPEC": {
+    "message": [
+      "Argument `<arg_name>` should be a WindowSpec, got <arg_type>."
+    ]
+  },
+  "NO_ACTIVE_EXCEPTION": {
+    "message": [
+      "No active exception."
+    ]
+  },
+  "NO_ACTIVE_OR_DEFAULT_SESSION": {
+    "message": [
+      "No active or default Spark session found. Please create a new Spark session before running the code."
+    ]
+  },
+  "NO_ACTIVE_SESSION": {
+    "message": [
+      "No active Spark session found. Please create a new Spark session before running the code."
+    ]
+  },
+  "NO_OBSERVE_BEFORE_GET": {
+    "message": [
+      "Should observe by calling `DataFrame.observe` before `get`."
+    ]
+  },
+  "NO_SCHEMA_AND_DRIVER_DEFAULT_SCHEME": {
+    "message": [
+      "Only allows <arg_name> to be a path without scheme, and Spark Driver should use the default scheme to determine the destination file system."
+    ]
+  },
+  "ONLY_ALLOWED_FOR_SINGLE_COLUMN": {
+    "message": [
+      "Argument `<arg_name>` can only be provided for a single column."
+    ]
+  },
+  "ONLY_ALLOW_SINGLE_TRIGGER": {
+    "message": [
+      "Only a single trigger is allowed."
+    ]
+  },
+  "ONLY_SUPPORTED_WITH_SPARK_CONNECT": {
+    "message": [
+      "<feature> is only supported with Spark Connect; however, the current Spark session does not use Spark Connect."
+    ]
+  },
+  "PACKAGE_NOT_INSTALLED": {
+    "message": [
+      "<package_name> >= <minimum_version> must be installed; however, it was not found."
+    ]
+  },
+  "PIPE_FUNCTION_EXITED": {
+    "message": [
+      "Pipe function `<func_name>` exited with error code <error_code>."
+    ]
+  },
+  "PYTHON_HASH_SEED_NOT_SET": {
+    "message": [
+      "Randomness of hash of string should be disabled via PYTHONHASHSEED."
+    ]
+  },
+  "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR": {
+    "message": [
+      "Failed when running Python streaming data source: <msg>"
+    ]
+  },
+  "PYTHON_VERSION_MISMATCH": {
+    "message": [
+      "Python in worker has different version: <worker_version> than that in driver: <driver_version>, PySpark cannot run with different minor versions.",
+      "Please check environment variables PYSPARK_PYTHON and PYSPARK_DRIVER_PYTHON are correctly set."
+    ]
+  },
+  "RDD_TRANSFORM_ONLY_VALID_ON_DRIVER": {
+    "message": [
+      "It appears that you are attempting to broadcast an RDD or reference an RDD from an ",
+      "action or transformation. RDD transformations and actions can only be invoked by the ",
+      "driver, not inside of other transformations; for example, ",
+      "rdd1.map(lambda x: rdd2.values.count() * x) is invalid because the values ",
+      "transformation and count action cannot be performed inside of the rdd1.map ",
+      "transformation. For more information, see SPARK-5063."
+    ]
+  },
+  "READ_ONLY": {
+    "message": [
+      "<object> is read-only."
+    ]
+  },
+  "RESPONSE_ALREADY_RECEIVED": {
+    "message": [
+      "OPERATION_NOT_FOUND on the server but responses were already received from it."
+    ]
+  },
+  "RESULT_COLUMNS_MISMATCH_FOR_ARROW_UDF": {
+    "message": [
+      "Column names of the returned pyarrow.Table do not match specified schema.<missing><extra>"
+    ]
+  },
+  "RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF": {
+    "message": [
+      "Column names of the returned pandas.DataFrame do not match specified schema.<missing><extra>"
+    ]
+  },
+  "RESULT_LENGTH_MISMATCH_FOR_PANDAS_UDF": {
+    "message": [
+      "Number of columns of the returned pandas.DataFrame doesn't match specified schema. Expected: <expected> Actual: <actual>"
+    ]
+  },
+  "RESULT_LENGTH_MISMATCH_FOR_SCALAR_ITER_PANDAS_UDF": {
+    "message": [
+      "The length of output in Scalar iterator pandas UDF should be the same with the input's; however, the length of output was <output_length> and the length of input was <input_length>."
+    ]
+  },
+  "RESULT_TYPE_MISMATCH_FOR_ARROW_UDF": {
+    "message": [
+      "Columns do not match in their data type: <mismatch>."
+    ]
+  },
+  "RETRIES_EXCEEDED": {
+    "message": [
+      "The maximum number of retries has been exceeded."
+    ]
+  },
+  "REUSE_OBSERVATION": {
+    "message": [
+      "An Observation can be used with a DataFrame only once."
+    ]
+  },
+  "SCHEMA_MISMATCH_FOR_PANDAS_UDF": {
+    "message": [
+      "Result vector from pandas_udf was not the required length: expected <expected>, got <actual>."
+    ]
+  },
+  "SESSION_ALREADY_EXIST": {
+    "message": [
+      "Cannot start a remote Spark session because there is a regular Spark session already running."
+    ]
+  },
+  "SESSION_NEED_CONN_STR_OR_BUILDER": {
+    "message": [
+      "Needs either connection string or channelBuilder (mutually exclusive) to create a new SparkSession."
+    ]
+  },
+  "SESSION_NOT_SAME": {
+    "message": [
+      "Both Datasets must belong to the same SparkSession."
+    ]
+  },
+  "SESSION_OR_CONTEXT_EXISTS": {
+    "message": [
+      "There should not be an existing Spark Session or Spark Context."
+    ]
+  },
+  "SESSION_OR_CONTEXT_NOT_EXISTS": {
+    "message": [
+      "SparkContext or SparkSession should be created first."
+    ]
+  },
+  "SLICE_WITH_STEP": {
+    "message": [
+      "Slice with step is not supported."
+    ]
+  },
+  "STATE_NOT_EXISTS": {
+    "message": [
+      "State is either not defined or has already been removed."
+    ]
+  },
+  "STOP_ITERATION_OCCURRED": {
+    "message": [
+      "Caught StopIteration thrown from user's code; failing the task: <exc>"
+    ]
+  },
+  "STOP_ITERATION_OCCURRED_FROM_SCALAR_ITER_PANDAS_UDF": {
+    "message": [
+      "pandas iterator UDF should exhaust the input iterator."
+    ]
+  },
+  "STREAMING_CONNECT_SERIALIZATION_ERROR": {
+    "message": [
+      "Cannot serialize the function `<name>`. If you accessed the Spark session, or a DataFrame defined outside of the function, or any object that contains a Spark session, please be aware that they are not allowed in Spark Connect. For `foreachBatch`, please access the Spark session using `df.sparkSession`, where `df` is the first parameter in your `foreachBatch` function. For `StreamingQueryListener`, please access the Spark session using `self.spark`. For details please check out the PySpark doc for `foreachBatch` and `StreamingQueryListener`."
+    ]
+  },
+  "TEST_CLASS_NOT_COMPILED": {
+    "message": [
+      "<test_class_path> doesn't exist. Spark sql test classes are not compiled."
+    ]
+  },
+  "TOO_MANY_VALUES": {
+    "message": [
+      "Expected <expected> values for `<item>`, got <actual>."
+    ]
+  },
+  "TYPE_HINT_SHOULD_BE_SPECIFIED": {
+    "message": [
+      "Type hints for <target> should be specified; however, got <sig>."
+    ]
+  },
+  "UDF_RETURN_TYPE": {
+    "message": [
+      "Return type of the user-defined function should be <expected>, but is <actual>."
+    ]
+  },
+  "UDTF_ARROW_TYPE_CAST_ERROR": {
+    "message": [
+      "Cannot convert the output value of the column '<col_name>' with type '<col_type>' to the specified return type of the column: '<arrow_type>'. Please check if the data types match and try again."
+    ]
+  },
+  "UDTF_CONSTRUCTOR_INVALID_IMPLEMENTS_ANALYZE_METHOD": {
+    "message": [
+      "Failed to evaluate the user-defined table function '<name>' because its constructor is invalid: the function implements the 'analyze' method, but its constructor has more than two arguments (including the 'self' reference). Please update the table function so that its constructor accepts exactly one 'self' argument, or one 'self' argument plus another argument for the result of the 'analyze' method, and try the query again."
+    ]
+  },
+  "UDTF_CONSTRUCTOR_INVALID_NO_ANALYZE_METHOD": {
+    "message": [
+      "Failed to evaluate the user-defined table function '<name>' because its constructor is invalid: the function does not implement the 'analyze' method, and its constructor has more than one argument (including the 'self' reference). Please update the table function so that its constructor accepts exactly one 'self' argument, and try the query again."
+    ]
+  },
+  "UDTF_EVAL_METHOD_ARGUMENTS_DO_NOT_MATCH_SIGNATURE": {
+    "message": [
+      "Failed to evaluate the user-defined table function '<name>' because the function arguments did not match the expected signature of the 'eval' method (<reason>). Please update the query so that this table function call provides arguments matching the expected signature, or else update the table function so that its 'eval' method accepts the provided arguments, and then try the query again."
+    ]
+  },
+  "UDTF_EXEC_ERROR": {
+    "message": [
+      "User defined table function encountered an error in the '<method_name>' method: <error>"
+    ]
+  },
+  "UDTF_INVALID_OUTPUT_ROW_TYPE": {
+    "message": [
+      "The type of an individual output row in the '<func>' method of the UDTF is invalid. Each row should be a tuple, list, or dict, but got '<type>'. Please make sure that the output rows are of the correct type."
+    ]
+  },
+  "UDTF_RETURN_NOT_ITERABLE": {
+    "message": [
+      "The return value of the '<func>' method of the UDTF is invalid. It should be an iterable (e.g., generator or list), but got '<type>'. Please make sure that the UDTF returns one of these types."
+    ]
+  },
+  "UDTF_RETURN_SCHEMA_MISMATCH": {
+    "message": [
+      "The number of columns in the result does not match the specified schema. Expected column count: <expected>, Actual column count: <actual>. Please make sure the values returned by the '<func>' method have the same number of columns as specified in the output schema."
+    ]
+  },
+  "UDTF_RETURN_TYPE_MISMATCH": {
+    "message": [
+      "Mismatch in return type for the UDTF '<name>'. Expected a 'StructType', but got '<return_type>'. Please ensure the return type is a correctly formatted StructType."
+    ]
+  },
+  "UDTF_SERIALIZATION_ERROR": {
+    "message": [
+      "Cannot serialize the UDTF '<name>': <message>"
+    ]
+  },
+  "UNEXPECTED_RESPONSE_FROM_SERVER": {
+    "message": [
+      "Unexpected response from iterator server."
+    ]
+  },
+  "UNEXPECTED_TUPLE_WITH_STRUCT": {
+    "message": [
+      "Unexpected tuple <tuple> with StructType."
+    ]
+  },
+  "UNKNOWN_EXPLAIN_MODE": {
+    "message": [
+      "Unknown explain mode: '<explain_mode>'. Accepted explain modes are 'simple', 'extended', 'codegen', 'cost', 'formatted'."
+    ]
+  },
+  "UNKNOWN_INTERRUPT_TYPE": {
+    "message": [
+      "Unknown interrupt type: '<interrupt_type>'. Accepted interrupt types are 'all'."
+    ]
+  },
+  "UNKNOWN_RESPONSE": {
+    "message": [
+      "Unknown response: <response>."
+    ]
+  },
+  "UNKNOWN_VALUE_FOR": {
+    "message": [
+      "Unknown value for `<var>`."
+    ]
+  },
+  "UNSUPPORTED_DATA_TYPE": {
+    "message": [
+      "Unsupported DataType `<data_type>`."
+    ]
+  },
+  "UNSUPPORTED_DATA_TYPE_FOR_ARROW": {
+    "message": [
+      "Single data type <data_type> is not supported with Arrow."
+    ]
+  },
+  "UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION": {
+    "message": [
+      "<data_type> is not supported in conversion to Arrow."
+    ]
+  },
+  "UNSUPPORTED_DATA_TYPE_FOR_ARROW_VERSION": {
+    "message": [
+      "<data_type> is only supported with pyarrow 2.0.0 and above."
+    ]
+  },
+  "UNSUPPORTED_JOIN_TYPE": {
+    "message": [
+      "Unsupported join type: <join_type>. Supported join types include: 'inner', 'outer', 'full', 'fullouter', 'full_outer', 'leftouter', 'left', 'left_outer', 'rightouter', 'right', 'right_outer', 'leftsemi', 'left_semi', 'semi', 'leftanti', 'left_anti', 'anti', 'cross'."
+    ]
+  },
+  "UNSUPPORTED_LITERAL": {
+    "message": [
+      "Unsupported Literal '<literal>'."
+    ]
+  },
+  "UNSUPPORTED_LOCAL_CONNECTION_STRING": {
+    "message": [
+      "Creating new SparkSessions with `local` connection string is not supported."
+    ]
+  },
+  "UNSUPPORTED_NUMPY_ARRAY_SCALAR": {
+    "message": [
+      "The type of array scalar '<dtype>' is not supported."
+    ]
+  },
+  "UNSUPPORTED_OPERATION": {
+    "message": [
+      "<operation> is not supported."
+    ]
+  },
+  "UNSUPPORTED_PACKAGE_VERSION": {
+    "message": [
+      "<package_name> >= <minimum_version> must be installed; however, your version is <current_version>."
+    ]
+  },
+  "UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION": {
+    "message": [
+      "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR KEYWORD arguments."
+    ]
+  },
+  "UNSUPPORTED_SIGNATURE": {
+    "message": [
+      "Unsupported signature: <signature>."
+    ]
+  },
+  "UNSUPPORTED_WITH_ARROW_OPTIMIZATION": {
+    "message": [
+      "<feature> is not supported with Arrow optimization enabled in Python UDFs. Disable 'spark.sql.execution.pythonUDF.arrow.enabled' to workaround."
+    ]
+  },
+  "VALUE_ALLOWED": {
+    "message": [
+      "Value for `<arg_name>` does not allow <disallowed_value>."
+    ]
+  },
+  "VALUE_NOT_ACCESSIBLE": {
+    "message": [
+      "Value `<value>` cannot be accessed inside tasks."
+    ]
+  },
+  "VALUE_NOT_ALLOWED": {
+    "message": [
+      "Value for `<arg_name>` has to be amongst the following values: <allowed_values>."
+    ]
+  },
+  "VALUE_NOT_ANY_OR_ALL": {
+    "message": [
+      "Value for `<arg_name>` must be 'any' or 'all', got '<arg_value>'."
+    ]
+  },
+  "VALUE_NOT_BETWEEN": {
+    "message": [
+      "Value for `<arg_name>` must be between <min> and <max>."
+    ]
+  },
+  "VALUE_NOT_NON_EMPTY_STR": {
+    "message": [
+      "Value for `<arg_name>` must be a non-empty string, got '<arg_value>'."
+    ]
+  },
+  "VALUE_NOT_PEARSON": {
+    "message": [
+      "Value for `<arg_name>` only supports the 'pearson', got '<arg_value>'."
+    ]
+  },
+  "VALUE_NOT_PLAIN_COLUMN_REFERENCE": {
+    "message": [
+      "Value `<val>` in `<field_name>` should be a plain column reference such as `df.col` or `col('column')`."
+    ]
+  },
+  "VALUE_NOT_POSITIVE": {
+    "message": [
+      "Value for `<arg_name>` must be positive, got '<arg_value>'."
+    ]
+  },
+  "VALUE_NOT_TRUE": {
+    "message": [
+      "Value for `<arg_name>` must be True, got '<arg_value>'."
+    ]
+  },
+  "VALUE_OUT_OF_BOUNDS": {
+    "message": [
+      "Value for `<arg_name>` must be between <lower_bound> and <upper_bound> (inclusive), got <actual>"
+    ]
+  },
+  "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": {
+    "message": [
+      "Function `<func_name>` should take between 1 and 3 arguments, but the provided function takes <num_args>."
+    ]
+  },
+  "WRONG_NUM_COLUMNS": {
+    "message": [
+      "Function `<func_name>` should take at least <num_cols> columns."
+    ]
+  },
+  "ZERO_INDEX": {
+    "message": [
+      "Index must be non-zero."
+    ]
+  }
+}
diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
index 6b7f19b449185..30869a3fbb2d2 100644
--- a/python/pyspark/errors/error_classes.py
+++ b/python/pyspark/errors/error_classes.py
@@ -15,1160 +15,17 @@
 # limitations under the License.
 #
 
-# NOTE: Automatically sort this file via
-# - cd $SPARK_HOME
-# - bin/pyspark
-# - from pyspark.errors.exceptions import _write_self; _write_self()
 import json
+import importlib.resources
 
-
-ERROR_CLASSES_JSON = '''
-{
-  "APPLICATION_NAME_NOT_SET": {
-    "message": [
-      "An application name must be set in your configuration."
-    ]
-  },
-  "ARGUMENT_REQUIRED": {
-    "message": [
-      "Argument `<arg_name>` is required when <condition>."
-    ]
-  },
-  "ARROW_LEGACY_IPC_FORMAT": {
-    "message": [
-      "Arrow legacy IPC format is not supported in PySpark, please unset ARROW_PRE_0_15_IPC_FORMAT."
-    ]
-  },
-  "ATTRIBUTE_NOT_CALLABLE": {
-    "message": [
-      "Attribute `<attr_name>` in provided object `<obj_name>` is not callable."
-    ]
-  },
-  "ATTRIBUTE_NOT_SUPPORTED": {
-    "message": [
-      "Attribute `<attr_name>` is not supported."
-    ]
-  },
-  "AXIS_LENGTH_MISMATCH": {
-    "message": [
-      "Length mismatch: Expected axis has <expected_length> element, new values have <actual_length> elements."
-    ]
-  },
-  "BROADCAST_VARIABLE_NOT_LOADED": {
-    "message": [
-      "Broadcast variable `<variable>` not loaded."
-    ]
-  },
-  "CALL_BEFORE_INITIALIZE": {
-    "message": [
-      "Not supported to call `<func_name>` before initialize <object>."
-    ]
-  },
-  "CANNOT_ACCEPT_OBJECT_IN_TYPE": {
-    "message": [
-      "`<data_type>` can not accept object `<obj_name>` in type `<obj_type>`."
-    ]
-  },
-  "CANNOT_ACCESS_TO_DUNDER": {
-    "message": [
-      "Dunder(double underscore) attribute is for internal use only."
-    ]
-  },
-  "CANNOT_APPLY_IN_FOR_COLUMN": {
-    "message": [
-      "Cannot apply 'in' operator against a column: please use 'contains' in a string column or 'array_contains' function for an array column."
-    ]
-  },
-  "CANNOT_BE_EMPTY": {
-    "message": [
-      "At least one <item> must be specified."
-    ]
-  },
-  "CANNOT_BE_NONE": {
-    "message": [
-      "Argument `<arg_name>` cannot be None."
-    ]
-  },
-  "CANNOT_CONFIGURE_SPARK_CONNECT": {
-    "message": [
-      "Spark Connect server cannot be configured: Existing [<existing_url>], New [<new_url>]."
-    ]
-  },
-  "CANNOT_CONFIGURE_SPARK_CONNECT_MASTER": {
-    "message": [
-      "Spark Connect server and Spark master cannot be configured together: Spark master [<master_url>], Spark Connect [<connect_url>]."
-    ]
-  },
-  "CANNOT_CONVERT_COLUMN_INTO_BOOL": {
-    "message": [
-      "Cannot convert column into bool: please use '&' for 'and', '|' for 'or', '~' for 'not' when building DataFrame boolean expressions."
-    ]
-  },
-  "CANNOT_CONVERT_TYPE": {
-    "message": [
-      "Cannot convert <from_type> into <to_type>."
-    ]
-  },
-  "CANNOT_DETERMINE_TYPE": {
-    "message": [
-      "Some of types cannot be determined after inferring."
-    ]
-  },
-  "CANNOT_GET_BATCH_ID": {
-    "message": [
-      "Could not get batch id from <obj_name>."
-    ]
-  },
-  "CANNOT_INFER_ARRAY_TYPE": {
-    "message": [
-      "Can not infer Array Type from a list with None as the first element."
-    ]
-  },
-  "CANNOT_INFER_EMPTY_SCHEMA": {
-    "message": [
-      "Can not infer schema from an empty dataset."
-    ]
-  },
-  "CANNOT_INFER_SCHEMA_FOR_TYPE": {
-    "message": [
-      "Can not infer schema for type: `<data_type>`."
-    ]
-  },
-  "CANNOT_INFER_TYPE_FOR_FIELD": {
-    "message": [
-      "Unable to infer the type of the field `<field_name>`."
-    ]
-  },
-  "CANNOT_MERGE_TYPE": {
-    "message": [
-      "Can not merge type `<data_type1>` and `<data_type2>`."
-    ]
-  },
-  "CANNOT_OPEN_SOCKET": {
-    "message": [
-      "Can not open socket: <errors>."
-    ]
-  },
-  "CANNOT_PARSE_DATATYPE": {
-    "message": [
-      "Unable to parse datatype. <msg>."
-    ]
-  },
-  "CANNOT_PROVIDE_METADATA": {
-    "message": [
-      "Metadata can only be provided for a single column."
-    ]
-  },
-  "CANNOT_SET_TOGETHER": {
-    "message": [
-      "<arg_list> should not be set together."
-    ]
-  },
-  "CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF": {
-    "message": [
-      "returnType can not be specified when `<arg_name>` is a user-defined function, but got <return_type>."
-    ]
-  },
-  "CANNOT_WITHOUT": {
-    "message": [
-      "Cannot <condition1> without <condition2>."
-    ]
-  },
-  "COLUMN_IN_LIST": {
-    "message": [
-      "`<func_name>` does not allow a Column in a list."
-    ]
-  },
-  "CONNECT_URL_ALREADY_DEFINED": {
-    "message": [
-      "Only one Spark Connect client URL can be set; however, got a different URL [<new_url>] from the existing [<existing_url>]."
-    ]
-  },
-  "CONNECT_URL_NOT_SET": {
-    "message": [
-      "Cannot create a Spark Connect session because the Spark Connect remote URL has not been set. Please define the remote URL by setting either the 'spark.remote' option or the 'SPARK_REMOTE' environment variable."
-    ]
-  },
-  "CONTEXT_ONLY_VALID_ON_DRIVER": {
-    "message": [
-      "It appears that you are attempting to reference SparkContext from a broadcast variable, action, or transformation. SparkContext can only be used on the driver, not in code that it run on workers. For more information, see SPARK-5063."
-    ]
-  },
-  "CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT": {
-    "message": [
-      "Remote client cannot create a SparkContext. Create SparkSession instead."
-    ]
-  },
-  "DATA_SOURCE_CREATE_ERROR": {
-    "message": [
-      "Failed to create python data source instance, error: <error>."
-    ]
-  },
-  "DATA_SOURCE_INVALID_RETURN_TYPE": {
-    "message": [
-      "Unsupported return type ('<type>') from Python data source '<name>'. Expected types: <supported_types>."
-    ]
-  },
-  "DATA_SOURCE_RETURN_SCHEMA_MISMATCH": {
-    "message": [
-      "Return schema mismatch in the result from 'read' method. Expected: <expected> columns, Found: <actual> columns. Make sure the returned values match the required output schema."
-    ]
-  },
-  "DATA_SOURCE_TYPE_MISMATCH": {
-    "message": [
-      "Expected <expected>, but got <actual>."
-    ]
-  },
-  "DIFFERENT_PANDAS_DATAFRAME": {
-    "message": [
-      "DataFrames are not almost equal:",
-      "Left:",
-      "<left>",
-      "<left_dtype>",
-      "Right:",
-      "<right>",
-      "<right_dtype>"
-    ]
-  },
-  "DIFFERENT_PANDAS_INDEX": {
-    "message": [
-      "Indices are not almost equal:",
-      "Left:",
-      "<left>",
-      "<left_dtype>",
-      "Right:",
-      "<right>",
-      "<right_dtype>"
-    ]
-  },
-  "DIFFERENT_PANDAS_MULTIINDEX": {
-    "message": [
-      "MultiIndices are not almost equal:",
-      "Left:",
-      "<left>",
-      "<left_dtype>",
-      "Right:",
-      "<right>",
-      "<right_dtype>"
-    ]
-  },
-  "DIFFERENT_PANDAS_SERIES": {
-    "message": [
-      "Series are not almost equal:",
-      "Left:",
-      "<left>",
-      "<left_dtype>",
-      "Right:",
-      "<right>",
-      "<right_dtype>"
-    ]
-  },
-  "DIFFERENT_ROWS": {
-    "message": [
-      "<error_msg>"
-    ]
-  },
-  "DIFFERENT_SCHEMA": {
-    "message": [
-      "Schemas do not match.",
-      "--- actual",
-      "+++ expected",
-      "<error_msg>"
-    ]
-  },
-  "DISALLOWED_TYPE_FOR_CONTAINER": {
-    "message": [
-      "Argument `<arg_name>`(type: <arg_type>) should only contain a type in [<allowed_types>], got <item_type>"
-    ]
-  },
-  "DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT": {
-    "message": [
-      "Duplicated field names in Arrow Struct are not allowed, got <field_names>"
-    ]
-  },
-  "ERROR_OCCURRED_WHILE_CALLING": {
-    "message": [
-      "An error occurred while calling <func_name>: <error_msg>."
-    ]
-  },
-  "FIELD_DATA_TYPE_UNACCEPTABLE": {
-    "message": [
-      "<data_type> can not accept object <obj> in type <obj_type>."
-    ]
-  },
-  "FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME": {
-    "message": [
-      "<field_name>: <data_type> can not accept object <obj> in type <obj_type>."
-    ]
-  },
-  "FIELD_NOT_NULLABLE": {
-    "message": [
-      "Field is not nullable, but got None."
-    ]
-  },
-  "FIELD_NOT_NULLABLE_WITH_NAME": {
-    "message": [
-      "<field_name>: This field is not nullable, but got None."
-    ]
-  },
-  "FIELD_STRUCT_LENGTH_MISMATCH": {
-    "message": [
-      "Length of object (<object_length>) does not match with length of fields (<field_length>)."
-    ]
-  },
-  "FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME": {
-    "message": [
-      "<field_name>: Length of object (<object_length>) does not match with length of fields (<field_length>)."
-    ]
-  },
-  "FIELD_TYPE_MISMATCH": {
-    "message": [
-      "<obj> is not an instance of type <data_type>."
-    ]
-  },
-  "FIELD_TYPE_MISMATCH_WITH_NAME": {
-    "message": [
-      "<field_name>: <obj> is not an instance of type <data_type>."
-    ]
-  },
-  "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN": {
-    "message": [
-      "Function `<func_name>` should return Column, got <return_type>."
-    ]
-  },
-  "INCORRECT_CONF_FOR_PROFILE": {
-    "message": [
-      "`spark.python.profile` or `spark.python.profile.memory` configuration",
-      " must be set to `true` to enable Python profile."
-    ]
-  },
-  "INDEX_NOT_POSITIVE": {
-    "message": [
-      "Index must be positive, got '<index>'."
-    ]
-  },
-  "INDEX_OUT_OF_RANGE": {
-    "message": [
-      "<arg_name> index out of range, got '<index>'."
-    ]
-  },
-  "INVALID_ARROW_UDTF_RETURN_TYPE": {
-    "message": [
-      "The return type of the arrow-optimized Python UDTF should be of type 'pandas.DataFrame', but the '<func>' method returned a value of type <return_type> with value: <value>."
-    ]
-  },
-  "INVALID_BROADCAST_OPERATION": {
-    "message": [
-      "Broadcast can only be <operation> in driver."
-    ]
-  },
-  "INVALID_CALL_ON_UNRESOLVED_OBJECT": {
-    "message": [
-      "Invalid call to `<func_name>` on unresolved object."
-    ]
-  },
-  "INVALID_CONNECT_URL": {
-    "message": [
-      "Invalid URL for Spark Connect: <detail>"
-    ]
-  },
-  "INVALID_INTERVAL_CASTING": {
-    "message": [
-      "Interval <start_field> to <end_field> is invalid."
-    ]
-  },
-  "INVALID_ITEM_FOR_CONTAINER": {
-    "message": [
-      "All items in `<arg_name>` should be in <allowed_types>, got <item_type>."
-    ]
-  },
-  "INVALID_MULTIPLE_ARGUMENT_CONDITIONS": {
-    "message": [
-      "[{arg_names}] cannot be <condition>."
-    ]
-  },
-  "INVALID_NDARRAY_DIMENSION": {
-    "message": [
-      "NumPy array input should be of <dimensions> dimensions."
-    ]
-  },
-  "INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP": {
-    "message": [
-      "Invalid number of dataframes in group <dataframes_in_group>."
-    ]
-  },
-  "INVALID_PANDAS_UDF": {
-    "message": [
-      "Invalid function: <detail>"
-    ]
-  },
-  "INVALID_PANDAS_UDF_TYPE": {
-    "message": [
-      "`<arg_name>` should be one of the values from PandasUDFType, got <arg_type>"
-    ]
-  },
-  "INVALID_RETURN_TYPE_FOR_ARROW_UDF": {
-    "message": [
-      "Grouped and Cogrouped map Arrow UDF should return StructType for <eval_type>, got <return_type>."
-    ]
-  },
-  "INVALID_RETURN_TYPE_FOR_PANDAS_UDF": {
-    "message": [
-      "Pandas UDF should return StructType for <eval_type>, got <return_type>."
-    ]
-  },
-  "INVALID_SESSION_UUID_ID": {
-    "message": [
-      "Parameter value <arg_name> must be a valid UUID format: <origin>"
-    ]
-  },
-  "INVALID_TIMEOUT_TIMESTAMP": {
-    "message": [
-      "Timeout timestamp (<timestamp>) cannot be earlier than the current watermark (<watermark>)."
-    ]
-  },
-  "INVALID_TYPE": {
-    "message": [
-      "Argument `<arg_name>` should not be a <arg_type>."
-    ]
-  },
-  "INVALID_TYPENAME_CALL": {
-    "message": [
-      "StructField does not have typeName. Use typeName on its type explicitly instead."
-    ]
-  },
-  "INVALID_TYPE_DF_EQUALITY_ARG": {
-    "message": [
-      "Expected type <expected_type> for `<arg_name>` but got type <actual_type>."
-    ]
-  },
-  "INVALID_UDF_EVAL_TYPE": {
-    "message": [
-      "Eval type for UDF must be <eval_type>."
-    ]
-  },
-  "INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE": {
-    "message": [
-      "The UDTF '<name>' is invalid. It has both its return type and an 'analyze' attribute. Please make it have one of either the return type or the 'analyze' static method in '<name>' and try again."
-    ]
-  },
-  "INVALID_UDTF_EVAL_TYPE": {
-    "message": [
-      "The eval type for the UDTF '<name>' is invalid. It must be one of <eval_type>."
-    ]
-  },
-  "INVALID_UDTF_HANDLER_TYPE": {
-    "message": [
-      "The UDTF is invalid. The function handler must be a class, but got '<type>'. Please provide a class as the function handler."
-    ]
-  },
-  "INVALID_UDTF_NO_EVAL": {
-    "message": [
-      "The UDTF '<name>' is invalid. It does not implement the required 'eval' method. Please implement the 'eval' method in '<name>' and try again."
-    ]
-  },
-  "INVALID_UDTF_RETURN_TYPE": {
-    "message": [
-      "The UDTF '<name>' is invalid. It does not specify its return type or implement the required 'analyze' static method. Please specify the return type or implement the 'analyze' static method in '<name>' and try again."
-    ]
-  },
-  "INVALID_WHEN_USAGE": {
-    "message": [
-      "when() can only be applied on a Column previously generated by when() function, and cannot be applied once otherwise() is applied."
-    ]
-  },
-  "INVALID_WINDOW_BOUND_TYPE": {
-    "message": [
-      "Invalid window bound type: <window_bound_type>."
-    ]
-  },
-  "JAVA_GATEWAY_EXITED": {
-    "message": [
-      "Java gateway process exited before sending its port number."
-    ]
-  },
-  "JVM_ATTRIBUTE_NOT_SUPPORTED": {
-    "message": [
-      "Attribute `<attr_name>` is not supported in Spark Connect as it depends on the JVM. If you need to use this attribute, do not use Spark Connect when creating your session. Visit https://spark.apache.org/docs/latest/sql-getting-started.html#starting-point-sparksession for creating regular Spark Session in detail."
-    ]
-  },
-  "KEY_NOT_EXISTS": {
-    "message": [
-      "Key `<key>` is not exists."
-    ]
-  },
-  "KEY_VALUE_PAIR_REQUIRED": {
-    "message": [
-      "Key-value pair or a list of pairs is required."
-    ]
-  },
-  "LENGTH_SHOULD_BE_THE_SAME": {
-    "message": [
-      "<arg1> and <arg2> should be of the same length, got <arg1_length> and <arg2_length>."
-    ]
-  },
-  "MASTER_URL_NOT_SET": {
-    "message": [
-      "A master URL must be set in your configuration."
-    ]
-  },
-  "MISSING_LIBRARY_FOR_PROFILER": {
-    "message": [
-      "Install the 'memory_profiler' library in the cluster to enable memory profiling."
-    ]
-  },
-  "MISSING_VALID_PLAN": {
-    "message": [
-      "Argument to <operator> does not contain a valid plan."
-    ]
-  },
-  "MIXED_TYPE_REPLACEMENT": {
-    "message": [
-      "Mixed type replacements are not supported."
-    ]
-  },
-  "NEGATIVE_VALUE": {
-    "message": [
-      "Value for `<arg_name>` must be greater than or equal to 0, got '<arg_value>'."
-    ]
-  },
-  "NOT_BOOL": {
-    "message": [
-      "Argument `<arg_name>` should be a bool, got <arg_type>."
-    ]
-  },
-  "NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE": {
-    "message": [
-      "Argument `<arg_name>` should be a bool, dict, float, int, str or tuple, got <arg_type>."
-    ]
-  },
-  "NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a bool, dict, float, int or str, got <arg_type>."
-    ]
-  },
-  "NOT_BOOL_OR_FLOAT_OR_INT": {
-    "message": [
-      "Argument `<arg_name>` should be a bool, float or int, got <arg_type>."
-    ]
-  },
-  "NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE": {
-    "message": [
-      "Argument `<arg_name>` should be a bool, float, int, list, None, str or tuple, got <arg_type>."
-    ]
-  },
-  "NOT_BOOL_OR_FLOAT_OR_INT_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a bool, float, int or str, got <arg_type>."
-    ]
-  },
-  "NOT_BOOL_OR_LIST": {
-    "message": [
-      "Argument `<arg_name>` should be a bool or list, got <arg_type>."
-    ]
-  },
-  "NOT_BOOL_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a bool or str, got <arg_type>."
-    ]
-  },
-  "NOT_CALLABLE": {
-    "message": [
-      "Argument `<arg_name>` should be a callable, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN": {
-    "message": [
-      "Argument `<arg_name>` should be a Column, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_DATATYPE_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a Column, str or DataType, but got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a Column, float, integer, list or string, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_INT": {
-    "message": [
-      "Argument `<arg_name>` should be a Column or int, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE": {
-    "message": [
-      "Argument `<arg_name>` should be a Column, int, list, str or tuple, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_INT_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a Column, int or str, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_LIST_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a Column, list or str, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a Column or str, got <arg_type>."
-    ]
-  },
-  "NOT_COLUMN_OR_STR_OR_STRUCT": {
-    "message": [
-      "Argument `<arg_name>` should be a StructType, Column or str, got <arg_type>."
-    ]
-  },
-  "NOT_DATAFRAME": {
-    "message": [
-      "Argument `<arg_name>` should be a DataFrame, got <arg_type>."
-    ]
-  },
-  "NOT_DATATYPE_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a DataType or str, got <arg_type>."
-    ]
-  },
-  "NOT_DICT": {
-    "message": [
-      "Argument `<arg_name>` should be a dict, got <arg_type>."
-    ]
-  },
-  "NOT_EXPRESSION": {
-    "message": [
-      "Argument `<arg_name>` should be an Expression, got <arg_type>."
-    ]
-  },
-  "NOT_FLOAT_OR_INT": {
-    "message": [
-      "Argument `<arg_name>` should be a float or int, got <arg_type>."
-    ]
-  },
-  "NOT_FLOAT_OR_INT_OR_LIST_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a float, int, list or str, got <arg_type>."
-    ]
-  },
-  "NOT_IMPLEMENTED": {
-    "message": [
-      "<feature> is not implemented."
-    ]
-  },
-  "NOT_INT": {
-    "message": [
-      "Argument `<arg_name>` should be an int, got <arg_type>."
-    ]
-  },
-  "NOT_INT_OR_SLICE_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be an int, slice or str, got <arg_type>."
-    ]
-  },
-  "NOT_IN_BARRIER_STAGE": {
-    "message": [
-      "It is not in a barrier stage."
-    ]
-  },
-  "NOT_ITERABLE": {
-    "message": [
-      "<objectName> is not iterable."
-    ]
-  },
-  "NOT_LIST": {
-    "message": [
-      "Argument `<arg_name>` should be a list, got <arg_type>."
-    ]
-  },
-  "NOT_LIST_OF_COLUMN": {
-    "message": [
-      "Argument `<arg_name>` should be a list[Column]."
-    ]
-  },
-  "NOT_LIST_OF_COLUMN_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a list[Column]."
-    ]
-  },
-  "NOT_LIST_OF_FLOAT_OR_INT": {
-    "message": [
-      "Argument `<arg_name>` should be a list[float, int], got <arg_type>."
-    ]
-  },
-  "NOT_LIST_OF_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a list[str], got <arg_type>."
-    ]
-  },
-  "NOT_LIST_OR_NONE_OR_STRUCT": {
-    "message": [
-      "Argument `<arg_name>` should be a list, None or StructType, got <arg_type>."
-    ]
-  },
-  "NOT_LIST_OR_STR_OR_TUPLE": {
-    "message": [
-      "Argument `<arg_name>` should be a list, str or tuple, got <arg_type>."
-    ]
-  },
-  "NOT_LIST_OR_TUPLE": {
-    "message": [
-      "Argument `<arg_name>` should be a list or tuple, got <arg_type>."
-    ]
-  },
-  "NOT_NUMERIC_COLUMNS": {
-    "message": [
-      "Numeric aggregation function can only be applied on numeric columns, got <invalid_columns>."
-    ]
-  },
-  "NOT_OBSERVATION_OR_STR": {
-    "message": [
-      "Argument `<arg_name>` should be an Observation or str, got <arg_type>."
-    ]
-  },
-  "NOT_SAME_TYPE": {
-    "message": [
-      "Argument `<arg_name1>` and `<arg_name2>` should be the same type, got <arg_type1> and <arg_type2>."
-    ]
-  },
-  "NOT_STR": {
-    "message": [
-      "Argument `<arg_name>` should be a str, got <arg_type>."
-    ]
-  },
-  "NOT_STRUCT": {
-    "message": [
-      "Argument `<arg_name>` should be a struct type, got <arg_type>."
-    ]
-  },
-  "NOT_STR_OR_LIST_OF_RDD": {
-    "message": [
-      "Argument `<arg_name>` should be a str or list[RDD], got <arg_type>."
-    ]
-  },
-  "NOT_STR_OR_STRUCT": {
-    "message": [
-      "Argument `<arg_name>` should be a str or struct type, got <arg_type>."
-    ]
-  },
-  "NOT_WINDOWSPEC": {
-    "message": [
-      "Argument `<arg_name>` should be a WindowSpec, got <arg_type>."
-    ]
-  },
-  "NO_ACTIVE_EXCEPTION": {
-    "message": [
-      "No active exception."
-    ]
-  },
-  "NO_ACTIVE_OR_DEFAULT_SESSION": {
-    "message": [
-      "No active or default Spark session found. Please create a new Spark session before running the code."
-    ]
-  },
-  "NO_ACTIVE_SESSION": {
-    "message": [
-      "No active Spark session found. Please create a new Spark session before running the code."
-    ]
-  },
-  "NO_OBSERVE_BEFORE_GET": {
-    "message": [
-      "Should observe by calling `DataFrame.observe` before `get`."
-    ]
-  },
-  "NO_SCHEMA_AND_DRIVER_DEFAULT_SCHEME": {
-    "message": [
-      "Only allows <arg_name> to be a path without scheme, and Spark Driver should use the default scheme to determine the destination file system."
-    ]
-  },
-  "ONLY_ALLOWED_FOR_SINGLE_COLUMN": {
-    "message": [
-      "Argument `<arg_name>` can only be provided for a single column."
-    ]
-  },
-  "ONLY_ALLOW_SINGLE_TRIGGER": {
-    "message": [
-      "Only a single trigger is allowed."
-    ]
-  },
-  "ONLY_SUPPORTED_WITH_SPARK_CONNECT": {
-    "message": [
-      "<feature> is only supported with Spark Connect; however, the current Spark session does not use Spark Connect."
-    ]
-  },
-  "PACKAGE_NOT_INSTALLED": {
-    "message": [
-      "<package_name> >= <minimum_version> must be installed; however, it was not found."
-    ]
-  },
-  "PIPE_FUNCTION_EXITED": {
-    "message": [
-      "Pipe function `<func_name>` exited with error code <error_code>."
-    ]
-  },
-  "PYTHON_HASH_SEED_NOT_SET": {
-    "message": [
-      "Randomness of hash of string should be disabled via PYTHONHASHSEED."
-    ]
-  },
-  "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR": {
-    "message": [
-      "Failed when running Python streaming data source: <msg>"
-    ]
-  },
-  "PYTHON_VERSION_MISMATCH": {
-    "message": [
-      "Python in worker has different version: <worker_version> than that in driver: <driver_version>, PySpark cannot run with different minor versions.",
-      "Please check environment variables PYSPARK_PYTHON and PYSPARK_DRIVER_PYTHON are correctly set."
-    ]
-  },
-  "RDD_TRANSFORM_ONLY_VALID_ON_DRIVER": {
-    "message": [
-      "It appears that you are attempting to broadcast an RDD or reference an RDD from an ",
-      "action or transformation. RDD transformations and actions can only be invoked by the ",
-      "driver, not inside of other transformations; for example, ",
-      "rdd1.map(lambda x: rdd2.values.count() * x) is invalid because the values ",
-      "transformation and count action cannot be performed inside of the rdd1.map ",
-      "transformation. For more information, see SPARK-5063."
-    ]
-  },
-  "READ_ONLY": {
-    "message": [
-      "<object> is read-only."
-    ]
-  },
-  "RESPONSE_ALREADY_RECEIVED": {
-    "message": [
-      "OPERATION_NOT_FOUND on the server but responses were already received from it."
-    ]
-  },
-  "RESULT_COLUMNS_MISMATCH_FOR_ARROW_UDF": {
-    "message": [
-      "Column names of the returned pyarrow.Table do not match specified schema.<missing><extra>"
-    ]
-  },
-  "RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF": {
-    "message": [
-      "Column names of the returned pandas.DataFrame do not match specified schema.<missing><extra>"
-    ]
-  },
-  "RESULT_LENGTH_MISMATCH_FOR_PANDAS_UDF": {
-    "message": [
-      "Number of columns of the returned pandas.DataFrame doesn't match specified schema. Expected: <expected> Actual: <actual>"
-    ]
-  },
-  "RESULT_LENGTH_MISMATCH_FOR_SCALAR_ITER_PANDAS_UDF": {
-    "message": [
-      "The length of output in Scalar iterator pandas UDF should be the same with the input's; however, the length of output was <output_length> and the length of input was <input_length>."
-    ]
-  },
-  "RESULT_TYPE_MISMATCH_FOR_ARROW_UDF": {
-    "message": [
-      "Columns do not match in their data type: <mismatch>."
-    ]
-  },
-  "RETRIES_EXCEEDED": {
-    "message": [
-      "The maximum number of retries has been exceeded."
-    ]
-  },
-  "REUSE_OBSERVATION": {
-    "message": [
-      "An Observation can be used with a DataFrame only once."
-    ]
-  },
-  "SCHEMA_MISMATCH_FOR_PANDAS_UDF": {
-    "message": [
-      "Result vector from pandas_udf was not the required length: expected <expected>, got <actual>."
-    ]
-  },
-  "SESSION_ALREADY_EXIST": {
-    "message": [
-      "Cannot start a remote Spark session because there is a regular Spark session already running."
-    ]
-  },
-  "SESSION_NEED_CONN_STR_OR_BUILDER": {
-    "message": [
-      "Needs either connection string or channelBuilder (mutually exclusive) to create a new SparkSession."
-    ]
-  },
-  "SESSION_NOT_SAME": {
-    "message": [
-      "Both Datasets must belong to the same SparkSession."
-    ]
-  },
-  "SESSION_OR_CONTEXT_EXISTS": {
-    "message": [
-      "There should not be an existing Spark Session or Spark Context."
-    ]
-  },
-  "SESSION_OR_CONTEXT_NOT_EXISTS": {
-    "message": [
-      "SparkContext or SparkSession should be created first."
-    ]
-  },
-  "SLICE_WITH_STEP": {
-    "message": [
-      "Slice with step is not supported."
-    ]
-  },
-  "STATE_NOT_EXISTS": {
-    "message": [
-      "State is either not defined or has already been removed."
-    ]
-  },
-  "STOP_ITERATION_OCCURRED": {
-    "message": [
-      "Caught StopIteration thrown from user's code; failing the task: <exc>"
-    ]
-  },
-  "STOP_ITERATION_OCCURRED_FROM_SCALAR_ITER_PANDAS_UDF": {
-    "message": [
-      "pandas iterator UDF should exhaust the input iterator."
-    ]
-  },
-  "STREAMING_CONNECT_SERIALIZATION_ERROR": {
-    "message": [
-      "Cannot serialize the function `<name>`. If you accessed the Spark session, or a DataFrame defined outside of the function, or any object that contains a Spark session, please be aware that they are not allowed in Spark Connect. For `foreachBatch`, please access the Spark session using `df.sparkSession`, where `df` is the first parameter in your `foreachBatch` function. For `StreamingQueryListener`, please access the Spark session using `self.spark`. For details please check out the PySpark doc for `foreachBatch` and `StreamingQueryListener`."
-    ]
-  },
-  "TEST_CLASS_NOT_COMPILED": {
-    "message": [
-      "<test_class_path> doesn't exist. Spark sql test classes are not compiled."
-    ]
-  },
-  "TOO_MANY_VALUES": {
-    "message": [
-      "Expected <expected> values for `<item>`, got <actual>."
-    ]
-  },
-  "TYPE_HINT_SHOULD_BE_SPECIFIED": {
-    "message": [
-      "Type hints for <target> should be specified; however, got <sig>."
-    ]
-  },
-  "UDF_RETURN_TYPE": {
-    "message": [
-      "Return type of the user-defined function should be <expected>, but is <actual>."
-    ]
-  },
-  "UDTF_ARROW_TYPE_CAST_ERROR": {
-    "message": [
-      "Cannot convert the output value of the column '<col_name>' with type '<col_type>' to the specified return type of the column: '<arrow_type>'. Please check if the data types match and try again."
-    ]
-  },
-  "UDTF_CONSTRUCTOR_INVALID_IMPLEMENTS_ANALYZE_METHOD": {
-    "message": [
-      "Failed to evaluate the user-defined table function '<name>' because its constructor is invalid: the function implements the 'analyze' method, but its constructor has more than two arguments (including the 'self' reference). Please update the table function so that its constructor accepts exactly one 'self' argument, or one 'self' argument plus another argument for the result of the 'analyze' method, and try the query again."
-    ]
-  },
-  "UDTF_CONSTRUCTOR_INVALID_NO_ANALYZE_METHOD": {
-    "message": [
-      "Failed to evaluate the user-defined table function '<name>' because its constructor is invalid: the function does not implement the 'analyze' method, and its constructor has more than one argument (including the 'self' reference). Please update the table function so that its constructor accepts exactly one 'self' argument, and try the query again."
-    ]
-  },
-  "UDTF_EVAL_METHOD_ARGUMENTS_DO_NOT_MATCH_SIGNATURE": {
-    "message": [
-      "Failed to evaluate the user-defined table function '<name>' because the function arguments did not match the expected signature of the 'eval' method (<reason>). Please update the query so that this table function call provides arguments matching the expected signature, or else update the table function so that its 'eval' method accepts the provided arguments, and then try the query again."
-    ]
-  },
-  "UDTF_EXEC_ERROR": {
-    "message": [
-      "User defined table function encountered an error in the '<method_name>' method: <error>"
-    ]
-  },
-  "UDTF_INVALID_OUTPUT_ROW_TYPE": {
-    "message": [
-      "The type of an individual output row in the '<func>' method of the UDTF is invalid. Each row should be a tuple, list, or dict, but got '<type>'. Please make sure that the output rows are of the correct type."
-    ]
-  },
-  "UDTF_RETURN_NOT_ITERABLE": {
-    "message": [
-      "The return value of the '<func>' method of the UDTF is invalid. It should be an iterable (e.g., generator or list), but got '<type>'. Please make sure that the UDTF returns one of these types."
-    ]
-  },
-  "UDTF_RETURN_SCHEMA_MISMATCH": {
-    "message": [
-      "The number of columns in the result does not match the specified schema. Expected column count: <expected>, Actual column count: <actual>. Please make sure the values returned by the '<func>' method have the same number of columns as specified in the output schema."
-    ]
-  },
-  "UDTF_RETURN_TYPE_MISMATCH": {
-    "message": [
-      "Mismatch in return type for the UDTF '<name>'. Expected a 'StructType', but got '<return_type>'. Please ensure the return type is a correctly formatted StructType."
-    ]
-  },
-  "UDTF_SERIALIZATION_ERROR": {
-    "message": [
-      "Cannot serialize the UDTF '<name>': <message>"
-    ]
-  },
-  "UNEXPECTED_RESPONSE_FROM_SERVER": {
-    "message": [
-      "Unexpected response from iterator server."
-    ]
-  },
-  "UNEXPECTED_TUPLE_WITH_STRUCT": {
-    "message": [
-      "Unexpected tuple <tuple> with StructType."
-    ]
-  },
-  "UNKNOWN_EXPLAIN_MODE": {
-    "message": [
-      "Unknown explain mode: '<explain_mode>'. Accepted explain modes are 'simple', 'extended', 'codegen', 'cost', 'formatted'."
-    ]
-  },
-  "UNKNOWN_INTERRUPT_TYPE": {
-    "message": [
-      "Unknown interrupt type: '<interrupt_type>'. Accepted interrupt types are 'all'."
-    ]
-  },
-  "UNKNOWN_RESPONSE": {
-    "message": [
-      "Unknown response: <response>."
-    ]
-  },
-  "UNKNOWN_VALUE_FOR": {
-    "message": [
-      "Unknown value for `<var>`."
-    ]
-  },
-  "UNSUPPORTED_DATA_TYPE": {
-    "message": [
-      "Unsupported DataType `<data_type>`."
-    ]
-  },
-  "UNSUPPORTED_DATA_TYPE_FOR_ARROW": {
-    "message": [
-      "Single data type <data_type> is not supported with Arrow."
-    ]
-  },
-  "UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION": {
-    "message": [
-      "<data_type> is not supported in conversion to Arrow."
-    ]
-  },
-  "UNSUPPORTED_DATA_TYPE_FOR_ARROW_VERSION": {
-    "message": [
-      "<data_type> is only supported with pyarrow 2.0.0 and above."
-    ]
-  },
-  "UNSUPPORTED_JOIN_TYPE": {
-    "message": [
-      "Unsupported join type: <join_type>. Supported join types include: 'inner', 'outer', 'full', 'fullouter', 'full_outer', 'leftouter', 'left', 'left_outer', 'rightouter', 'right', 'right_outer', 'leftsemi', 'left_semi', 'semi', 'leftanti', 'left_anti', 'anti', 'cross'."
-    ]
-  },
-  "UNSUPPORTED_LITERAL": {
-    "message": [
-      "Unsupported Literal '<literal>'."
-    ]
-  },
-  "UNSUPPORTED_LOCAL_CONNECTION_STRING": {
-    "message": [
-      "Creating new SparkSessions with `local` connection string is not supported."
-    ]
-  },
-  "UNSUPPORTED_NUMPY_ARRAY_SCALAR": {
-    "message": [
-      "The type of array scalar '<dtype>' is not supported."
-    ]
-  },
-  "UNSUPPORTED_OPERATION": {
-    "message": [
-      "<operation> is not supported."
-    ]
-  },
-  "UNSUPPORTED_PACKAGE_VERSION": {
-    "message": [
-      "<package_name> >= <minimum_version> must be installed; however, your version is <current_version>."
-    ]
-  },
-  "UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION": {
-    "message": [
-      "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR KEYWORD arguments."
-    ]
-  },
-  "UNSUPPORTED_SIGNATURE": {
-    "message": [
-      "Unsupported signature: <signature>."
-    ]
-  },
-  "UNSUPPORTED_WITH_ARROW_OPTIMIZATION": {
-    "message": [
-      "<feature> is not supported with Arrow optimization enabled in Python UDFs. Disable 'spark.sql.execution.pythonUDF.arrow.enabled' to workaround."
-    ]
-  },
-  "VALUE_ALLOWED": {
-    "message": [
-      "Value for `<arg_name>` does not allow <disallowed_value>."
-    ]
-  },
-  "VALUE_NOT_ACCESSIBLE": {
-    "message": [
-      "Value `<value>` cannot be accessed inside tasks."
-    ]
-  },
-  "VALUE_NOT_ALLOWED": {
-    "message": [
-      "Value for `<arg_name>` has to be amongst the following values: <allowed_values>."
-    ]
-  },
-  "VALUE_NOT_ANY_OR_ALL": {
-    "message": [
-      "Value for `<arg_name>` must be 'any' or 'all', got '<arg_value>'."
-    ]
-  },
-  "VALUE_NOT_BETWEEN": {
-    "message": [
-      "Value for `<arg_name>` must be between <min> and <max>."
-    ]
-  },
-  "VALUE_NOT_NON_EMPTY_STR": {
-    "message": [
-      "Value for `<arg_name>` must be a non-empty string, got '<arg_value>'."
-    ]
-  },
-  "VALUE_NOT_PEARSON": {
-    "message": [
-      "Value for `<arg_name>` only supports the 'pearson', got '<arg_value>'."
-    ]
-  },
-  "VALUE_NOT_PLAIN_COLUMN_REFERENCE": {
-    "message": [
-      "Value `<val>` in `<field_name>` should be a plain column reference such as `df.col` or `col('column')`."
-    ]
-  },
-  "VALUE_NOT_POSITIVE": {
-    "message": [
-      "Value for `<arg_name>` must be positive, got '<arg_value>'."
-    ]
-  },
-  "VALUE_NOT_TRUE": {
-    "message": [
-      "Value for `<arg_name>` must be True, got '<arg_value>'."
-    ]
-  },
-  "VALUE_OUT_OF_BOUNDS": {
-    "message": [
-      "Value for `<arg_name>` must be between <lower_bound> and <upper_bound> (inclusive), got <actual>"
-    ]
-  },
-  "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION": {
-    "message": [
-      "Function `<func_name>` should take between 1 and 3 arguments, but the provided function takes <num_args>."
-    ]
-  },
-  "WRONG_NUM_COLUMNS": {
-    "message": [
-      "Function `<func_name>` should take at least <num_cols> columns."
-    ]
-  },
-  "ZERO_INDEX": {
-    "message": [
-      "Index must be non-zero."
-    ]
-  }
-}
-'''
-
+# Note: Though we call them "error classes" here, the proper name is "error conditions",
+#   hence why the name of the JSON file is different.
+#   For more information, please see: https://issues.apache.org/jira/browse/SPARK-46810
+#   This discrepancy will be resolved as part of: https://issues.apache.org/jira/browse/SPARK-47429
+ERROR_CLASSES_JSON = (
+    importlib.resources
+    .files("pyspark.errors")
+    .joinpath("error-conditions.json")
+    .read_text()
+)
 ERROR_CLASSES_MAP = json.loads(ERROR_CLASSES_JSON)
diff --git a/python/pyspark/errors/exceptions/__init__.py b/python/pyspark/errors/exceptions/__init__.py
index 4fd16c6a2e1ad..c66f35958f8dd 100644
--- a/python/pyspark/errors/exceptions/__init__.py
+++ b/python/pyspark/errors/exceptions/__init__.py
@@ -18,39 +18,15 @@
 
 def _write_self() -> None:
     import json
+    from pathlib import Path
     from pyspark.errors import error_classes
 
-    with open("python/pyspark/errors/error_classes.py", "w") as f:
-        error_class_py_file = """#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# NOTE: Automatically sort this file via
-# - cd $SPARK_HOME
-# - bin/pyspark
-# - from pyspark.errors.exceptions import _write_self; _write_self()
-import json
-
-
-ERROR_CLASSES_JSON = '''
-%s
-'''
+    ERRORS_DIR = Path(__file__).parents[1]
 
-ERROR_CLASSES_MAP = json.loads(ERROR_CLASSES_JSON)
-""" % json.dumps(
-            error_classes.ERROR_CLASSES_MAP, sort_keys=True, indent=2
+    with open(ERRORS_DIR / "error-conditions.json", "w") as f:
+        json.dump(
+            error_classes.ERROR_CLASSES_MAP,
+            f,
+            sort_keys=True,
+            indent=2,
         )
-        f.write(error_class_py_file)
diff --git a/python/pyspark/errors/exceptions/captured.py b/python/pyspark/errors/exceptions/captured.py
index 2a30eba3fb22f..b5bb742161c06 100644
--- a/python/pyspark/errors/exceptions/captured.py
+++ b/python/pyspark/errors/exceptions/captured.py
@@ -166,7 +166,14 @@ def getQueryContext(self) -> List[BaseQueryContext]:
         if self._origin is not None and is_instance_of(
             gw, self._origin, "org.apache.spark.SparkThrowable"
         ):
-            return [QueryContext(q) for q in self._origin.getQueryContext()]
+            contexts: List[BaseQueryContext] = []
+            for q in self._origin.getQueryContext():
+                if q.contextType().toString() == "SQL":
+                    contexts.append(SQLQueryContext(q))
+                else:
+                    contexts.append(DataFrameQueryContext(q))
+
+            return contexts
         else:
             return []
 
@@ -379,17 +386,12 @@ class UnknownException(CapturedException, BaseUnknownException):
     """
 
 
-class QueryContext(BaseQueryContext):
+class SQLQueryContext(BaseQueryContext):
     def __init__(self, q: "JavaObject"):
         self._q = q
 
     def contextType(self) -> QueryContextType:
-        context_type = self._q.contextType().toString()
-        assert context_type in ("SQL", "DataFrame")
-        if context_type == "DataFrame":
-            return QueryContextType.DataFrame
-        else:
-            return QueryContextType.SQL
+        return QueryContextType.SQL
 
     def objectType(self) -> str:
         return str(self._q.objectType())
@@ -409,13 +411,34 @@ def fragment(self) -> str:
     def callSite(self) -> str:
         return str(self._q.callSite())
 
-    def pysparkFragment(self) -> Optional[str]:  # type: ignore[return]
-        if self.contextType() == QueryContextType.DataFrame:
-            return str(self._q.pysparkFragment())
+    def summary(self) -> str:
+        return str(self._q.summary())
+
+
+class DataFrameQueryContext(BaseQueryContext):
+    def __init__(self, q: "JavaObject"):
+        self._q = q
+
+    def contextType(self) -> QueryContextType:
+        return QueryContextType.DataFrame
+
+    def objectType(self) -> str:
+        return str(self._q.objectType())
+
+    def objectName(self) -> str:
+        return str(self._q.objectName())
 
-    def pysparkCallSite(self) -> Optional[str]:  # type: ignore[return]
-        if self.contextType() == QueryContextType.DataFrame:
-            return str(self._q.pysparkCallSite())
+    def startIndex(self) -> int:
+        return int(self._q.startIndex())
+
+    def stopIndex(self) -> int:
+        return int(self._q.stopIndex())
+
+    def fragment(self) -> str:
+        return str(self._q.fragment())
+
+    def callSite(self) -> str:
+        return str(self._q.callSite())
 
     def summary(self) -> str:
         return str(self._q.summary())
diff --git a/python/pyspark/errors/exceptions/connect.py b/python/pyspark/errors/exceptions/connect.py
index 0cffe72687539..8a95358f26975 100644
--- a/python/pyspark/errors/exceptions/connect.py
+++ b/python/pyspark/errors/exceptions/connect.py
@@ -91,7 +91,10 @@ def convert_exception(
         )
         query_contexts = []
         for query_context in resp.errors[resp.root_error_idx].spark_throwable.query_contexts:
-            query_contexts.append(QueryContext(query_context))
+            if query_context.context_type == pb2.FetchErrorDetailsResponse.QueryContext.SQL:
+                query_contexts.append(SQLQueryContext(query_context))
+            else:
+                query_contexts.append(DataFrameQueryContext(query_context))
 
     if "org.apache.spark.sql.catalyst.parser.ParseException" in classes:
         return ParseException(
@@ -430,17 +433,12 @@ class SparkNoSuchElementException(SparkConnectGrpcException, BaseNoSuchElementEx
     """
 
 
-class QueryContext(BaseQueryContext):
+class SQLQueryContext(BaseQueryContext):
     def __init__(self, q: pb2.FetchErrorDetailsResponse.QueryContext):
         self._q = q
 
     def contextType(self) -> QueryContextType:
-        context_type = self._q.context_type
-
-        if int(context_type) == QueryContextType.DataFrame.value:
-            return QueryContextType.DataFrame
-        else:
-            return QueryContextType.SQL
+        return QueryContextType.SQL
 
     def objectType(self) -> str:
         return str(self._q.object_type)
@@ -457,6 +455,75 @@ def stopIndex(self) -> int:
     def fragment(self) -> str:
         return str(self._q.fragment)
 
+    def callSite(self) -> str:
+        raise UnsupportedOperationException(
+            "",
+            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            message_parameters={"className": "SQLQueryContext", "methodName": "callSite"},
+            sql_state="0A000",
+            server_stacktrace=None,
+            display_server_stacktrace=False,
+            query_contexts=[],
+        )
+
+    def summary(self) -> str:
+        return str(self._q.summary)
+
+
+class DataFrameQueryContext(BaseQueryContext):
+    def __init__(self, q: pb2.FetchErrorDetailsResponse.QueryContext):
+        self._q = q
+
+    def contextType(self) -> QueryContextType:
+        return QueryContextType.DataFrame
+
+    def objectType(self) -> str:
+        raise UnsupportedOperationException(
+            "",
+            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            message_parameters={"className": "DataFrameQueryContext", "methodName": "objectType"},
+            sql_state="0A000",
+            server_stacktrace=None,
+            display_server_stacktrace=False,
+            query_contexts=[],
+        )
+
+    def objectName(self) -> str:
+        raise UnsupportedOperationException(
+            "",
+            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            message_parameters={"className": "DataFrameQueryContext", "methodName": "objectName"},
+            sql_state="0A000",
+            server_stacktrace=None,
+            display_server_stacktrace=False,
+            query_contexts=[],
+        )
+
+    def startIndex(self) -> int:
+        raise UnsupportedOperationException(
+            "",
+            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            message_parameters={"className": "DataFrameQueryContext", "methodName": "startIndex"},
+            sql_state="0A000",
+            server_stacktrace=None,
+            display_server_stacktrace=False,
+            query_contexts=[],
+        )
+
+    def stopIndex(self) -> int:
+        raise UnsupportedOperationException(
+            "",
+            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            message_parameters={"className": "DataFrameQueryContext", "methodName": "stopIndex"},
+            sql_state="0A000",
+            server_stacktrace=None,
+            display_server_stacktrace=False,
+            query_contexts=[],
+        )
+
+    def fragment(self) -> str:
+        return str(self._q.fragment)
+
     def callSite(self) -> str:
         return str(self._q.call_site)
 
diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py
index e1f249506dd02..89721d23c3858 100644
--- a/python/pyspark/errors/utils.py
+++ b/python/pyspark/errors/utils.py
@@ -16,14 +16,42 @@
 #
 
 import re
-from typing import Dict, Match
-
+import functools
+import inspect
+import os
+import threading
+from typing import Any, Callable, Dict, Match, TypeVar, Type, Optional, TYPE_CHECKING
+import pyspark
 from pyspark.errors.error_classes import ERROR_CLASSES_MAP
 
+if TYPE_CHECKING:
+    from pyspark.sql import SparkSession
+
+T = TypeVar("T")
+
+_current_origin = threading.local()
+
+
+def current_origin() -> threading.local:
+    global _current_origin
+
+    if not hasattr(_current_origin, "fragment"):
+        _current_origin.fragment = None
+    if not hasattr(_current_origin, "call_site"):
+        _current_origin.call_site = None
+    return _current_origin
+
+
+def set_current_origin(fragment: Optional[str], call_site: Optional[str]) -> None:
+    global _current_origin
+
+    _current_origin.fragment = fragment
+    _current_origin.call_site = call_site
+
 
 class ErrorClassesReader:
     """
-    A reader to load error information from error_classes.py.
+    A reader to load error information from error-conditions.json.
     """
 
     def __init__(self) -> None:
@@ -51,11 +79,11 @@ def replace_match(match: Match[str]) -> str:
 
     def get_message_template(self, error_class: str) -> str:
         """
-        Returns the message template for corresponding error class from error_classes.py.
+        Returns the message template for corresponding error class from error-conditions.json.
 
         For example,
         when given `error_class` is "EXAMPLE_ERROR_CLASS",
-        and corresponding error class in error_classes.py looks like the below:
+        and corresponding error class in error-conditions.json looks like the below:
 
         .. code-block:: python
 
@@ -69,7 +97,7 @@ def get_message_template(self, error_class: str) -> str:
         "Problem <A> because of <B>."
 
         For sub error class, when given `error_class` is "EXAMPLE_ERROR_CLASS.SUB_ERROR_CLASS",
-        and corresponding error class in error_classes.py looks like the below:
+        and corresponding error class in error-conditions.json looks like the below:
 
         .. code-block:: python
 
@@ -119,3 +147,124 @@ def get_message_template(self, error_class: str) -> str:
             message_template = main_message_template + " " + sub_message_template
 
         return message_template
+
+
+def _capture_call_site(spark_session: "SparkSession", depth: int) -> str:
+    """
+    Capture the call site information including file name, line number, and function name.
+    This function updates the thread-local storage from JVM side (PySparkCurrentOrigin)
+    with the current call site information when a PySpark API function is called.
+
+    Parameters
+    ----------
+    spark_session : SparkSession
+        Current active Spark session.
+
+    Notes
+    -----
+    The call site information is used to enhance error messages with the exact location
+    in the user code that led to the error.
+    """
+    # Filtering out PySpark code and keeping user code only
+    pyspark_root = os.path.dirname(pyspark.__file__)
+    stack = [
+        frame_info for frame_info in inspect.stack() if pyspark_root not in frame_info.filename
+    ]
+
+    selected_frames = stack[:depth]
+
+    # We try import here since IPython is not a required dependency
+    try:
+        import IPython
+
+        # ipykernel is required for IPython
+        import ipykernel  # type: ignore[import-not-found]
+
+        ipython = IPython.get_ipython()
+        # Filtering out IPython related frames
+        ipy_root = os.path.dirname(IPython.__file__)
+        ipykernel_root = os.path.dirname(ipykernel.__file__)
+        selected_frames = [
+            frame
+            for frame in selected_frames
+            if (ipy_root not in frame.filename) and (ipykernel_root not in frame.filename)
+        ]
+    except ImportError:
+        ipython = None
+
+    # Identifying the cell is useful when the error is generated from IPython Notebook
+    if ipython:
+        call_sites = [
+            f"line {frame.lineno} in cell [{ipython.execution_count}]" for frame in selected_frames
+        ]
+    else:
+        call_sites = [f"{frame.filename}:{frame.lineno}" for frame in selected_frames]
+    call_sites_str = "\n".join(call_sites)
+
+    return call_sites_str
+
+
+def _with_origin(func: Callable[..., Any]) -> Callable[..., Any]:
+    """
+    A decorator to capture and provide the call site information to the server side
+    when PySpark API functions are invoked.
+    """
+
+    @functools.wraps(func)
+    def wrapper(*args: Any, **kwargs: Any) -> Any:
+        from pyspark.sql import SparkSession
+        from pyspark.sql.utils import is_remote
+
+        spark = SparkSession.getActiveSession()
+        if spark is not None and hasattr(func, "__name__"):
+            if is_remote():
+                global current_origin
+
+                # Getting the configuration requires RPC call. Uses the default value for now.
+                depth = 1
+                set_current_origin(func.__name__, _capture_call_site(spark, depth))
+
+                try:
+                    return func(*args, **kwargs)
+                finally:
+                    set_current_origin(None, None)
+            else:
+                assert spark._jvm is not None
+                jvm_pyspark_origin = (
+                    spark._jvm.org.apache.spark.sql.catalyst.trees.PySparkCurrentOrigin
+                )
+                depth = int(
+                    spark.conf.get(  # type: ignore[arg-type]
+                        "spark.sql.stackTracesInDataFrameContext"
+                    )
+                )
+                # Update call site when the function is called
+                jvm_pyspark_origin.set(func.__name__, _capture_call_site(spark, depth))
+
+                try:
+                    return func(*args, **kwargs)
+                finally:
+                    jvm_pyspark_origin.clear()
+        else:
+            return func(*args, **kwargs)
+
+    return wrapper
+
+
+def with_origin_to_class(cls: Type[T]) -> Type[T]:
+    """
+    Decorate all methods of a class with `_with_origin` to capture call site information.
+    """
+    if os.environ.get("PYSPARK_PIN_THREAD", "true").lower() == "true":
+        for name, method in cls.__dict__.items():
+            # Excluding Python magic methods that do not utilize JVM functions.
+            if callable(method) and name not in (
+                "__init__",
+                "__new__",
+                "__iter__",
+                "__nonzero__",
+                "__repr__",
+                "__bool__",
+            ):
+                setattr(cls, name, _with_origin(method))
+    return cls
diff --git a/python/pyspark/errors_doc_gen.py b/python/pyspark/errors_doc_gen.py
index ad32745348127..e1bd94dcec4d0 100644
--- a/python/pyspark/errors_doc_gen.py
+++ b/python/pyspark/errors_doc_gen.py
@@ -41,7 +41,7 @@ def generate_errors_doc(output_rst_file_path: str) -> None:
 Error classes in PySpark
 ========================
 
-This is a list of common, named error classes returned by PySpark which are defined at `error_classes.py <https://github.com/apache/spark/blob/master/python/pyspark/errors/error_classes.py>`_.
+This is a list of common, named error classes returned by PySpark which are defined at `error-conditions.json <https://github.com/apache/spark/blob/master/python/pyspark/errors/error-conditions.json>`_.
 
 When writing PySpark errors, developers must use an error class from the list. If an appropriate error class is not available, add a new one into the list. For more information, please refer to `Contributing Error and Exception <contributing.rst#contributing-error-and-exception>`_.
 """  # noqa
diff --git a/python/pyspark/ml/connect/functions.py b/python/pyspark/ml/connect/functions.py
index b305c04519ae8..6597e6c4118ad 100644
--- a/python/pyspark/ml/connect/functions.py
+++ b/python/pyspark/ml/connect/functions.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 from pyspark.ml import functions as PyMLFunctions
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.functions.builtin import _invoke_function, _to_col, lit
 
 
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
index 466d94ccc8889..32941b33c4603 100644
--- a/python/pyspark/ml/functions.py
+++ b/python/pyspark/ml/functions.py
@@ -28,7 +28,7 @@
     pass  # Let it throw a better error message later when the API is invoked.
 
 from pyspark.sql.functions import pandas_udf
-from pyspark.sql.column import Column, _to_java_column
+from pyspark.sql.column import Column
 from pyspark.sql.types import (
     ArrayType,
     ByteType,
@@ -116,6 +116,7 @@ def vector_to_array(col: Column, dtype: str = "float64") -> Column:
      StructField('oldVec', ArrayType(FloatType(), False), False)]
     """
     from pyspark.core.context import SparkContext
+    from pyspark.sql.classic.column import Column, _to_java_column
 
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
@@ -159,6 +160,7 @@ def array_to_vector(col: Column) -> Column:
     [Row(vec1=DenseVector([1.0, 3.0]))]
     """
     from pyspark.core.context import SparkContext
+    from pyspark.sql.classic.column import Column, _to_java_column
 
     sc = SparkContext._active_spark_context
     assert sc is not None and sc._jvm is not None
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index ec5da94079ea3..4dcc961909520 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -22,7 +22,7 @@
 from pyspark.ml.common import _java2py, _py2java
 from pyspark.ml.linalg import Matrix, Vector
 from pyspark.ml.wrapper import JavaWrapper, _jvm
-from pyspark.sql.column import Column, _to_seq
+from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.functions import lit
 
@@ -431,6 +431,7 @@ def metrics(*metrics: str) -> "SummaryBuilder":
         :py:class:`pyspark.ml.stat.SummaryBuilder`
         """
         from pyspark.core.context import SparkContext
+        from pyspark.sql.classic.column import _to_seq
 
         sc = SparkContext._active_spark_context
         assert sc is not None
diff --git a/python/pyspark/ml/tests/connect/test_connect_function.py b/python/pyspark/ml/tests/connect/test_connect_function.py
index f503761106608..393d38fdc426a 100644
--- a/python/pyspark/ml/tests/connect/test_connect_function.py
+++ b/python/pyspark/ml/tests/connect/test_connect_function.py
@@ -19,7 +19,6 @@
 
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession as PySparkSession
-from pyspark.sql.dataframe import DataFrame as SDF
 from pyspark.ml import functions as SF
 from pyspark.testing.sqlutils import SQLTestUtils
 from pyspark.testing.connectutils import (
@@ -55,6 +54,8 @@ def tearDownClass(cls):
         del os.environ["PYSPARK_NO_NAMESPACE_SHARE"]
 
     def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
+        from pyspark.sql.classic.dataframe import DataFrame as SDF
+
         assert isinstance(df1, (SDF, CDF))
         if isinstance(df1, SDF):
             str1 = df1._jdf.showString(n, truncate, False)
diff --git a/python/pyspark/pandas/config.py b/python/pyspark/pandas/config.py
index a66cb5a16d2dc..bfa88253dc6f4 100644
--- a/python/pyspark/pandas/config.py
+++ b/python/pyspark/pandas/config.py
@@ -169,7 +169,7 @@ def validate(self, v: Any) -> None:
             "can be expensive in general. So, if `compute.ops_on_diff_frames` variable is not "
             "True, that method throws an exception."
         ),
-        default=False,
+        default=True,
         types=bool,
     ),
     Option(
diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py
index 2df40252965bc..b4a6b1abbcaf9 100644
--- a/python/pyspark/pandas/data_type_ops/base.py
+++ b/python/pyspark/pandas/data_type_ops/base.py
@@ -24,7 +24,7 @@
 import pandas as pd
 from pandas.api.types import CategoricalDtype
 
-from pyspark.sql import functions as F
+from pyspark.sql import functions as F, Column as PySparkColumn
 from pyspark.sql.types import (
     ArrayType,
     BinaryType,
@@ -53,9 +53,6 @@
     spark_type_to_pandas_dtype,
 )
 
-# For supporting Spark Connect
-from pyspark.sql.utils import get_column_class
-
 if extension_dtypes_available:
     from pandas import Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype
 
@@ -485,16 +482,14 @@ def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         else:
             from pyspark.pandas.base import column_op
 
-            Column = get_column_class()
-            return column_op(Column.__eq__)(left, right)
+            return column_op(PySparkColumn.__eq__)(left, right)
 
     def ne(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
         _sanitize_list_like(right)
 
-        Column = get_column_class()
-        return column_op(Column.__ne__)(left, right)
+        return column_op(PySparkColumn.__ne__)(left, right)
 
     def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
         raise TypeError("Unary ~ can not be applied to %s." % self.pretty_name)
diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py
index 7e7ea7eb0738c..c91dcc913080b 100644
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -35,10 +35,8 @@
     _is_boolean_type,
 )
 from pyspark.pandas.typedef.typehints import as_spark_type, extension_dtypes, pandas_on_spark_type
-from pyspark.sql import functions as F
-from pyspark.sql.column import Column as PySparkColumn
+from pyspark.sql import functions as F, Column as PySparkColumn
 from pyspark.sql.types import BooleanType, StringType
-from pyspark.sql.utils import get_column_class
 from pyspark.errors import PySparkValueError
 
 
@@ -331,23 +329,19 @@ def abs(self, operand: IndexOpsLike) -> IndexOpsLike:
 
     def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__lt__)(left, right)
+        return column_op(PySparkColumn.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__le__)(left, right)
+        return column_op(PySparkColumn.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__ge__)(left, right)
+        return column_op(PySparkColumn.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__gt__)(left, right)
+        return column_op(PySparkColumn.__gt__)(left, right)
 
     def invert(self, operand: IndexOpsLike) -> IndexOpsLike:
         return operand._with_new_scol(~operand.spark.column, field=operand._internal.data_fields[0])
diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py
index 771b5d38a17ac..9a0b82de6ce8b 100644
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@@ -23,9 +23,8 @@
 import pandas as pd
 from pandas.api.types import CategoricalDtype
 
-from pyspark.sql import functions as F
+from pyspark.sql import functions as F, Column as PySparkColumn
 from pyspark.sql.types import BooleanType, DateType, StringType
-from pyspark.sql.utils import get_column_class
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
 from pyspark.pandas.base import column_op, IndexOpsMixin
 from pyspark.pandas.data_type_ops.base import (
@@ -84,29 +83,25 @@ def lt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__lt__)(left, right)
+        return column_op(PySparkColumn.__lt__)(left, right)
 
     def le(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__le__)(left, right)
+        return column_op(PySparkColumn.__le__)(left, right)
 
     def ge(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__ge__)(left, right)
+        return column_op(PySparkColumn.__ge__)(left, right)
 
     def gt(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         from pyspark.pandas.base import column_op
 
         _sanitize_list_like(right)
-        Column = get_column_class()
-        return column_op(Column.__gt__)(left, right)
+        return column_op(PySparkColumn.__gt__)(left, right)
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
         dtype, spark_type = pandas_on_spark_type(dtype)
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index 6f393c9652d7d..8e8dfee9990e3 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -43,8 +43,7 @@
     _is_boolean_type,
 )
 from pyspark.pandas.typedef.typehints import extension_dtypes, pandas_on_spark_type
-from pyspark.sql import functions as F
-from pyspark.sql import Column as PySparkColumn
+from pyspark.sql import functions as F, Column as PySparkColumn
 from pyspark.sql.types import (
     BooleanType,
     DataType,
@@ -53,7 +52,7 @@
 from pyspark.errors import PySparkValueError
 
 # For Supporting Spark Connect
-from pyspark.sql.utils import pyspark_column_op, get_column_class
+from pyspark.sql.utils import pyspark_column_op
 
 
 def _non_fractional_astype(
@@ -82,8 +81,7 @@ def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Addition can not be applied to given types.")
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
-        Column = get_column_class()
-        return column_op(Column.__add__)(left, right)
+        return column_op(PySparkColumn.__add__)(left, right)
 
     def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
@@ -91,8 +89,7 @@ def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Subtraction can not be applied to given types.")
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
-        Column = get_column_class()
-        return column_op(Column.__sub__)(left, right)
+        return column_op(PySparkColumn.__sub__)(left, right)
 
     def mod(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
@@ -110,13 +107,11 @@ def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         if not is_valid_operand_for_numeric_arithmetic(right):
             raise TypeError("Exponentiation can not be applied to given types.")
 
-        Column = get_column_class()
-
-        def pow_func(left: Column, right: Any) -> Column:  # type: ignore[valid-type]
+        def pow_func(left: PySparkColumn, right: Any) -> PySparkColumn:
             return (
-                F.when(left == 1, left)  # type: ignore
+                F.when(left == 1, left)
                 .when(F.lit(right) == 0, 1)
-                .otherwise(Column.__pow__(left, right))
+                .otherwise(PySparkColumn.__pow__(left, right))
             )
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
@@ -127,34 +122,29 @@ def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         if not isinstance(right, numbers.Number):
             raise TypeError("Addition can not be applied to given types.")
         right = transform_boolean_operand_to_numeric(right)
-        Column = get_column_class()
-        return column_op(Column.__radd__)(left, right)
+        return column_op(PySparkColumn.__radd__)(left, right)
 
     def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Subtraction can not be applied to given types.")
         right = transform_boolean_operand_to_numeric(right)
-        Column = get_column_class()
-        return column_op(Column.__rsub__)(left, right)
+        return column_op(PySparkColumn.__rsub__)(left, right)
 
     def rmul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Multiplication can not be applied to given types.")
         right = transform_boolean_operand_to_numeric(right)
-        Column = get_column_class()
-        return column_op(Column.__rmul__)(left, right)
+        return column_op(PySparkColumn.__rmul__)(left, right)
 
     def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
         if not isinstance(right, numbers.Number):
             raise TypeError("Exponentiation can not be applied to given types.")
 
-        Column = get_column_class()
-
-        def rpow_func(left: Column, right: Any) -> Column:  # type: ignore[valid-type]
-            return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right))
+        def rpow_func(left: PySparkColumn, right: Any) -> PySparkColumn:
+            return F.when(F.lit(right == 1), right).otherwise(PySparkColumn.__rpow__(left, right))
 
         right = transform_boolean_operand_to_numeric(right)
         return column_op(rpow_func)(left, right)
@@ -250,8 +240,8 @@ def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Multiplication can not be applied to given types.")
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
-        Column = get_column_class()
-        return column_op(Column.__mul__)(left, right)
+
+        return column_op(PySparkColumn.__mul__)(left, right)
 
     def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
@@ -335,8 +325,8 @@ def mul(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Multiplication can not be applied to given types.")
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
-        Column = get_column_class()
-        return column_op(Column.__mul__)(left, right)
+
+        return column_op(PySparkColumn.__mul__)(left, right)
 
     def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
@@ -496,13 +486,11 @@ def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         if not isinstance(right, numbers.Number):
             raise TypeError("Exponentiation can not be applied to given types.")
 
-        Column = get_column_class()
-
-        def rpow_func(left: Column, right: Any) -> Column:  # type: ignore[valid-type]
+        def rpow_func(left: PySparkColumn, right: Any) -> PySparkColumn:
             return (
-                F.when(left.isNull(), np.nan)  # type: ignore
+                F.when(left.isNull(), np.nan)
                 .when(F.lit(right == 1), right)
-                .otherwise(Column.__rpow__(left, right))
+                .otherwise(PySparkColumn.__rpow__(left, right))
             )
 
         right = transform_boolean_operand_to_numeric(right)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index e8369544124eb..52f7a327b5be0 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -149,7 +149,6 @@
     create_tuple_for_frame_type,
 )
 from pyspark.pandas.plot import PandasOnSparkPlotAccessor
-from pyspark.sql.utils import get_column_class, get_dataframe_class
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import OptionalPrimitiveType
@@ -491,7 +490,8 @@ class DataFrame(Frame, Generic[T]):
 
     >>> import pandas as pd
     >>> sdf = spark.createDataFrame([("Data", 1), ("Bricks", 2)], ["x", "y"])
-    >>> ps.DataFrame(data=sdf, index=pd.Index([0, 1, 2]))
+    >>> with ps.option_context("compute.ops_on_diff_frames", False):
+    ...     ps.DataFrame(data=sdf, index=pd.Index([0, 1, 2]))
     Traceback (most recent call last):
       ...
     ValueError: Cannot combine the series or dataframe...'compute.ops_on_diff_frames' option.
@@ -509,7 +509,8 @@ class DataFrame(Frame, Generic[T]):
 
     >>> import pandas as pd
     >>> sdf = spark.createDataFrame([("Data", 1), ("Bricks", 2)], ["x", "y"])
-    >>> ps.DataFrame(data=sdf, index=ps.Index([0, 1, 2]))
+    >>> with ps.option_context("compute.ops_on_diff_frames", False):
+    ...     ps.DataFrame(data=sdf, index=ps.Index([0, 1, 2]))
     Traceback (most recent call last):
       ...
     ValueError: Cannot combine the series or dataframe...'compute.ops_on_diff_frames' option.
@@ -527,7 +528,6 @@ class DataFrame(Frame, Generic[T]):
     def __init__(  # type: ignore[no-untyped-def]
         self, data=None, index=None, columns=None, dtype=None, copy=False
     ):
-        SparkDataFrame = get_dataframe_class()
         index_assigned = False
         if isinstance(data, InternalFrame):
             assert columns is None
@@ -535,7 +535,7 @@ def __init__(  # type: ignore[no-untyped-def]
             assert not copy
             if index is None:
                 internal = data
-        elif isinstance(data, SparkDataFrame):
+        elif isinstance(data, PySparkDataFrame):
             assert columns is None
             assert dtype is None
             assert not copy
@@ -5627,10 +5627,9 @@ def _assign(self, kwargs: Any) -> "DataFrame":
         from pyspark.pandas.indexes import MultiIndex
         from pyspark.pandas.series import IndexOpsMixin
 
-        Column = get_column_class()
         for k, v in kwargs.items():
             is_invalid_assignee = (
-                not (isinstance(v, (IndexOpsMixin, Column)) or callable(v) or is_scalar(v))
+                not (isinstance(v, (IndexOpsMixin, PySparkColumn)) or callable(v) or is_scalar(v))
             ) or isinstance(v, MultiIndex)
             if is_invalid_assignee:
                 raise TypeError(
@@ -5644,7 +5643,7 @@ def _assign(self, kwargs: Any) -> "DataFrame":
                 (v.spark.column, v._internal.data_fields[0])
                 if isinstance(v, IndexOpsMixin) and not isinstance(v, MultiIndex)
                 else (v, None)
-                if isinstance(v, Column)
+                if isinstance(v, PySparkColumn)
                 else (F.lit(v), None)
             )
             for k, v in kwargs.items()
@@ -7687,21 +7686,20 @@ def _sort(
         if na_position not in ("first", "last"):
             raise ValueError("invalid na_position: '{}'".format(na_position))
 
-        Column = get_column_class()
         # Mapper: Get a spark colum
         # n function for (ascending, na_position) combination
         mapper = {
-            (True, "first"): Column.asc_nulls_first,
-            (True, "last"): Column.asc_nulls_last,
-            (False, "first"): Column.desc_nulls_first,
-            (False, "last"): Column.desc_nulls_last,
+            (True, "first"): PySparkColumn.asc_nulls_first,
+            (True, "last"): PySparkColumn.asc_nulls_last,
+            (False, "first"): PySparkColumn.desc_nulls_first,
+            (False, "last"): PySparkColumn.desc_nulls_last,
         }
         by = [mapper[(asc, na_position)](scol) for scol, asc in zip(by, ascending)]
 
         natural_order_scol = F.col(NATURAL_ORDER_COLUMN_NAME)
 
         if keep == "last":
-            natural_order_scol = Column.desc(natural_order_scol)
+            natural_order_scol = PySparkColumn.desc(natural_order_scol)
         elif keep == "all":
             raise NotImplementedError("`keep`=all is not implemented yet.")
         elif keep != "first":
@@ -13626,14 +13624,6 @@ def _set_axis_fallback(self, *args: Any, **kwargs: Any) -> "DataFrame":
         _f = self._build_fallback_method("set_axis")
         return _f(*args, **kwargs)
 
-    def _to_feather_fallback(self, *args: Any, **kwargs: Any) -> None:
-        _f = self._build_fallback_driver_method("to_feather")
-        return _f(*args, **kwargs)
-
-    def _to_stata_fallback(self, *args: Any, **kwargs: Any) -> None:
-        _f = self._build_fallback_driver_method("to_stata")
-        return _f(*args, **kwargs)
-
     def __getattr__(self, key: str) -> Any:
         if key.startswith("__"):
             raise AttributeError(key)
@@ -13738,8 +13728,7 @@ def _reduce_spark_multi(sdf: PySparkDataFrame, aggs: List[PySparkColumn]) -> Any
     """
     Performs a reduction on a spark DataFrame, the functions being known SQL aggregate functions.
     """
-    SparkDataFrame = get_dataframe_class()
-    assert isinstance(sdf, SparkDataFrame)
+    assert isinstance(sdf, PySparkDataFrame)
     sdf0 = sdf.agg(*aggs)
     lst = sdf0.limit(2).toPandas()
     assert len(lst) == 1, (sdf, lst)
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index ec47ab75c43cf..55627a4c740c3 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -308,6 +308,7 @@ def aggregate(
             )
 
         if not self._as_index:
+            index_cols = psdf._internal.column_labels
             should_drop_index = set(
                 i for i, gkey in enumerate(self._groupkeys) if gkey._psdf is not self._psdf
             )
@@ -322,8 +323,12 @@ def aggregate(
                 psdf = psdf.reset_index(level=should_drop_index, drop=drop)
             if len(should_drop_index) < len(self._groupkeys):
                 psdf = psdf.reset_index()
+            index_cols = [c for c in psdf._internal.column_labels if c not in index_cols]
+            if relabeling:
+                psdf = psdf[pd.Index(index_cols + list(order))]
+                psdf.columns = pd.Index([c[0] for c in index_cols] + list(columns))
 
-        if relabeling:
+        if relabeling and self._as_index:
             psdf = psdf[order]
             psdf.columns = columns  # type: ignore[assignment]
         return psdf
diff --git a/python/pyspark/pandas/indexes/multi.py b/python/pyspark/pandas/indexes/multi.py
index 7d2712cbb531e..b5aae890d50a2 100644
--- a/python/pyspark/pandas/indexes/multi.py
+++ b/python/pyspark/pandas/indexes/multi.py
@@ -23,7 +23,6 @@
 
 from pyspark.sql import functions as F, Column as PySparkColumn, Window
 from pyspark.sql.types import DataType
-from pyspark.sql.utils import get_column_class
 from pyspark import pandas as ps
 from pyspark.pandas._typing import Label, Name, Scalar
 from pyspark.pandas.exceptions import PandasNotImplementedError
@@ -514,7 +513,6 @@ def _is_monotonic_increasing(self) -> Series:
 
         cond = F.lit(True)
         has_not_null = F.lit(True)
-        Column = get_column_class()
         for scol in self._internal.index_spark_columns[::-1]:
             data_type = self._internal.spark_type_for(scol)
             prev = F.lag(scol, 1).over(window)
@@ -522,7 +520,9 @@ def _is_monotonic_increasing(self) -> Series:
             # Since pandas 1.1.4, null value is not allowed at any levels of MultiIndex.
             # Therefore, we should check `has_not_null` over all levels.
             has_not_null = has_not_null & scol.isNotNull()
-            cond = F.when(scol.eqNullSafe(prev), cond).otherwise(compare(scol, prev, Column.__gt__))
+            cond = F.when(scol.eqNullSafe(prev), cond).otherwise(
+                compare(scol, prev, PySparkColumn.__gt__)
+            )
 
         cond = has_not_null & (prev.isNull() | cond)
 
@@ -560,7 +560,6 @@ def _is_monotonic_decreasing(self) -> Series:
 
         cond = F.lit(True)
         has_not_null = F.lit(True)
-        Column = get_column_class()
         for scol in self._internal.index_spark_columns[::-1]:
             data_type = self._internal.spark_type_for(scol)
             prev = F.lag(scol, 1).over(window)
@@ -568,7 +567,9 @@ def _is_monotonic_decreasing(self) -> Series:
             # Since pandas 1.1.4, null value is not allowed at any levels of MultiIndex.
             # Therefore, we should check `has_not_null` over all levels.
             has_not_null = has_not_null & scol.isNotNull()
-            cond = F.when(scol.eqNullSafe(prev), cond).otherwise(compare(scol, prev, Column.__lt__))
+            cond = F.when(scol.eqNullSafe(prev), cond).otherwise(
+                compare(scol, prev, PySparkColumn.__lt__)
+            )
 
         cond = has_not_null & (prev.isNull() | cond)
 
diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py
index 24b7c53eea997..fada94cf383a0 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -50,7 +50,6 @@
     spark_column_equals,
     verify_temp_column_name,
 )
-from pyspark.sql.utils import get_column_class
 
 if TYPE_CHECKING:
     from pyspark.pandas.frame import DataFrame
@@ -259,12 +258,11 @@ def _select_rows(
         """
         from pyspark.pandas.series import Series
 
-        Column = get_column_class()
         if rows_sel is None:
             return None, None, None
         elif isinstance(rows_sel, Series):
             return self._select_rows_by_series(rows_sel)
-        elif isinstance(rows_sel, Column):
+        elif isinstance(rows_sel, PySparkColumn):
             return self._select_rows_by_spark_column(rows_sel)
         elif isinstance(rows_sel, slice):
             if rows_sel == slice(None):
@@ -306,7 +304,6 @@ def _select_cols(
         """
         from pyspark.pandas.series import Series
 
-        Column = get_column_class()
         if cols_sel is None:
             column_labels = self._internal.column_labels
             data_spark_columns = self._internal.data_spark_columns
@@ -314,7 +311,7 @@ def _select_cols(
             return column_labels, data_spark_columns, data_fields, False, None
         elif isinstance(cols_sel, Series):
             return self._select_cols_by_series(cols_sel, missing_keys)
-        elif isinstance(cols_sel, Column):
+        elif isinstance(cols_sel, PySparkColumn):
             return self._select_cols_by_spark_column(cols_sel, missing_keys)
         elif isinstance(cols_sel, slice):
             if cols_sel == slice(None):
@@ -579,7 +576,6 @@ def __setitem__(self, key: Any, value: Any) -> None:
         from pyspark.pandas.frame import DataFrame
         from pyspark.pandas.series import Series, first_series
 
-        Column = get_column_class()
         if self._is_series:
             if (
                 isinstance(key, Series)
@@ -639,7 +635,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
                     self._internal.spark_frame[cast(iLocIndexer, self)._sequence_col] < F.lit(limit)
                 )
 
-            if isinstance(value, (Series, Column)):
+            if isinstance(value, (Series, PySparkColumn)):
                 if remaining_index is not None and remaining_index == 0:
                     raise ValueError(
                         "No axis named {} for object type {}".format(key, type(value).__name__)
@@ -724,7 +720,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
                     self._internal.spark_frame[cast(iLocIndexer, self)._sequence_col] < F.lit(limit)
                 )
 
-            if isinstance(value, (Series, Column)):
+            if isinstance(value, (Series, PySparkColumn)):
                 if remaining_index is not None and remaining_index == 0:
                     raise ValueError("Incompatible indexer with Series")
                 if len(data_spark_columns) > 1:
@@ -1125,9 +1121,8 @@ def _select_rows_by_slice(
                     )
                 )[::-1]:
                     compare = MultiIndex._comparator_for_monotonic_increasing(dt)
-                    Column = get_column_class()
                     cond = F.when(scol.eqNullSafe(F.lit(value).cast(dt)), cond).otherwise(
-                        compare(scol, F.lit(value).cast(dt), Column.__gt__)
+                        compare(scol, F.lit(value).cast(dt), PySparkColumn.__gt__)
                     )
                 conds.append(cond)
             if stop is not None:
@@ -1140,9 +1135,8 @@ def _select_rows_by_slice(
                     )
                 )[::-1]:
                     compare = MultiIndex._comparator_for_monotonic_increasing(dt)
-                    Column = get_column_class()
                     cond = F.when(scol.eqNullSafe(F.lit(value).cast(dt)), cond).otherwise(
-                        compare(scol, F.lit(value).cast(dt), Column.__lt__)
+                        compare(scol, F.lit(value).cast(dt), PySparkColumn.__lt__)
                     )
                 conds.append(cond)
 
@@ -1300,12 +1294,11 @@ def _select_cols_by_iterable(
     ]:
         from pyspark.pandas.series import Series
 
-        Column = get_column_class()
         if all(isinstance(key, Series) for key in cols_sel):
             column_labels = [key._column_label for key in cols_sel]
             data_spark_columns = [key.spark.column for key in cols_sel]
             data_fields = [key._internal.data_fields[0] for key in cols_sel]
-        elif all(isinstance(key, Column) for key in cols_sel):
+        elif all(isinstance(key, PySparkColumn) for key in cols_sel):
             column_labels = [
                 (self._internal.spark_frame.select(col).columns[0],) for col in cols_sel
             ]
@@ -1804,8 +1797,7 @@ def _select_cols_else(
             )
 
     def __setitem__(self, key: Any, value: Any) -> None:
-        Column = get_column_class()
-        if not isinstance(value, Column) and is_list_like(value):
+        if not isinstance(value, PySparkColumn) and is_list_like(value):
             iloc_item = self[key]
             if not is_list_like(key) or not is_list_like(iloc_item):
                 raise ValueError("setting an array element with a sequence.")
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index 2966db073d0ca..c5fef3b138254 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -33,6 +33,7 @@
     Window,
 )
 from pyspark.sql.types import (  # noqa: F401
+    _drop_metadata,
     BooleanType,
     DataType,
     LongType,
@@ -40,8 +41,7 @@
     StructType,
     StringType,
 )
-from pyspark.sql.utils import is_timestamp_ntz_preferred
-from pyspark.sql.utils import is_remote, get_column_class, get_dataframe_class
+from pyspark.sql.utils import is_timestamp_ntz_preferred, is_remote
 from pyspark import pandas as ps
 from pyspark.pandas._typing import Label
 from pyspark.pandas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
@@ -619,8 +619,7 @@ def __init__(
         >>> internal.column_label_names
         [('column_labels_a',), ('column_labels_b',)]
         """
-        SparkDataFrame = get_dataframe_class()
-        assert isinstance(spark_frame, SparkDataFrame)
+        assert isinstance(spark_frame, PySparkDataFrame)
         assert not spark_frame.isStreaming, "pandas-on-Spark does not support Structured Streaming."
 
         if not index_spark_columns:
@@ -672,12 +671,12 @@ def __init__(
         self._sdf = spark_frame
 
         # index_spark_columns
-        Column = get_column_class()
+
         assert all(
-            isinstance(index_scol, Column) for index_scol in index_spark_columns
+            isinstance(index_scol, PySparkColumn) for index_scol in index_spark_columns
         ), index_spark_columns
 
-        self._index_spark_columns: List[Column] = index_spark_columns  # type: ignore[valid-type]
+        self._index_spark_columns: List[PySparkColumn] = index_spark_columns
 
         # data_spark_columns
         if data_spark_columns is None:
@@ -691,9 +690,9 @@ def __init__(
                 and col not in HIDDEN_COLUMNS
             ]
         else:
-            assert all(isinstance(scol, Column) for scol in data_spark_columns)
+            assert all(isinstance(scol, PySparkColumn) for scol in data_spark_columns)
 
-        self._data_spark_columns: List[Column] = data_spark_columns  # type: ignore[valid-type]
+        self._data_spark_columns: List[PySparkColumn] = data_spark_columns
 
         # fields
         if index_fields is None:
@@ -761,14 +760,8 @@ def __init__(
                 # in a few tests when using Spark Connect. However, the function works properly.
                 # Therefore, we temporarily perform Spark Connect tests by excluding metadata
                 # until the issue is resolved.
-                def remove_metadata(struct_field: StructField) -> StructField:
-                    new_struct_field = StructField(
-                        struct_field.name, struct_field.dataType, struct_field.nullable
-                    )
-                    return new_struct_field
-
                 assert all(
-                    remove_metadata(index_field.struct_field) == remove_metadata(struct_field)
+                    _drop_metadata(index_field.struct_field) == _drop_metadata(struct_field)
                     for index_field, struct_field in zip(index_fields, struct_fields)
                 ), (index_fields, struct_fields)
             else:
@@ -795,14 +788,8 @@ def remove_metadata(struct_field: StructField) -> StructField:
                 # in a few tests when using Spark Connect. However, the function works properly.
                 # Therefore, we temporarily perform Spark Connect tests by excluding metadata
                 # until the issue is resolved.
-                def remove_metadata(struct_field: StructField) -> StructField:
-                    new_struct_field = StructField(
-                        struct_field.name, struct_field.dataType, struct_field.nullable
-                    )
-                    return new_struct_field
-
                 assert all(
-                    remove_metadata(data_field.struct_field) == remove_metadata(struct_field)
+                    _drop_metadata(data_field.struct_field) == _drop_metadata(struct_field)
                     for data_field, struct_field in zip(data_fields, struct_fields)
                 ), (data_fields, struct_fields)
             else:
@@ -959,7 +946,7 @@ def attach_distributed_sequence_column(
 
                 return sdf.select(
                     ConnectColumn(DistributedSequenceID()).alias(column_name),
-                    "*",  # type: ignore[call-overload]
+                    "*",
                 )
             else:
                 return PySparkDataFrame(
@@ -985,27 +972,27 @@ def spark_column_for(self, label: Label) -> PySparkColumn:
 
     def spark_column_name_for(self, label_or_scol: Union[Label, PySparkColumn]) -> str:
         """Return the actual Spark column name for the given column label."""
-        Column = get_column_class()
-        if isinstance(label_or_scol, Column):
+
+        if isinstance(label_or_scol, PySparkColumn):
             return self.spark_frame.select(label_or_scol).columns[0]
         else:
-            return self.field_for(label_or_scol).name  # type: ignore[arg-type]
+            return self.field_for(label_or_scol).name
 
     def spark_type_for(self, label_or_scol: Union[Label, PySparkColumn]) -> DataType:
         """Return DataType for the given column label."""
-        Column = get_column_class()
-        if isinstance(label_or_scol, Column):
+
+        if isinstance(label_or_scol, PySparkColumn):
             return self.spark_frame.select(label_or_scol).schema[0].dataType
         else:
-            return self.field_for(label_or_scol).spark_type  # type: ignore[arg-type]
+            return self.field_for(label_or_scol).spark_type
 
     def spark_column_nullable_for(self, label_or_scol: Union[Label, PySparkColumn]) -> bool:
         """Return nullability for the given column label."""
-        Column = get_column_class()
-        if isinstance(label_or_scol, Column):
+
+        if isinstance(label_or_scol, PySparkColumn):
             return self.spark_frame.select(label_or_scol).schema[0].nullable
         else:
-            return self.field_for(label_or_scol).nullable  # type: ignore[arg-type]
+            return self.field_for(label_or_scol).nullable
 
     def field_for(self, label: Label) -> InternalField:
         """Return InternalField for the given column label."""
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index 42a0ce49faa56..4cea4b4fff225 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -94,9 +94,6 @@
 from pyspark.pandas.indexes import Index, DatetimeIndex, TimedeltaIndex
 from pyspark.pandas.indexes.multi import MultiIndex
 
-# For Supporting Spark Connect
-from pyspark.sql.utils import get_column_class
-
 __all__ = [
     "from_pandas",
     "range",
@@ -3398,8 +3395,7 @@ def rename(col: str) -> str:
     else:
         on = None
 
-    Column = get_column_class()
-    if tolerance is not None and not isinstance(tolerance, Column):
+    if tolerance is not None and not isinstance(tolerance, PySparkColumn):
         tolerance = F.lit(tolerance)
 
     as_of_joined_table = left_table._joinAsOf(
@@ -3424,10 +3420,10 @@ def rename(col: str) -> str:
     data_columns = []
     column_labels = []
 
-    def left_scol_for(label: Label) -> Column:  # type: ignore[valid-type]
+    def left_scol_for(label: Label) -> PySparkColumn:
         return scol_for(as_of_joined_table, left_internal.spark_column_name_for(label))
 
-    def right_scol_for(label: Label) -> Column:  # type: ignore[valid-type]
+    def right_scol_for(label: Label) -> PySparkColumn:
         return scol_for(as_of_joined_table, right_internal.spark_column_name_for(label))
 
     for label in left_internal.column_labels:
@@ -3441,7 +3437,7 @@ def right_scol_for(label: Label) -> Column:  # type: ignore[valid-type]
                 pass
             else:
                 col = col + left_suffix
-                scol = scol.alias(col)  # type: ignore[attr-defined]
+                scol = scol.alias(col)
                 label = tuple([str(label[0]) + left_suffix] + list(label[1:]))
         exprs.append(scol)
         data_columns.append(col)
@@ -3449,7 +3445,7 @@ def right_scol_for(label: Label) -> Column:  # type: ignore[valid-type]
     for label in right_internal.column_labels:
         # recover `right_prefix` here.
         col = right_internal.spark_column_name_for(label)[len(right_prefix) :]
-        scol = right_scol_for(label).alias(col)  # type: ignore[attr-defined]
+        scol = right_scol_for(label).alias(col)
         if label in duplicate_columns:
             spark_column_name = left_internal.spark_column_name_for(label)
             if spark_column_name in left_as_of_names + left_join_on_names and (
@@ -3458,7 +3454,7 @@ def right_scol_for(label: Label) -> Column:  # type: ignore[valid-type]
                 continue
             else:
                 col = col + right_suffix
-                scol = scol.alias(col)  # type: ignore[attr-defined]
+                scol = scol.alias(col)
                 label = tuple([str(label[0]) + right_suffix] + list(label[1:]))
         exprs.append(scol)
         data_columns.append(col)
diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py
index 5bd2a67ed39bb..819ac02a51266 100644
--- a/python/pyspark/pandas/plot/core.py
+++ b/python/pyspark/pandas/plot/core.py
@@ -23,6 +23,7 @@
 from pandas.core.dtypes.inference import is_integer
 
 from pyspark.sql import functions as F
+from pyspark.sql.utils import is_remote
 from pyspark.pandas.missing import unsupported_function
 from pyspark.pandas.config import get_option
 from pyspark.pandas.utils import name_like_string
@@ -571,10 +572,14 @@ def _get_plot_backend(backend=None):
         return module
 
     def __call__(self, kind="line", backend=None, **kwargs):
+        kind = {"density": "kde"}.get(kind, kind)
+
+        if is_remote() and kind in ["hist", "kde"]:
+            return unsupported_function(class_name="pd.DataFrame", method_name=kind)()
+
         plot_backend = PandasOnSparkPlotAccessor._get_plot_backend(backend)
         plot_data = self.data
 
-        kind = {"density": "kde"}.get(kind, kind)
         if hasattr(plot_backend, "plot_pandas_on_spark"):
             # use if there's pandas-on-Spark specific method.
             return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs)
@@ -948,6 +953,9 @@ def hist(self, bins=10, **kwds):
             >>> df = ps.from_pandas(df)
             >>> df.plot.hist(bins=12, alpha=0.5)  # doctest: +SKIP
         """
+        if is_remote():
+            return unsupported_function(class_name="pd.DataFrame", method_name="hist")()
+
         return self(kind="hist", bins=bins, **kwds)
 
     def kde(self, bw_method=None, ind=None, **kwargs):
@@ -1023,6 +1031,9 @@ def kde(self, bw_method=None, ind=None, **kwargs):
             ... })
             >>> df.plot.kde(ind=[1, 2, 3, 4, 5, 6], bw_method=0.3)  # doctest: +SKIP
         """
+        if is_remote():
+            return unsupported_function(class_name="pd.DataFrame", method_name="kde")()
+
         return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
 
     density = kde
diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py
index 9683fc4f4e7ff..5557ca2af7738 100644
--- a/python/pyspark/pandas/resample.py
+++ b/python/pyspark/pandas/resample.py
@@ -56,7 +56,6 @@
     scol_for,
     verify_temp_column_name,
 )
-from pyspark.pandas.spark.functions import timestampdiff
 
 
 class Resampler(Generic[FrameLike], metaclass=ABCMeta):
@@ -279,7 +278,7 @@ def _bin_timestamp(self, origin: pd.Timestamp, ts_scol: Column) -> Column:
             truncated_ts_scol = F.date_trunc(unit_str, ts_scol)
             if isinstance(key_type, TimestampNTZType):
                 truncated_ts_scol = F.to_timestamp_ntz(truncated_ts_scol)
-            diff = timestampdiff(unit_str, origin_scol, truncated_ts_scol)
+            diff = F.timestamp_diff(unit_str, origin_scol, truncated_ts_scol)
             mod = F.lit(0) if n == 1 else (diff % F.lit(n))
 
             if rule_code in ["h", "H"]:
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 8edc2c531b518..4e0f3ca349177 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -54,7 +54,12 @@
 )
 from pandas.tseries.frequencies import DateOffset
 
-from pyspark.sql import functions as F, Column as PySparkColumn, DataFrame as SparkDataFrame
+from pyspark.sql import (
+    functions as F,
+    Column as PySparkColumn,
+    DataFrame as SparkDataFrame,
+    Window as PySparkWindow,
+)
 from pyspark.sql.types import (
     ArrayType,
     BooleanType,
@@ -70,7 +75,6 @@
     NullType,
 )
 from pyspark.sql.window import Window
-from pyspark.sql.utils import get_column_class, get_window_class
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import Axis, Dtype, Label, Name, Scalar, T
 from pyspark.pandas.accessors import PandasOnSparkSeriesMethods
@@ -2257,15 +2261,14 @@ def _interpolate(
         last_non_null = F.last(scol, True)
         null_index = SF.null_index(scol)
 
-        Window = get_window_class()
-        window_forward = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(
-            Window.unboundedPreceding, Window.currentRow
+        window_forward = PySparkWindow.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(
+            PySparkWindow.unboundedPreceding, PySparkWindow.currentRow
         )
         last_non_null_forward = last_non_null.over(window_forward)
         null_index_forward = null_index.over(window_forward)
 
-        window_backward = Window.orderBy(F.desc(NATURAL_ORDER_COLUMN_NAME)).rowsBetween(
-            Window.unboundedPreceding, Window.currentRow
+        window_backward = PySparkWindow.orderBy(F.desc(NATURAL_ORDER_COLUMN_NAME)).rowsBetween(
+            PySparkWindow.unboundedPreceding, PySparkWindow.currentRow
         )
         last_non_null_backward = last_non_null.over(window_backward)
         null_index_backward = null_index.over(window_backward)
@@ -4171,11 +4174,10 @@ def _rank(
         if self._internal.index_level > 1:
             raise NotImplementedError("rank do not support MultiIndex now")
 
-        Column = get_column_class()
         if ascending:
-            asc_func = Column.asc
+            asc_func = PySparkColumn.asc
         else:
-            asc_func = Column.desc
+            asc_func = PySparkColumn.desc
 
         if method == "first":
             window = (
diff --git a/python/pyspark/pandas/spark/accessors.py b/python/pyspark/pandas/spark/accessors.py
index 1ac12bb59ddc4..7f3041cf79c7c 100644
--- a/python/pyspark/pandas/spark/accessors.py
+++ b/python/pyspark/pandas/spark/accessors.py
@@ -27,7 +27,6 @@
 from pyspark.sql.types import DataType, StructType
 from pyspark.pandas._typing import IndexOpsLike
 from pyspark.pandas.internal import InternalField
-from pyspark.sql.utils import get_column_class, get_dataframe_class
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import OptionalPrimitiveType
@@ -116,8 +115,7 @@ def transform(self, func: Callable[[PySparkColumn], PySparkColumn]) -> IndexOpsL
         if isinstance(self._data, MultiIndex):
             raise NotImplementedError("MultiIndex does not support spark.transform yet.")
         output = func(self._data.spark.column)
-        Column = get_column_class()
-        if not isinstance(output, Column):
+        if not isinstance(output, PySparkColumn):
             raise ValueError(
                 "The output of the function [%s] should be of a "
                 "pyspark.sql.Column; however, got [%s]." % (func, type(output))
@@ -192,8 +190,7 @@ def apply(self, func: Callable[[PySparkColumn], PySparkColumn]) -> "ps.Series":
         from pyspark.pandas.internal import HIDDEN_COLUMNS
 
         output = func(self._data.spark.column)
-        Column = get_column_class()
-        if not isinstance(output, Column):
+        if not isinstance(output, PySparkColumn):
             raise ValueError(
                 "The output of the function [%s] should be of a "
                 "pyspark.sql.Column; however, got [%s]." % (func, type(output))
@@ -240,7 +237,8 @@ def analyzed(self) -> "ps.Series":
 
         However, it won't work with the same anchor Series.
 
-        >>> ser + ser.spark.analyzed
+        >>> with ps.option_context('compute.ops_on_diff_frames', False):
+        ...     ser + ser.spark.analyzed
         Traceback (most recent call last):
         ...
         ValueError: ... enable 'compute.ops_on_diff_frames' option.
@@ -290,7 +288,8 @@ def analyzed(self) -> "ps.Index":
 
         However, it won't work with the same anchor Index.
 
-        >>> idx + idx.spark.analyzed
+        >>> with ps.option_context('compute.ops_on_diff_frames', False):
+        ...     idx + idx.spark.analyzed
         Traceback (most recent call last):
         ...
         ValueError: ... enable 'compute.ops_on_diff_frames' option.
@@ -936,8 +935,7 @@ def apply(
         2  3      1
         """
         output = func(self.frame(index_col))
-        SparkDataFrame = get_dataframe_class()
-        if not isinstance(output, SparkDataFrame):
+        if not isinstance(output, PySparkDataFrame):
             raise ValueError(
                 "The output of the function [%s] should be of a "
                 "pyspark.sql.DataFrame; however, got [%s]." % (func, type(output))
@@ -1148,7 +1146,8 @@ def analyzed(self) -> "ps.DataFrame":
 
         However, it won't work with the same anchor Series.
 
-        >>> df + df.spark.analyzed
+        >>> with ps.option_context('compute.ops_on_diff_frames', False):
+        ...     df + df.spark.analyzed
         Traceback (most recent call last):
         ...
         ValueError: ... enable 'compute.ops_on_diff_frames' option.
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
index a598ff37c47f3..db1cc423078a7 100644
--- a/python/pyspark/pandas/spark/functions.py
+++ b/python/pyspark/pandas/spark/functions.py
@@ -25,9 +25,9 @@ def product(col: Column, dropna: bool) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "pandas_product",
-            col,  # type: ignore[arg-type]
+            col,
             lit(dropna),
         )
 
@@ -42,9 +42,9 @@ def stddev(col: Column, ddof: int) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "pandas_stddev",
-            col,  # type: ignore[arg-type]
+            col,
             lit(ddof),
         )
 
@@ -59,9 +59,9 @@ def var(col: Column, ddof: int) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "pandas_var",
-            col,  # type: ignore[arg-type]
+            col,
             lit(ddof),
         )
 
@@ -76,9 +76,9 @@ def skew(col: Column) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "pandas_skew",
-            col,  # type: ignore[arg-type]
+            col,
         )
 
     else:
@@ -92,9 +92,9 @@ def kurt(col: Column) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "pandas_kurt",
-            col,  # type: ignore[arg-type]
+            col,
         )
 
     else:
@@ -108,9 +108,9 @@ def mode(col: Column, dropna: bool) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "pandas_mode",
-            col,  # type: ignore[arg-type]
+            col,
             lit(dropna),
         )
 
@@ -125,10 +125,10 @@ def covar(col1: Column, col2: Column, ddof: int) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "pandas_covar",
-            col1,  # type: ignore[arg-type]
-            col2,  # type: ignore[arg-type]
+            col1,
+            col2,
             lit(ddof),
         )
 
@@ -143,9 +143,9 @@ def ewm(col: Column, alpha: float, ignore_na: bool) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "ewm",
-            col,  # type: ignore[arg-type]
+            col,
             lit(alpha),
             lit(ignore_na),
         )
@@ -161,9 +161,9 @@ def null_index(col: Column) -> Column:
     if is_remote():
         from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns
 
-        return _invoke_function_over_columns(  # type: ignore[return-value]
+        return _invoke_function_over_columns(
             "null_index",
-            col,  # type: ignore[arg-type]
+            col,
         )
 
     else:
@@ -171,21 +171,3 @@ def null_index(col: Column) -> Column:
 
         sc = SparkContext._active_spark_context
         return Column(sc._jvm.PythonSQLUtils.nullIndex(col._jc))
-
-
-def timestampdiff(unit: str, start: Column, end: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
-
-        return _invoke_function_over_columns(  # type: ignore[return-value]
-            "timestampdiff",
-            lit(unit),
-            start,  # type: ignore[arg-type]
-            end,  # type: ignore[arg-type]
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.timestampDiff(unit, start._jc, end._jc))
diff --git a/python/pyspark/pandas/sql_formatter.py b/python/pyspark/pandas/sql_formatter.py
index 7e8263f552f0c..b6d48077675bd 100644
--- a/python/pyspark/pandas/sql_formatter.py
+++ b/python/pyspark/pandas/sql_formatter.py
@@ -27,10 +27,10 @@
 from pyspark.pandas.namespace import _get_index_map
 from pyspark import pandas as ps
 from pyspark.sql import SparkSession
+from pyspark.sql.utils import get_lit_sql_str
 from pyspark.pandas.utils import default_session
 from pyspark.pandas.frame import DataFrame
 from pyspark.pandas.series import Series
-from pyspark.errors import PySparkTypeError
 from pyspark.sql.utils import is_remote
 
 
@@ -203,15 +203,16 @@ def sql(
     session = default_session()
     formatter = PandasSQLStringFormatter(session)
     try:
-        # ps.DataFrame are not supported for Spark Connect currently.
-        if is_remote():
-            for obj in kwargs.values():
-                if isinstance(obj, ps.DataFrame):
-                    raise PySparkTypeError(
-                        error_class="UNSUPPORTED_DATA_TYPE",
-                        message_parameters={"data_type": type(obj).__name__},
-                    )
-        sdf = session.sql(formatter.format(query, **kwargs), args)
+        if not is_remote():
+            sdf = session.sql(formatter.format(query, **kwargs), args)
+        else:
+            ps_query = formatter.format(query, **kwargs)
+            # here the new_kwargs stores the views
+            new_kwargs = {}
+            for psdf, name in formatter._temp_views:
+                new_kwargs[name] = psdf._to_spark()
+            # delegate views to spark.sql
+            sdf = session.sql(ps_query, args, **new_kwargs)
     finally:
         formatter.clear()
 
@@ -264,30 +265,42 @@ def _convert_value(self, val: Any, name: str) -> Optional[str]:
         elif isinstance(val, (DataFrame, pd.DataFrame)):
             df_name = "_pandas_api_%s" % str(uuid.uuid4()).replace("-", "")
 
-            if isinstance(val, pd.DataFrame):
-                # Don't store temp view for plain pandas instances
-                # because it is unable to know which pandas DataFrame
-                # holds which Series.
-                val = ps.from_pandas(val)
+            if not is_remote():
+                if isinstance(val, pd.DataFrame):
+                    # Don't store temp view for plain pandas instances
+                    # because it is unable to know which pandas DataFrame
+                    # holds which Series.
+                    val = ps.from_pandas(val)
+                else:
+                    for df, n in self._temp_views:
+                        if df is val:
+                            return n
+                    self._temp_views.append((val, df_name))
+                val._to_spark().createOrReplaceTempView(df_name)
+                return df_name
             else:
+                if isinstance(val, pd.DataFrame):
+                    # Always convert pd.DataFrame to ps.DataFrame, and record it in _temp_views.
+                    val = ps.from_pandas(val)
+
                 for df, n in self._temp_views:
                     if df is val:
                         return n
-                self._temp_views.append((val, df_name))
-
-            val._to_spark().createOrReplaceTempView(df_name)
-            return df_name
+                self._temp_views.append((val, name))
+                # In Spark Connect, keep the original view name here (not the UUID one),
+                # the reformatted query is like: 'select * from {tbl} where A > 1'
+                # and then delegate the view operations to spark.sql.
+                return "{" + name + "}"
         elif isinstance(val, str):
-            # This is matched to behavior from JVM implementation.
-            # See `sql` definition from `sql/catalyst/src/main/scala/org/apache/spark/
-            # sql/catalyst/expressions/literals.scala`
-            return "'" + val.replace("\\", "\\\\").replace("'", "\\'") + "'"
+            return get_lit_sql_str(val)
         else:
             return val
 
     def clear(self) -> None:
-        for _, n in self._temp_views:
-            self._session.catalog.dropTempView(n)
+        # In Spark Connect, views are created and dropped in Connect Server
+        if not is_remote():
+            for _, n in self._temp_views:
+                self._session.catalog.dropTempView(n)
         self._temp_views = []
         self._ref_sers = []
 
diff --git a/python/pyspark/pandas/tests/computation/test_binary_ops.py b/python/pyspark/pandas/tests/computation/test_binary_ops.py
index 966b3d9cee7ec..44aa9380c19ec 100644
--- a/python/pyspark/pandas/tests/computation/test_binary_ops.py
+++ b/python/pyspark/pandas/tests/computation/test_binary_ops.py
@@ -49,11 +49,12 @@ def test_binary_operators(self):
         self.assert_eq(psdf + psdf.loc[:, ["A", "B"]], pdf + pdf.loc[:, ["A", "B"]])
         self.assert_eq(psdf.loc[:, ["A", "B"]] + psdf, pdf.loc[:, ["A", "B"]] + pdf)
 
-        self.assertRaisesRegex(
-            ValueError,
-            "it comes from a different dataframe",
-            lambda: ps.range(10).add(ps.range(10)),
-        )
+        with ps.option_context("compute.ops_on_diff_frames", False):
+            self.assertRaisesRegex(
+                ValueError,
+                "it comes from a different dataframe",
+                lambda: ps.range(10).add(ps.range(10)),
+            )
 
         self.assertRaisesRegex(
             TypeError,
diff --git a/python/pyspark/pandas/tests/computation/test_corr.py b/python/pyspark/pandas/tests/computation/test_corr.py
index 99dc1733539af..49cb84ca22e04 100644
--- a/python/pyspark/pandas/tests/computation/test_corr.py
+++ b/python/pyspark/pandas/tests/computation/test_corr.py
@@ -160,8 +160,9 @@ def test_series_corr(self):
         psser1 = ps.from_pandas(pser1)
         psser2 = ps.from_pandas(pser2)
 
-        with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
-            psser1.corr(psser2)
+        with ps.option_context("compute.ops_on_diff_frames", False):
+            with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
+                psser1.corr(psser2)
 
         for method in ["pearson", "spearman", "kendall"]:
             with ps.option_context("compute.ops_on_diff_frames", True):
diff --git a/python/pyspark/pandas/tests/connect/indexes/test_parity_default.py b/python/pyspark/pandas/tests/connect/indexes/test_parity_default.py
index d6f0cadbf0cd1..4240eb8fdbc81 100644
--- a/python/pyspark/pandas/tests/connect/indexes/test_parity_default.py
+++ b/python/pyspark/pandas/tests/connect/indexes/test_parity_default.py
@@ -19,6 +19,7 @@
 from pyspark.pandas.tests.indexes.test_default import DefaultIndexTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.util import is_remote_only
 
 
 class DefaultIndexParityTests(
@@ -26,7 +27,7 @@ class DefaultIndexParityTests(
     PandasOnSparkTestUtils,
     ReusedConnectTestCase,
 ):
-    @unittest.skip("Test depends on SparkContext which is not supported from Spark Connect.")
+    @unittest.skipIf(is_remote_only(), "Requires JVM access")
     def test_index_distributed_sequence_cleanup(self):
         super().test_index_distributed_sequence_cleanup()
 
diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py
index f093f48b16e9c..abb18d473bf8d 100644
--- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py
@@ -24,6 +24,10 @@
 class SeriesPlotMatplotlibParityTests(
     SeriesPlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
 ):
+    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
+    def test_empty_hist(self):
+        super().test_empty_hist()
+
     @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
     def test_hist(self):
         super().test_hist()
diff --git a/python/pyspark/pandas/tests/connect/test_connect_plotting.py b/python/pyspark/pandas/tests/connect/test_connect_plotting.py
new file mode 100644
index 0000000000000..9b7cfebfcd552
--- /dev/null
+++ b/python/pyspark/pandas/tests/connect/test_connect_plotting.py
@@ -0,0 +1,124 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.exceptions import PandasNotImplementedError
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
+
+
+class ConnectPlottingTests(PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase):
+    @property
+    def pdf1(self):
+        return pd.DataFrame(
+            [[1, 2], [4, 5], [7, 8]],
+            index=["cobra", "viper", None],
+            columns=["max_speed", "shield"],
+        )
+
+    @property
+    def psdf1(self):
+        return ps.from_pandas(self.pdf1)
+
+    def test_unsupported_functions(self):
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.hist()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.hist(bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.kde()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.kde(bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.density()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot.density(bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.hist()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.hist(bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.kde()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.kde(bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.density()
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot.density(bw_method=3)
+
+    def test_unsupported_kinds(self):
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="hist")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="hist", bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="kde")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="kde", bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="density")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.plot(kind="density", bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="hist")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="hist", bins=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="kde")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="kde", bw_method=3)
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="density")
+
+        with self.assertRaises(PandasNotImplementedError):
+            self.psdf1.shield.plot(kind="density", bw_method=3)
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.connect.test_connect_plotting import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py b/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py
index 24626a9164e84..4a8417382253e 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_frame_spark.py
@@ -24,19 +24,19 @@
 class SparkFrameMethodsParityTests(
     SparkFrameMethodsTestsMixin, TestUtils, PandasOnSparkTestUtils, ReusedConnectTestCase
 ):
-    @unittest.skip("Test depends on checkpoint which is not supported from Spark Connect.")
+    @unittest.skip("Test depends on SparkContext which is not supported from Spark Connect.")
     def test_checkpoint(self):
         super().test_checkpoint()
 
-    @unittest.skip("Test depends on RDD which is not supported from Spark Connect.")
+    @unittest.skip(
+        "Test depends on RDD, and cannot use SQL expression due to Catalyst optimization"
+    )
     def test_coalesce(self):
         super().test_coalesce()
 
-    @unittest.skip("Test depends on localCheckpoint which is not supported from Spark Connect.")
-    def test_local_checkpoint(self):
-        super().test_local_checkpoint()
-
-    @unittest.skip("Test depends on RDD which is not supported from Spark Connect.")
+    @unittest.skip(
+        "Test depends on RDD, and cannot use SQL expression due to Catalyst optimization"
+    )
     def test_repartition(self):
         super().test_repartition()
 
diff --git a/python/pyspark/pandas/tests/connect/test_parity_sql.py b/python/pyspark/pandas/tests/connect/test_parity_sql.py
index 2e503cac07a8a..29abbda8c0ebb 100644
--- a/python/pyspark/pandas/tests/connect/test_parity_sql.py
+++ b/python/pyspark/pandas/tests/connect/test_parity_sql.py
@@ -22,13 +22,7 @@
 
 
 class SQLParityTests(SQLTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase):
-    @unittest.skip("Test depends on temp view issue on JVM side.")
-    def test_sql_with_index_col(self):
-        super().test_sql_with_index_col()
-
-    @unittest.skip("Test depends on temp view issue on JVM side.")
-    def test_sql_with_pandas_on_spark_objects(self):
-        super().test_sql_with_pandas_on_spark_objects()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/frame/test_constructor.py b/python/pyspark/pandas/tests/frame/test_constructor.py
index ee010d8f023dd..d0d842d8264c7 100644
--- a/python/pyspark/pandas/tests/frame/test_constructor.py
+++ b/python/pyspark/pandas/tests/frame/test_constructor.py
@@ -137,13 +137,14 @@ def test_creation_index(self):
             pd.DataFrame(data=data, index=pd.Index([1, 2, 3, 5, 6])),
         )
 
-        err_msg = "Cannot combine the series or dataframe"
-        with self.assertRaisesRegex(ValueError, err_msg):
-            # test ps.DataFrame with ps.Index
-            ps.DataFrame(data=ps.DataFrame([1, 2]), index=ps.Index([1, 2]))
-        with self.assertRaisesRegex(ValueError, err_msg):
-            # test ps.DataFrame with pd.Index
-            ps.DataFrame(data=ps.DataFrame([1, 2]), index=pd.Index([3, 4]))
+        with ps.option_context("compute.ops_on_diff_frames", False):
+            err_msg = "Cannot combine the series or dataframe"
+            with self.assertRaisesRegex(ValueError, err_msg):
+                # test ps.DataFrame with ps.Index
+                ps.DataFrame(data=ps.DataFrame([1, 2]), index=ps.Index([1, 2]))
+            with self.assertRaisesRegex(ValueError, err_msg):
+                # test ps.DataFrame with pd.Index
+                ps.DataFrame(data=ps.DataFrame([1, 2]), index=pd.Index([3, 4]))
 
         with ps.option_context("compute.ops_on_diff_frames", True):
             # test pd.DataFrame with pd.Index
@@ -195,14 +196,14 @@ def test_creation_index(self):
         with ps.option_context("compute.ops_on_diff_frames", True):
             # test with ps.DataFrame and pd.Index
             self.assert_eq(
-                ps.DataFrame(data=psdf, index=pd.Index([2, 3, 4, 5, 6])),
-                pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])),
+                ps.DataFrame(data=psdf, index=pd.Index([2, 3, 4, 5, 6])).sort_index(),
+                pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])).sort_index(),
             )
 
             # test with ps.DataFrame and ps.Index
             self.assert_eq(
-                ps.DataFrame(data=psdf, index=ps.Index([2, 3, 4, 5, 6])),
-                pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])),
+                ps.DataFrame(data=psdf, index=ps.Index([2, 3, 4, 5, 6])).sort_index(),
+                pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])).sort_index(),
             )
 
         # test String Index
@@ -269,11 +270,11 @@ def test_creation_index(self):
             ps.DataFrame(
                 data=pdf,
                 index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
-            ),
+            ).sort_index(),
             pd.DataFrame(
                 data=pdf,
                 index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
-            ),
+            ).sort_index(),
         )
 
         # test with pd.DataFrame and ps.DatetimeIndex
@@ -281,11 +282,11 @@ def test_creation_index(self):
             ps.DataFrame(
                 data=pdf,
                 index=ps.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
-            ),
+            ).sort_index(),
             pd.DataFrame(
                 data=pdf,
                 index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
-            ),
+            ).sort_index(),
         )
 
         with ps.option_context("compute.ops_on_diff_frames", True):
@@ -296,13 +297,13 @@ def test_creation_index(self):
                     index=pd.DatetimeIndex(
                         ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
                     ),
-                ),
+                ).sort_index(),
                 pd.DataFrame(
                     data=pdf,
                     index=pd.DatetimeIndex(
                         ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
                     ),
-                ),
+                ).sort_index(),
             )
 
             # test with ps.DataFrame and ps.DatetimeIndex
@@ -312,13 +313,13 @@ def test_creation_index(self):
                     index=ps.DatetimeIndex(
                         ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
                     ),
-                ),
+                ).sort_index(),
                 pd.DataFrame(
                     data=pdf,
                     index=pd.DatetimeIndex(
                         ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
                     ),
-                ),
+                ).sort_index(),
             )
 
         # test MultiIndex
diff --git a/python/pyspark/pandas/tests/groupby/test_groupby.py b/python/pyspark/pandas/tests/groupby/test_groupby.py
index 5867f7b62fa5e..b58bfddb4b996 100644
--- a/python/pyspark/pandas/tests/groupby/test_groupby.py
+++ b/python/pyspark/pandas/tests/groupby/test_groupby.py
@@ -451,6 +451,27 @@ def test_diff(self):
             pdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
         )
 
+    def test_aggregate_relabel_index_false(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [0, 0, 1, 1, 1],
+                "B": ["a", "a", "b", "a", "b"],
+                "C": [10, 15, 10, 20, 30],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            pdf.groupby(["B", "A"], as_index=False)
+            .agg(C_MAX=("C", "max"))
+            .sort_values(["B", "A"])
+            .reset_index(drop=True),
+            psdf.groupby(["B", "A"], as_index=False)
+            .agg(C_MAX=("C", "max"))
+            .sort_values(["B", "A"])
+            .reset_index(drop=True),
+        )
+
 
 class GroupByTests(
     GroupByTestsMixin,
diff --git a/python/pyspark/pandas/tests/indexes/test_default.py b/python/pyspark/pandas/tests/indexes/test_default.py
index 3d19eb407b42c..5cd9fae76dfbe 100644
--- a/python/pyspark/pandas/tests/indexes/test_default.py
+++ b/python/pyspark/pandas/tests/indexes/test_default.py
@@ -44,7 +44,7 @@ def test_index_distributed_sequence_cleanup(self):
             "compute.default_index_type", "distributed-sequence"
         ), ps.option_context("compute.ops_on_diff_frames", True):
             with ps.option_context("compute.default_index_cache", "LOCAL_CHECKPOINT"):
-                cached_rdd_ids = [rdd_id for rdd_id in self.spark._jsc.getPersistentRDDs()]
+                cached_rdd_ids = [rdd_id for rdd_id in self._legacy_sc._jsc.getPersistentRDDs()]
 
                 psdf1 = (
                     self.spark.range(0, 100, 1, 10).withColumn("Key", F.col("id") % 33).pandas_api()
@@ -61,13 +61,13 @@ def test_index_distributed_sequence_cleanup(self):
                 self.assertTrue(
                     any(
                         rdd_id not in cached_rdd_ids
-                        for rdd_id in self.spark._jsc.getPersistentRDDs()
+                        for rdd_id in self._legacy_sc._jsc.getPersistentRDDs()
                     )
                 )
 
             for storage_level in ["NONE", "DISK_ONLY_2", "MEMORY_AND_DISK_SER"]:
                 with ps.option_context("compute.default_index_cache", storage_level):
-                    cached_rdd_ids = [rdd_id for rdd_id in self.spark._jsc.getPersistentRDDs()]
+                    cached_rdd_ids = [rdd_id for rdd_id in self._legacy_sc._jsc.getPersistentRDDs()]
 
                     psdf1 = (
                         self.spark.range(0, 100, 1, 10)
@@ -86,7 +86,7 @@ def test_index_distributed_sequence_cleanup(self):
                     self.assertTrue(
                         all(
                             rdd_id in cached_rdd_ids
-                            for rdd_id in self.spark._jsc.getPersistentRDDs()
+                            for rdd_id in self._legacy_sc._jsc.getPersistentRDDs()
                         )
                     )
 
diff --git a/python/pyspark/pandas/tests/indexes/test_indexing.py b/python/pyspark/pandas/tests/indexes/test_indexing.py
index c7367492a20f5..3178e8b17665a 100644
--- a/python/pyspark/pandas/tests/indexes/test_indexing.py
+++ b/python/pyspark/pandas/tests/indexes/test_indexing.py
@@ -235,7 +235,9 @@ def test_insert(self):
         self.assert_eq(psdf.sort_index(), pdf.sort_index(), almost=True)
 
         psser = ps.Series([4, 5, 6])
-        self.assertRaises(ValueError, lambda: psdf.insert(0, "y", psser))
+        with ps.option_context("compute.ops_on_diff_frames", False):
+            self.assertRaises(ValueError, lambda: psdf.insert(0, "y", psser))
+
         self.assertRaisesRegex(
             ValueError, "cannot insert b, already exists", lambda: psdf.insert(1, "b", 10)
         )
@@ -256,7 +258,9 @@ def test_insert(self):
         )
 
         self.assertRaises(ValueError, lambda: psdf.insert(0, "e", [7, 8, 9, 10]))
-        self.assertRaises(ValueError, lambda: psdf.insert(0, "f", ps.Series([7, 8])))
+        with ps.option_context("compute.ops_on_diff_frames", False):
+            self.assertRaises(ValueError, lambda: psdf.insert(0, "f", ps.Series([7, 8])))
+
         self.assertRaises(AssertionError, lambda: psdf.insert(100, "y", psser))
         self.assertRaises(AssertionError, lambda: psdf.insert(1, "y", psser, allow_duplicates=True))
 
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index 0fe2944bcabe9..fec45072cf93a 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -42,7 +42,7 @@
 
 from pyspark.sql import functions as F, Column, DataFrame as PySparkDataFrame, SparkSession
 from pyspark.sql.types import DoubleType
-from pyspark.sql.utils import is_remote, get_dataframe_class
+from pyspark.sql.utils import is_remote
 from pyspark.errors import PySparkTypeError
 from pyspark import pandas as ps  # noqa: F401
 from pyspark.pandas._typing import (
@@ -915,8 +915,7 @@ def verify_temp_column_name(
         )
         column_name = column_name_or_label
 
-    SparkDataFrame = get_dataframe_class()
-    assert isinstance(df, SparkDataFrame), type(df)
+    assert isinstance(df, PySparkDataFrame), type(df)
     assert (
         column_name not in df.columns
     ), "The given column name `{}` already exists in the Spark DataFrame: {}".format(
diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py
index a22afdf16c8b0..e9e6ef3520eea 100644
--- a/python/pyspark/resource/profile.py
+++ b/python/pyspark/resource/profile.py
@@ -201,14 +201,15 @@ class ResourceProfileBuilder:
     """
 
     def __init__(self) -> None:
-        from pyspark.core.context import SparkContext
+        from pyspark.sql import is_remote
 
-        # TODO: ignore[attr-defined] will be removed, once SparkContext is inlined
-        _jvm = SparkContext._jvm
+        _jvm = None
+        if not is_remote():
+            from pyspark.core.context import SparkContext
 
-        from pyspark.sql import is_remote
+            _jvm = SparkContext._jvm
 
-        if _jvm is not None and not is_remote():
+        if _jvm is not None:
             self._jvm = _jvm
             self._java_resource_profile_builder = (
                 _jvm.org.apache.spark.resource.ResourceProfileBuilder()
diff --git a/python/pyspark/resource/requests.py b/python/pyspark/resource/requests.py
index 746fca9848393..fa8bb43ee2c49 100644
--- a/python/pyspark/resource/requests.py
+++ b/python/pyspark/resource/requests.py
@@ -164,14 +164,17 @@ def __init__(
         _jvm: Optional["JVMView"] = None,
         _requests: Optional[Dict[str, ExecutorResourceRequest]] = None,
     ):
-        from pyspark import SparkContext
         from pyspark.sql import is_remote
 
-        _jvm = _jvm or SparkContext._jvm
+        jvm = None
+        if not is_remote():
+            from pyspark.core.context import SparkContext
 
-        if _jvm is not None and not is_remote():
+            jvm = _jvm or SparkContext._jvm
+
+        if jvm is not None:
             self._java_executor_resource_requests = (
-                _jvm.org.apache.spark.resource.ExecutorResourceRequests()
+                jvm.org.apache.spark.resource.ExecutorResourceRequests()
             )
             if _requests is not None:
                 for k, v in _requests.items():
@@ -462,15 +465,18 @@ def __init__(
         _jvm: Optional["JVMView"] = None,
         _requests: Optional[Dict[str, TaskResourceRequest]] = None,
     ):
-        from pyspark import SparkContext
         from pyspark.sql import is_remote
 
-        _jvm = _jvm or SparkContext._jvm
+        jvm = None
+        if not is_remote():
+            from pyspark.core.context import SparkContext
+
+            jvm = _jvm or SparkContext._jvm
 
-        if _jvm is not None and not is_remote():
+        if jvm is not None:
             self._java_task_resource_requests: Optional[
                 "JavaObject"
-            ] = _jvm.org.apache.spark.resource.TaskResourceRequests()
+            ] = jvm.org.apache.spark.resource.TaskResourceRequests()
             if _requests is not None:
                 for k, v in _requests.items():
                     if k == self._CPUS:
diff --git a/python/pyspark/resource/tests/test_connect_resources.py b/python/pyspark/resource/tests/test_connect_resources.py
index 1529a33cb0ad0..90bae85c2a1b7 100644
--- a/python/pyspark/resource/tests/test_connect_resources.py
+++ b/python/pyspark/resource/tests/test_connect_resources.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 import unittest
+import os
 
 from pyspark.resource import ResourceProfileBuilder, TaskResourceRequests, ExecutorResourceRequests
 from pyspark.sql import SparkSession
@@ -35,20 +36,20 @@ def test_profile_before_sc_for_connect(self):
         # check taskResources, similar to executorResources.
         self.assertEqual(rp.taskResources["cpus"].amount, 2.0)
 
-        # SparkContext is not initialized and is not remote.
-        with self.assertRaisesRegex(
-            RuntimeError, "SparkContext must be created to get the profile id."
-        ):
+        # SparkContext or SparkSesssion is not initialized.
+        with self.assertRaises(RuntimeError):
             rp.id
 
         # Remote mode.
-        spark = SparkSession.builder.remote("local-cluster[1, 2, 1024]").getOrCreate()
+        spark = SparkSession.builder.remote(
+            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local-cluster[1, 2, 1024]")
+        ).getOrCreate()
         # Still can access taskResources, similar to executorResources.
         self.assertEqual(rp.taskResources["cpus"].amount, 2.0)
         rp.id
         df = spark.range(10)
-        df.mapInPandas(lambda x: x, df.schema, False, rp).collect()
-        df.mapInArrow(lambda x: x, df.schema, False, rp).collect()
+        df.mapInPandas(lambda x: x, df.schema, False, rp).show(n=10)
+        df.mapInArrow(lambda x: x, df.schema, False, rp).show(n=10)
 
         def assert_request_contents(exec_reqs, task_reqs):
             self.assertEqual(len(exec_reqs), 6)
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index 5cebfa384045e..fb3bd53984959 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -22,7 +22,8 @@
 
 from typing import Dict, Optional, TYPE_CHECKING, cast
 
-from pyspark.sql.column import Column, _to_java_column
+from pyspark.errors import PySparkTypeError
+from pyspark.sql.column import Column
 from pyspark.sql.utils import get_active_spark_context, try_remote_avro_functions
 from pyspark.util import _print_missing_jar
 
@@ -78,6 +79,26 @@ def from_avro(
     [Row(value=Row(avro=Row(age=2, name='Alice')))]
     """
     from py4j.java_gateway import JVMView
+    from pyspark.sql.classic.column import _to_java_column
+
+    if not isinstance(data, (Column, str)):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={
+                "arg_name": "data",
+                "arg_type": "pyspark.sql.Column or str",
+            },
+        )
+    if not isinstance(jsonFormatSchema, str):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+        )
+    if options is not None and not isinstance(options, dict):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={"arg_name": "options", "arg_type": "dict, optional"},
+        )
 
     sc = get_active_spark_context()
     try:
@@ -128,6 +149,21 @@ def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
     [Row(suite=bytearray(b'\\x02\\x00'))]
     """
     from py4j.java_gateway import JVMView
+    from pyspark.sql.classic.column import _to_java_column
+
+    if not isinstance(data, (Column, str)):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={
+                "arg_name": "data",
+                "arg_type": "pyspark.sql.Column or str",
+            },
+        )
+    if not isinstance(jsonFormatSchema, str):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+        )
 
     sc = get_active_spark_context()
     try:
diff --git a/python/pyspark/sql/classic/__init__.py b/python/pyspark/sql/classic/__init__.py
new file mode 100644
index 0000000000000..f7ae391c3186d
--- /dev/null
+++ b/python/pyspark/sql/classic/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Spark Classic specific"""
diff --git a/python/pyspark/sql/classic/column.py b/python/pyspark/sql/classic/column.py
new file mode 100644
index 0000000000000..7630cfed5c173
--- /dev/null
+++ b/python/pyspark/sql/classic/column.py
@@ -0,0 +1,637 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+import json
+import warnings
+from typing import (
+    cast,
+    overload,
+    Any,
+    Callable,
+    Iterable,
+    List,
+    Optional,
+    Tuple,
+    TYPE_CHECKING,
+    Union,
+)
+
+from pyspark.sql.column import Column as ParentColumn
+from pyspark.errors import PySparkAttributeError, PySparkTypeError, PySparkValueError
+from pyspark.errors.utils import with_origin_to_class
+from pyspark.sql.types import DataType
+from pyspark.sql.utils import get_active_spark_context
+
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject
+    from pyspark.core.context import SparkContext
+    from pyspark.sql._typing import ColumnOrName, LiteralType, DecimalLiteral, DateTimeLiteral
+    from pyspark.sql.window import WindowSpec
+
+__all__ = ["Column"]
+
+
+def _create_column_from_literal(
+    literal: Union["LiteralType", "DecimalLiteral", "DateTimeLiteral", "ParentColumn"]
+) -> "JavaObject":
+    from py4j.java_gateway import JVMView
+
+    sc = get_active_spark_context()
+    return cast(JVMView, sc._jvm).functions.lit(literal)
+
+
+def _create_column_from_name(name: str) -> "JavaObject":
+    from py4j.java_gateway import JVMView
+
+    sc = get_active_spark_context()
+    return cast(JVMView, sc._jvm).functions.col(name)
+
+
+def _to_java_column(col: "ColumnOrName") -> "JavaObject":
+    if isinstance(col, Column):
+        jcol = col._jc
+    elif isinstance(col, str):
+        jcol = _create_column_from_name(col)
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+        )
+    return jcol
+
+
+def _to_java_expr(col: "ColumnOrName") -> "JavaObject":
+    return _to_java_column(col).expr()
+
+
+@overload
+def _to_seq(sc: "SparkContext", cols: Iterable["JavaObject"]) -> "JavaObject":
+    ...
+
+
+@overload
+def _to_seq(
+    sc: "SparkContext",
+    cols: Iterable["ColumnOrName"],
+    converter: Optional[Callable[["ColumnOrName"], "JavaObject"]],
+) -> "JavaObject":
+    ...
+
+
+def _to_seq(
+    sc: "SparkContext",
+    cols: Union[Iterable["ColumnOrName"], Iterable["JavaObject"]],
+    converter: Optional[Callable[["ColumnOrName"], "JavaObject"]] = None,
+) -> "JavaObject":
+    """
+    Convert a list of Columns (or names) into a JVM Seq of Column.
+
+    An optional `converter` could be used to convert items in `cols`
+    into JVM Column objects.
+    """
+    if converter:
+        cols = [converter(c) for c in cols]
+    assert sc._jvm is not None
+    return sc._jvm.PythonUtils.toSeq(cols)
+
+
+def _to_list(
+    sc: "SparkContext",
+    cols: List["ColumnOrName"],
+    converter: Optional[Callable[["ColumnOrName"], "JavaObject"]] = None,
+) -> "JavaObject":
+    """
+    Convert a list of Columns (or names) into a JVM (Scala) List of Columns.
+
+    An optional `converter` could be used to convert items in `cols`
+    into JVM Column objects.
+    """
+    if converter:
+        cols = [converter(c) for c in cols]
+    assert sc._jvm is not None
+    return sc._jvm.PythonUtils.toList(cols)
+
+
+def _unary_op(name: str, self: ParentColumn) -> ParentColumn:
+    """Create a method for given unary operator"""
+
+    jc = getattr(self._jc, name)()
+    return Column(jc)
+
+
+def _func_op(name: str, self: ParentColumn) -> ParentColumn:
+    from py4j.java_gateway import JVMView
+
+    sc = get_active_spark_context()
+    jc = getattr(cast(JVMView, sc._jvm).functions, name)(self._jc)
+    return Column(jc)
+
+
+def _bin_func_op(
+    name: str,
+    self: ParentColumn,
+    other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+    reverse: bool = False,
+) -> ParentColumn:
+    from py4j.java_gateway import JVMView
+
+    sc = get_active_spark_context()
+    fn = getattr(cast(JVMView, sc._jvm).functions, name)
+    jc = other._jc if isinstance(other, ParentColumn) else _create_column_from_literal(other)
+    njc = fn(self._jc, jc) if not reverse else fn(jc, self._jc)
+    return Column(njc)
+
+
+def _bin_op(
+    name: str,
+    self: ParentColumn,
+    other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+) -> ParentColumn:
+    """Create a method for given binary operator"""
+    jc = other._jc if isinstance(other, ParentColumn) else other
+    njc = getattr(self._jc, name)(jc)
+    return Column(njc)
+
+
+def _reverse_op(
+    name: str,
+    self: ParentColumn,
+    other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+) -> ParentColumn:
+    """Create a method for binary operator (this object is on right side)"""
+    jother = _create_column_from_literal(other)
+    jc = getattr(jother, name)(self._jc)
+    return Column(jc)
+
+
+@with_origin_to_class
+class Column(ParentColumn):
+    def __new__(
+        cls,
+        jc: "JavaObject",
+    ) -> "Column":
+        self = object.__new__(cls)
+        self.__init__(jc)  # type: ignore[misc]
+        return self
+
+    def __init__(self, jc: "JavaObject") -> None:
+        self._jc = jc
+
+    # arithmetic operators
+    def __neg__(self) -> ParentColumn:
+        return _func_op("negate", self)
+
+    def __add__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("plus", self, other)
+
+    def __sub__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("minus", self, other)
+
+    def __mul__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("multiply", self, other)
+
+    def __div__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("divide", self, other)
+
+    def __truediv__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("divide", self, other)
+
+    def __mod__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("mod", self, other)
+
+    def __radd__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("plus", self, other)
+
+    def __rsub__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _reverse_op("minus", self, other)
+
+    def __rmul__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("multiply", self, other)
+
+    def __rdiv__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _reverse_op("divide", self, other)
+
+    def __rtruediv__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _reverse_op("divide", self, other)
+
+    def __rmod__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _reverse_op("mod", self, other)
+
+    def __pow__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_func_op("pow", self, other)
+
+    def __rpow__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_func_op("pow", self, other, reverse=True)
+
+    # logistic operators
+    def __eq__(  # type: ignore[override]
+        self,
+        other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+    ) -> ParentColumn:
+        return _bin_op("equalTo", self, other)
+
+    def __ne__(  # type: ignore[override]
+        self,
+        other: Any,
+    ) -> ParentColumn:
+        return _bin_op("notEqual", self, other)
+
+    def __lt__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("lt", self, other)
+
+    def __le__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("leq", self, other)
+
+    def __ge__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("geq", self, other)
+
+    def __gt__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("gt", self, other)
+
+    def eqNullSafe(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("eqNullSafe", self, other)
+
+    # `and`, `or`, `not` cannot be overloaded in Python,
+    # so use bitwise operators as boolean operators
+    def __and__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("and", self, other)
+
+    def __or__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("or", self, other)
+
+    def __invert__(self) -> ParentColumn:
+        return _func_op("not", self)
+
+    def __rand__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("and", self, other)
+
+    def __ror__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("or", self, other)
+
+    # container operators
+    def __contains__(self, item: Any) -> None:
+        raise PySparkValueError(
+            error_class="CANNOT_APPLY_IN_FOR_COLUMN",
+            message_parameters={},
+        )
+
+    # bitwise operators
+    def bitwiseOR(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("bitwiseOR", self, other)
+
+    def bitwiseAND(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("bitwiseAND", self, other)
+
+    def bitwiseXOR(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("bitwiseXOR", self, other)
+
+    def getItem(self, key: Any) -> ParentColumn:
+        if isinstance(key, Column):
+            warnings.warn(
+                "A column as 'key' in getItem is deprecated as of Spark 3.0, and will not "
+                "be supported in the future release. Use `column[key]` or `column.key` syntax "
+                "instead.",
+                FutureWarning,
+            )
+        return self[key]
+
+    def getField(self, name: Any) -> ParentColumn:
+        if isinstance(name, Column):
+            warnings.warn(
+                "A column as 'name' in getField is deprecated as of Spark 3.0, and will not "
+                "be supported in the future release. Use `column[name]` or `column.name` syntax "
+                "instead.",
+                FutureWarning,
+            )
+        return self[name]
+
+    def withField(self, fieldName: str, col: ParentColumn) -> ParentColumn:
+        if not isinstance(fieldName, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
+            )
+
+        if not isinstance(col, Column):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
+
+        return Column(self._jc.withField(fieldName, col._jc))
+
+    def dropFields(self, *fieldNames: str) -> ParentColumn:
+        sc = get_active_spark_context()
+        jc = self._jc.dropFields(_to_seq(sc, fieldNames))
+        return Column(jc)
+
+    def __getattr__(self, item: Any) -> ParentColumn:
+        if item.startswith("__"):
+            raise PySparkAttributeError(
+                error_class="CANNOT_ACCESS_TO_DUNDER",
+                message_parameters={},
+            )
+        return self[item]
+
+    def __getitem__(self, k: Any) -> ParentColumn:
+        if isinstance(k, slice):
+            if k.step is not None:
+                raise PySparkValueError(
+                    error_class="SLICE_WITH_STEP",
+                    message_parameters={},
+                )
+            return self.substr(k.start, k.stop)
+        else:
+            return _bin_op("apply", self, k)
+
+    def __iter__(self) -> None:
+        raise PySparkTypeError(
+            error_class="NOT_ITERABLE", message_parameters={"objectName": "Column"}
+        )
+
+    # string methods
+    def contains(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("contains", self, other)
+
+    def startswith(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("startsWith", self, other)
+
+    def endswith(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("endsWith", self, other)
+
+    def like(self: ParentColumn, other: str) -> ParentColumn:
+        njc = getattr(self._jc, "like")(other)
+        return Column(njc)
+
+    def rlike(self: ParentColumn, other: str) -> ParentColumn:
+        njc = getattr(self._jc, "rlike")(other)
+        return Column(njc)
+
+    def ilike(self: ParentColumn, other: str) -> ParentColumn:
+        njc = getattr(self._jc, "ilike")(other)
+        return Column(njc)
+
+    def substr(
+        self, startPos: Union[int, ParentColumn], length: Union[int, ParentColumn]
+    ) -> ParentColumn:
+        if type(startPos) != type(length):
+            raise PySparkTypeError(
+                error_class="NOT_SAME_TYPE",
+                message_parameters={
+                    "arg_name1": "startPos",
+                    "arg_name2": "length",
+                    "arg_type1": type(startPos).__name__,
+                    "arg_type2": type(length).__name__,
+                },
+            )
+        if isinstance(startPos, int):
+            jc = self._jc.substr(startPos, length)
+        elif isinstance(startPos, Column):
+            jc = self._jc.substr(startPos._jc, cast(ParentColumn, length)._jc)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_INT",
+                message_parameters={"arg_name": "startPos", "arg_type": type(startPos).__name__},
+            )
+        return Column(jc)
+
+    def isin(self, *cols: Any) -> ParentColumn:
+        if len(cols) == 1 and isinstance(cols[0], (list, set)):
+            cols = cast(Tuple, cols[0])
+        cols = cast(
+            Tuple,
+            [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols],
+        )
+        sc = get_active_spark_context()
+        jc = getattr(self._jc, "isin")(_to_seq(sc, cols))
+        return Column(jc)
+
+    # order
+    def asc(self) -> ParentColumn:
+        return _unary_op("asc", self)
+
+    def asc_nulls_first(self) -> ParentColumn:
+        return _unary_op("asc_nulls_first", self)
+
+    def asc_nulls_last(self) -> ParentColumn:
+        return _unary_op("asc_nulls_last", self)
+
+    def desc(self) -> ParentColumn:
+        return _unary_op("desc", self)
+
+    def desc_nulls_first(self) -> ParentColumn:
+        return _unary_op("desc_nulls_first", self)
+
+    def desc_nulls_last(self) -> ParentColumn:
+        return _unary_op("desc_nulls_last", self)
+
+    def isNull(self) -> ParentColumn:
+        return _unary_op("isNull", self)
+
+    def isNotNull(self) -> ParentColumn:
+        return _unary_op("isNotNull", self)
+
+    def isNaN(self) -> ParentColumn:
+        return _unary_op("isNaN", self)
+
+    def alias(self, *alias: str, **kwargs: Any) -> ParentColumn:
+        metadata = kwargs.pop("metadata", None)
+        assert not kwargs, "Unexpected kwargs where passed: %s" % kwargs
+
+        sc = get_active_spark_context()
+        if len(alias) == 1:
+            if metadata:
+                assert sc._jvm is not None
+                jmeta = sc._jvm.org.apache.spark.sql.types.Metadata.fromJson(json.dumps(metadata))
+                return Column(getattr(self._jc, "as")(alias[0], jmeta))
+            else:
+                return Column(getattr(self._jc, "as")(alias[0]))
+        else:
+            if metadata is not None:
+                raise PySparkValueError(
+                    error_class="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
+                    message_parameters={"arg_name": "metadata"},
+                )
+            return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
+
+    def name(self, *alias: str, **kwargs: Any) -> ParentColumn:
+        return self.alias(*alias, **kwargs)
+
+    def cast(self, dataType: Union[DataType, str]) -> ParentColumn:
+        if isinstance(dataType, str):
+            jc = self._jc.cast(dataType)
+        elif isinstance(dataType, DataType):
+            from pyspark.sql import SparkSession
+
+            spark = SparkSession._getActiveSessionOrCreate()
+            jdt = spark._jsparkSession.parseDataType(dataType.json())
+            jc = self._jc.cast(jdt)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_DATATYPE_OR_STR",
+                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
+            )
+        return Column(jc)
+
+    def try_cast(self, dataType: Union[DataType, str]) -> ParentColumn:
+        if isinstance(dataType, str):
+            jc = self._jc.try_cast(dataType)
+        elif isinstance(dataType, DataType):
+            from pyspark.sql import SparkSession
+
+            spark = SparkSession._getActiveSessionOrCreate()
+            jdt = spark._jsparkSession.parseDataType(dataType.json())
+            jc = self._jc.try_cast(jdt)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_DATATYPE_OR_STR",
+                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
+            )
+        return Column(jc)
+
+    def astype(self, dataType: Union[DataType, str]) -> ParentColumn:
+        return self.cast(dataType)
+
+    def between(
+        self,
+        lowerBound: Union[ParentColumn, "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
+        upperBound: Union[ParentColumn, "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
+    ) -> ParentColumn:
+        return (self >= lowerBound) & (self <= upperBound)
+
+    def when(self, condition: ParentColumn, value: Any) -> ParentColumn:
+        if not isinstance(condition, Column):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN",
+                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+            )
+        v = value._jc if isinstance(value, Column) else value
+        jc = self._jc.when(condition._jc, v)
+        return Column(jc)
+
+    def otherwise(self, value: Any) -> ParentColumn:
+        v = value._jc if isinstance(value, Column) else value
+        jc = self._jc.otherwise(v)
+        return Column(jc)
+
+    def over(self, window: "WindowSpec") -> ParentColumn:
+        from pyspark.sql.classic.window import WindowSpec
+
+        if not isinstance(window, WindowSpec):
+            raise PySparkTypeError(
+                error_class="NOT_WINDOWSPEC",
+                message_parameters={"arg_name": "window", "arg_type": type(window).__name__},
+            )
+        jc = self._jc.over(window._jspec)
+        return Column(jc)
+
+    def __nonzero__(self) -> None:
+        raise PySparkValueError(
+            error_class="CANNOT_CONVERT_COLUMN_INTO_BOOL",
+            message_parameters={},
+        )
+
+    __bool__ = __nonzero__
+
+    def __repr__(self) -> str:
+        return "Column<'%s'>" % self._jc.toString()
+
+
+def _test() -> None:
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.column
+
+    # It inherits docstrings but doctests cannot detect them so we run
+    # the parent classe's doctests here directly.
+    globs = pyspark.sql.column.__dict__.copy()
+    spark = (
+        SparkSession.builder.master("local[4]").appName("sql.classic.column tests").getOrCreate()
+    )
+    globs["spark"] = spark
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.column,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
+    )
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/classic/dataframe.py b/python/pyspark/sql/classic/dataframe.py
new file mode 100644
index 0000000000000..1bedd624603e1
--- /dev/null
+++ b/python/pyspark/sql/classic/dataframe.py
@@ -0,0 +1,1990 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import json
+import sys
+import random
+import warnings
+from collections.abc import Iterable
+from functools import reduce
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    Union,
+    cast,
+    overload,
+    TYPE_CHECKING,
+)
+
+from pyspark import _NoValue
+from pyspark.resource import ResourceProfile
+from pyspark._globals import _NoValueType
+from pyspark.errors import (
+    PySparkTypeError,
+    PySparkValueError,
+    PySparkIndexError,
+    PySparkAttributeError,
+)
+from pyspark.util import (
+    _load_from_socket,
+    _local_iterator_from_socket,
+)
+from pyspark.serializers import BatchedSerializer, CPickleSerializer, UTF8Deserializer
+from pyspark.storagelevel import StorageLevel
+from pyspark.traceback_utils import SCCallSiteSync
+from pyspark.sql.column import Column
+from pyspark.sql.classic.column import _to_seq, _to_list, _to_java_column
+from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
+from pyspark.sql.streaming import DataStreamWriter
+from pyspark.sql.types import (
+    StructType,
+    Row,
+    _parse_datatype_json_string,
+)
+from pyspark.sql.dataframe import (
+    DataFrame as ParentDataFrame,
+    DataFrameNaFunctions as ParentDataFrameNaFunctions,
+    DataFrameStatFunctions as ParentDataFrameStatFunctions,
+)
+from pyspark.sql.utils import get_active_spark_context, toJArray
+from pyspark.sql.pandas.conversion import PandasConversionMixin
+from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
+
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject
+    import pyarrow as pa
+    from pyspark.core.rdd import RDD
+    from pyspark.core.context import SparkContext
+    from pyspark._typing import PrimitiveType
+    from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
+    from pyspark.sql._typing import (
+        ColumnOrName,
+        ColumnOrNameOrOrdinal,
+        LiteralType,
+        OptionalPrimitiveType,
+    )
+    from pyspark.sql.pandas._typing import (
+        PandasMapIterFunction,
+        ArrowMapIterFunction,
+        DataFrameLike as PandasDataFrameLike,
+    )
+    from pyspark.sql.context import SQLContext
+    from pyspark.sql.session import SparkSession
+    from pyspark.sql.group import GroupedData
+    from pyspark.sql.observation import Observation
+    from pyspark.sql.metrics import ExecutionInfo
+
+
+class DataFrame(ParentDataFrame, PandasMapOpsMixin, PandasConversionMixin):
+    def __new__(
+        cls,
+        jdf: "JavaObject",
+        sql_ctx: Union["SQLContext", "SparkSession"],
+    ) -> "DataFrame":
+        self = object.__new__(cls)
+        self.__init__(jdf, sql_ctx)  # type: ignore[misc]
+        return self
+
+    def __init__(
+        self,
+        jdf: "JavaObject",
+        sql_ctx: Union["SQLContext", "SparkSession"],
+    ):
+        from pyspark.sql.context import SQLContext
+
+        self._sql_ctx: Optional["SQLContext"] = None
+
+        if isinstance(sql_ctx, SQLContext):
+            assert not os.environ.get("SPARK_TESTING")  # Sanity check for our internal usage.
+            assert isinstance(sql_ctx, SQLContext)
+            # We should remove this if-else branch in the future release, and rename
+            # sql_ctx to session in the constructor. This is an internal code path but
+            # was kept with a warning because it's used intensively by third-party libraries.
+            warnings.warn("DataFrame constructor is internal. Do not directly use it.")
+            self._sql_ctx = sql_ctx
+            session = sql_ctx.sparkSession
+        else:
+            session = sql_ctx
+        self._session: "SparkSession" = session
+
+        self._sc: "SparkContext" = sql_ctx._sc
+        self._jdf: "JavaObject" = jdf
+        self.is_cached = False
+        # initialized lazily
+        self._schema: Optional[StructType] = None
+        self._lazy_rdd: Optional["RDD[Row]"] = None
+        # Check whether _repr_html is supported or not, we use it to avoid calling _jdf twice
+        # by __repr__ and _repr_html_ while eager evaluation opens.
+        self._support_repr_html = False
+
+    @property
+    def sql_ctx(self) -> "SQLContext":
+        from pyspark.sql.context import SQLContext
+
+        warnings.warn(
+            "DataFrame.sql_ctx is an internal property, and will be removed "
+            "in future releases. Use DataFrame.sparkSession instead."
+        )
+        if self._sql_ctx is None:
+            self._sql_ctx = SQLContext._get_or_create(self._sc)
+        return self._sql_ctx
+
+    @property
+    def sparkSession(self) -> "SparkSession":
+        return self._session
+
+    @property
+    def rdd(self) -> "RDD[Row]":
+        from pyspark.core.rdd import RDD
+
+        if self._lazy_rdd is None:
+            jrdd = self._jdf.javaToPython()
+            self._lazy_rdd = RDD(
+                jrdd, self.sparkSession._sc, BatchedSerializer(CPickleSerializer())
+            )
+        return self._lazy_rdd
+
+    @property
+    def na(self) -> ParentDataFrameNaFunctions:
+        return DataFrameNaFunctions(self)
+
+    @property
+    def stat(self) -> ParentDataFrameStatFunctions:
+        return DataFrameStatFunctions(self)
+
+    def toJSON(self, use_unicode: bool = True) -> "RDD[str]":
+        from pyspark.core.rdd import RDD
+
+        rdd = self._jdf.toJSON()
+        return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode))
+
+    def registerTempTable(self, name: str) -> None:
+        warnings.warn("Deprecated in 2.0, use createOrReplaceTempView instead.", FutureWarning)
+        self._jdf.createOrReplaceTempView(name)
+
+    def createTempView(self, name: str) -> None:
+        self._jdf.createTempView(name)
+
+    def createOrReplaceTempView(self, name: str) -> None:
+        self._jdf.createOrReplaceTempView(name)
+
+    def createGlobalTempView(self, name: str) -> None:
+        self._jdf.createGlobalTempView(name)
+
+    def createOrReplaceGlobalTempView(self, name: str) -> None:
+        self._jdf.createOrReplaceGlobalTempView(name)
+
+    @property
+    def write(self) -> DataFrameWriter:
+        return DataFrameWriter(self)
+
+    @property
+    def writeStream(self) -> DataStreamWriter:
+        return DataStreamWriter(self)
+
+    @property
+    def schema(self) -> StructType:
+        if self._schema is None:
+            try:
+                self._schema = cast(
+                    StructType, _parse_datatype_json_string(self._jdf.schema().json())
+                )
+            except Exception as e:
+                raise PySparkValueError(
+                    error_class="CANNOT_PARSE_DATATYPE",
+                    message_parameters={"error": str(e)},
+                )
+        return self._schema
+
+    def printSchema(self, level: Optional[int] = None) -> None:
+        if level:
+            print(self._jdf.schema().treeString(level))
+        else:
+            print(self._jdf.schema().treeString())
+
+    def explain(
+        self, extended: Optional[Union[bool, str]] = None, mode: Optional[str] = None
+    ) -> None:
+        if extended is not None and mode is not None:
+            raise PySparkValueError(
+                error_class="CANNOT_SET_TOGETHER",
+                message_parameters={"arg_list": "extended and mode"},
+            )
+
+        # For the no argument case: df.explain()
+        is_no_argument = extended is None and mode is None
+
+        # For the cases below:
+        #   explain(True)
+        #   explain(extended=False)
+        is_extended_case = isinstance(extended, bool) and mode is None
+
+        # For the case when extended is mode:
+        #   df.explain("formatted")
+        is_extended_as_mode = isinstance(extended, str) and mode is None
+
+        # For the mode specified:
+        #   df.explain(mode="formatted")
+        is_mode_case = extended is None and isinstance(mode, str)
+
+        if not (is_no_argument or is_extended_case or is_extended_as_mode or is_mode_case):
+            if (extended is not None) and (not isinstance(extended, (bool, str))):
+                raise PySparkTypeError(
+                    error_class="NOT_BOOL_OR_STR",
+                    message_parameters={
+                        "arg_name": "extended",
+                        "arg_type": type(extended).__name__,
+                    },
+                )
+            if (mode is not None) and (not isinstance(mode, str)):
+                raise PySparkTypeError(
+                    error_class="NOT_STR",
+                    message_parameters={"arg_name": "mode", "arg_type": type(mode).__name__},
+                )
+
+        # Sets an explain mode depending on a given argument
+        if is_no_argument:
+            explain_mode = "simple"
+        elif is_extended_case:
+            explain_mode = "extended" if extended else "simple"
+        elif is_mode_case:
+            explain_mode = cast(str, mode)
+        elif is_extended_as_mode:
+            explain_mode = cast(str, extended)
+        assert self._sc._jvm is not None
+        print(self._sc._jvm.PythonSQLUtils.explainString(self._jdf.queryExecution(), explain_mode))
+
+    def exceptAll(self, other: ParentDataFrame) -> ParentDataFrame:
+        return DataFrame(self._jdf.exceptAll(other._jdf), self.sparkSession)
+
+    def isLocal(self) -> bool:
+        return self._jdf.isLocal()
+
+    @property
+    def isStreaming(self) -> bool:
+        return self._jdf.isStreaming()
+
+    def isEmpty(self) -> bool:
+        return self._jdf.isEmpty()
+
+    def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False) -> None:
+        print(self._show_string(n, truncate, vertical))
+
+    def _show_string(
+        self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False
+    ) -> str:
+        if not isinstance(n, int) or isinstance(n, bool):
+            raise PySparkTypeError(
+                error_class="NOT_INT",
+                message_parameters={"arg_name": "n", "arg_type": type(n).__name__},
+            )
+
+        if not isinstance(vertical, bool):
+            raise PySparkTypeError(
+                error_class="NOT_BOOL",
+                message_parameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
+            )
+
+        if isinstance(truncate, bool) and truncate:
+            return self._jdf.showString(n, 20, vertical)
+        else:
+            try:
+                int_truncate = int(truncate)
+            except ValueError:
+                raise PySparkTypeError(
+                    error_class="NOT_BOOL",
+                    message_parameters={
+                        "arg_name": "truncate",
+                        "arg_type": type(truncate).__name__,
+                    },
+                )
+
+            return self._jdf.showString(n, int_truncate, vertical)
+
+    def __repr__(self) -> str:
+        if not self._support_repr_html and self.sparkSession._jconf.isReplEagerEvalEnabled():
+            vertical = False
+            return self._jdf.showString(
+                self.sparkSession._jconf.replEagerEvalMaxNumRows(),
+                self.sparkSession._jconf.replEagerEvalTruncate(),
+                vertical,
+            )
+        else:
+            return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
+
+    def _repr_html_(self) -> Optional[str]:
+        """Returns a :class:`DataFrame` with html code when you enabled eager evaluation
+        by 'spark.sql.repl.eagerEval.enabled', this only called by REPL you are
+        using support eager evaluation with HTML.
+        """
+        if not self._support_repr_html:
+            self._support_repr_html = True
+        if self.sparkSession._jconf.isReplEagerEvalEnabled():
+            return self._jdf.htmlString(
+                self.sparkSession._jconf.replEagerEvalMaxNumRows(),
+                self.sparkSession._jconf.replEagerEvalTruncate(),
+            )
+        else:
+            return None
+
+    def checkpoint(self, eager: bool = True) -> ParentDataFrame:
+        jdf = self._jdf.checkpoint(eager)
+        return DataFrame(jdf, self.sparkSession)
+
+    def localCheckpoint(self, eager: bool = True) -> ParentDataFrame:
+        jdf = self._jdf.localCheckpoint(eager)
+        return DataFrame(jdf, self.sparkSession)
+
+    def withWatermark(self, eventTime: str, delayThreshold: str) -> ParentDataFrame:
+        if not eventTime or type(eventTime) is not str:
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "eventTime", "arg_type": type(eventTime).__name__},
+            )
+        if not delayThreshold or type(delayThreshold) is not str:
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={
+                    "arg_name": "delayThreshold",
+                    "arg_type": type(delayThreshold).__name__,
+                },
+            )
+        jdf = self._jdf.withWatermark(eventTime, delayThreshold)
+        return DataFrame(jdf, self.sparkSession)
+
+    def hint(
+        self, name: str, *parameters: Union["PrimitiveType", "Column", List["PrimitiveType"]]
+    ) -> ParentDataFrame:
+        if len(parameters) == 1 and isinstance(parameters[0], list):
+            parameters = parameters[0]  # type: ignore[assignment]
+
+        if not isinstance(name, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+            )
+
+        allowed_types = (str, float, int, Column, list)
+        allowed_primitive_types = (str, float, int)
+        allowed_types_repr = ", ".join(
+            [t.__name__ for t in allowed_types[:-1]]
+            + ["list[" + t.__name__ + "]" for t in allowed_primitive_types]
+        )
+        for p in parameters:
+            if not isinstance(p, allowed_types):
+                raise PySparkTypeError(
+                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                    message_parameters={
+                        "arg_name": "parameters",
+                        "arg_type": type(parameters).__name__,
+                        "allowed_types": allowed_types_repr,
+                        "item_type": type(p).__name__,
+                    },
+                )
+            if isinstance(p, list):
+                if not all(isinstance(e, allowed_primitive_types) for e in p):
+                    raise PySparkTypeError(
+                        error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                        message_parameters={
+                            "arg_name": "parameters",
+                            "arg_type": type(parameters).__name__,
+                            "allowed_types": allowed_types_repr,
+                            "item_type": type(p).__name__ + "[" + type(p[0]).__name__ + "]",
+                        },
+                    )
+
+        def _converter(parameter: Union[str, list, float, int, Column]) -> Any:
+            if isinstance(parameter, Column):
+                return _to_java_column(parameter)
+            elif isinstance(parameter, list):
+                # for list input, we are assuming only one element type exist in the list.
+                # for empty list, we are converting it into an empty long[] in the JVM side.
+                gateway = self._sc._gateway
+                assert gateway is not None
+                jclass = gateway.jvm.long
+                if len(parameter) >= 1:
+                    mapping = {
+                        str: gateway.jvm.java.lang.String,
+                        float: gateway.jvm.double,
+                        int: gateway.jvm.long,
+                    }
+                    jclass = mapping[type(parameter[0])]
+                return toJArray(gateway, jclass, parameter)
+            else:
+                return parameter
+
+        jdf = self._jdf.hint(name, self._jseq(parameters, _converter))
+        return DataFrame(jdf, self.sparkSession)
+
+    def count(self) -> int:
+        return int(self._jdf.count())
+
+    def collect(self) -> List[Row]:
+        with SCCallSiteSync(self._sc):
+            sock_info = self._jdf.collectToPython()
+        return list(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))
+
+    def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
+        with SCCallSiteSync(self._sc):
+            sock_info = self._jdf.toPythonIterator(prefetchPartitions)
+        return _local_iterator_from_socket(sock_info, BatchedSerializer(CPickleSerializer()))
+
+    def limit(self, num: int) -> ParentDataFrame:
+        jdf = self._jdf.limit(num)
+        return DataFrame(jdf, self.sparkSession)
+
+    def offset(self, num: int) -> ParentDataFrame:
+        jdf = self._jdf.offset(num)
+        return DataFrame(jdf, self.sparkSession)
+
+    def take(self, num: int) -> List[Row]:
+        return self.limit(num).collect()
+
+    def tail(self, num: int) -> List[Row]:
+        with SCCallSiteSync(self._sc):
+            sock_info = self._jdf.tailToPython(num)
+        return list(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))
+
+    def foreach(self, f: Callable[[Row], None]) -> None:
+        self.rdd.foreach(f)
+
+    def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None:
+        self.rdd.foreachPartition(f)  # type: ignore[arg-type]
+
+    def cache(self) -> ParentDataFrame:
+        self.is_cached = True
+        self._jdf.cache()
+        return self
+
+    def persist(
+        self,
+        storageLevel: StorageLevel = (StorageLevel.MEMORY_AND_DISK_DESER),
+    ) -> ParentDataFrame:
+        self.is_cached = True
+        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
+        self._jdf.persist(javaStorageLevel)
+        return self
+
+    @property
+    def storageLevel(self) -> StorageLevel:
+        java_storage_level = self._jdf.storageLevel()
+        storage_level = StorageLevel(
+            java_storage_level.useDisk(),
+            java_storage_level.useMemory(),
+            java_storage_level.useOffHeap(),
+            java_storage_level.deserialized(),
+            java_storage_level.replication(),
+        )
+        return storage_level
+
+    def unpersist(self, blocking: bool = False) -> ParentDataFrame:
+        self.is_cached = False
+        self._jdf.unpersist(blocking)
+        return self
+
+    def coalesce(self, numPartitions: int) -> ParentDataFrame:
+        return DataFrame(self._jdf.coalesce(numPartitions), self.sparkSession)
+
+    @overload
+    def repartition(self, numPartitions: int, *cols: "ColumnOrName") -> ParentDataFrame:
+        ...
+
+    @overload
+    def repartition(self, *cols: "ColumnOrName") -> ParentDataFrame:
+        ...
+
+    def repartition(  # type: ignore[misc]
+        self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
+    ) -> ParentDataFrame:
+        if isinstance(numPartitions, int):
+            if len(cols) == 0:
+                return DataFrame(self._jdf.repartition(numPartitions), self.sparkSession)
+            else:
+                return DataFrame(
+                    self._jdf.repartition(numPartitions, self._jcols(*cols)),
+                    self.sparkSession,
+                )
+        elif isinstance(numPartitions, (str, Column)):
+            cols = (numPartitions,) + cols
+            return DataFrame(self._jdf.repartition(self._jcols(*cols)), self.sparkSession)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={
+                    "arg_name": "numPartitions",
+                    "arg_type": type(numPartitions).__name__,
+                },
+            )
+
+    @overload
+    def repartitionByRange(self, numPartitions: int, *cols: "ColumnOrName") -> ParentDataFrame:
+        ...
+
+    @overload
+    def repartitionByRange(self, *cols: "ColumnOrName") -> ParentDataFrame:
+        ...
+
+    def repartitionByRange(  # type: ignore[misc]
+        self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
+    ) -> ParentDataFrame:
+        if isinstance(numPartitions, int):
+            if len(cols) == 0:
+                raise PySparkValueError(
+                    error_class="CANNOT_BE_EMPTY",
+                    message_parameters={"item": "partition-by expression"},
+                )
+            else:
+                return DataFrame(
+                    self._jdf.repartitionByRange(numPartitions, self._jcols(*cols)),
+                    self.sparkSession,
+                )
+        elif isinstance(numPartitions, (str, Column)):
+            cols = (numPartitions,) + cols
+            return DataFrame(self._jdf.repartitionByRange(self._jcols(*cols)), self.sparkSession)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_INT_OR_STR",
+                message_parameters={
+                    "arg_name": "numPartitions",
+                    "arg_type": type(numPartitions).__name__,
+                },
+            )
+
+    def distinct(self) -> ParentDataFrame:
+        return DataFrame(self._jdf.distinct(), self.sparkSession)
+
+    @overload
+    def sample(self, fraction: float, seed: Optional[int] = ...) -> ParentDataFrame:
+        ...
+
+    @overload
+    def sample(
+        self,
+        withReplacement: Optional[bool],
+        fraction: float,
+        seed: Optional[int] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    def sample(  # type: ignore[misc]
+        self,
+        withReplacement: Optional[Union[float, bool]] = None,
+        fraction: Optional[Union[int, float]] = None,
+        seed: Optional[int] = None,
+    ) -> ParentDataFrame:
+        # For the cases below:
+        #   sample(True, 0.5 [, seed])
+        #   sample(True, fraction=0.5 [, seed])
+        #   sample(withReplacement=False, fraction=0.5 [, seed])
+        is_withReplacement_set = type(withReplacement) == bool and isinstance(fraction, float)
+
+        # For the case below:
+        #   sample(faction=0.5 [, seed])
+        is_withReplacement_omitted_kwargs = withReplacement is None and isinstance(fraction, float)
+
+        # For the case below:
+        #   sample(0.5 [, seed])
+        is_withReplacement_omitted_args = isinstance(withReplacement, float)
+
+        if not (
+            is_withReplacement_set
+            or is_withReplacement_omitted_kwargs
+            or is_withReplacement_omitted_args
+        ):
+            argtypes = [type(arg).__name__ for arg in [withReplacement, fraction, seed]]
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_FLOAT_OR_INT",
+                message_parameters={
+                    "arg_name": "withReplacement (optional), "
+                    + "fraction (required) and seed (optional)",
+                    "arg_type": ", ".join(argtypes),
+                },
+            )
+
+        if is_withReplacement_omitted_args:
+            if fraction is not None:
+                seed = cast(int, fraction)
+            fraction = withReplacement
+            withReplacement = None
+
+        seed = int(seed) if seed is not None else None
+        args = [arg for arg in [withReplacement, fraction, seed] if arg is not None]
+        jdf = self._jdf.sample(*args)
+        return DataFrame(jdf, self.sparkSession)
+
+    def sampleBy(
+        self, col: "ColumnOrName", fractions: Dict[Any, float], seed: Optional[int] = None
+    ) -> ParentDataFrame:
+        if isinstance(col, str):
+            col = Column(col)
+        elif not isinstance(col, Column):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
+        if not isinstance(fractions, dict):
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
+            )
+        for k, v in fractions.items():
+            if not isinstance(k, (float, int, str)):
+                raise PySparkTypeError(
+                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                    message_parameters={
+                        "arg_name": "fractions",
+                        "arg_type": type(fractions).__name__,
+                        "allowed_types": "float, int, str",
+                        "item_type": type(k).__name__,
+                    },
+                )
+            fractions[k] = float(v)
+        col = col._jc
+        seed = seed if seed is not None else random.randint(0, sys.maxsize)
+        return DataFrame(
+            self._jdf.stat().sampleBy(col, self._jmap(fractions), seed), self.sparkSession
+        )
+
+    def randomSplit(
+        self, weights: List[float], seed: Optional[int] = None
+    ) -> List[ParentDataFrame]:
+        for w in weights:
+            if w < 0.0:
+                raise PySparkValueError(
+                    error_class="VALUE_NOT_POSITIVE",
+                    message_parameters={"arg_name": "weights", "arg_value": str(w)},
+                )
+        seed = seed if seed is not None else random.randint(0, sys.maxsize)
+        df_array = self._jdf.randomSplit(
+            _to_list(self.sparkSession._sc, cast(List["ColumnOrName"], weights)), int(seed)
+        )
+        return [DataFrame(df, self.sparkSession) for df in df_array]
+
+    @property
+    def dtypes(self) -> List[Tuple[str, str]]:
+        return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields]
+
+    @property
+    def columns(self) -> List[str]:
+        return [f.name for f in self.schema.fields]
+
+    def colRegex(self, colName: str) -> Column:
+        if not isinstance(colName, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "colName", "arg_type": type(colName).__name__},
+            )
+        jc = self._jdf.colRegex(colName)
+        return Column(jc)
+
+    def to(self, schema: StructType) -> ParentDataFrame:
+        assert schema is not None
+        jschema = self._jdf.sparkSession().parseDataType(schema.json())
+        return DataFrame(self._jdf.to(jschema), self.sparkSession)
+
+    def alias(self, alias: str) -> ParentDataFrame:
+        assert isinstance(alias, str), "alias should be a string"
+        return DataFrame(getattr(self._jdf, "as")(alias), self.sparkSession)
+
+    def crossJoin(self, other: ParentDataFrame) -> ParentDataFrame:
+        jdf = self._jdf.crossJoin(other._jdf)
+        return DataFrame(jdf, self.sparkSession)
+
+    def join(
+        self,
+        other: ParentDataFrame,
+        on: Optional[Union[str, List[str], Column, List[Column]]] = None,
+        how: Optional[str] = None,
+    ) -> ParentDataFrame:
+        if on is not None and not isinstance(on, list):
+            on = [on]  # type: ignore[assignment]
+
+        if on is not None:
+            if isinstance(on[0], str):
+                on = self._jseq(cast(List[str], on))
+            else:
+                assert isinstance(on[0], Column), "on should be Column or list of Column"
+                on = reduce(lambda x, y: x.__and__(y), cast(List[Column], on))
+                on = on._jc
+
+        if on is None and how is None:
+            jdf = self._jdf.join(other._jdf)
+        else:
+            if how is None:
+                how = "inner"
+            if on is None:
+                on = self._jseq([])
+            assert isinstance(how, str), "how should be a string"
+            jdf = self._jdf.join(other._jdf, on, how)
+        return DataFrame(jdf, self.sparkSession)
+
+    # TODO(SPARK-22947): Fix the DataFrame API.
+    def _joinAsOf(
+        self,
+        other: ParentDataFrame,
+        leftAsOfColumn: Union[str, Column],
+        rightAsOfColumn: Union[str, Column],
+        on: Optional[Union[str, List[str], Column, List[Column]]] = None,
+        how: Optional[str] = None,
+        *,
+        tolerance: Optional[Column] = None,
+        allowExactMatches: bool = True,
+        direction: str = "backward",
+    ) -> ParentDataFrame:
+        """
+        Perform an as-of join.
+
+        This is similar to a left-join except that we match on the nearest
+        key rather than equal keys.
+
+        .. versionchanged:: 4.0.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Right side of the join
+        leftAsOfColumn : str or :class:`Column`
+            a string for the as-of join column name, or a Column
+        rightAsOfColumn : str or :class:`Column`
+            a string for the as-of join column name, or a Column
+        on : str, list or :class:`Column`, optional
+            a string for the join column name, a list of column names,
+            a join expression (Column), or a list of Columns.
+            If `on` is a string or a list of strings indicating the name of the join column(s),
+            the column(s) must exist on both sides, and this performs an equi-join.
+        how : str, optional
+            default ``inner``. Must be one of: ``inner`` and ``left``.
+        tolerance : :class:`Column`, optional
+            an asof tolerance within this range; must be compatible
+            with the merge index.
+        allowExactMatches : bool, optional
+            default ``True``.
+        direction : str, optional
+            default ``backward``. Must be one of: ``backward``, ``forward``, and ``nearest``.
+
+        Examples
+        --------
+        The following performs an as-of join between ``left`` and ``right``.
+
+        >>> left = spark.createDataFrame([(1, "a"), (5, "b"), (10,  "c")], ["a", "left_val"])
+        >>> right = spark.createDataFrame([(1, 1), (2, 2), (3, 3), (6, 6), (7, 7)],
+        ...                               ["a", "right_val"])
+        >>> left._joinAsOf(
+        ...     right, leftAsOfColumn="a", rightAsOfColumn="a"
+        ... ).select(left.a, 'left_val', 'right_val').sort("a").collect()
+        [Row(a=1, left_val='a', right_val=1),
+         Row(a=5, left_val='b', right_val=3),
+         Row(a=10, left_val='c', right_val=7)]
+
+        >>> from pyspark.sql import functions as sf
+        >>> left._joinAsOf(
+        ...     right, leftAsOfColumn="a", rightAsOfColumn="a", tolerance=sf.lit(1)
+        ... ).select(left.a, 'left_val', 'right_val').sort("a").collect()
+        [Row(a=1, left_val='a', right_val=1)]
+
+        >>> left._joinAsOf(
+        ...     right, leftAsOfColumn="a", rightAsOfColumn="a", how="left", tolerance=sf.lit(1)
+        ... ).select(left.a, 'left_val', 'right_val').sort("a").collect()
+        [Row(a=1, left_val='a', right_val=1),
+         Row(a=5, left_val='b', right_val=None),
+         Row(a=10, left_val='c', right_val=None)]
+
+        >>> left._joinAsOf(
+        ...     right, leftAsOfColumn="a", rightAsOfColumn="a", allowExactMatches=False
+        ... ).select(left.a, 'left_val', 'right_val').sort("a").collect()
+        [Row(a=5, left_val='b', right_val=3),
+         Row(a=10, left_val='c', right_val=7)]
+
+        >>> left._joinAsOf(
+        ...     right, leftAsOfColumn="a", rightAsOfColumn="a", direction="forward"
+        ... ).select(left.a, 'left_val', 'right_val').sort("a").collect()
+        [Row(a=1, left_val='a', right_val=1),
+         Row(a=5, left_val='b', right_val=6)]
+        """
+        if isinstance(leftAsOfColumn, str):
+            leftAsOfColumn = self[leftAsOfColumn]
+        left_as_of_jcol = leftAsOfColumn._jc
+        if isinstance(rightAsOfColumn, str):
+            rightAsOfColumn = other[rightAsOfColumn]
+        right_as_of_jcol = rightAsOfColumn._jc
+
+        if on is not None and not isinstance(on, list):
+            on = [on]  # type: ignore[assignment]
+
+        if on is not None:
+            if isinstance(on[0], str):
+                on = self._jseq(cast(List[str], on))
+            else:
+                assert isinstance(on[0], Column), "on should be Column or list of Column"
+                on = reduce(lambda x, y: x.__and__(y), cast(List[Column], on))
+                on = on._jc
+
+        if how is None:
+            how = "inner"
+        assert isinstance(how, str), "how should be a string"
+
+        if tolerance is not None:
+            assert isinstance(tolerance, Column), "tolerance should be Column"
+            tolerance = tolerance._jc
+
+        jdf = self._jdf.joinAsOf(
+            other._jdf,
+            left_as_of_jcol,
+            right_as_of_jcol,
+            on,
+            how,
+            tolerance,
+            allowExactMatches,
+            direction,
+        )
+        return DataFrame(jdf, self.sparkSession)
+
+    def sortWithinPartitions(
+        self,
+        *cols: Union[int, str, Column, List[Union[int, str, Column]]],
+        **kwargs: Any,
+    ) -> ParentDataFrame:
+        jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs))
+        return DataFrame(jdf, self.sparkSession)
+
+    def sort(
+        self,
+        *cols: Union[int, str, Column, List[Union[int, str, Column]]],
+        **kwargs: Any,
+    ) -> ParentDataFrame:
+        jdf = self._jdf.sort(self._sort_cols(cols, kwargs))
+        return DataFrame(jdf, self.sparkSession)
+
+    orderBy = sort
+
+    def _jseq(
+        self,
+        cols: Sequence,
+        converter: Optional[Callable[..., Union["PrimitiveType", "JavaObject"]]] = None,
+    ) -> "JavaObject":
+        """Return a JVM Seq of Columns from a list of Column or names"""
+        return _to_seq(self.sparkSession._sc, cols, converter)
+
+    def _jmap(self, jm: Dict) -> "JavaObject":
+        """Return a JVM Scala Map from a dict"""
+        return _to_scala_map(self.sparkSession._sc, jm)
+
+    def _jcols(self, *cols: "ColumnOrName") -> "JavaObject":
+        """Return a JVM Seq of Columns from a list of Column or column names
+
+        If `cols` has only one list in it, cols[0] will be used as the list.
+        """
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+        return self._jseq(cols, _to_java_column)
+
+    def _jcols_ordinal(self, *cols: "ColumnOrNameOrOrdinal") -> "JavaObject":
+        """Return a JVM Seq of Columns from a list of Column or column names or column ordinals.
+
+        If `cols` has only one list in it, cols[0] will be used as the list.
+        """
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+
+        _cols = []
+        for c in cols:
+            if isinstance(c, int) and not isinstance(c, bool):
+                if c < 1:
+                    raise PySparkIndexError(
+                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                    )
+                # ordinal is 1-based
+                _cols.append(self[c - 1])
+            else:
+                _cols.append(c)  # type: ignore[arg-type]
+        return self._jseq(_cols, _to_java_column)
+
+    def _sort_cols(
+        self,
+        cols: Sequence[Union[int, str, Column, List[Union[int, str, Column]]]],
+        kwargs: Dict[str, Any],
+    ) -> "JavaObject":
+        """Return a JVM Seq of Columns that describes the sort order"""
+        if not cols:
+            raise PySparkValueError(
+                error_class="CANNOT_BE_EMPTY",
+                message_parameters={"item": "column"},
+            )
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+
+        jcols = []
+        for c in cols:
+            if isinstance(c, int) and not isinstance(c, bool):
+                # ordinal is 1-based
+                if c > 0:
+                    _c = self[c - 1]
+                # negative ordinal means sort by desc
+                elif c < 0:
+                    _c = self[-c - 1].desc()
+                else:
+                    raise PySparkIndexError(
+                        error_class="ZERO_INDEX",
+                        message_parameters={},
+                    )
+            else:
+                _c = c  # type: ignore[assignment]
+            jcols.append(_to_java_column(cast("ColumnOrName", _c)))
+
+        ascending = kwargs.get("ascending", True)
+        if isinstance(ascending, (bool, int)):
+            if not ascending:
+                jcols = [jc.desc() for jc in jcols]
+        elif isinstance(ascending, list):
+            jcols = [jc if asc else jc.desc() for asc, jc in zip(ascending, jcols)]
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_LIST",
+                message_parameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
+            )
+        return self._jseq(jcols)
+
+    def describe(self, *cols: Union[str, List[str]]) -> ParentDataFrame:
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]  # type: ignore[assignment]
+        jdf = self._jdf.describe(self._jseq(cols))
+        return DataFrame(jdf, self.sparkSession)
+
+    def summary(self, *statistics: str) -> ParentDataFrame:
+        if len(statistics) == 1 and isinstance(statistics[0], list):
+            statistics = statistics[0]
+        jdf = self._jdf.summary(self._jseq(statistics))
+        return DataFrame(jdf, self.sparkSession)
+
+    @overload
+    def head(self) -> Optional[Row]:
+        ...
+
+    @overload
+    def head(self, n: int) -> List[Row]:
+        ...
+
+    def head(self, n: Optional[int] = None) -> Union[Optional[Row], List[Row]]:
+        if n is None:
+            rs = self.head(1)
+            return rs[0] if rs else None
+        return self.take(n)
+
+    def first(self) -> Optional[Row]:
+        return self.head()
+
+    @overload
+    def __getitem__(self, item: Union[int, str]) -> Column:
+        ...
+
+    @overload
+    def __getitem__(self, item: Union[Column, List, Tuple]) -> ParentDataFrame:
+        ...
+
+    def __getitem__(
+        self, item: Union[int, str, Column, List, Tuple]
+    ) -> Union[Column, ParentDataFrame]:
+        if isinstance(item, str):
+            jc = self._jdf.apply(item)
+            return Column(jc)
+        elif isinstance(item, Column):
+            return self.filter(item)
+        elif isinstance(item, (list, tuple)):
+            return self.select(*item)
+        elif isinstance(item, int):
+            jc = self._jdf.apply(self.columns[item])
+            return Column(jc)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR",
+                message_parameters={"arg_name": "item", "arg_type": type(item).__name__},
+            )
+
+    def __getattr__(self, name: str) -> Column:
+        if name not in self.columns:
+            raise PySparkAttributeError(
+                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
+            )
+        jc = self._jdf.apply(name)
+        return Column(jc)
+
+    def __dir__(self) -> List[str]:
+        attrs = set(dir(DataFrame))
+        attrs.update(filter(lambda s: s.isidentifier(), self.columns))
+        return sorted(attrs)
+
+    @overload
+    def select(self, *cols: "ColumnOrName") -> ParentDataFrame:
+        ...
+
+    @overload
+    def select(self, __cols: Union[List[Column], List[str]]) -> ParentDataFrame:
+        ...
+
+    def select(self, *cols: "ColumnOrName") -> ParentDataFrame:  # type: ignore[misc]
+        jdf = self._jdf.select(self._jcols(*cols))
+        return DataFrame(jdf, self.sparkSession)
+
+    @overload
+    def selectExpr(self, *expr: str) -> ParentDataFrame:
+        ...
+
+    @overload
+    def selectExpr(self, *expr: List[str]) -> ParentDataFrame:
+        ...
+
+    def selectExpr(self, *expr: Union[str, List[str]]) -> ParentDataFrame:
+        if len(expr) == 1 and isinstance(expr[0], list):
+            expr = expr[0]  # type: ignore[assignment]
+        jdf = self._jdf.selectExpr(self._jseq(expr))
+        return DataFrame(jdf, self.sparkSession)
+
+    def filter(self, condition: "ColumnOrName") -> ParentDataFrame:
+        if isinstance(condition, str):
+            jdf = self._jdf.filter(condition)
+        elif isinstance(condition, Column):
+            jdf = self._jdf.filter(condition._jc)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+            )
+        return DataFrame(jdf, self.sparkSession)
+
+    @overload
+    def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
+        ...
+
+    @overload
+    def groupBy(self, __cols: Union[List[Column], List[str], List[int]]) -> "GroupedData":
+        ...
+
+    def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
+        jgd = self._jdf.groupBy(self._jcols_ordinal(*cols))
+        from pyspark.sql.group import GroupedData
+
+        return GroupedData(jgd, self)
+
+    @overload
+    def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
+        ...
+
+    @overload
+    def rollup(self, __cols: Union[List[Column], List[str]]) -> "GroupedData":
+        ...
+
+    def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
+        jgd = self._jdf.rollup(self._jcols_ordinal(*cols))
+        from pyspark.sql.group import GroupedData
+
+        return GroupedData(jgd, self)
+
+    @overload
+    def cube(self, *cols: "ColumnOrName") -> "GroupedData":
+        ...
+
+    @overload
+    def cube(self, __cols: Union[List[Column], List[str]]) -> "GroupedData":
+        ...
+
+    def cube(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
+        jgd = self._jdf.cube(self._jcols_ordinal(*cols))
+        from pyspark.sql.group import GroupedData
+
+        return GroupedData(jgd, self)
+
+    def groupingSets(
+        self, groupingSets: Sequence[Sequence["ColumnOrName"]], *cols: "ColumnOrName"
+    ) -> "GroupedData":
+        from pyspark.sql.group import GroupedData
+
+        jgrouping_sets = _to_seq(self._sc, [self._jcols(*inner) for inner in groupingSets])
+
+        jgd = self._jdf.groupingSets(jgrouping_sets, self._jcols(*cols))
+        return GroupedData(jgd, self)
+
+    def unpivot(
+        self,
+        ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
+        values: Optional[Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]],
+        variableColumnName: str,
+        valueColumnName: str,
+    ) -> ParentDataFrame:
+        assert ids is not None, "ids must not be None"
+
+        def to_jcols(
+            cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+        ) -> "JavaObject":
+            if isinstance(cols, list):
+                return self._jcols(*cols)
+            if isinstance(cols, tuple):
+                return self._jcols(*list(cols))
+            return self._jcols(cols)
+
+        jids = to_jcols(ids)
+        if values is None:
+            jdf = self._jdf.unpivotWithSeq(jids, variableColumnName, valueColumnName)
+        else:
+            jvals = to_jcols(values)
+            jdf = self._jdf.unpivotWithSeq(jids, jvals, variableColumnName, valueColumnName)
+
+        return DataFrame(jdf, self.sparkSession)
+
+    def melt(
+        self,
+        ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
+        values: Optional[Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]],
+        variableColumnName: str,
+        valueColumnName: str,
+    ) -> ParentDataFrame:
+        return self.unpivot(ids, values, variableColumnName, valueColumnName)
+
+    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> ParentDataFrame:
+        return self.groupBy().agg(*exprs)  # type: ignore[arg-type]
+
+    def observe(
+        self,
+        observation: Union["Observation", str],
+        *exprs: Column,
+    ) -> ParentDataFrame:
+        from pyspark.sql import Observation
+
+        if len(exprs) == 0:
+            raise PySparkValueError(
+                error_class="CANNOT_BE_EMPTY",
+                message_parameters={"item": "exprs"},
+            )
+        if not all(isinstance(c, Column) for c in exprs):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OF_COLUMN",
+                message_parameters={"arg_name": "exprs"},
+            )
+
+        if isinstance(observation, Observation):
+            return observation._on(self, *exprs)
+        elif isinstance(observation, str):
+            return DataFrame(
+                self._jdf.observe(
+                    observation, exprs[0]._jc, _to_seq(self._sc, [c._jc for c in exprs[1:]])
+                ),
+                self.sparkSession,
+            )
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OF_COLUMN",
+                message_parameters={
+                    "arg_name": "observation",
+                    "arg_type": type(observation).__name__,
+                },
+            )
+
+    def union(self, other: ParentDataFrame) -> ParentDataFrame:
+        return DataFrame(self._jdf.union(other._jdf), self.sparkSession)
+
+    def unionAll(self, other: ParentDataFrame) -> ParentDataFrame:
+        return self.union(other)
+
+    def unionByName(
+        self, other: ParentDataFrame, allowMissingColumns: bool = False
+    ) -> ParentDataFrame:
+        return DataFrame(self._jdf.unionByName(other._jdf, allowMissingColumns), self.sparkSession)
+
+    def intersect(self, other: ParentDataFrame) -> ParentDataFrame:
+        return DataFrame(self._jdf.intersect(other._jdf), self.sparkSession)
+
+    def intersectAll(self, other: ParentDataFrame) -> ParentDataFrame:
+        return DataFrame(self._jdf.intersectAll(other._jdf), self.sparkSession)
+
+    def subtract(self, other: ParentDataFrame) -> ParentDataFrame:
+        return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sparkSession)
+
+    def dropDuplicates(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
+        # Acceptable args should be str, ... or a single List[str]
+        # So if subset length is 1, it can be either single str, or a list of str
+        # if subset length is greater than 1, it must be a sequence of str
+        if len(subset) > 1:
+            assert all(isinstance(c, str) for c in subset)
+
+        if not subset:
+            jdf = self._jdf.dropDuplicates()
+        elif len(subset) == 1 and isinstance(subset[0], list):
+            jdf = self._jdf.dropDuplicates(self._jseq(subset[0]))
+        else:
+            jdf = self._jdf.dropDuplicates(self._jseq(subset))
+        return DataFrame(jdf, self.sparkSession)
+
+    drop_duplicates = dropDuplicates
+
+    def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
+        # Acceptable args should be str, ... or a single List[str]
+        # So if subset length is 1, it can be either single str, or a list of str
+        # if subset length is greater than 1, it must be a sequence of str
+        if len(subset) > 1:
+            assert all(isinstance(c, str) for c in subset)
+
+        if not subset:
+            jdf = self._jdf.dropDuplicatesWithinWatermark()
+        elif len(subset) == 1 and isinstance(subset[0], list):
+            jdf = self._jdf.dropDuplicatesWithinWatermark(self._jseq(subset[0]))
+        else:
+            jdf = self._jdf.dropDuplicatesWithinWatermark(self._jseq(subset))
+        return DataFrame(jdf, self.sparkSession)
+
+    def dropna(
+        self,
+        how: str = "any",
+        thresh: Optional[int] = None,
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
+    ) -> ParentDataFrame:
+        if how is not None and how not in ["any", "all"]:
+            raise PySparkValueError(
+                error_class="VALUE_NOT_ANY_OR_ALL",
+                message_parameters={"arg_name": "how", "arg_type": how},
+            )
+
+        if subset is None:
+            subset = self.columns
+        elif isinstance(subset, str):
+            subset = [subset]
+        elif not isinstance(subset, (list, tuple)):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_STR_OR_TUPLE",
+                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
+
+        if thresh is None:
+            thresh = len(subset) if how == "any" else 1
+
+        return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sparkSession)
+
+    @overload
+    def fillna(
+        self,
+        value: "LiteralType",
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    @overload
+    def fillna(self, value: Dict[str, "LiteralType"]) -> ParentDataFrame:
+        ...
+
+    def fillna(
+        self,
+        value: Union["LiteralType", Dict[str, "LiteralType"]],
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
+    ) -> ParentDataFrame:
+        if not isinstance(value, (float, int, str, bool, dict)):
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
+                message_parameters={"arg_name": "value", "arg_type": type(value).__name__},
+            )
+
+        # Note that bool validates isinstance(int), but we don't want to
+        # convert bools to floats
+
+        if not isinstance(value, bool) and isinstance(value, int):
+            value = float(value)
+
+        if isinstance(value, dict):
+            return DataFrame(self._jdf.na().fill(value), self.sparkSession)
+        elif subset is None:
+            return DataFrame(self._jdf.na().fill(value), self.sparkSession)
+        else:
+            if isinstance(subset, str):
+                subset = [subset]
+            elif not isinstance(subset, (list, tuple)):
+                raise PySparkTypeError(
+                    error_class="NOT_LIST_OR_TUPLE",
+                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                )
+
+            return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sparkSession)
+
+    @overload
+    def replace(
+        self,
+        to_replace: "LiteralType",
+        value: "OptionalPrimitiveType",
+        subset: Optional[List[str]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    @overload
+    def replace(
+        self,
+        to_replace: List["LiteralType"],
+        value: List["OptionalPrimitiveType"],
+        subset: Optional[List[str]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    @overload
+    def replace(
+        self,
+        to_replace: Dict["LiteralType", "OptionalPrimitiveType"],
+        subset: Optional[List[str]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    @overload
+    def replace(
+        self,
+        to_replace: List["LiteralType"],
+        value: "OptionalPrimitiveType",
+        subset: Optional[List[str]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    def replace(  # type: ignore[misc]
+        self,
+        to_replace: Union[
+            "LiteralType", List["LiteralType"], Dict["LiteralType", "OptionalPrimitiveType"]
+        ],
+        value: Optional[
+            Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], _NoValueType]
+        ] = _NoValue,
+        subset: Optional[List[str]] = None,
+    ) -> ParentDataFrame:
+        if value is _NoValue:
+            if isinstance(to_replace, dict):
+                value = None
+            else:
+                raise PySparkTypeError(
+                    error_class="ARGUMENT_REQUIRED",
+                    message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
+                )
+
+        # Helper functions
+        def all_of(types: Union[Type, Tuple[Type, ...]]) -> Callable[[Iterable], bool]:
+            """Given a type or tuple of types and a sequence of xs
+            check if each x is instance of type(s)
+
+            >>> all_of(bool)([True, False])
+            True
+            >>> all_of(str)(["a", 1])
+            False
+            """
+
+            def all_of_(xs: Iterable) -> bool:
+                return all(isinstance(x, types) for x in xs)
+
+            return all_of_
+
+        all_of_bool = all_of(bool)
+        all_of_str = all_of(str)
+        all_of_numeric = all_of((float, int))
+
+        # Validate input types
+        valid_types = (bool, float, int, str, list, tuple)
+        if not isinstance(to_replace, valid_types + (dict,)):
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+                message_parameters={
+                    "arg_name": "to_replace",
+                    "arg_type": type(to_replace).__name__,
+                },
+            )
+
+        if (
+            not isinstance(value, valid_types)
+            and value is not None
+            and not isinstance(to_replace, dict)
+        ):
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE",
+                message_parameters={
+                    "arg_name": "value",
+                    "arg_type": type(value).__name__,
+                },
+            )
+
+        if isinstance(to_replace, (list, tuple)) and isinstance(value, (list, tuple)):
+            if len(to_replace) != len(value):
+                raise PySparkValueError(
+                    error_class="LENGTH_SHOULD_BE_THE_SAME",
+                    message_parameters={
+                        "arg1": "to_replace",
+                        "arg2": "value",
+                        "arg1_length": str(len(to_replace)),
+                        "arg2_length": str(len(value)),
+                    },
+                )
+
+        if not (subset is None or isinstance(subset, (list, tuple, str))):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_STR_OR_TUPLE",
+                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
+
+        # Reshape input arguments if necessary
+        if isinstance(to_replace, (float, int, str)):
+            to_replace = [to_replace]
+
+        if isinstance(to_replace, dict):
+            rep_dict = to_replace
+            if value is not None:
+                warnings.warn("to_replace is a dict and value is not None. value will be ignored.")
+        else:
+            if isinstance(value, (float, int, str)) or value is None:
+                value = [value for _ in range(len(to_replace))]
+            rep_dict = dict(zip(to_replace, cast("Iterable[Optional[Union[float, str]]]", value)))
+
+        if isinstance(subset, str):
+            subset = [subset]
+
+        # Verify we were not passed in mixed type generics.
+        if not any(
+            all_of_type(rep_dict.keys())
+            and all_of_type(x for x in rep_dict.values() if x is not None)
+            for all_of_type in [all_of_bool, all_of_str, all_of_numeric]
+        ):
+            raise PySparkValueError(
+                error_class="MIXED_TYPE_REPLACEMENT",
+                message_parameters={},
+            )
+
+        if subset is None:
+            return DataFrame(self._jdf.na().replace("*", rep_dict), self.sparkSession)
+        else:
+            return DataFrame(
+                self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)),
+                self.sparkSession,
+            )
+
+    @overload
+    def approxQuantile(
+        self,
+        col: str,
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> List[float]:
+        ...
+
+    @overload
+    def approxQuantile(
+        self,
+        col: Union[List[str], Tuple[str]],
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> List[List[float]]:
+        ...
+
+    def approxQuantile(
+        self,
+        col: Union[str, List[str], Tuple[str]],
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> Union[List[float], List[List[float]]]:
+        if not isinstance(col, (str, list, tuple)):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_STR_OR_TUPLE",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
+
+        isStr = isinstance(col, str)
+
+        if isinstance(col, tuple):
+            col = list(col)
+        elif isStr:
+            col = [cast(str, col)]
+
+        for c in col:
+            if not isinstance(c, str):
+                raise PySparkTypeError(
+                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                    message_parameters={
+                        "arg_name": "col",
+                        "arg_type": type(col).__name__,
+                        "allowed_types": "str",
+                        "item_type": type(c).__name__,
+                    },
+                )
+        col = _to_list(self._sc, cast(List["ColumnOrName"], col))
+
+        if not isinstance(probabilities, (list, tuple)):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_TUPLE",
+                message_parameters={
+                    "arg_name": "probabilities",
+                    "arg_type": type(probabilities).__name__,
+                },
+            )
+        if isinstance(probabilities, tuple):
+            probabilities = list(probabilities)
+        for p in probabilities:
+            if not isinstance(p, (float, int)) or p < 0 or p > 1:
+                raise PySparkTypeError(
+                    error_class="NOT_LIST_OF_FLOAT_OR_INT",
+                    message_parameters={
+                        "arg_name": "probabilities",
+                        "arg_type": type(p).__name__,
+                    },
+                )
+        probabilities = _to_list(self._sc, cast(List["ColumnOrName"], probabilities))
+
+        if not isinstance(relativeError, (float, int)):
+            raise PySparkTypeError(
+                error_class="NOT_FLOAT_OR_INT",
+                message_parameters={
+                    "arg_name": "relativeError",
+                    "arg_type": type(relativeError).__name__,
+                },
+            )
+        if relativeError < 0:
+            raise PySparkValueError(
+                error_class="NEGATIVE_VALUE",
+                message_parameters={
+                    "arg_name": "relativeError",
+                    "arg_value": str(relativeError),
+                },
+            )
+        relativeError = float(relativeError)
+
+        jaq = self._jdf.stat().approxQuantile(col, probabilities, relativeError)
+        jaq_list = [list(j) for j in jaq]
+        return jaq_list[0] if isStr else jaq_list
+
+    def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
+        if not isinstance(col1, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+            )
+        if not isinstance(col2, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+            )
+        if not method:
+            method = "pearson"
+        if not method == "pearson":
+            raise PySparkValueError(
+                error_class="VALUE_NOT_PEARSON",
+                message_parameters={"arg_name": "method", "arg_value": method},
+            )
+        return self._jdf.stat().corr(col1, col2, method)
+
+    def cov(self, col1: str, col2: str) -> float:
+        if not isinstance(col1, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+            )
+        if not isinstance(col2, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+            )
+        return self._jdf.stat().cov(col1, col2)
+
+    def crosstab(self, col1: str, col2: str) -> ParentDataFrame:
+        if not isinstance(col1, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+            )
+        if not isinstance(col2, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+            )
+        return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sparkSession)
+
+    def freqItems(
+        self, cols: Union[List[str], Tuple[str]], support: Optional[float] = None
+    ) -> ParentDataFrame:
+        if isinstance(cols, tuple):
+            cols = list(cols)
+        if not isinstance(cols, list):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_TUPLE",
+                message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
+            )
+        if not support:
+            support = 0.01
+        return DataFrame(
+            self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sparkSession
+        )
+
+    def _ipython_key_completions_(self) -> List[str]:
+        """Returns the names of columns in this :class:`DataFrame`.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df._ipython_key_completions_()
+        ['age', 'name']
+
+        Would return illegal identifiers.
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age 1", "name?1"])
+        >>> df._ipython_key_completions_()
+        ['age 1', 'name?1']
+        """
+        return self.columns
+
+    def withColumns(self, *colsMap: Dict[str, Column]) -> ParentDataFrame:
+        # Below code is to help enable kwargs in future.
+        assert len(colsMap) == 1
+        colsMap = colsMap[0]  # type: ignore[assignment]
+
+        if not isinstance(colsMap, dict):
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+            )
+
+        col_names = list(colsMap.keys())
+        cols = list(colsMap.values())
+
+        return DataFrame(
+            self._jdf.withColumns(_to_seq(self._sc, col_names), self._jcols(*cols)),
+            self.sparkSession,
+        )
+
+    def withColumn(self, colName: str, col: Column) -> ParentDataFrame:
+        if not isinstance(col, Column):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
+        return DataFrame(self._jdf.withColumn(colName, col._jc), self.sparkSession)
+
+    def withColumnRenamed(self, existing: str, new: str) -> ParentDataFrame:
+        return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sparkSession)
+
+    def withColumnsRenamed(self, colsMap: Dict[str, str]) -> ParentDataFrame:
+        if not isinstance(colsMap, dict):
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+            )
+
+        col_names: List[str] = []
+        new_col_names: List[str] = []
+        for k, v in colsMap.items():
+            col_names.append(k)
+            new_col_names.append(v)
+
+        return DataFrame(
+            self._jdf.withColumnsRenamed(
+                _to_seq(self._sc, col_names), _to_seq(self._sc, new_col_names)
+            ),
+            self.sparkSession,
+        )
+
+    def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> ParentDataFrame:
+        from py4j.java_gateway import JVMView
+
+        if not isinstance(metadata, dict):
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
+            )
+        sc = get_active_spark_context()
+        jmeta = cast(JVMView, sc._jvm).org.apache.spark.sql.types.Metadata.fromJson(
+            json.dumps(metadata)
+        )
+        return DataFrame(self._jdf.withMetadata(columnName, jmeta), self.sparkSession)
+
+    @overload
+    def drop(self, cols: "ColumnOrName") -> ParentDataFrame:
+        ...
+
+    @overload
+    def drop(self, *cols: str) -> ParentDataFrame:
+        ...
+
+    def drop(self, *cols: "ColumnOrName") -> ParentDataFrame:  # type: ignore[misc]
+        column_names: List[str] = []
+        java_columns: List["JavaObject"] = []
+
+        for c in cols:
+            if isinstance(c, str):
+                column_names.append(c)
+            elif isinstance(c, Column):
+                java_columns.append(c._jc)
+            else:
+                raise PySparkTypeError(
+                    error_class="NOT_COLUMN_OR_STR",
+                    message_parameters={"arg_name": "col", "arg_type": type(c).__name__},
+                )
+
+        jdf = self._jdf
+        if len(java_columns) > 0:
+            first_column, *remaining_columns = java_columns
+            jdf = jdf.drop(first_column, self._jseq(remaining_columns))
+        if len(column_names) > 0:
+            jdf = jdf.drop(self._jseq(column_names))
+
+        return DataFrame(jdf, self.sparkSession)
+
+    def toDF(self, *cols: str) -> ParentDataFrame:
+        for col in cols:
+            if not isinstance(col, str):
+                raise PySparkTypeError(
+                    error_class="NOT_LIST_OF_STR",
+                    message_parameters={"arg_name": "cols", "arg_type": type(col).__name__},
+                )
+        jdf = self._jdf.toDF(self._jseq(cols))
+        return DataFrame(jdf, self.sparkSession)
+
+    def transform(
+        self, func: Callable[..., ParentDataFrame], *args: Any, **kwargs: Any
+    ) -> ParentDataFrame:
+        result = func(self, *args, **kwargs)
+        assert isinstance(
+            result, DataFrame
+        ), "Func returned an instance of type [%s], " "should have been DataFrame." % type(result)
+        return result
+
+    def sameSemantics(self, other: ParentDataFrame) -> bool:
+        if not isinstance(other, DataFrame):
+            raise PySparkTypeError(
+                error_class="NOT_DATAFRAME",
+                message_parameters={"arg_name": "other", "arg_type": type(other).__name__},
+            )
+        return self._jdf.sameSemantics(other._jdf)
+
+    def semanticHash(self) -> int:
+        return self._jdf.semanticHash()
+
+    def inputFiles(self) -> List[str]:
+        return list(self._jdf.inputFiles())
+
+    def where(self, condition: "ColumnOrName") -> ParentDataFrame:
+        return self.filter(condition)
+
+    # Two aliases below were added for pandas compatibility many years ago.
+    # There are too many differences compared to pandas and we cannot just
+    # make it "compatible" by adding aliases. Therefore, we stop adding such
+    # aliases as of Spark 3.0. Two methods below remain just
+    # for legacy users currently.
+    @overload
+    def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
+        ...
+
+    @overload
+    def groupby(self, __cols: Union[List[Column], List[str], List[int]]) -> "GroupedData":
+        ...
+
+    def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
+        return self.groupBy(*cols)
+
+    def writeTo(self, table: str) -> DataFrameWriterV2:
+        return DataFrameWriterV2(self, table)
+
+    def pandas_api(
+        self, index_col: Optional[Union[str, List[str]]] = None
+    ) -> "PandasOnSparkDataFrame":
+        from pyspark.pandas.namespace import _get_index_map
+        from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
+        from pyspark.pandas.internal import InternalFrame
+
+        index_spark_columns, index_names = _get_index_map(self, index_col)
+        internal = InternalFrame(
+            spark_frame=self,
+            index_spark_columns=index_spark_columns,
+            index_names=index_names,  # type: ignore[arg-type]
+        )
+        return PandasOnSparkDataFrame(internal)
+
+    def mapInPandas(
+        self,
+        func: "PandasMapIterFunction",
+        schema: Union[StructType, str],
+        barrier: bool = False,
+        profile: Optional[ResourceProfile] = None,
+    ) -> ParentDataFrame:
+        return PandasMapOpsMixin.mapInPandas(self, func, schema, barrier, profile)
+
+    def mapInArrow(
+        self,
+        func: "ArrowMapIterFunction",
+        schema: Union[StructType, str],
+        barrier: bool = False,
+        profile: Optional[ResourceProfile] = None,
+    ) -> ParentDataFrame:
+        return PandasMapOpsMixin.mapInArrow(self, func, schema, barrier, profile)
+
+    def toArrow(self) -> "pa.Table":
+        return PandasConversionMixin.toArrow(self)
+
+    def toPandas(self) -> "PandasDataFrameLike":
+        return PandasConversionMixin.toPandas(self)
+
+    @property
+    def executionInfo(self) -> Optional["ExecutionInfo"]:
+        raise PySparkValueError(
+            error_class="CLASSIC_OPERATION_NOT_SUPPORTED_ON_DF",
+            message_parameters={"member": "queryExecution"},
+        )
+
+
+def _to_scala_map(sc: "SparkContext", jm: Dict) -> "JavaObject":
+    """
+    Convert a dict into a JVM Map.
+    """
+    assert sc._jvm is not None
+    return sc._jvm.PythonUtils.toScalaMap(jm)
+
+
+class DataFrameNaFunctions(ParentDataFrameNaFunctions):
+    def __init__(self, df: ParentDataFrame):
+        self.df = df
+
+    def drop(
+        self,
+        how: str = "any",
+        thresh: Optional[int] = None,
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
+    ) -> ParentDataFrame:
+        return self.df.dropna(how=how, thresh=thresh, subset=subset)
+
+    @overload
+    def fill(self, value: "LiteralType", subset: Optional[List[str]] = ...) -> ParentDataFrame:
+        ...
+
+    @overload
+    def fill(self, value: Dict[str, "LiteralType"]) -> ParentDataFrame:
+        ...
+
+    def fill(
+        self,
+        value: Union["LiteralType", Dict[str, "LiteralType"]],
+        subset: Optional[List[str]] = None,
+    ) -> ParentDataFrame:
+        return self.df.fillna(value=value, subset=subset)  # type: ignore[arg-type]
+
+    @overload
+    def replace(
+        self,
+        to_replace: List["LiteralType"],
+        value: List["OptionalPrimitiveType"],
+        subset: Optional[List[str]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    @overload
+    def replace(
+        self,
+        to_replace: Dict["LiteralType", "OptionalPrimitiveType"],
+        subset: Optional[List[str]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    @overload
+    def replace(
+        self,
+        to_replace: List["LiteralType"],
+        value: "OptionalPrimitiveType",
+        subset: Optional[List[str]] = ...,
+    ) -> ParentDataFrame:
+        ...
+
+    def replace(  # type: ignore[misc]
+        self,
+        to_replace: Union[List["LiteralType"], Dict["LiteralType", "OptionalPrimitiveType"]],
+        value: Optional[
+            Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], _NoValueType]
+        ] = _NoValue,
+        subset: Optional[List[str]] = None,
+    ) -> ParentDataFrame:
+        return self.df.replace(to_replace, value, subset)  # type: ignore[arg-type]
+
+
+class DataFrameStatFunctions(ParentDataFrameStatFunctions):
+    def __init__(self, df: ParentDataFrame):
+        self.df = df
+
+    @overload
+    def approxQuantile(
+        self,
+        col: str,
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> List[float]:
+        ...
+
+    @overload
+    def approxQuantile(
+        self,
+        col: Union[List[str], Tuple[str]],
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> List[List[float]]:
+        ...
+
+    def approxQuantile(
+        self,
+        col: Union[str, List[str], Tuple[str]],
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> Union[List[float], List[List[float]]]:
+        return self.df.approxQuantile(col, probabilities, relativeError)
+
+    def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
+        return self.df.corr(col1, col2, method)
+
+    def cov(self, col1: str, col2: str) -> float:
+        return self.df.cov(col1, col2)
+
+    def crosstab(self, col1: str, col2: str) -> ParentDataFrame:
+        return self.df.crosstab(col1, col2)
+
+    def freqItems(self, cols: List[str], support: Optional[float] = None) -> ParentDataFrame:
+        return self.df.freqItems(cols, support)
+
+    def sampleBy(
+        self, col: str, fractions: Dict[Any, float], seed: Optional[int] = None
+    ) -> ParentDataFrame:
+        return self.df.sampleBy(col, fractions, seed)
+
+
+def _test() -> None:
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.dataframe
+
+    # It inherits docstrings but doctests cannot detect them so we run
+    # the parent classe's doctests here directly.
+    globs = pyspark.sql.dataframe.__dict__.copy()
+    spark = (
+        SparkSession.builder.master("local[4]").appName("sql.classic.dataframe tests").getOrCreate()
+    )
+    globs["spark"] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.dataframe,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
+    )
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/classic/window.py b/python/pyspark/sql/classic/window.py
new file mode 100644
index 0000000000000..b5c528eec10a1
--- /dev/null
+++ b/python/pyspark/sql/classic/window.py
@@ -0,0 +1,146 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+from typing import cast, Iterable, List, Tuple, TYPE_CHECKING, Union
+
+from pyspark.sql.window import (
+    Window as ParentWindow,
+    WindowSpec as ParentWindowSpec,
+)
+from pyspark.sql.utils import get_active_spark_context
+
+if TYPE_CHECKING:
+    from py4j.java_gateway import JavaObject
+    from pyspark.sql._typing import ColumnOrName, ColumnOrName_
+
+
+__all__ = ["Window", "WindowSpec"]
+
+
+def _to_java_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]) -> "JavaObject":
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
+    if len(cols) == 1 and isinstance(cols[0], list):
+        cols = cols[0]  # type: ignore[assignment]
+    sc = get_active_spark_context()
+    return _to_seq(sc, cast(Iterable["ColumnOrName"], cols), _to_java_column)
+
+
+class Window(ParentWindow):
+    @staticmethod
+    def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+        from py4j.java_gateway import JVMView
+
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.partitionBy(
+            _to_java_cols(cols)
+        )
+        return WindowSpec(jspec)
+
+    @staticmethod
+    def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+        from py4j.java_gateway import JVMView
+
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.orderBy(
+            _to_java_cols(cols)
+        )
+        return WindowSpec(jspec)
+
+    @staticmethod
+    def rowsBetween(start: int, end: int) -> ParentWindowSpec:
+        from py4j.java_gateway import JVMView
+
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rowsBetween(
+            start, end
+        )
+        return WindowSpec(jspec)
+
+    @staticmethod
+    def rangeBetween(start: int, end: int) -> ParentWindowSpec:
+        from py4j.java_gateway import JVMView
+
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rangeBetween(
+            start, end
+        )
+        return WindowSpec(jspec)
+
+
+class WindowSpec(ParentWindowSpec):
+    def __new__(cls, jspec: "JavaObject") -> "WindowSpec":
+        self = object.__new__(cls)
+        self.__init__(jspec)  # type: ignore[misc]
+        return self
+
+    def __init__(self, jspec: "JavaObject") -> None:
+        self._jspec = jspec
+
+    def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+        return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols)))
+
+    def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+        return WindowSpec(self._jspec.orderBy(_to_java_cols(cols)))
+
+    def rowsBetween(self, start: int, end: int) -> ParentWindowSpec:
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        return WindowSpec(self._jspec.rowsBetween(start, end))
+
+    def rangeBetween(self, start: int, end: int) -> ParentWindowSpec:
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+        return WindowSpec(self._jspec.rangeBetween(start, end))
+
+
+def _test() -> None:
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.window
+
+    # It inherits docstrings but doctests cannot detect them so we run
+    # the parent classe's doctests here directly.
+    globs = pyspark.sql.window.__dict__.copy()
+    spark = (
+        SparkSession.builder.master("local[4]").appName("sql.classic.window tests").getOrCreate()
+    )
+    globs["spark"] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.window,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
+    )
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index fb266b03c2ffd..4ea621b626bb8 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -15,228 +15,28 @@
 # limitations under the License.
 #
 
+# mypy: disable-error-code="empty-body"
+
 import sys
-import json
-import warnings
-import inspect
 from typing import (
-    cast,
     overload,
     Any,
-    Callable,
-    Iterable,
-    List,
-    Optional,
-    Tuple,
     TYPE_CHECKING,
     Union,
 )
 
-from pyspark.errors import PySparkAttributeError, PySparkTypeError, PySparkValueError
+from pyspark.sql.utils import dispatch_col_method
 from pyspark.sql.types import DataType
-from pyspark.sql.utils import get_active_spark_context
+from pyspark.errors import PySparkValueError
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
-    from pyspark.core.context import SparkContext
-    from pyspark.sql._typing import ColumnOrName, LiteralType, DecimalLiteral, DateTimeLiteral
+    from pyspark.sql._typing import LiteralType, DecimalLiteral, DateTimeLiteral
     from pyspark.sql.window import WindowSpec
 
 __all__ = ["Column"]
 
 
-def _create_column_from_literal(literal: Union["LiteralType", "DecimalLiteral"]) -> "Column":
-    from py4j.java_gateway import JVMView
-
-    sc = get_active_spark_context()
-    return cast(JVMView, sc._jvm).functions.lit(literal)
-
-
-def _create_column_from_name(name: str) -> "Column":
-    from py4j.java_gateway import JVMView
-
-    sc = get_active_spark_context()
-    return cast(JVMView, sc._jvm).functions.col(name)
-
-
-def _to_java_column(col: "ColumnOrName") -> "JavaObject":
-    if isinstance(col, Column):
-        jcol = col._jc
-    elif isinstance(col, str):
-        jcol = _create_column_from_name(col)
-    else:
-        raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
-        )
-    return jcol
-
-
-def _to_java_expr(col: "ColumnOrName") -> "JavaObject":
-    return _to_java_column(col).expr()
-
-
-@overload
-def _to_seq(sc: "SparkContext", cols: Iterable["JavaObject"]) -> "JavaObject":
-    ...
-
-
-@overload
-def _to_seq(
-    sc: "SparkContext",
-    cols: Iterable["ColumnOrName"],
-    converter: Optional[Callable[["ColumnOrName"], "JavaObject"]],
-) -> "JavaObject":
-    ...
-
-
-def _to_seq(
-    sc: "SparkContext",
-    cols: Union[Iterable["ColumnOrName"], Iterable["JavaObject"]],
-    converter: Optional[Callable[["ColumnOrName"], "JavaObject"]] = None,
-) -> "JavaObject":
-    """
-    Convert a list of Columns (or names) into a JVM Seq of Column.
-
-    An optional `converter` could be used to convert items in `cols`
-    into JVM Column objects.
-    """
-    if converter:
-        cols = [converter(c) for c in cols]
-    assert sc._jvm is not None
-    return sc._jvm.PythonUtils.toSeq(cols)
-
-
-def _to_list(
-    sc: "SparkContext",
-    cols: List["ColumnOrName"],
-    converter: Optional[Callable[["ColumnOrName"], "JavaObject"]] = None,
-) -> "JavaObject":
-    """
-    Convert a list of Columns (or names) into a JVM (Scala) List of Columns.
-
-    An optional `converter` could be used to convert items in `cols`
-    into JVM Column objects.
-    """
-    if converter:
-        cols = [converter(c) for c in cols]
-    assert sc._jvm is not None
-    return sc._jvm.PythonUtils.toList(cols)
-
-
-def _unary_op(
-    name: str,
-    doc: str = "unary operator",
-) -> Callable[["Column"], "Column"]:
-    """Create a method for given unary operator"""
-
-    def _(self: "Column") -> "Column":
-        jc = getattr(self._jc, name)()
-        return Column(jc)
-
-    _.__doc__ = doc
-    return _
-
-
-def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]:
-    def _(self: "Column") -> "Column":
-        from py4j.java_gateway import JVMView
-
-        sc = get_active_spark_context()
-        jc = getattr(cast(JVMView, sc._jvm).functions, name)(self._jc)
-        return Column(jc)
-
-    _.__doc__ = doc
-    return _
-
-
-def _bin_func_op(
-    name: str,
-    reverse: bool = False,
-    doc: str = "binary function",
-) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"]:
-    def _(self: "Column", other: Union["Column", "LiteralType", "DecimalLiteral"]) -> "Column":
-        from py4j.java_gateway import JVMView
-
-        sc = get_active_spark_context()
-        fn = getattr(cast(JVMView, sc._jvm).functions, name)
-        jc = other._jc if isinstance(other, Column) else _create_column_from_literal(other)
-        njc = fn(self._jc, jc) if not reverse else fn(jc, self._jc)
-        return Column(njc)
-
-    _.__doc__ = doc
-    return _
-
-
-def _bin_op(
-    name: str,
-    doc: str = "binary operator",
-) -> Callable[
-    ["Column", Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]], "Column"
-]:
-    """Create a method for given binary operator"""
-    binary_operator_map = {
-        "plus": "+",
-        "minus": "-",
-        "divide": "/",
-        "multiply": "*",
-        "mod": "%",
-        "equalTo": "=",
-        "lt": "<",
-        "leq": "<=",
-        "geq": ">=",
-        "gt": ">",
-        "eqNullSafe": "<=>",
-        "bitwiseOR": "|",
-        "bitwiseAND": "&",
-        "bitwiseXOR": "^",
-        # Just following JVM rule even if the names of source and target are the same.
-        "and": "and",
-        "or": "or",
-    }
-
-    def _(
-        self: "Column",
-        other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
-    ) -> "Column":
-        jc = other._jc if isinstance(other, Column) else other
-        if name in binary_operator_map:
-            from pyspark.sql import SparkSession
-
-            spark = SparkSession._getActiveSessionOrCreate()
-            stack = list(reversed(inspect.stack()))
-            depth = int(
-                spark.conf.get("spark.sql.stackTracesInDataFrameContext")  # type: ignore[arg-type]
-            )
-            selected_frames = stack[:depth]
-            call_sites = [f"{frame.filename}:{frame.lineno}" for frame in selected_frames]
-            call_site_str = "\n".join(call_sites)
-
-            njc = getattr(self._jc, "fn")(binary_operator_map[name], jc, name, call_site_str)
-        else:
-            njc = getattr(self._jc, name)(jc)
-        return Column(njc)
-
-    _.__doc__ = doc
-    _.__name__ = name
-    return _
-
-
-def _reverse_op(
-    name: str,
-    doc: str = "binary operator",
-) -> Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"]:
-    """Create a method for binary operator (this object is on right side)"""
-
-    def _(self: "Column", other: Union["LiteralType", "DecimalLiteral"]) -> "Column":
-        jother = _create_column_from_literal(other)
-        jc = getattr(jother, name)(self._jc)
-        return Column(jc)
-
-    _.__doc__ = doc
-    return _
-
-
 class Column:
 
     """
@@ -268,146 +68,251 @@ class Column:
     Column<...>
     """
 
+    # HACK ALERT!! this is to reduce the backward compatibility concern, and returns
+    # Spark Classic Column by default. This is NOT an API, and NOT supposed to
+    # be directly invoked. DO NOT use this constructor.
+    def __new__(
+        cls,
+        jc: "JavaObject",
+    ) -> "Column":
+        from pyspark.sql.classic.column import Column
+
+        return Column.__new__(Column, jc)
+
     def __init__(self, jc: "JavaObject") -> None:
         self._jc = jc
 
     # arithmetic operators
-    __neg__ = _func_op("negate")
-    __add__ = cast(
-        Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"],
-        _bin_op("plus"),
-    )
-    __sub__ = cast(
-        Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"],
-        _bin_op("minus"),
-    )
-    __mul__ = cast(
-        Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"],
-        _bin_op("multiply"),
-    )
-    __div__ = cast(
-        Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"],
-        _bin_op("divide"),
-    )
-    __truediv__ = cast(
-        Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"],
-        _bin_op("divide"),
-    )
-    __mod__ = cast(
-        Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"],
-        _bin_op("mod"),
-    )
-    __radd__ = cast(
-        Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"], _bin_op("plus")
-    )
-    __rsub__ = _reverse_op("minus")
-    __rmul__ = cast(
-        Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"], _bin_op("multiply")
-    )
-    __rdiv__ = _reverse_op("divide")
-    __rtruediv__ = _reverse_op("divide")
-    __rmod__ = _reverse_op("mod")
-
-    __pow__ = _bin_func_op("pow")
-    __rpow__ = cast(
-        Callable[["Column", Union["LiteralType", "DecimalLiteral"]], "Column"],
-        _bin_func_op("pow", reverse=True),
-    )
+    @dispatch_col_method
+    def __neg__(self) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __add__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __sub__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __mul__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __div__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __truediv__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __mod__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __radd__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __rsub__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __rmul__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __rdiv__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __rtruediv__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __rmod__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __pow__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __rpow__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
 
     # logistic operators
+    @dispatch_col_method
     def __eq__(  # type: ignore[override]
         self,
         other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
     ) -> "Column":
         """binary function"""
-        return _bin_op("equalTo")(self, other)
+        ...
 
+    @dispatch_col_method
     def __ne__(  # type: ignore[override]
         self,
         other: Any,
     ) -> "Column":
         """binary function"""
-        return _bin_op("notEqual")(self, other)
+        ...
 
-    __lt__ = _bin_op("lt")
-    __le__ = _bin_op("leq")
-    __ge__ = _bin_op("geq")
-    __gt__ = _bin_op("gt")
+    @dispatch_col_method
+    def __lt__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
 
-    _eqNullSafe_doc = """
-    Equality test that is safe for null values.
+    @dispatch_col_method
+    def __le__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
 
-    .. versionadded:: 2.3.0
+    @dispatch_col_method
+    def __ge__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+    @dispatch_col_method
+    def __gt__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
 
-    Parameters
-    ----------
-    other
-        a value or :class:`Column`
+    @dispatch_col_method
+    def eqNullSafe(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        """
+        Equality test that is safe for null values.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df1 = spark.createDataFrame([
-    ...     Row(id=1, value='foo'),
-    ...     Row(id=2, value=None)
-    ... ])
-    >>> df1.select(
-    ...     df1['value'] == 'foo',
-    ...     df1['value'].eqNullSafe('foo'),
-    ...     df1['value'].eqNullSafe(None)
-    ... ).show()
-    +-------------+---------------+----------------+
-    |(value = foo)|(value <=> foo)|(value <=> NULL)|
-    +-------------+---------------+----------------+
-    |         true|           true|           false|
-    |         NULL|          false|            true|
-    +-------------+---------------+----------------+
-    >>> df2 = spark.createDataFrame([
-    ...     Row(value = 'bar'),
-    ...     Row(value = None)
-    ... ])
-    >>> df1.join(df2, df1["value"] == df2["value"]).count()
-    0
-    >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count()
-    1
-    >>> df2 = spark.createDataFrame([
-    ...     Row(id=1, value=float('NaN')),
-    ...     Row(id=2, value=42.0),
-    ...     Row(id=3, value=None)
-    ... ])
-    >>> df2.select(
-    ...     df2['value'].eqNullSafe(None),
-    ...     df2['value'].eqNullSafe(float('NaN')),
-    ...     df2['value'].eqNullSafe(42.0)
-    ... ).show()
-    +----------------+---------------+----------------+
-    |(value <=> NULL)|(value <=> NaN)|(value <=> 42.0)|
-    +----------------+---------------+----------------+
-    |           false|           true|           false|
-    |           false|          false|            true|
-    |            true|          false|           false|
-    +----------------+---------------+----------------+
-
-    Notes
-    -----
-    Unlike Pandas, PySpark doesn't consider NaN values to be NULL. See the
-    `NaN Semantics <https://spark.apache.org/docs/latest/sql-ref-datatypes.html#nan-semantics>`_
-    for details.
-    """
-    eqNullSafe = _bin_op("eqNullSafe", _eqNullSafe_doc)
+        .. versionadded:: 2.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other
+            a value or :class:`Column`
+
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df1 = spark.createDataFrame([
+        ...     Row(id=1, value='foo'),
+        ...     Row(id=2, value=None)
+        ... ])
+        >>> df1.select(
+        ...     df1['value'] == 'foo',
+        ...     df1['value'].eqNullSafe('foo'),
+        ...     df1['value'].eqNullSafe(None)
+        ... ).show()
+        +-------------+---------------+----------------+
+        |(value = foo)|(value <=> foo)|(value <=> NULL)|
+        +-------------+---------------+----------------+
+        |         true|           true|           false|
+        |         NULL|          false|            true|
+        +-------------+---------------+----------------+
+        >>> df2 = spark.createDataFrame([
+        ...     Row(value = 'bar'),
+        ...     Row(value = None)
+        ... ])
+        >>> df1.join(df2, df1["value"] == df2["value"]).count()
+        0
+        >>> df1.join(df2, df1["value"].eqNullSafe(df2["value"])).count()
+        1
+        >>> df2 = spark.createDataFrame([
+        ...     Row(id=1, value=float('NaN')),
+        ...     Row(id=2, value=42.0),
+        ...     Row(id=3, value=None)
+        ... ])
+        >>> df2.select(
+        ...     df2['value'].eqNullSafe(None),
+        ...     df2['value'].eqNullSafe(float('NaN')),
+        ...     df2['value'].eqNullSafe(42.0)
+        ... ).show()
+        +----------------+---------------+----------------+
+        |(value <=> NULL)|(value <=> NaN)|(value <=> 42.0)|
+        +----------------+---------------+----------------+
+        |           false|           true|           false|
+        |           false|          false|            true|
+        |            true|          false|           false|
+        +----------------+---------------+----------------+
+
+        Notes
+        -----
+        Unlike Pandas, PySpark doesn't consider NaN values to be NULL. See the
+        `NaN Semantics <https://spark.apache.org/docs/latest/sql-ref-datatypes.html#nan-semantics>`_
+        for details.
+        """
+        ...
 
     # `and`, `or`, `not` cannot be overloaded in Python,
     # so use bitwise operators as boolean operators
-    __and__ = _bin_op("and")
-    __or__ = _bin_op("or")
-    __invert__ = _func_op("not")
-    __rand__ = _bin_op("and")
-    __ror__ = _bin_op("or")
+    @dispatch_col_method
+    def __and__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __or__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __invert__(self) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __rand__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
+
+    @dispatch_col_method
+    def __ror__(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        ...
 
     # container operators
+    @dispatch_col_method
     def __contains__(self, item: Any) -> None:
         raise PySparkValueError(
             error_class="CANNOT_APPLY_IN_FOR_COLUMN",
@@ -415,68 +320,82 @@ def __contains__(self, item: Any) -> None:
         )
 
     # bitwise operators
-    _bitwiseOR_doc = """
-    Compute bitwise OR of this expression with another expression.
+    @dispatch_col_method
+    def bitwiseOR(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        """ "
+        Compute bitwise OR of this expression with another expression.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Parameters
-    ----------
-    other
-        a value or :class:`Column` to calculate bitwise or(|) with
-        this :class:`Column`.
+        Parameters
+        ----------
+        other
+            a value or :class:`Column` to calculate bitwise or(|) with
+            this :class:`Column`.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(a=170, b=75)])
-    >>> df.select(df.a.bitwiseOR(df.b)).collect()
-    [Row((a | b)=235)]
-    """
-    _bitwiseAND_doc = """
-    Compute bitwise AND of this expression with another expression.
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([Row(a=170, b=75)])
+        >>> df.select(df.a.bitwiseOR(df.b)).collect()
+        [Row((a | b)=235)]
+        """
+        ...
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+    @dispatch_col_method
+    def bitwiseAND(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        """
+        Compute bitwise AND of this expression with another expression.
 
-    Parameters
-    ----------
-    other
-        a value or :class:`Column` to calculate bitwise and(&) with
-        this :class:`Column`.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(a=170, b=75)])
-    >>> df.select(df.a.bitwiseAND(df.b)).collect()
-    [Row((a & b)=10)]
-    """
-    _bitwiseXOR_doc = """
-    Compute bitwise XOR of this expression with another expression.
+        Parameters
+        ----------
+        other
+            a value or :class:`Column` to calculate bitwise and(&) with
+            this :class:`Column`.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([Row(a=170, b=75)])
+        >>> df.select(df.a.bitwiseAND(df.b)).collect()
+        [Row((a & b)=10)]
+        """
+        ...
+
+    @dispatch_col_method
+    def bitwiseXOR(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        """
+        Compute bitwise XOR of this expression with another expression.
 
-    Parameters
-    ----------
-    other
-        a value or :class:`Column` to calculate bitwise xor(^) with
-        this :class:`Column`.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(a=170, b=75)])
-    >>> df.select(df.a.bitwiseXOR(df.b)).collect()
-    [Row((a ^ b)=225)]
-    """
+        Parameters
+        ----------
+        other
+            a value or :class:`Column` to calculate bitwise xor(^) with
+            this :class:`Column`.
 
-    bitwiseOR = _bin_op("bitwiseOR", _bitwiseOR_doc)
-    bitwiseAND = _bin_op("bitwiseAND", _bitwiseAND_doc)
-    bitwiseXOR = _bin_op("bitwiseXOR", _bitwiseXOR_doc)
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([Row(a=170, b=75)])
+        >>> df.select(df.a.bitwiseXOR(df.b)).collect()
+        [Row((a ^ b)=225)]
+        """
+        ...
 
+    @dispatch_col_method
     def getItem(self, key: Any) -> "Column":
         """
         An expression that gets an item at position ``ordinal`` out of a list,
@@ -511,15 +430,9 @@ def getItem(self, key: Any) -> "Column":
         |   1| value|
         +----+------+
         """
-        if isinstance(key, Column):
-            warnings.warn(
-                "A column as 'key' in getItem is deprecated as of Spark 3.0, and will not "
-                "be supported in the future release. Use `column[key]` or `column.key` syntax "
-                "instead.",
-                FutureWarning,
-            )
-        return self[key]
+        ...
 
+    @dispatch_col_method
     def getField(self, name: Any) -> "Column":
         """
         An expression that gets a field by name in a :class:`StructType`.
@@ -559,15 +472,9 @@ def getField(self, name: Any) -> "Column":
         |  1|
         +---+
         """
-        if isinstance(name, Column):
-            warnings.warn(
-                "A column as 'name' in getField is deprecated as of Spark 3.0, and will not "
-                "be supported in the future release. Use `column[name]` or `column.name` syntax "
-                "instead.",
-                FutureWarning,
-            )
-        return self[name]
+        ...
 
+    @dispatch_col_method
     def withField(self, fieldName: str, col: "Column") -> "Column":
         """
         An expression that adds/replaces a field in :class:`StructType` by name.
@@ -609,20 +516,9 @@ def withField(self, fieldName: str, col: "Column") -> "Column":
         |  4|
         +---+
         """
-        if not isinstance(fieldName, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
-            )
-
-        if not isinstance(col, Column):
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
-            )
-
-        return Column(self._jc.withField(fieldName, col._jc))
+        ...
 
+    @dispatch_col_method
     def dropFields(self, *fieldNames: str) -> "Column":
         """
         An expression that drops fields in :class:`StructType` by name.
@@ -687,10 +583,9 @@ def dropFields(self, *fieldNames: str) -> "Column":
         +--------------+
 
         """
-        sc = get_active_spark_context()
-        jc = self._jc.dropFields(_to_seq(sc, fieldNames))
-        return Column(jc)
+        ...
 
+    @dispatch_col_method
     def __getattr__(self, item: Any) -> "Column":
         """
         An expression that gets an item at position ``ordinal`` out of a list,
@@ -721,13 +616,9 @@ def __getattr__(self, item: Any) -> "Column":
         | value|
         +------+
         """
-        if item.startswith("__"):
-            raise PySparkAttributeError(
-                error_class="CANNOT_ACCESS_TO_DUNDER",
-                message_parameters={},
-            )
-        return self[item]
+        ...
 
+    @dispatch_col_method
     def __getitem__(self, k: Any) -> "Column":
         """
         An expression that gets an item at position ``ordinal`` out of a list,
@@ -759,85 +650,90 @@ def __getitem__(self, k: Any) -> "Column":
         |            abc| value|
         +---------------+------+
         """
-        if isinstance(k, slice):
-            if k.step is not None:
-                raise PySparkValueError(
-                    error_class="SLICE_WITH_STEP",
-                    message_parameters={},
-                )
-            return self.substr(k.start, k.stop)
-        else:
-            return _bin_op("apply")(self, k)
+        ...
 
+    @dispatch_col_method
     def __iter__(self) -> None:
-        raise PySparkTypeError(
-            error_class="NOT_ITERABLE", message_parameters={"objectName": "Column"}
-        )
+        ...
 
     # string methods
-    _contains_doc = """
-    Contains the other element. Returns a boolean :class:`Column` based on a string match.
+    @dispatch_col_method
+    def contains(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        """
+        Contains the other element. Returns a boolean :class:`Column` based on a string match.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other
+            string in line. A value as a literal or a :class:`Column`.
 
-    Parameters
-    ----------
-    other
-        string in line. A value as a literal or a :class:`Column`.
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df.filter(df.name.contains('o')).collect()
+        [Row(age=5, name='Bob')]
+        """
+        ...
 
-    Examples
-    --------
-    >>> df = spark.createDataFrame(
-    ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
-    >>> df.filter(df.name.contains('o')).collect()
-    [Row(age=5, name='Bob')]
-    """
-    _startswith_doc = """
-    String starts with. Returns a boolean :class:`Column` based on a string match.
+    @dispatch_col_method
+    def startswith(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        """
+        String starts with. Returns a boolean :class:`Column` based on a string match.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Parameters
-    ----------
-    other : :class:`Column` or str
-        string at start of line (do not use a regex `^`)
+        Parameters
+        ----------
+        other : :class:`Column` or str
+            string at start of line (do not use a regex `^`)
 
-    Examples
-    --------
-    >>> df = spark.createDataFrame(
-    ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
-    >>> df.filter(df.name.startswith('Al')).collect()
-    [Row(age=2, name='Alice')]
-    >>> df.filter(df.name.startswith('^Al')).collect()
-    []
-    """
-    _endswith_doc = """
-    String ends with. Returns a boolean :class:`Column` based on a string match.
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df.filter(df.name.startswith('Al')).collect()
+        [Row(age=2, name='Alice')]
+        >>> df.filter(df.name.startswith('^Al')).collect()
+        []
+        """
+        ...
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+    @dispatch_col_method
+    def endswith(
+        self, other: Union["Column", "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> "Column":
+        """
+        String ends with. Returns a boolean :class:`Column` based on a string match.
 
-    Parameters
-    ----------
-    other : :class:`Column` or str
-        string at end of line (do not use a regex `$`)
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Examples
-    --------
-    >>> df = spark.createDataFrame(
-    ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
-    >>> df.filter(df.name.endswith('ice')).collect()
-    [Row(age=2, name='Alice')]
-    >>> df.filter(df.name.endswith('ice$')).collect()
-    []
-    """
+        Parameters
+        ----------
+        other : :class:`Column` or str
+            string at end of line (do not use a regex `$`)
 
-    contains = _bin_op("contains", _contains_doc)
-    startswith = _bin_op("startsWith", _startswith_doc)
-    endswith = _bin_op("endsWith", _endswith_doc)
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df.filter(df.name.endswith('ice')).collect()
+        [Row(age=2, name='Alice')]
+        >>> df.filter(df.name.endswith('ice$')).collect()
+        []
+        """
+        ...
 
+    @dispatch_col_method
     def like(self: "Column", other: str) -> "Column":
         """
         SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.
@@ -867,9 +763,9 @@ def like(self: "Column", other: str) -> "Column":
         >>> df.filter(df.name.like('Al%')).collect()
         [Row(age=2, name='Alice')]
         """
-        njc = getattr(self._jc, "like")(other)
-        return Column(njc)
+        ...
 
+    @dispatch_col_method
     def rlike(self: "Column", other: str) -> "Column":
         """
         SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex
@@ -896,9 +792,9 @@ def rlike(self: "Column", other: str) -> "Column":
         >>> df.filter(df.name.rlike('ice$')).collect()
         [Row(age=2, name='Alice')]
         """
-        njc = getattr(self._jc, "rlike")(other)
-        return Column(njc)
+        ...
 
+    @dispatch_col_method
     def ilike(self: "Column", other: str) -> "Column":
         """
         SQL ILIKE expression (case insensitive LIKE). Returns a boolean :class:`Column`
@@ -931,8 +827,7 @@ def ilike(self: "Column", other: str) -> "Column":
         >>> df.filter(df.name.ilike('%Ice')).collect()
         [Row(age=2, name='Alice')]
         """
-        njc = getattr(self._jc, "ilike")(other)
-        return Column(njc)
+        ...
 
     @overload
     def substr(self, startPos: int, length: int) -> "Column":
@@ -942,6 +837,7 @@ def substr(self, startPos: int, length: int) -> "Column":
     def substr(self, startPos: "Column", length: "Column") -> "Column":
         ...
 
+    @dispatch_col_method
     def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -> "Column":
         """
         Return a :class:`Column` which is a substring of the column.
@@ -980,27 +876,9 @@ def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -
         >>> df.select(df.name.substr(df.sidx, df.eidx).alias("col")).collect()
         [Row(col='ice'), Row(col='ob')]
         """
-        if type(startPos) != type(length):
-            raise PySparkTypeError(
-                error_class="NOT_SAME_TYPE",
-                message_parameters={
-                    "arg_name1": "startPos",
-                    "arg_name2": "length",
-                    "arg_type1": type(startPos).__name__,
-                    "arg_type2": type(length).__name__,
-                },
-            )
-        if isinstance(startPos, int):
-            jc = self._jc.substr(startPos, length)
-        elif isinstance(startPos, Column):
-            jc = self._jc.substr(startPos._jc, cast("Column", length)._jc)
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_INT",
-                message_parameters={"arg_name": "startPos", "arg_type": type(startPos).__name__},
-            )
-        return Column(jc)
+        ...
 
+    @dispatch_col_method
     def isin(self, *cols: Any) -> "Column":
         """
         A boolean expression that is evaluated to true if the value of this
@@ -1054,164 +932,184 @@ def isin(self, *cols: Any) -> "Column":
         |  8|Mike|
         +---+----+
         """
-        if len(cols) == 1 and isinstance(cols[0], (list, set)):
-            cols = cast(Tuple, cols[0])
-        cols = cast(
-            Tuple,
-            [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols],
-        )
-        sc = get_active_spark_context()
-        jc = getattr(self._jc, "isin")(_to_seq(sc, cols))
-        return Column(jc)
+        ...
 
     # order
-    _asc_doc = """
-    Returns a sort expression based on the ascending order of the column.
+    @dispatch_col_method
+    def asc(self) -> "Column":
+        """
+        Returns a sort expression based on the ascending order of the column.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
-    >>> df.select(df.name).orderBy(df.name.asc()).collect()
-    [Row(name='Alice'), Row(name='Tom')]
-    """
-    _asc_nulls_first_doc = """
-    Returns a sort expression based on ascending order of the column, and null values
-    return before non-null values.
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
+        >>> df.select(df.name).orderBy(df.name.asc()).collect()
+        [Row(name='Alice'), Row(name='Tom')]
+        """
+        ...
 
-    .. versionadded:: 2.4.0
+    @dispatch_col_method
+    def asc_nulls_first(self) -> "Column":
+        """
+        Returns a sort expression based on ascending order of the column, and null values
+        return before non-null values.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionadded:: 2.4.0
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
-    >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
-    [Row(name=None), Row(name='Alice'), Row(name='Tom')]
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    """
-    _asc_nulls_last_doc = """
-    Returns a sort expression based on ascending order of the column, and null values
-    appear after non-null values.
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame(
+        ...     [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+        >>> df.select(df.name).orderBy(df.name.asc_nulls_first()).collect()
+        [Row(name=None), Row(name='Alice'), Row(name='Tom')]
 
-    .. versionadded:: 2.4.0
+        """
+        ...
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+    @dispatch_col_method
+    def asc_nulls_last(self) -> "Column":
+        """
+        Returns a sort expression based on ascending order of the column, and null values
+        appear after non-null values.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
-    >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
-    [Row(name='Alice'), Row(name='Tom'), Row(name=None)]
+        .. versionadded:: 2.4.0
 
-    """
-    _desc_doc = """
-    Returns a sort expression based on the descending order of the column.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    .. versionadded:: 2.4.0
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame(
+        ...     [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+        >>> df.select(df.name).orderBy(df.name.asc_nulls_last()).collect()
+        [Row(name='Alice'), Row(name='Tom'), Row(name=None)]
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        """
+        ...
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
-    >>> df.select(df.name).orderBy(df.name.desc()).collect()
-    [Row(name='Tom'), Row(name='Alice')]
-    """
-    _desc_nulls_first_doc = """
-    Returns a sort expression based on the descending order of the column, and null values
-    appear before non-null values.
+    @dispatch_col_method
+    def desc(self) -> "Column":
+        """
+        Returns a sort expression based on the descending order of the column.
 
-    .. versionadded:: 2.4.0
+        .. versionadded:: 2.4.0
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
-    >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
-    [Row(name=None), Row(name='Tom'), Row(name='Alice')]
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([('Tom', 80), ('Alice', None)], ["name", "height"])
+        >>> df.select(df.name).orderBy(df.name.desc()).collect()
+        [Row(name='Tom'), Row(name='Alice')]
+        """
+        ...
 
-    """
-    _desc_nulls_last_doc = """
-    Returns a sort expression based on the descending order of the column, and null values
-    appear after non-null values.
+    @dispatch_col_method
+    def desc_nulls_first(self) -> "Column":
+        """
+        Returns a sort expression based on the descending order of the column, and null values
+        appear before non-null values.
 
-    .. versionadded:: 2.4.0
+        .. versionadded:: 2.4.0
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
-    >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
-    [Row(name='Tom'), Row(name='Alice'), Row(name=None)]
-    """
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame(
+        ...     [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+        >>> df.select(df.name).orderBy(df.name.desc_nulls_first()).collect()
+        [Row(name=None), Row(name='Tom'), Row(name='Alice')]
 
-    asc = _unary_op("asc", _asc_doc)
-    asc_nulls_first = _unary_op("asc_nulls_first", _asc_nulls_first_doc)
-    asc_nulls_last = _unary_op("asc_nulls_last", _asc_nulls_last_doc)
-    desc = _unary_op("desc", _desc_doc)
-    desc_nulls_first = _unary_op("desc_nulls_first", _desc_nulls_first_doc)
-    desc_nulls_last = _unary_op("desc_nulls_last", _desc_nulls_last_doc)
+        """
+        ...
 
-    _isNull_doc = """
-    True if the current expression is null.
+    @dispatch_col_method
+    def desc_nulls_last(self) -> "Column":
+        """
+        Returns a sort expression based on the descending order of the column, and null values
+        appear after non-null values.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        .. versionadded:: 2.4.0
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])
-    >>> df.filter(df.height.isNull()).collect()
-    [Row(name='Alice', height=None)]
-    """
-    _isNotNull_doc = """
-    True if the current expression is NOT null.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame(
+        ...     [('Tom', 80), (None, 60), ('Alice', None)], ["name", "height"])
+        >>> df.select(df.name).orderBy(df.name.desc_nulls_last()).collect()
+        [Row(name='Tom'), Row(name='Alice'), Row(name=None)]
+        """
+        ...
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])
-    >>> df.filter(df.height.isNotNull()).collect()
-    [Row(name='Tom', height=80)]
-    """
-    _isNaN_doc = """
-    True if the current expression is NaN.
+    @dispatch_col_method
+    def isNull(self) -> "Column":
+        """
+        True if the current expression is null.
 
-    .. versionadded:: 4.0.0
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
-    Examples
-    --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame(
-    ...     [Row(name='Tom', height=80.0), Row(name='Alice', height=float('nan'))])
-    >>> df.filter(df.height.isNaN()).collect()
-    [Row(name='Alice', height=nan)]
-    """
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])
+        >>> df.filter(df.height.isNull()).collect()
+        [Row(name='Alice', height=None)]
+        """
+        ...
+
+    @dispatch_col_method
+    def isNotNull(self) -> "Column":
+        """
+        True if the current expression is NOT null.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([Row(name='Tom', height=80), Row(name='Alice', height=None)])
+        >>> df.filter(df.height.isNotNull()).collect()
+        [Row(name='Tom', height=80)]
+        """
+        ...
+
+    @dispatch_col_method
+    def isNaN(self) -> "Column":
+        """
+        True if the current expression is NaN.
 
-    isNull = _unary_op("isNull", _isNull_doc)
-    isNotNull = _unary_op("isNotNull", _isNotNull_doc)
-    isNaN = _unary_op("isNaN", _isNaN_doc)
+        .. versionadded:: 4.0.0
 
+        Examples
+        --------
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame(
+        ...     [Row(name='Tom', height=80.0), Row(name='Alice', height=float('nan'))])
+        >>> df.filter(df.height.isNaN()).collect()
+        [Row(name='Alice', height=nan)]
+        """
+        ...
+
+    @dispatch_col_method
     def alias(self, *alias: str, **kwargs: Any) -> "Column":
         """
         Returns this column aliased with a new name or names (in the case of expressions that
@@ -1251,34 +1149,18 @@ def alias(self, *alias: str, **kwargs: Any) -> "Column":
         >>> df.select(df.age.alias("age3", metadata={'max': 99})).schema['age3'].metadata['max']
         99
         """
+        ...
 
-        metadata = kwargs.pop("metadata", None)
-        assert not kwargs, "Unexpected kwargs where passed: %s" % kwargs
-
-        sc = get_active_spark_context()
-        if len(alias) == 1:
-            if metadata:
-                assert sc._jvm is not None
-                jmeta = sc._jvm.org.apache.spark.sql.types.Metadata.fromJson(json.dumps(metadata))
-                return Column(getattr(self._jc, "as")(alias[0], jmeta))
-            else:
-                return Column(getattr(self._jc, "as")(alias[0]))
-        else:
-            if metadata is not None:
-                raise PySparkValueError(
-                    error_class="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
-                    message_parameters={"arg_name": "metadata"},
-                )
-            return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
-
+    @dispatch_col_method
     def name(self, *alias: str, **kwargs: Any) -> "Column":
         """
         :func:`name` is an alias for :func:`alias`.
 
         .. versionadded:: 2.0.0
         """
-        return self.alias(*alias, **kwargs)
+        ...
 
+    @dispatch_col_method
     def cast(self, dataType: Union[DataType, str]) -> "Column":
         """
         Casts the column into type ``dataType``.
@@ -1309,21 +1191,9 @@ def cast(self, dataType: Union[DataType, str]) -> "Column":
         >>> df.select(df.age.cast(StringType()).alias('ages')).collect()
         [Row(ages='2'), Row(ages='5')]
         """
-        if isinstance(dataType, str):
-            jc = self._jc.cast(dataType)
-        elif isinstance(dataType, DataType):
-            from pyspark.sql import SparkSession
-
-            spark = SparkSession._getActiveSessionOrCreate()
-            jdt = spark._jsparkSession.parseDataType(dataType.json())
-            jc = self._jc.cast(jdt)
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
-            )
-        return Column(jc)
+        ...
 
+    @dispatch_col_method
     def try_cast(self, dataType: Union[DataType, str]) -> "Column":
         """
         This is a special version of `cast` that performs the same operation, but returns a NULL
@@ -1371,29 +1241,18 @@ def try_cast(self, dataType: Union[DataType, str]) -> "Column":
         | NULL|
         +-----+
         """
-        if isinstance(dataType, str):
-            jc = self._jc.try_cast(dataType)
-        elif isinstance(dataType, DataType):
-            from pyspark.sql import SparkSession
-
-            spark = SparkSession._getActiveSessionOrCreate()
-            jdt = spark._jsparkSession.parseDataType(dataType.json())
-            jc = self._jc.try_cast(jdt)
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
-            )
-        return Column(jc)
+        ...
 
+    @dispatch_col_method
     def astype(self, dataType: Union[DataType, str]) -> "Column":
         """
         :func:`astype` is an alias for :func:`cast`.
 
         .. versionadded:: 1.4.0
         """
-        return self.cast(dataType)
+        ...
 
+    @dispatch_col_method
     def between(
         self,
         lowerBound: Union["Column", "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
@@ -1501,8 +1360,9 @@ def between(
         |  Bob|                                                              true|
         +-----+------------------------------------------------------------------+
         """
-        return (self >= lowerBound) & (self <= upperBound)
+        ...
 
+    @dispatch_col_method
     def when(self, condition: "Column", value: Any) -> "Column":
         """
         Evaluates a list of conditions and returns one of multiple possible result expressions.
@@ -1576,15 +1436,9 @@ def when(self, condition: "Column", value: Any) -> "Column":
         --------
         pyspark.sql.functions.when
         """
-        if not isinstance(condition, Column):
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
-            )
-        v = value._jc if isinstance(value, Column) else value
-        jc = self._jc.when(condition._jc, v)
-        return Column(jc)
+        ...
 
+    @dispatch_col_method
     def otherwise(self, value: Any) -> "Column":
         """
         Evaluates a list of conditions and returns one of multiple possible result expressions.
@@ -1622,10 +1476,9 @@ def otherwise(self, value: Any) -> "Column":
         --------
         pyspark.sql.functions.when
         """
-        v = value._jc if isinstance(value, Column) else value
-        jc = self._jc.otherwise(v)
-        return Column(jc)
+        ...
 
+    @dispatch_col_method
     def over(self, window: "WindowSpec") -> "Column":
         """
         Define a windowing column.
@@ -1666,26 +1519,19 @@ def over(self, window: "WindowSpec") -> "Column":
         |  2|Alice|   1|  2|
         +---+-----+----+---+
         """
-        from pyspark.sql.window import WindowSpec
-
-        if not isinstance(window, WindowSpec):
-            raise PySparkTypeError(
-                error_class="NOT_WINDOWSPEC",
-                message_parameters={"arg_name": "window", "arg_type": type(window).__name__},
-            )
-        jc = self._jc.over(window._jspec)
-        return Column(jc)
+        ...
 
+    @dispatch_col_method
     def __nonzero__(self) -> None:
-        raise PySparkValueError(
-            error_class="CANNOT_CONVERT_COLUMN_INTO_BOOL",
-            message_parameters={},
-        )
+        ...
 
-    __bool__ = __nonzero__
+    @dispatch_col_method
+    def __bool__(self) -> None:
+        ...
 
+    @dispatch_col_method
     def __repr__(self) -> str:
-        return "Column<'%s'>" % self._jc.toString()
+        ...
 
 
 def _test() -> None:
diff --git a/python/pyspark/sql/connect/_typing.py b/python/pyspark/sql/connect/_typing.py
index 1b8516427dbdd..806476af1eb60 100644
--- a/python/pyspark/sql/connect/_typing.py
+++ b/python/pyspark/sql/connect/_typing.py
@@ -15,19 +15,20 @@
 # limitations under the License.
 #
 from types import FunctionType
-from typing import Any, Callable, Iterable, Union, Optional, NewType, Protocol, Tuple
+from typing import Any, Callable, Iterable, Union, Optional, NewType, Protocol, Tuple, TypeVar
 import datetime
 import decimal
 
 import pyarrow
 from pandas.core.frame import DataFrame as PandasDataFrame
 
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.types import DataType
 from pyspark.sql.streaming.state import GroupState
 
 
 ColumnOrName = Union[Column, str]
+ColumnOrName_ = TypeVar("ColumnOrName_", bound=ColumnOrName)
 
 ColumnOrNameOrOrdinal = Union[Column, str, int]
 
diff --git a/python/pyspark/sql/connect/avro/functions.py b/python/pyspark/sql/connect/avro/functions.py
index 43088333b1086..da350f92a531a 100644
--- a/python/pyspark/sql/connect/avro/functions.py
+++ b/python/pyspark/sql/connect/avro/functions.py
@@ -19,6 +19,7 @@
 A collections of builtin avro functions
 """
 
+from pyspark.errors import PySparkTypeError
 from pyspark.sql.connect.utils import check_dependencies
 
 check_dependencies(__name__)
@@ -26,8 +27,7 @@
 from typing import Dict, Optional, TYPE_CHECKING
 
 from pyspark.sql.avro import functions as PyAvroFunctions
-
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.functions.builtin import _invoke_function, _to_col, _options_to_col, lit
 
 if TYPE_CHECKING:
@@ -37,6 +37,25 @@
 def from_avro(
     data: "ColumnOrName", jsonFormatSchema: str, options: Optional[Dict[str, str]] = None
 ) -> Column:
+    if not isinstance(data, (Column, str)):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={
+                "arg_name": "data",
+                "arg_type": "pyspark.sql.Column or str",
+            },
+        )
+    if not isinstance(jsonFormatSchema, str):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+        )
+    if options is not None and not isinstance(options, dict):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={"arg_name": "options", "arg_type": "dict, optional"},
+        )
+
     if options is None:
         return _invoke_function("from_avro", _to_col(data), lit(jsonFormatSchema))
     else:
@@ -49,6 +68,20 @@ def from_avro(
 
 
 def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
+    if not isinstance(data, (Column, str)):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={
+                "arg_name": "data",
+                "arg_type": "pyspark.sql.Column or str",
+            },
+        )
+    if not isinstance(jsonFormatSchema, str):
+        raise PySparkTypeError(
+            error_class="INVALID_TYPE",
+            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+        )
+
     if jsonFormatSchema == "":
         return _invoke_function("to_avro", _to_col(data))
     else:
@@ -80,15 +113,8 @@ def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
     import pyspark.sql.connect.avro.functions
-    from pyspark.util import is_remote_only
 
     globs = pyspark.sql.connect.avro.functions.__dict__.copy()
-
-    # TODO(SPARK-47760): Reeanble Avro function doctests
-    if is_remote_only():
-        del pyspark.sql.connect.avro.functions.from_avro
-        del pyspark.sql.connect.avro.functions.to_avro
-
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.avro.functions tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py
index 0bdfb4bb7910d..e91324150cbd8 100644
--- a/python/pyspark/sql/connect/client/core.py
+++ b/python/pyspark/sql/connect/client/core.py
@@ -61,12 +61,17 @@
 from pyspark.loose_version import LooseVersion
 from pyspark.version import __version__
 from pyspark.resource.information import ResourceInformation
+from pyspark.sql.metrics import MetricValue, PlanMetrics, ExecutionInfo, ObservedMetrics
 from pyspark.sql.connect.client.artifact import ArtifactManager
 from pyspark.sql.connect.client.logging import logger
 from pyspark.sql.connect.profiler import ConnectProfilerCollector
 from pyspark.sql.connect.client.reattach import ExecutePlanResponseReattachableIterator
 from pyspark.sql.connect.client.retries import RetryPolicy, Retrying, DefaultPolicy
-from pyspark.sql.connect.conversion import storage_level_to_proto, proto_to_storage_level
+from pyspark.sql.connect.conversion import (
+    storage_level_to_proto,
+    proto_to_storage_level,
+    proto_to_remote_cached_dataframe,
+)
 import pyspark.sql.connect.proto as pb2
 import pyspark.sql.connect.proto.base_pb2_grpc as grpc_lib
 import pyspark.sql.connect.types as types
@@ -443,56 +448,7 @@ def toChannel(self) -> grpc.Channel:
             return self._secure_channel(self.endpoint, creds)
 
 
-class MetricValue:
-    def __init__(self, name: str, value: Union[int, float], type: str):
-        self._name = name
-        self._type = type
-        self._value = value
-
-    def __repr__(self) -> str:
-        return f"<{self._name}={self._value} ({self._type})>"
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @property
-    def value(self) -> Union[int, float]:
-        return self._value
-
-    @property
-    def metric_type(self) -> str:
-        return self._type
-
-
-class PlanMetrics:
-    def __init__(self, name: str, id: int, parent: int, metrics: List[MetricValue]):
-        self._name = name
-        self._id = id
-        self._parent_id = parent
-        self._metrics = metrics
-
-    def __repr__(self) -> str:
-        return f"Plan({self._name})={self._metrics}"
-
-    @property
-    def name(self) -> str:
-        return self._name
-
-    @property
-    def plan_id(self) -> int:
-        return self._id
-
-    @property
-    def parent_plan_id(self) -> int:
-        return self._parent_id
-
-    @property
-    def metrics(self) -> List[MetricValue]:
-        return self._metrics
-
-
-class PlanObservedMetrics:
+class PlanObservedMetrics(ObservedMetrics):
     def __init__(self, name: str, metrics: List[pb2.Expression.Literal], keys: List[str]):
         self._name = name
         self._metrics = metrics
@@ -509,6 +465,13 @@ def name(self) -> str:
     def metrics(self) -> List[pb2.Expression.Literal]:
         return self._metrics
 
+    @property
+    def pairs(self) -> dict[str, Any]:
+        result = {}
+        for x in range(len(self._metrics)):
+            result[self.keys[x]] = LiteralExpression._to_value(self.metrics[x])
+        return result
+
     @property
     def keys(self) -> List[str]:
         return self._keys
@@ -655,12 +618,7 @@ def __init__(
         use_reattachable_execute: bool
             Enable reattachable execution.
         """
-
-        class ClientThreadLocals(threading.local):
-            tags: set = set()
-            inside_error_handling: bool = False
-
-        self.thread_local = ClientThreadLocals()
+        self.thread_local = threading.local()
 
         # Parse the connection string.
         self._builder = (
@@ -889,7 +847,7 @@ def _resources(self) -> Dict[str, ResourceInformation]:
         logger.info("Fetching the resources")
         cmd = pb2.Command()
         cmd.get_resources_command.SetInParent()
-        (_, properties) = self.execute_command(cmd)
+        (_, properties, _) = self.execute_command(cmd)
         resources = properties["get_resources_command_result"]
         return resources
 
@@ -916,18 +874,23 @@ def to_table_as_iterator(
 
     def to_table(
         self, plan: pb2.Plan, observations: Dict[str, Observation]
-    ) -> Tuple["pa.Table", Optional[StructType]]:
+    ) -> Tuple["pa.Table", Optional[StructType], ExecutionInfo]:
         """
         Return given plan as a PyArrow Table.
         """
         logger.info(f"Executing plan {self._proto_to_string(plan)}")
         req = self._execute_plan_request_with_metadata()
         req.plan.CopyFrom(plan)
-        table, schema, _, _, _ = self._execute_and_fetch(req, observations)
+        table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(req, observations)
+
+        # Create a query execution object.
+        ei = ExecutionInfo(metrics, observed_metrics)
         assert table is not None
-        return table, schema
+        return table, schema, ei
 
-    def to_pandas(self, plan: pb2.Plan, observations: Dict[str, Observation]) -> "pd.DataFrame":
+    def to_pandas(
+        self, plan: pb2.Plan, observations: Dict[str, Observation]
+    ) -> Tuple["pd.DataFrame", "ExecutionInfo"]:
         """
         Return given plan as a pandas DataFrame.
         """
@@ -942,6 +905,7 @@ def to_pandas(self, plan: pb2.Plan, observations: Dict[str, Observation]) -> "pd
             req, observations, self_destruct=self_destruct
         )
         assert table is not None
+        ei = ExecutionInfo(metrics, observed_metrics)
 
         schema = schema or from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
         assert schema is not None and isinstance(schema, StructType)
@@ -1008,7 +972,7 @@ def to_pandas(self, plan: pb2.Plan, observations: Dict[str, Observation]) -> "pd
             pdf.attrs["metrics"] = metrics
         if len(observed_metrics) > 0:
             pdf.attrs["observed_metrics"] = observed_metrics
-        return pdf
+        return pdf, ei
 
     def _proto_to_string(self, p: google.protobuf.message.Message) -> str:
         """
@@ -1052,7 +1016,7 @@ def explain_string(self, plan: pb2.Plan, explain_mode: str = "extended") -> str:
 
     def execute_command(
         self, command: pb2.Command, observations: Optional[Dict[str, Observation]] = None
-    ) -> Tuple[Optional[pd.DataFrame], Dict[str, Any]]:
+    ) -> Tuple[Optional[pd.DataFrame], Dict[str, Any], ExecutionInfo]:
         """
         Execute given command.
         """
@@ -1061,11 +1025,15 @@ def execute_command(
         if self._user_id:
             req.user_context.user_id = self._user_id
         req.plan.command.CopyFrom(command)
-        data, _, _, _, properties = self._execute_and_fetch(req, observations or {})
+        data, _, metrics, observed_metrics, properties = self._execute_and_fetch(
+            req, observations or {}
+        )
+        # Create a query execution object.
+        ei = ExecutionInfo(metrics, observed_metrics)
         if data is not None:
-            return (data.to_pandas(), properties)
+            return (data.to_pandas(), properties, ei)
         else:
-            return (None, properties)
+            return (None, properties, ei)
 
     def execute_command_as_iterator(
         self, command: pb2.Command, observations: Optional[Dict[str, Observation]] = None
@@ -1400,6 +1368,12 @@ def handle_response(
             if b.HasField("create_resource_profile_command_result"):
                 profile_id = b.create_resource_profile_command_result.profile_id
                 yield {"create_resource_profile_command_result": profile_id}
+            if b.HasField("checkpoint_command_result"):
+                yield {
+                    "checkpoint_command_result": proto_to_remote_cached_dataframe(
+                        b.checkpoint_command_result.relation
+                    )
+                }
 
         try:
             if self._use_reattachable_execute:
@@ -1683,7 +1657,7 @@ def _handle_error(self, error: Exception) -> NoReturn:
         Throws the appropriate internal Python exception.
         """
 
-        if self.thread_local.inside_error_handling:
+        if getattr(self.thread_local, "inside_error_handling", False):
             # We are already inside error handling routine,
             # avoid recursive error processing (with potentially infinite recursion)
             raise error
@@ -1763,6 +1737,9 @@ def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
                     info = error_details_pb2.ErrorInfo()
                     d.Unpack(info)
 
+                    if info.metadata["errorClass"] == "INVALID_HANDLE.SESSION_CHANGED":
+                        self._closed = True
+
                     raise convert_exception(
                         info,
                         status.message,
@@ -1826,6 +1803,7 @@ def _verify_response_integrity(
                 response.server_side_session_id
                 and response.server_side_session_id != self._server_session_id
             ):
+                self._closed = True
                 raise PySparkAssertionError(
                     "Received incorrect server side session identifier for request. "
                     "Please create a new Spark Session to reconnect. ("
@@ -1840,6 +1818,6 @@ def _create_profile(self, profile: pb2.ResourceProfile) -> int:
         logger.info("Creating the ResourceProfile")
         cmd = pb2.Command()
         cmd.create_resource_profile_command.profile.CopyFrom(profile)
-        (_, properties) = self.execute_command(cmd)
+        (_, properties, _) = self.execute_command(cmd)
         profile_id = properties["create_resource_profile_command_result"]
         return profile_id
diff --git a/python/pyspark/sql/connect/client/reattach.py b/python/pyspark/sql/connect/client/reattach.py
index 4468582ca80ea..82c7ae9772188 100644
--- a/python/pyspark/sql/connect/client/reattach.py
+++ b/python/pyspark/sql/connect/client/reattach.py
@@ -58,7 +58,20 @@ class ExecutePlanResponseReattachableIterator(Generator):
 
     # Lock to manage the pool
     _lock: ClassVar[RLock] = RLock()
-    _release_thread_pool: Optional[ThreadPool] = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
+    _release_thread_pool_instance: Optional[ThreadPool] = None
+
+    @classmethod  # type: ignore[misc]
+    @property
+    def _release_thread_pool(cls) -> ThreadPool:
+        # Perform a first check outside the critical path.
+        if cls._release_thread_pool_instance is not None:
+            return cls._release_thread_pool_instance
+        with cls._lock:
+            if cls._release_thread_pool_instance is None:
+                cls._release_thread_pool_instance = ThreadPool(
+                    os.cpu_count() if os.cpu_count() else 8
+                )
+            return cls._release_thread_pool_instance
 
     @classmethod
     def shutdown(cls: Type["ExecutePlanResponseReattachableIterator"]) -> None:
@@ -67,19 +80,10 @@ def shutdown(cls: Type["ExecutePlanResponseReattachableIterator"]) -> None:
         outstanding calls are closed.
         """
         with cls._lock:
-            if cls._release_thread_pool is not None:
-                cls._release_thread_pool.close()
-                cls._release_thread_pool.join()
-                cls._release_thread_pool = None
-
-    @classmethod
-    def _initialize_pool_if_necessary(cls: Type["ExecutePlanResponseReattachableIterator"]) -> None:
-        """
-        If the processing pool for the release calls is None, initialize the pool exactly once.
-        """
-        with cls._lock:
-            if cls._release_thread_pool is None:
-                cls._release_thread_pool = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
+            if cls._release_thread_pool_instance is not None:
+                cls._release_thread_pool.close()  # type: ignore[attr-defined]
+                cls._release_thread_pool.join()  # type: ignore[attr-defined]
+                cls._release_thread_pool_instance = None
 
     def __init__(
         self,
@@ -88,7 +92,7 @@ def __init__(
         retrying: Callable[[], Retrying],
         metadata: Iterable[Tuple[str, str]],
     ):
-        ExecutePlanResponseReattachableIterator._initialize_pool_if_necessary()
+        self._release_thread_pool  # Trigger initialization
         self._request = request
         self._retrying = retrying
         if request.operation_id:
@@ -206,8 +210,9 @@ def target() -> None:
             except Exception as e:
                 warnings.warn(f"ReleaseExecute failed with exception: {e}.")
 
-        if ExecutePlanResponseReattachableIterator._release_thread_pool is not None:
-            ExecutePlanResponseReattachableIterator._release_thread_pool.apply_async(target)
+        with self._lock:
+            if self._release_thread_pool_instance is not None:
+                self._release_thread_pool.apply_async(target)
 
     def _release_all(self) -> None:
         """
@@ -230,8 +235,9 @@ def target() -> None:
             except Exception as e:
                 warnings.warn(f"ReleaseExecute failed with exception: {e}.")
 
-        if ExecutePlanResponseReattachableIterator._release_thread_pool is not None:
-            ExecutePlanResponseReattachableIterator._release_thread_pool.apply_async(target)
+        with self._lock:
+            if self._release_thread_pool_instance is not None:
+                self._release_thread_pool.apply_async(target)
         self._result_complete = True
 
     def _call_iter(self, iter_fun: Callable) -> Any:
@@ -254,7 +260,10 @@ def _call_iter(self, iter_fun: Callable) -> Any:
             return iter_fun()
         except grpc.RpcError as e:
             status = rpc_status.from_call(cast(grpc.Call, e))
-            if status is not None and "INVALID_HANDLE.OPERATION_NOT_FOUND" in status.message:
+            if status is not None and (
+                "INVALID_HANDLE.OPERATION_NOT_FOUND" in status.message
+                or "INVALID_HANDLE.SESSION_NOT_FOUND" in status.message
+            ):
                 if self._last_returned_response_id is not None:
                     raise PySparkRuntimeError(
                         error_class="RESPONSE_ALREADY_RECEIVED",
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
index 4436b36907a96..ef48091a35b0c 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -24,16 +24,14 @@
 
 from typing import (
     TYPE_CHECKING,
-    Callable,
     Any,
     Union,
-    overload,
     Optional,
 )
 
+from pyspark.sql.column import Column as ParentColumn
 from pyspark.errors import PySparkTypeError, PySparkAttributeError, PySparkValueError
 from pyspark.sql.types import DataType
-from pyspark.sql.column import Column as PySparkColumn
 
 import pyspark.sql.connect.proto as proto
 from pyspark.sql.connect.expressions import (
@@ -48,6 +46,7 @@
     WithField,
     DropField,
 )
+from pyspark.errors.utils import with_origin_to_class
 
 
 if TYPE_CHECKING:
@@ -60,53 +59,57 @@
     from pyspark.sql.connect.window import WindowSpec
 
 
-def _func_op(name: str, doc: Optional[str] = "") -> Callable[["Column"], "Column"]:
-    def wrapped(self: "Column") -> "Column":
-        return Column(UnresolvedFunction(name, [self._expr]))
-
-    wrapped.__doc__ = doc
-    return wrapped
+def _func_op(name: str, self: ParentColumn) -> ParentColumn:
+    return Column(UnresolvedFunction(name, [self._expr]))  # type: ignore[list-item]
 
 
 def _bin_op(
-    name: str, doc: Optional[str] = "binary function", reverse: bool = False
-) -> Callable[["Column", Any], "Column"]:
-    def wrapped(self: "Column", other: Any) -> "Column":
-        if other is None or isinstance(
-            other,
-            (
-                bool,
-                float,
-                int,
-                str,
-                datetime.datetime,
-                datetime.date,
-                decimal.Decimal,
-                datetime.timedelta,
-            ),
-        ):
-            other_expr = LiteralExpression._from_value(other)
-        else:
-            other_expr = other._expr
-
-        if not reverse:
-            return Column(UnresolvedFunction(name, [self._expr, other_expr]))
-        else:
-            return Column(UnresolvedFunction(name, [other_expr, self._expr]))
-
-    wrapped.__doc__ = doc
-    return wrapped
-
-
-def _unary_op(name: str, doc: Optional[str] = "unary function") -> Callable[["Column"], "Column"]:
-    def wrapped(self: "Column") -> "Column":
-        return Column(UnresolvedFunction(name, [self._expr]))
-
-    wrapped.__doc__ = doc
-    return wrapped
-
+    name: str,
+    self: ParentColumn,
+    other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
+    reverse: bool = False,
+) -> ParentColumn:
+    if other is None or isinstance(
+        other,
+        (
+            bool,
+            float,
+            int,
+            str,
+            datetime.datetime,
+            datetime.date,
+            decimal.Decimal,
+            datetime.timedelta,
+        ),
+    ):
+        other_expr = LiteralExpression._from_value(other)
+    else:
+        other_expr = other._expr  # type: ignore[assignment]
+
+    if not reverse:
+        return Column(UnresolvedFunction(name, [self._expr, other_expr]))  # type: ignore[list-item]
+    else:
+        return Column(UnresolvedFunction(name, [other_expr, self._expr]))  # type: ignore[list-item]
+
+
+def _unary_op(name: str, self: ParentColumn) -> ParentColumn:
+    return Column(UnresolvedFunction(name, [self._expr]))  # type: ignore[list-item]
+
+
+def _to_expr(v: Any) -> Expression:
+    return v._expr if isinstance(v, Column) else LiteralExpression._from_value(v)
+
+
+@with_origin_to_class
+class Column(ParentColumn):
+    def __new__(
+        cls,
+        expr: "Expression",
+    ) -> "Column":
+        self = object.__new__(cls)
+        self.__init__(expr)  # type: ignore[misc]
+        return self
 
-class Column:
     def __init__(self, expr: "Expression") -> None:
         if not isinstance(expr, Expression):
             raise PySparkTypeError(
@@ -115,36 +118,128 @@ def __init__(self, expr: "Expression") -> None:
             )
         self._expr = expr
 
-    __gt__ = _bin_op(">")
-    __lt__ = _bin_op("<")
-    __add__ = _bin_op("+")
-    __sub__ = _bin_op("-")
-    __mul__ = _bin_op("*")
-    __div__ = _bin_op("/")
-    __truediv__ = _bin_op("/")
-    __mod__ = _bin_op("%")
-    __radd__ = _bin_op("+", reverse=True)
-    __rsub__ = _bin_op("-", reverse=True)
-    __rmul__ = _bin_op("*", reverse=True)
-    __rdiv__ = _bin_op("/", reverse=True)
-    __rtruediv__ = _bin_op("/", reverse=True)
-    __rmod__ = _bin_op("%", reverse=True)
-    __pow__ = _bin_op("power")
-    __rpow__ = _bin_op("power", reverse=True)
-    __ge__ = _bin_op(">=")
-    __le__ = _bin_op("<=")
-
-    eqNullSafe = _bin_op("<=>", PySparkColumn.eqNullSafe.__doc__)
-
-    __neg__ = _func_op("negative")
+    def __gt__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op(">", self, other)
+
+    def __lt__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("<", self, other)
+
+    def __add__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("+", self, other)
+
+    def __sub__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("-", self, other)
+
+    def __mul__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("*", self, other)
+
+    def __div__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("/", self, other)
+
+    def __truediv__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("/", self, other)
+
+    def __mod__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("%", self, other)
+
+    def __radd__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("+", self, other, reverse=True)
+
+    def __rsub__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("-", self, other, reverse=True)
+
+    def __rmul__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("*", self, other, reverse=True)
+
+    def __rdiv__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("/", self, other, reverse=True)
+
+    def __rtruediv__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("/", self, other, reverse=True)
+
+    def __rmod__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("%", self, other, reverse=True)
+
+    def __pow__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("power", self, other)
+
+    def __rpow__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("power", self, other, reverse=True)
+
+    def __ge__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op(">=", self, other)
+
+    def __le__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("<=", self, other)
+
+    def eqNullSafe(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("<=>", self, other)
+
+    def __neg__(self) -> ParentColumn:
+        return _func_op("negative", self)
 
     # `and`, `or`, `not` cannot be overloaded in Python,
     # so use bitwise operators as boolean operators
-    __and__ = _bin_op("and")
-    __or__ = _bin_op("or")
-    __invert__ = _func_op("not")
-    __rand__ = _bin_op("and")
-    __ror__ = _bin_op("or")
+    def __and__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("and", self, other)
+
+    def __or__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("or", self, other)
+
+    def __invert__(self) -> ParentColumn:
+        return _func_op("not", self)
+
+    def __rand__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("and", self, other)
+
+    def __ror__(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("or", self, other)
 
     # container operators
     def __contains__(self, item: Any) -> None:
@@ -154,27 +249,53 @@ def __contains__(self, item: Any) -> None:
         )
 
     # bitwise operators
-    bitwiseOR = _bin_op("|", PySparkColumn.bitwiseOR.__doc__)
-    bitwiseAND = _bin_op("&", PySparkColumn.bitwiseAND.__doc__)
-    bitwiseXOR = _bin_op("^", PySparkColumn.bitwiseXOR.__doc__)
+    def bitwiseOR(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("|", self, other)
+
+    def bitwiseAND(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("&", self, other)
+
+    def bitwiseXOR(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("^", self, other)
+
+    def isNull(self) -> ParentColumn:
+        return _unary_op("isNull", self)
+
+    def isNotNull(self) -> ParentColumn:
+        return _unary_op("isNotNull", self)
 
-    isNull = _unary_op("isnull", PySparkColumn.isNull.__doc__)
-    isNotNull = _unary_op("isnotnull", PySparkColumn.isNotNull.__doc__)
-    isNaN = _unary_op("isNaN", PySparkColumn.isNaN.__doc__)
+    def isNaN(self) -> ParentColumn:
+        return _unary_op("isNaN", self)
 
     def __ne__(  # type: ignore[override]
         self,
         other: Any,
-    ) -> "Column":
-        """binary function"""
-        return _func_op("not")(_bin_op("==")(self, other))
+    ) -> ParentColumn:
+        return _func_op("not", _bin_op("==", self, other))
 
     # string methods
-    contains = _bin_op("contains", PySparkColumn.contains.__doc__)
-    startswith = _bin_op("startswith", PySparkColumn.startswith.__doc__)
-    endswith = _bin_op("endswith", PySparkColumn.endswith.__doc__)
-
-    def when(self, condition: "Column", value: Any) -> "Column":
+    def contains(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("contains", self, other)
+
+    def startswith(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("startsWith", self, other)
+
+    def endswith(
+        self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
+    ) -> ParentColumn:
+        return _bin_op("endsWith", self, other)
+
+    def when(self, condition: ParentColumn, value: Any) -> ParentColumn:
         if not isinstance(condition, Column):
             raise PySparkTypeError(
                 error_class="NOT_COLUMN",
@@ -193,18 +314,14 @@ def when(self, condition: "Column", value: Any) -> "Column":
                 message_parameters={},
             )
 
-        if isinstance(value, Column):
-            _value = value._expr
-        else:
-            _value = LiteralExpression._from_value(value)
-
-        _branches = self._expr._branches + [(condition._expr, _value)]
-
-        return Column(CaseWhen(branches=_branches, else_value=None))
-
-    when.__doc__ = PySparkColumn.when.__doc__
+        return Column(
+            CaseWhen(
+                branches=self._expr._branches + [(condition._expr, _to_expr(value))],
+                else_value=None,
+            )
+        )
 
-    def otherwise(self, value: Any) -> "Column":
+    def otherwise(self, value: Any) -> ParentColumn:
         if not isinstance(self._expr, CaseWhen):
             raise PySparkTypeError(
                 "otherwise() can only be applied on a Column previously generated by when()"
@@ -215,28 +332,25 @@ def otherwise(self, value: Any) -> "Column":
                 "otherwise() can only be applied once on a Column previously generated by when()"
             )
 
-        if isinstance(value, Column):
-            _value = value._expr
-        else:
-            _value = LiteralExpression._from_value(value)
-
-        return Column(CaseWhen(branches=self._expr._branches, else_value=_value))
-
-    otherwise.__doc__ = PySparkColumn.otherwise.__doc__
+        return Column(
+            CaseWhen(
+                branches=self._expr._branches,
+                else_value=_to_expr(value),
+            )
+        )
 
-    like = _bin_op("like", PySparkColumn.like.__doc__)
-    rlike = _bin_op("rlike", PySparkColumn.rlike.__doc__)
-    ilike = _bin_op("ilike", PySparkColumn.ilike.__doc__)
+    def like(self: ParentColumn, other: str) -> ParentColumn:
+        return _bin_op("like", self, other)
 
-    @overload
-    def substr(self, startPos: int, length: int) -> "Column":
-        ...
+    def rlike(self: ParentColumn, other: str) -> ParentColumn:
+        return _bin_op("rlike", self, other)
 
-    @overload
-    def substr(self, startPos: "Column", length: "Column") -> "Column":
-        ...
+    def ilike(self: ParentColumn, other: str) -> ParentColumn:
+        return _bin_op("ilike", self, other)
 
-    def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -> "Column":
+    def substr(
+        self, startPos: Union[int, ParentColumn], length: Union[int, ParentColumn]
+    ) -> ParentColumn:
         if type(startPos) != type(length):
             raise PySparkTypeError(
                 error_class="NOT_SAME_TYPE",
@@ -248,12 +362,9 @@ def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -
                 },
             )
 
-        if isinstance(length, Column):
-            length_expr = length._expr
-            start_expr = startPos._expr  # type: ignore[union-attr]
-        elif isinstance(length, int):
-            length_expr = LiteralExpression._from_value(length)
-            start_expr = LiteralExpression._from_value(startPos)
+        if isinstance(length, (Column, int)):
+            length_expr = _to_expr(length)
+            start_expr = _to_expr(startPos)
         else:
             raise PySparkTypeError(
                 error_class="NOT_COLUMN_OR_INT",
@@ -261,12 +372,7 @@ def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -
             )
         return Column(UnresolvedFunction("substr", [self._expr, start_expr, length_expr]))
 
-    substr.__doc__ = PySparkColumn.substr.__doc__
-
-    def __eq__(self, other: Any) -> "Column":  # type: ignore[override]
-        """Returns a binary expression with the current column as the left
-        side and the other expression as the right side.
-        """
+    def __eq__(self, other: Any) -> ParentColumn:  # type: ignore[override]
         if other is None or isinstance(
             other, (bool, float, int, str, datetime.datetime, datetime.date, decimal.Decimal)
         ):
@@ -279,46 +385,30 @@ def __eq__(self, other: Any) -> "Column":  # type: ignore[override]
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         return self._expr.to_plan(session)
 
-    def alias(self, *alias: str, **kwargs: Any) -> "Column":
+    def alias(self, *alias: str, **kwargs: Any) -> ParentColumn:
         return Column(self._expr.alias(*alias, **kwargs))
 
-    alias.__doc__ = PySparkColumn.alias.__doc__
-
     name = alias
 
-    name.__doc__ = PySparkColumn.name.__doc__
-
-    def asc(self) -> "Column":
+    def asc(self) -> ParentColumn:
         return self.asc_nulls_first()
 
-    asc.__doc__ = PySparkColumn.asc.__doc__
-
-    def asc_nulls_first(self) -> "Column":
+    def asc_nulls_first(self) -> ParentColumn:
         return Column(SortOrder(self._expr, ascending=True, nullsFirst=True))
 
-    asc_nulls_first.__doc__ = PySparkColumn.asc_nulls_first.__doc__
-
-    def asc_nulls_last(self) -> "Column":
+    def asc_nulls_last(self) -> ParentColumn:
         return Column(SortOrder(self._expr, ascending=True, nullsFirst=False))
 
-    asc_nulls_last.__doc__ = PySparkColumn.asc_nulls_last.__doc__
-
-    def desc(self) -> "Column":
+    def desc(self) -> ParentColumn:
         return self.desc_nulls_last()
 
-    desc.__doc__ = PySparkColumn.desc.__doc__
-
-    def desc_nulls_first(self) -> "Column":
+    def desc_nulls_first(self) -> ParentColumn:
         return Column(SortOrder(self._expr, ascending=False, nullsFirst=True))
 
-    desc_nulls_first.__doc__ = PySparkColumn.desc_nulls_first.__doc__
-
-    def desc_nulls_last(self) -> "Column":
+    def desc_nulls_last(self) -> ParentColumn:
         return Column(SortOrder(self._expr, ascending=False, nullsFirst=False))
 
-    desc_nulls_last.__doc__ = PySparkColumn.desc_nulls_last.__doc__
-
-    def cast(self, dataType: Union[DataType, str]) -> "Column":
+    def cast(self, dataType: Union[DataType, str]) -> ParentColumn:
         if isinstance(dataType, (DataType, str)):
             return Column(CastExpression(expr=self._expr, data_type=dataType))
         else:
@@ -327,11 +417,9 @@ def cast(self, dataType: Union[DataType, str]) -> "Column":
                 message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
             )
 
-    cast.__doc__ = PySparkColumn.cast.__doc__
-
     astype = cast
 
-    def try_cast(self, dataType: Union[DataType, str]) -> "Column":
+    def try_cast(self, dataType: Union[DataType, str]) -> ParentColumn:
         if isinstance(dataType, (DataType, str)):
             return Column(
                 CastExpression(
@@ -346,12 +434,10 @@ def try_cast(self, dataType: Union[DataType, str]) -> "Column":
                 message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
             )
 
-    try_cast.__doc__ = PySparkColumn.try_cast.__doc__
-
     def __repr__(self) -> str:
         return "Column<'%s'>" % self._expr.__repr__()
 
-    def over(self, window: "WindowSpec") -> "Column":
+    def over(self, window: "WindowSpec") -> ParentColumn:  # type: ignore[override]
         from pyspark.sql.connect.window import WindowSpec
 
         if not isinstance(window, WindowSpec):
@@ -362,35 +448,22 @@ def over(self, window: "WindowSpec") -> "Column":
 
         return Column(WindowExpression(windowFunction=self._expr, windowSpec=window))
 
-    over.__doc__ = PySparkColumn.over.__doc__
-
-    def isin(self, *cols: Any) -> "Column":
+    def isin(self, *cols: Any) -> ParentColumn:
         if len(cols) == 1 and isinstance(cols[0], (list, set)):
             _cols = list(cols[0])
         else:
             _cols = list(cols)
 
-        _exprs = [self._expr]
-        for c in _cols:
-            if isinstance(c, Column):
-                _exprs.append(c._expr)
-            else:
-                _exprs.append(LiteralExpression._from_value(c))
-
-        return Column(UnresolvedFunction("in", _exprs))
-
-    isin.__doc__ = PySparkColumn.isin.__doc__
+        return Column(UnresolvedFunction("in", [self._expr] + [_to_expr(c) for c in _cols]))
 
     def between(
         self,
-        lowerBound: Union["Column", "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
-        upperBound: Union["Column", "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
-    ) -> "Column":
+        lowerBound: Union[ParentColumn, "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
+        upperBound: Union[ParentColumn, "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
+    ) -> ParentColumn:
         return (self >= lowerBound) & (self <= upperBound)
 
-    between.__doc__ = PySparkColumn.between.__doc__
-
-    def getItem(self, key: Any) -> "Column":
+    def getItem(self, key: Any) -> ParentColumn:
         if isinstance(key, Column):
             warnings.warn(
                 "A column as 'key' in getItem is deprecated as of Spark 3.0, and will not "
@@ -400,9 +473,7 @@ def getItem(self, key: Any) -> "Column":
             )
         return self[key]
 
-    getItem.__doc__ = PySparkColumn.getItem.__doc__
-
-    def getField(self, name: Any) -> "Column":
+    def getField(self, name: Any) -> ParentColumn:
         if isinstance(name, Column):
             warnings.warn(
                 "A column as 'name' in getField is deprecated as of Spark 3.0, and will not "
@@ -412,9 +483,7 @@ def getField(self, name: Any) -> "Column":
             )
         return self[name]
 
-    getField.__doc__ = PySparkColumn.getField.__doc__
-
-    def withField(self, fieldName: str, col: "Column") -> "Column":
+    def withField(self, fieldName: str, col: ParentColumn) -> ParentColumn:
         if not isinstance(fieldName, str):
             raise PySparkTypeError(
                 error_class="NOT_STR",
@@ -429,9 +498,7 @@ def withField(self, fieldName: str, col: "Column") -> "Column":
 
         return Column(WithField(self._expr, fieldName, col._expr))
 
-    withField.__doc__ = PySparkColumn.withField.__doc__
-
-    def dropFields(self, *fieldNames: str) -> "Column":
+    def dropFields(self, *fieldNames: str) -> ParentColumn:
         dropField: Optional[DropField] = None
         for fieldName in fieldNames:
             if not isinstance(fieldName, str):
@@ -458,9 +525,7 @@ def dropFields(self, *fieldNames: str) -> "Column":
 
         return Column(dropField)
 
-    dropFields.__doc__ = PySparkColumn.dropFields.__doc__
-
-    def __getattr__(self, item: Any) -> "Column":
+    def __getattr__(self, item: Any) -> ParentColumn:
         if item == "_jc":
             raise PySparkAttributeError(
                 error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jc"}
@@ -471,7 +536,7 @@ def __getattr__(self, item: Any) -> "Column":
             )
         return self[item]
 
-    def __getitem__(self, k: Any) -> "Column":
+    def __getitem__(self, k: Any) -> ParentColumn:
         if isinstance(k, slice):
             if k.step is not None:
                 raise PySparkValueError(
@@ -479,10 +544,8 @@ def __getitem__(self, k: Any) -> "Column":
                     message_parameters={},
                 )
             return self.substr(k.start, k.stop)
-        elif isinstance(k, Column):
-            return Column(UnresolvedExtractValue(self._expr, k._expr))
         else:
-            return Column(UnresolvedExtractValue(self._expr, LiteralExpression._from_value(k)))
+            return Column(UnresolvedExtractValue(self._expr, _to_expr(k)))
 
     def __iter__(self) -> None:
         raise PySparkTypeError(
@@ -499,17 +562,14 @@ def __nonzero__(self) -> None:
     __bool__ = __nonzero__
 
 
-Column.__doc__ = PySparkColumn.__doc__
-
-
 def _test() -> None:
     import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
-    import pyspark.sql.connect.column
+    import pyspark.sql.column
 
-    globs = pyspark.sql.connect.column.__dict__.copy()
+    globs = pyspark.sql.column.__dict__.copy()
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.column tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
@@ -517,7 +577,7 @@ def _test() -> None:
     )
 
     (failure_count, test_count) = doctest.testmod(
-        pyspark.sql.connect.column,
+        pyspark.sql.column,
         globs=globs,
         optionflags=doctest.ELLIPSIS
         | doctest.NORMALIZE_WHITESPACE
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
index 9b1007c41f9c0..1c205586d6096 100644
--- a/python/pyspark/sql/connect/conversion.py
+++ b/python/pyspark/sql/connect/conversion.py
@@ -48,12 +48,10 @@
 import pyspark.sql.connect.proto as pb2
 from pyspark.sql.pandas.types import to_arrow_schema, _dedup_names, _deduplicate_field_names
 
-from typing import (
-    Any,
-    Callable,
-    Sequence,
-    List,
-)
+from typing import Any, Callable, Sequence, List, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect.dataframe import DataFrame
 
 
 class LocalDataToArrowConversion:
@@ -570,3 +568,17 @@ def proto_to_storage_level(storage_level: pb2.StorageLevel) -> StorageLevel:
         deserialized=storage_level.deserialized,
         replication=storage_level.replication,
     )
+
+
+def proto_to_remote_cached_dataframe(relation: pb2.CachedRemoteRelation) -> "DataFrame":
+    assert relation is not None and isinstance(relation, pb2.CachedRemoteRelation)
+
+    from pyspark.sql.connect.dataframe import DataFrame
+    from pyspark.sql.connect.session import SparkSession
+    import pyspark.sql.connect.plan as plan
+
+    session = SparkSession.active()
+    return DataFrame(
+        plan=plan.CachedRemoteRelation(relation.relation_id, session),
+        session=session,
+    )
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index f0dc412760a4a..46698c2530eab 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
+# mypy: disable-error-code="override"
 from pyspark.errors.exceptions.base import (
     SessionNotSameException,
     PySparkIndexError,
@@ -40,21 +42,23 @@
     Type,
 )
 
+import copy
 import sys
 import random
-import pandas
 import pyarrow as pa
 import json
 import warnings
 from collections.abc import Iterable
+import functools
 
 from pyspark import _NoValue
 from pyspark._globals import _NoValueType
+from pyspark.util import is_remote_only
 from pyspark.sql.types import Row, StructType, _create_row
 from pyspark.sql.dataframe import (
-    DataFrame as PySparkDataFrame,
-    DataFrameNaFunctions as PySparkDataFrameNaFunctions,
-    DataFrameStatFunctions as PySparkDataFrameStatFunctions,
+    DataFrame as ParentDataFrame,
+    DataFrameNaFunctions as ParentDataFrameNaFunctions,
+    DataFrameStatFunctions as ParentDataFrameStatFunctions,
 )
 
 from pyspark.errors import (
@@ -71,14 +75,15 @@
 from pyspark.sql.connect.group import GroupedData
 from pyspark.sql.connect.readwriter import DataFrameWriter, DataFrameWriterV2
 from pyspark.sql.connect.streaming.readwriter import DataStreamWriter
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.expressions import (
     ColumnReference,
     UnresolvedRegex,
     UnresolvedStar,
 )
 from pyspark.sql.connect.functions import builtin as F
-from pyspark.sql.pandas.types import from_arrow_schema
+from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
+from pyspark.sql.pandas.functions import _validate_pandas_udf  # type: ignore[attr-defined]
 
 
 if TYPE_CHECKING:
@@ -91,12 +96,24 @@
         PandasMapIterFunction,
         ArrowMapIterFunction,
     )
+    from pyspark.core.rdd import RDD
+    from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
     from pyspark.sql.connect.observation import Observation
     from pyspark.sql.connect.session import SparkSession
     from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
+    from pyspark.sql.metrics import ExecutionInfo
+
 
+class DataFrame(ParentDataFrame):
+    def __new__(
+        cls,
+        plan: plan.LogicalPlan,
+        session: "SparkSession",
+    ) -> "DataFrame":
+        self = object.__new__(cls)
+        self.__init__(plan, session)  # type: ignore[misc]
+        return self
 
-class DataFrame:
     def __init__(
         self,
         plan: plan.LogicalPlan,
@@ -110,7 +127,7 @@ def __init__(
                 message_parameters={"operator": "__init__"},
             )
 
-        self._session: "SparkSession" = session
+        self._session: "SparkSession" = session  # type: ignore[assignment]
         if self._session is None:
             raise PySparkRuntimeError(
                 error_class="NO_ACTIVE_SESSION",
@@ -121,6 +138,7 @@ def __init__(
         # by __repr__ and _repr_html_ while eager evaluation opens.
         self._support_repr_html = False
         self._cached_schema: Optional[StructType] = None
+        self._execution_info: Optional["ExecutionInfo"] = None
 
     def __reduce__(self) -> Tuple:
         """
@@ -188,30 +206,39 @@ def _repr_html_(self) -> Optional[str]:
         else:
             return None
 
-    _repr_html_.__doc__ = PySparkDataFrame._repr_html_.__doc__
-
     @property
     def write(self) -> "DataFrameWriter":
-        return DataFrameWriter(self._plan, self._session)
+        def cb(qe: "ExecutionInfo") -> None:
+            self._execution_info = qe
 
-    write.__doc__ = PySparkDataFrame.write.__doc__
+        return DataFrameWriter(self._plan, self._session, cb)
 
+    @functools.cache
     def isEmpty(self) -> bool:
         return len(self.select().take(1)) == 0
 
-    isEmpty.__doc__ = PySparkDataFrame.isEmpty.__doc__
+    @overload
+    def select(self, *cols: "ColumnOrName") -> ParentDataFrame:
+        ...
+
+    @overload
+    def select(self, __cols: Union[List[Column], List[str]]) -> ParentDataFrame:
+        ...
 
-    def select(self, *cols: "ColumnOrName") -> "DataFrame":
+    def select(self, *cols: "ColumnOrName") -> ParentDataFrame:  # type: ignore[misc]
         if len(cols) == 1 and isinstance(cols[0], list):
             cols = cols[0]
+        if any(not isinstance(c, (str, Column)) for c in cols):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OF_COLUMN_OR_STR",
+                message_parameters={"arg_name": "columns"},
+            )
         return DataFrame(
             plan.Project(self._plan, [F._to_col(c) for c in cols]),
             session=self._session,
         )
 
-    select.__doc__ = PySparkDataFrame.select.__doc__
-
-    def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
+    def selectExpr(self, *expr: Union[str, List[str]]) -> ParentDataFrame:
         sql_expr = []
         if len(expr) == 1 and isinstance(expr[0], list):
             expr = expr[0]  # type: ignore[assignment]
@@ -223,9 +250,7 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
 
         return DataFrame(plan.Project(self._plan, sql_expr), session=self._session)
 
-    selectExpr.__doc__ = PySparkDataFrame.selectExpr.__doc__
-
-    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
+    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> ParentDataFrame:
         if not exprs:
             raise PySparkValueError(
                 error_class="CANNOT_BE_EMPTY",
@@ -241,87 +266,79 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
             exprs = cast(Tuple[Column, ...], exprs)
             return self.groupBy().agg(*exprs)
 
-    agg.__doc__ = PySparkDataFrame.agg.__doc__
-
-    def alias(self, alias: str) -> "DataFrame":
-        return DataFrame(plan.SubqueryAlias(self._plan, alias), session=self._session)
-
-    alias.__doc__ = PySparkDataFrame.alias.__doc__
+    def alias(self, alias: str) -> ParentDataFrame:
+        res = DataFrame(plan.SubqueryAlias(self._plan, alias), session=self._session)
+        res._cached_schema = self._cached_schema
+        return res
 
     def colRegex(self, colName: str) -> Column:
+        from pyspark.sql.connect.column import Column as ConnectColumn
+
         if not isinstance(colName, str):
             raise PySparkTypeError(
                 error_class="NOT_STR",
                 message_parameters={"arg_name": "colName", "arg_type": type(colName).__name__},
             )
-        return Column(UnresolvedRegex(colName, self._plan._plan_id))
-
-    colRegex.__doc__ = PySparkDataFrame.colRegex.__doc__
+        return ConnectColumn(UnresolvedRegex(colName, self._plan._plan_id))
 
     @property
     def dtypes(self) -> List[Tuple[str, str]]:
         return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields]
 
-    dtypes.__doc__ = PySparkDataFrame.dtypes.__doc__
-
     @property
     def columns(self) -> List[str]:
         return self.schema.names
 
-    columns.__doc__ = PySparkDataFrame.columns.__doc__
-
     @property
     def sparkSession(self) -> "SparkSession":
         return self._session
 
-    sparkSession.__doc__ = PySparkDataFrame.sparkSession.__doc__
-
     def count(self) -> int:
-        table, _ = self.agg(F._invoke_function("count", F.lit(1)))._to_table()
+        table, _ = self.agg(
+            F._invoke_function("count", F.lit(1))
+        )._to_table()  # type: ignore[operator]
         return table[0][0].as_py()
 
-    count.__doc__ = PySparkDataFrame.count.__doc__
-
-    def crossJoin(self, other: "DataFrame") -> "DataFrame":
+    def crossJoin(self, other: ParentDataFrame) -> ParentDataFrame:
         self._check_same_session(other)
         return DataFrame(
-            plan.Join(left=self._plan, right=other._plan, on=None, how="cross"),
+            plan.Join(
+                left=self._plan, right=other._plan, on=None, how="cross"  # type: ignore[arg-type]
+            ),
             session=self._session,
         )
 
-    crossJoin.__doc__ = PySparkDataFrame.crossJoin.__doc__
-
-    def _check_same_session(self, other: "DataFrame") -> None:
-        if self._session.session_id != other._session.session_id:
+    def _check_same_session(self, other: ParentDataFrame) -> None:
+        if self._session.session_id != other._session.session_id:  # type: ignore[attr-defined]
             raise SessionNotSameException(
                 error_class="SESSION_NOT_SAME",
                 message_parameters={},
             )
 
-    def coalesce(self, numPartitions: int) -> "DataFrame":
+    def coalesce(self, numPartitions: int) -> ParentDataFrame:
         if not numPartitions > 0:
             raise PySparkValueError(
                 error_class="VALUE_NOT_POSITIVE",
                 message_parameters={"arg_name": "numPartitions", "arg_value": str(numPartitions)},
             )
-        return DataFrame(
+        res = DataFrame(
             plan.Repartition(self._plan, num_partitions=numPartitions, shuffle=False),
             self._session,
         )
-
-    coalesce.__doc__ = PySparkDataFrame.coalesce.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
     @overload
-    def repartition(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
+    def repartition(self, numPartitions: int, *cols: "ColumnOrName") -> ParentDataFrame:
         ...
 
     @overload
-    def repartition(self, *cols: "ColumnOrName") -> "DataFrame":
+    def repartition(self, *cols: "ColumnOrName") -> ParentDataFrame:
         ...
 
     def repartition(  # type: ignore[misc]
         self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         if isinstance(numPartitions, int):
             if not numPartitions > 0:
                 raise PySparkValueError(
@@ -332,12 +349,12 @@ def repartition(  # type: ignore[misc]
                     },
                 )
             if len(cols) == 0:
-                return DataFrame(
+                res = DataFrame(
                     plan.Repartition(self._plan, numPartitions, shuffle=True),
                     self._session,
                 )
             else:
-                return DataFrame(
+                res = DataFrame(
                     plan.RepartitionByExpression(
                         self._plan, numPartitions, [F._to_col(c) for c in cols]
                     ),
@@ -345,7 +362,7 @@ def repartition(  # type: ignore[misc]
                 )
         elif isinstance(numPartitions, (str, Column)):
             cols = (numPartitions,) + cols
-            return DataFrame(
+            res = DataFrame(
                 plan.RepartitionByExpression(self._plan, None, [F._to_col(c) for c in cols]),
                 self.sparkSession,
             )
@@ -358,19 +375,20 @@ def repartition(  # type: ignore[misc]
                 },
             )
 
-    repartition.__doc__ = PySparkDataFrame.repartition.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
     @overload
-    def repartitionByRange(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
+    def repartitionByRange(self, numPartitions: int, *cols: "ColumnOrName") -> ParentDataFrame:
         ...
 
     @overload
-    def repartitionByRange(self, *cols: "ColumnOrName") -> "DataFrame":
+    def repartitionByRange(self, *cols: "ColumnOrName") -> ParentDataFrame:
         ...
 
     def repartitionByRange(  # type: ignore[misc]
         self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         if isinstance(numPartitions, int):
             if not numPartitions > 0:
                 raise PySparkValueError(
@@ -386,14 +404,14 @@ def repartitionByRange(  # type: ignore[misc]
                     message_parameters={"item": "cols"},
                 )
             else:
-                return DataFrame(
+                res = DataFrame(
                     plan.RepartitionByExpression(
                         self._plan, numPartitions, [F._sort_col(c) for c in cols]
                     ),
                     self.sparkSession,
                 )
         elif isinstance(numPartitions, (str, Column)):
-            return DataFrame(
+            res = DataFrame(
                 plan.RepartitionByExpression(
                     self._plan, None, [F._sort_col(c) for c in [numPartitions] + list(cols)]
                 ),
@@ -408,58 +426,79 @@ def repartitionByRange(  # type: ignore[misc]
                 },
             )
 
-    repartitionByRange.__doc__ = PySparkDataFrame.repartitionByRange.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
-    def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
-        if subset is not None and not isinstance(subset, (list, tuple)):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
-            )
+    def dropDuplicates(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
+        # Acceptable args should be str, ... or a single List[str]
+        # So if subset length is 1, it can be either single str, or a list of str
+        # if subset length is greater than 1, it must be a sequence of str
+        if len(subset) > 1:
+            assert all(isinstance(c, str) for c in subset)
 
-        if subset is None:
-            return DataFrame(
+        if not subset:
+            res = DataFrame(
                 plan.Deduplicate(child=self._plan, all_columns_as_keys=True), session=self._session
             )
+        elif len(subset) == 1 and isinstance(subset[0], list):
+            res = DataFrame(
+                plan.Deduplicate(child=self._plan, column_names=subset[0]),
+                session=self._session,
+            )
         else:
-            return DataFrame(
-                plan.Deduplicate(child=self._plan, column_names=subset), session=self._session
+            res = DataFrame(
+                plan.Deduplicate(child=self._plan, column_names=cast(List[str], subset)),
+                session=self._session,
             )
 
-    dropDuplicates.__doc__ = PySparkDataFrame.dropDuplicates.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
     drop_duplicates = dropDuplicates
 
-    def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> "DataFrame":
-        if subset is not None and not isinstance(subset, (list, tuple)):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
-            )
+    def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
+        # Acceptable args should be str, ... or a single List[str]
+        # So if subset length is 1, it can be either single str, or a list of str
+        # if subset length is greater than 1, it must be a sequence of str
+        if len(subset) > 1:
+            assert all(isinstance(c, str) for c in subset)
 
-        if subset is None:
+        if not subset:
             return DataFrame(
                 plan.Deduplicate(child=self._plan, all_columns_as_keys=True, within_watermark=True),
                 session=self._session,
             )
+        elif len(subset) == 1 and isinstance(subset[0], list):
+            return DataFrame(
+                plan.Deduplicate(child=self._plan, column_names=subset[0], within_watermark=True),
+                session=self._session,
+            )
         else:
             return DataFrame(
-                plan.Deduplicate(child=self._plan, column_names=subset, within_watermark=True),
+                plan.Deduplicate(
+                    child=self._plan,
+                    column_names=cast(List[str], subset),
+                    within_watermark=True,
+                ),
                 session=self._session,
             )
 
-    dropDuplicatesWithinWatermark.__doc__ = PySparkDataFrame.dropDuplicatesWithinWatermark.__doc__
-
-    drop_duplicates_within_watermark = dropDuplicatesWithinWatermark
-
-    def distinct(self) -> "DataFrame":
-        return DataFrame(
+    def distinct(self) -> ParentDataFrame:
+        res = DataFrame(
             plan.Deduplicate(child=self._plan, all_columns_as_keys=True), session=self._session
         )
+        res._cached_schema = self._cached_schema
+        return res
 
-    distinct.__doc__ = PySparkDataFrame.distinct.__doc__
+    @overload
+    def drop(self, cols: "ColumnOrName") -> ParentDataFrame:
+        ...
 
-    def drop(self, *cols: "ColumnOrName") -> "DataFrame":
+    @overload
+    def drop(self, *cols: str) -> ParentDataFrame:
+        ...
+
+    def drop(self, *cols: "ColumnOrName") -> ParentDataFrame:  # type: ignore[misc]
         _cols = list(cols)
         if any(not isinstance(c, (str, Column)) for c in _cols):
             raise PySparkTypeError(
@@ -475,21 +514,25 @@ def drop(self, *cols: "ColumnOrName") -> "DataFrame":
             session=self._session,
         )
 
-    drop.__doc__ = PySparkDataFrame.drop.__doc__
-
-    def filter(self, condition: Union[Column, str]) -> "DataFrame":
+    def filter(self, condition: Union[Column, str]) -> ParentDataFrame:
         if isinstance(condition, str):
             expr = F.expr(condition)
         else:
             expr = condition
-        return DataFrame(plan.Filter(child=self._plan, filter=expr), session=self._session)
-
-    filter.__doc__ = PySparkDataFrame.filter.__doc__
+        res = DataFrame(plan.Filter(child=self._plan, filter=expr), session=self._session)
+        res._cached_schema = self._cached_schema
+        return res
 
     def first(self) -> Optional[Row]:
         return self.head()
 
-    first.__doc__ = PySparkDataFrame.first.__doc__
+    @overload  # type: ignore[no-overload-impl]
+    def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
+        ...
+
+    @overload
+    def groupby(self, __cols: Union[List[Column], List[str], List[int]]) -> "GroupedData":
+        ...
 
     def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> GroupedData:
         if len(cols) == 1 and isinstance(cols[0], list):
@@ -516,11 +559,17 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> GroupedData:
 
         return GroupedData(df=self, group_type="groupby", grouping_cols=_cols)
 
-    groupBy.__doc__ = PySparkDataFrame.groupBy.__doc__
-
-    groupby = groupBy
+    groupby = groupBy  # type: ignore[assignment]
 
+    @overload
     def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
+        ...
+
+    @overload
+    def rollup(self, __cols: Union[List[Column], List[str]]) -> "GroupedData":
+        ...
+
+    def rollup(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
         _cols: List[Column] = []
         for c in cols:
             if isinstance(c, Column):
@@ -542,9 +591,15 @@ def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
 
         return GroupedData(df=self, group_type="rollup", grouping_cols=_cols)
 
-    rollup.__doc__ = PySparkDataFrame.rollup.__doc__
-
+    @overload
     def cube(self, *cols: "ColumnOrName") -> "GroupedData":
+        ...
+
+    @overload
+    def cube(self, __cols: Union[List[Column], List[str]]) -> "GroupedData":
+        ...
+
+    def cube(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
         _cols: List[Column] = []
         for c in cols:
             if isinstance(c, Column):
@@ -566,8 +621,6 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData":
 
         return GroupedData(df=self, group_type="cube", grouping_cols=_cols)
 
-    cube.__doc__ = PySparkDataFrame.cube.__doc__
-
     def groupingSets(
         self, groupingSets: Sequence[Sequence["ColumnOrName"]], *cols: "ColumnOrName"
     ) -> "GroupedData":
@@ -605,8 +658,6 @@ def groupingSets(
             df=self, group_type="grouping_sets", grouping_cols=gcols, grouping_sets=gsets
         )
 
-    groupingSets.__doc__ = PySparkDataFrame.groupingSets.__doc__
-
     @overload
     def head(self) -> Optional[Row]:
         ...
@@ -621,32 +672,26 @@ def head(self, n: Optional[int] = None) -> Union[Optional[Row], List[Row]]:
             return rs[0] if rs else None
         return self.take(n)
 
-    head.__doc__ = PySparkDataFrame.head.__doc__
-
     def take(self, num: int) -> List[Row]:
         return self.limit(num).collect()
 
-    take.__doc__ = PySparkDataFrame.take.__doc__
-
     def join(
         self,
-        other: "DataFrame",
+        other: ParentDataFrame,
         on: Optional[Union[str, List[str], Column, List[Column]]] = None,
         how: Optional[str] = None,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         self._check_same_session(other)
         if how is not None and isinstance(how, str):
             how = how.lower().replace("_", "")
         return DataFrame(
-            plan.Join(left=self._plan, right=other._plan, on=on, how=how),
+            plan.Join(left=self._plan, right=other._plan, on=on, how=how),  # type: ignore[arg-type]
             session=self._session,
         )
 
-    join.__doc__ = PySparkDataFrame.join.__doc__
-
     def _joinAsOf(
         self,
-        other: "DataFrame",
+        other: ParentDataFrame,
         leftAsOfColumn: Union[str, Column],
         rightAsOfColumn: Union[str, Column],
         on: Optional[Union[str, List[str], Column, List[Column]]] = None,
@@ -655,7 +700,7 @@ def _joinAsOf(
         tolerance: Optional[Column] = None,
         allowExactMatches: bool = True,
         direction: str = "backward",
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         self._check_same_session(other)
         if how is None:
             how = "inner"
@@ -664,16 +709,16 @@ def _joinAsOf(
         if tolerance is not None:
             assert isinstance(tolerance, Column), "tolerance should be Column"
 
-        def _convert_col(df: "DataFrame", col: "ColumnOrName") -> Column:
+        def _convert_col(df: ParentDataFrame, col: "ColumnOrName") -> Column:
             if isinstance(col, Column):
                 return col
             else:
-                return df._col(col)
+                return df._col(col)  # type: ignore[operator]
 
         return DataFrame(
             plan.AsOfJoin(
                 left=self._plan,
-                right=other._plan,
+                right=other._plan,  # type: ignore[arg-type]
                 left_as_of=_convert_col(self, leftAsOfColumn),
                 right_as_of=_convert_col(other, rightAsOfColumn),
                 on=on,
@@ -685,18 +730,14 @@ def _convert_col(df: "DataFrame", col: "ColumnOrName") -> Column:
             session=self._session,
         )
 
-    _joinAsOf.__doc__ = PySparkDataFrame._joinAsOf.__doc__
-
-    def limit(self, n: int) -> "DataFrame":
-        return DataFrame(plan.Limit(child=self._plan, limit=n), session=self._session)
-
-    limit.__doc__ = PySparkDataFrame.limit.__doc__
+    def limit(self, n: int) -> ParentDataFrame:
+        res = DataFrame(plan.Limit(child=self._plan, limit=n), session=self._session)
+        res._cached_schema = self._cached_schema
+        return res
 
     def tail(self, num: int) -> List[Row]:
         return DataFrame(plan.Tail(child=self._plan, limit=num), session=self._session).collect()
 
-    tail.__doc__ = PySparkDataFrame.tail.__doc__
-
     def _sort_cols(
         self,
         cols: Sequence[Union[int, str, Column, List[Union[int, str, Column]]]],
@@ -748,8 +789,8 @@ def sort(
         self,
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
-    ) -> "DataFrame":
-        return DataFrame(
+    ) -> ParentDataFrame:
+        res = DataFrame(
             plan.Sort(
                 self._plan,
                 columns=self._sort_cols(cols, kwargs),
@@ -757,8 +798,8 @@ def sort(
             ),
             session=self._session,
         )
-
-    sort.__doc__ = PySparkDataFrame.sort.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
     orderBy = sort
 
@@ -766,8 +807,8 @@ def sortWithinPartitions(
         self,
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
-    ) -> "DataFrame":
-        return DataFrame(
+    ) -> ParentDataFrame:
+        res = DataFrame(
             plan.Sort(
                 self._plan,
                 columns=self._sort_cols(cols, kwargs),
@@ -775,15 +816,15 @@ def sortWithinPartitions(
             ),
             session=self._session,
         )
-
-    sortWithinPartitions.__doc__ = PySparkDataFrame.sortWithinPartitions.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
     def sample(
         self,
         withReplacement: Optional[Union[float, bool]] = None,
         fraction: Optional[Union[int, float]] = None,
         seed: Optional[int] = None,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         # For the cases below:
         #   sample(True, 0.5 [, seed])
         #   sample(True, fraction=0.5 [, seed])
@@ -822,9 +863,9 @@ def sample(
         if withReplacement is None:
             withReplacement = False
 
-        seed = int(seed) if seed is not None else None
+        seed = int(seed) if seed is not None else random.randint(0, sys.maxsize)
 
-        return DataFrame(
+        res = DataFrame(
             plan.Sample(
                 child=self._plan,
                 lower_bound=0.0,
@@ -834,15 +875,13 @@ def sample(
             ),
             session=self._session,
         )
+        res._cached_schema = self._cached_schema
+        return res
 
-    sample.__doc__ = PySparkDataFrame.sample.__doc__
-
-    def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
+    def withColumnRenamed(self, existing: str, new: str) -> ParentDataFrame:
         return self.withColumnsRenamed({existing: new})
 
-    withColumnRenamed.__doc__ = PySparkDataFrame.withColumnRenamed.__doc__
-
-    def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
+    def withColumnsRenamed(self, colsMap: Dict[str, str]) -> ParentDataFrame:
         if not isinstance(colsMap, dict):
             raise PySparkTypeError(
                 error_class="NOT_DICT",
@@ -851,8 +890,6 @@ def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
 
         return DataFrame(plan.WithColumnsRenamed(self._plan, colsMap), self._session)
 
-    withColumnsRenamed.__doc__ = PySparkDataFrame.withColumnsRenamed.__doc__
-
     def _show_string(
         self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False
     ) -> str:
@@ -893,7 +930,7 @@ def _show_string(
         )._to_table()
         return table[0][0].as_py()
 
-    def withColumns(self, colsMap: Dict[str, Column]) -> "DataFrame":
+    def withColumns(self, colsMap: Dict[str, Column]) -> ParentDataFrame:
         if not isinstance(colsMap, dict):
             raise PySparkTypeError(
                 error_class="NOT_DICT",
@@ -915,9 +952,7 @@ def withColumns(self, colsMap: Dict[str, Column]) -> "DataFrame":
             session=self._session,
         )
 
-    withColumns.__doc__ = PySparkDataFrame.withColumns.__doc__
-
-    def withColumn(self, colName: str, col: Column) -> "DataFrame":
+    def withColumn(self, colName: str, col: Column) -> ParentDataFrame:
         if not isinstance(col, Column):
             raise PySparkTypeError(
                 error_class="NOT_COLUMN",
@@ -932,9 +967,7 @@ def withColumn(self, colName: str, col: Column) -> "DataFrame":
             session=self._session,
         )
 
-    withColumn.__doc__ = PySparkDataFrame.withColumn.__doc__
-
-    def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame":
+    def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> ParentDataFrame:
         if not isinstance(metadata, dict):
             raise PySparkTypeError(
                 error_class="NOT_DICT",
@@ -951,15 +984,13 @@ def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame"
             session=self._session,
         )
 
-    withMetadata.__doc__ = PySparkDataFrame.withMetadata.__doc__
-
     def unpivot(
         self,
         ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
         values: Optional[Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]],
         variableColumnName: str,
         valueColumnName: str,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         assert ids is not None, "ids must not be None"
 
         def _convert_cols(
@@ -983,11 +1014,9 @@ def _convert_cols(
             self._session,
         )
 
-    unpivot.__doc__ = PySparkDataFrame.unpivot.__doc__
-
     melt = unpivot
 
-    def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
+    def withWatermark(self, eventTime: str, delayThreshold: str) -> ParentDataFrame:
         # TODO: reuse error handling code in sql.DataFrame.withWatermark()
         if not eventTime or type(eventTime) is not str:
             raise PySparkTypeError(
@@ -1012,11 +1041,9 @@ def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
             session=self._session,
         )
 
-    withWatermark.__doc__ = PySparkDataFrame.withWatermark.__doc__
-
     def hint(
         self, name: str, *parameters: Union["PrimitiveType", "Column", List["PrimitiveType"]]
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         if len(parameters) == 1 and isinstance(parameters[0], list):
             parameters = parameters[0]  # type: ignore[assignment]
 
@@ -1053,18 +1080,18 @@ def hint(
                         },
                     )
 
-        return DataFrame(
+        res = DataFrame(
             plan.Hint(self._plan, name, [F.lit(p) for p in list(parameters)]),
             session=self._session,
         )
-
-    hint.__doc__ = PySparkDataFrame.hint.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
     def randomSplit(
         self,
         weights: List[float],
         seed: Optional[int] = None,
-    ) -> List["DataFrame"]:
+    ) -> List[ParentDataFrame]:
         for w in weights:
             if w < 0.0:
                 raise PySparkValueError(
@@ -1099,18 +1126,17 @@ def randomSplit(
                 ),
                 session=self._session,
             )
+            samplePlan._cached_schema = self._cached_schema
             splits.append(samplePlan)
             j += 1
 
-        return splits
-
-    randomSplit.__doc__ = PySparkDataFrame.randomSplit.__doc__
+        return splits  # type: ignore[return-value]
 
     def observe(
         self,
         observation: Union["Observation", str],
         *exprs: Column,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         from pyspark.sql.connect.observation import Observation
 
         if len(exprs) == 0:
@@ -1125,9 +1151,9 @@ def observe(
             )
 
         if isinstance(observation, Observation):
-            return observation._on(self, *exprs)
+            res = observation._on(self, *exprs)
         elif isinstance(observation, str):
-            return DataFrame(
+            res = DataFrame(
                 plan.CollectMetrics(self._plan, observation, list(exprs)),
                 self._session,
             )
@@ -1140,78 +1166,96 @@ def observe(
                 },
             )
 
-    observe.__doc__ = PySparkDataFrame.observe.__doc__
+        res._cached_schema = self._cached_schema
+        return res
 
     def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False) -> None:
         print(self._show_string(n, truncate, vertical))
 
-    show.__doc__ = PySparkDataFrame.show.__doc__
+    def _merge_cached_schema(self, other: ParentDataFrame) -> Optional[StructType]:
+        # to avoid type coercion, only propagate the schema
+        # when the cached schemas are exactly the same
+        if self._cached_schema is not None and self._cached_schema == other._cached_schema:
+            return self.schema
+        return None
 
-    def union(self, other: "DataFrame") -> "DataFrame":
+    def union(self, other: ParentDataFrame) -> ParentDataFrame:
         self._check_same_session(other)
         return self.unionAll(other)
 
-    union.__doc__ = PySparkDataFrame.union.__doc__
-
-    def unionAll(self, other: "DataFrame") -> "DataFrame":
+    def unionAll(self, other: ParentDataFrame) -> ParentDataFrame:
         self._check_same_session(other)
-        return DataFrame(
-            plan.SetOperation(self._plan, other._plan, "union", is_all=True), session=self._session
+        res = DataFrame(
+            plan.SetOperation(
+                self._plan, other._plan, "union", is_all=True  # type: ignore[arg-type]
+            ),
+            session=self._session,
         )
+        res._cached_schema = self._merge_cached_schema(other)
+        return res
 
-    unionAll.__doc__ = PySparkDataFrame.unionAll.__doc__
-
-    def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) -> "DataFrame":
+    def unionByName(
+        self, other: ParentDataFrame, allowMissingColumns: bool = False
+    ) -> ParentDataFrame:
         self._check_same_session(other)
-        return DataFrame(
+        res = DataFrame(
             plan.SetOperation(
                 self._plan,
-                other._plan,
+                other._plan,  # type: ignore[arg-type]
                 "union",
                 by_name=True,
                 allow_missing_columns=allowMissingColumns,
             ),
             session=self._session,
         )
+        res._cached_schema = self._merge_cached_schema(other)
+        return res
 
-    unionByName.__doc__ = PySparkDataFrame.unionByName.__doc__
-
-    def subtract(self, other: "DataFrame") -> "DataFrame":
+    def subtract(self, other: ParentDataFrame) -> ParentDataFrame:
         self._check_same_session(other)
-        return DataFrame(
-            plan.SetOperation(self._plan, other._plan, "except", is_all=False),
+        res = DataFrame(
+            plan.SetOperation(
+                self._plan, other._plan, "except", is_all=False  # type: ignore[arg-type]
+            ),
             session=self._session,
         )
+        res._cached_schema = self._merge_cached_schema(other)
+        return res
 
-    subtract.__doc__ = PySparkDataFrame.subtract.__doc__
-
-    def exceptAll(self, other: "DataFrame") -> "DataFrame":
+    def exceptAll(self, other: ParentDataFrame) -> ParentDataFrame:
         self._check_same_session(other)
-        return DataFrame(
-            plan.SetOperation(self._plan, other._plan, "except", is_all=True), session=self._session
+        res = DataFrame(
+            plan.SetOperation(
+                self._plan, other._plan, "except", is_all=True  # type: ignore[arg-type]
+            ),
+            session=self._session,
         )
+        res._cached_schema = self._merge_cached_schema(other)
+        return res
 
-    exceptAll.__doc__ = PySparkDataFrame.exceptAll.__doc__
-
-    def intersect(self, other: "DataFrame") -> "DataFrame":
+    def intersect(self, other: ParentDataFrame) -> ParentDataFrame:
         self._check_same_session(other)
-        return DataFrame(
-            plan.SetOperation(self._plan, other._plan, "intersect", is_all=False),
+        res = DataFrame(
+            plan.SetOperation(
+                self._plan, other._plan, "intersect", is_all=False  # type: ignore[arg-type]
+            ),
             session=self._session,
         )
+        res._cached_schema = self._merge_cached_schema(other)
+        return res
 
-    intersect.__doc__ = PySparkDataFrame.intersect.__doc__
-
-    def intersectAll(self, other: "DataFrame") -> "DataFrame":
+    def intersectAll(self, other: ParentDataFrame) -> ParentDataFrame:
         self._check_same_session(other)
-        return DataFrame(
-            plan.SetOperation(self._plan, other._plan, "intersect", is_all=True),
+        res = DataFrame(
+            plan.SetOperation(
+                self._plan, other._plan, "intersect", is_all=True  # type: ignore[arg-type]
+            ),
             session=self._session,
         )
+        res._cached_schema = self._merge_cached_schema(other)
+        return res
 
-    intersectAll.__doc__ = PySparkDataFrame.intersectAll.__doc__
-
-    def where(self, condition: Union[Column, str]) -> "DataFrame":
+    def where(self, condition: Union[Column, str]) -> ParentDataFrame:
         if not isinstance(condition, (str, Column)):
             raise PySparkTypeError(
                 error_class="NOT_COLUMN_OR_STR",
@@ -1219,19 +1263,15 @@ def where(self, condition: Union[Column, str]) -> "DataFrame":
             )
         return self.filter(condition)
 
-    where.__doc__ = PySparkDataFrame.where.__doc__
-
     @property
-    def na(self) -> "DataFrameNaFunctions":
+    def na(self) -> ParentDataFrameNaFunctions:
         return DataFrameNaFunctions(self)
 
-    na.__doc__ = PySparkDataFrame.na.__doc__
-
     def fillna(
         self,
         value: Union["LiteralType", Dict[str, "LiteralType"]],
         subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         if not isinstance(value, (float, int, str, bool, dict)):
             raise PySparkTypeError(
                 error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
@@ -1290,14 +1330,12 @@ def fillna(
             session=self._session,
         )
 
-    fillna.__doc__ = PySparkDataFrame.fillna.__doc__
-
     def dropna(
         self,
         how: str = "any",
         thresh: Optional[int] = None,
         subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         min_non_nulls: Optional[int] = None
 
         if how is not None:
@@ -1349,8 +1387,6 @@ def dropna(
             session=self._session,
         )
 
-    dropna.__doc__ = PySparkDataFrame.dropna.__doc__
-
     def replace(
         self,
         to_replace: Union[
@@ -1360,7 +1396,7 @@ def replace(
             Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], _NoValueType]
         ] = _NoValue,
         subset: Optional[List[str]] = None,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         if value is _NoValue:
             if isinstance(to_replace, dict):
                 value = None
@@ -1478,15 +1514,11 @@ def _convert_int_to_float(v: Any) -> Any:
             session=self._session,
         )
 
-    replace.__doc__ = PySparkDataFrame.replace.__doc__
-
     @property
-    def stat(self) -> "DataFrameStatFunctions":
+    def stat(self) -> ParentDataFrameStatFunctions:
         return DataFrameStatFunctions(self)
 
-    stat.__doc__ = PySparkDataFrame.stat.__doc__
-
-    def summary(self, *statistics: str) -> "DataFrame":
+    def summary(self, *statistics: str) -> ParentDataFrame:
         _statistics: List[str] = list(statistics)
         for s in _statistics:
             if not isinstance(s, str):
@@ -1499,9 +1531,7 @@ def summary(self, *statistics: str) -> "DataFrame":
             session=self._session,
         )
 
-    summary.__doc__ = PySparkDataFrame.summary.__doc__
-
-    def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
+    def describe(self, *cols: Union[str, List[str]]) -> ParentDataFrame:
         if len(cols) == 1 and isinstance(cols[0], list):
             cols = cols[0]  # type: ignore[assignment]
 
@@ -1516,8 +1546,6 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
             session=self._session,
         )
 
-    describe.__doc__ = PySparkDataFrame.describe.__doc__
-
     def cov(self, col1: str, col2: str) -> float:
         if not isinstance(col1, str):
             raise PySparkTypeError(
@@ -1535,8 +1563,6 @@ def cov(self, col1: str, col2: str) -> float:
         )._to_table()
         return table[0][0].as_py()
 
-    cov.__doc__ = PySparkDataFrame.cov.__doc__
-
     def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
         if not isinstance(col1, str):
             raise PySparkTypeError(
@@ -1561,8 +1587,6 @@ def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
         )._to_table()
         return table[0][0].as_py()
 
-    corr.__doc__ = PySparkDataFrame.corr.__doc__
-
     def approxQuantile(
         self,
         col: Union[str, List[str], Tuple[str]],
@@ -1639,9 +1663,7 @@ def approxQuantile(
         jaq_list = [list(j) for j in jaq]
         return jaq_list[0] if isStr else jaq_list
 
-    approxQuantile.__doc__ = PySparkDataFrame.approxQuantile.__doc__
-
-    def crosstab(self, col1: str, col2: str) -> "DataFrame":
+    def crosstab(self, col1: str, col2: str) -> ParentDataFrame:
         if not isinstance(col1, str):
             raise PySparkTypeError(
                 error_class="NOT_STR",
@@ -1657,11 +1679,9 @@ def crosstab(self, col1: str, col2: str) -> "DataFrame":
             session=self._session,
         )
 
-    crosstab.__doc__ = PySparkDataFrame.crosstab.__doc__
-
     def freqItems(
         self, cols: Union[List[str], Tuple[str]], support: Optional[float] = None
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         if isinstance(cols, tuple):
             cols = list(cols)
         if not isinstance(cols, list):
@@ -1676,11 +1696,9 @@ def freqItems(
             session=self._session,
         )
 
-    freqItems.__doc__ = PySparkDataFrame.freqItems.__doc__
-
     def sampleBy(
         self, col: "ColumnOrName", fractions: Dict[Any, float], seed: Optional[int] = None
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         if not isinstance(col, (str, Column)):
             raise PySparkTypeError(
                 error_class="NOT_COLUMN_OR_STR",
@@ -1717,21 +1735,27 @@ def sampleBy(
             session=self._session,
         )
 
-    sampleBy.__doc__ = PySparkDataFrame.sampleBy.__doc__
+    def _ipython_key_completions_(self) -> List[str]:
+        """Returns the names of columns in this :class:`DataFrame`.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df._ipython_key_completions_()
+        ['age', 'name']
+
+        Would return illegal identifiers.
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], ["age 1", "name?1"])
+        >>> df._ipython_key_completions_()
+        ['age 1', 'name?1']
+        """
+        return self.columns
 
     def __getattr__(self, name: str) -> "Column":
         if name in ["_jseq", "_jdf", "_jmap", "_jcols", "rdd", "toJSON"]:
             raise PySparkAttributeError(
                 error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
             )
-        elif name in [
-            "checkpoint",
-            "localCheckpoint",
-        ]:
-            raise PySparkNotImplementedError(
-                error_class="NOT_IMPLEMENTED",
-                message_parameters={"feature": f"{name}()"},
-            )
 
         if name not in self.columns:
             raise PySparkAttributeError(
@@ -1740,20 +1764,22 @@ def __getattr__(self, name: str) -> "Column":
 
         return self._col(name)
 
-    __getattr__.__doc__ = PySparkDataFrame.__getattr__.__doc__
-
     @overload
     def __getitem__(self, item: Union[int, str]) -> Column:
         ...
 
     @overload
-    def __getitem__(self, item: Union[Column, List, Tuple]) -> "DataFrame":
+    def __getitem__(self, item: Union[Column, List, Tuple]) -> ParentDataFrame:
         ...
 
-    def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Column, "DataFrame"]:
+    def __getitem__(
+        self, item: Union[int, str, Column, List, Tuple]
+    ) -> Union[Column, ParentDataFrame]:
+        from pyspark.sql.connect.column import Column as ConnectColumn
+
         if isinstance(item, str):
             if item == "*":
-                return Column(
+                return ConnectColumn(
                     UnresolvedStar(
                         unparsed_target=None,
                         plan_id=self._plan._plan_id,
@@ -1764,7 +1790,7 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum
                 # if (sparkSession.sessionState.conf.supportQuotedRegexColumnName) {
                 #   colRegex(colName)
                 # } else {
-                #   Column(addDataFrameIdToCol(resolve(colName)))
+                #   ConnectColumn(addDataFrameIdToCol(resolve(colName)))
                 # }
 
                 # validate the column name
@@ -1790,7 +1816,9 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum
             )
 
     def _col(self, name: str) -> Column:
-        return Column(
+        from pyspark.sql.connect.column import Column as ConnectColumn
+
+        return ConnectColumn(
             ColumnReference(
                 unparsed_identifier=name,
                 plan_id=self._plan._plan_id,
@@ -1798,34 +1826,40 @@ def _col(self, name: str) -> Column:
         )
 
     def __dir__(self) -> List[str]:
-        attrs = set(super().__dir__())
+        attrs = set(dir(DataFrame))
         attrs.update(self.columns)
         return sorted(attrs)
 
-    __dir__.__doc__ = PySparkDataFrame.__dir__.__doc__
-
     def collect(self) -> List[Row]:
         table, schema = self._to_table()
 
-        schema = schema or from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
+        # not all datatypes are supported in arrow based collect
+        # here always verify the schema by from_arrow_schema
+        schema2 = from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
+        schema = schema or schema2
 
         assert schema is not None and isinstance(schema, StructType)
 
         return ArrowTableToRowsConversion.convert(table, schema)
 
-    collect.__doc__ = PySparkDataFrame.collect.__doc__
-
     def _to_table(self) -> Tuple["pa.Table", Optional[StructType]]:
         query = self._plan.to_proto(self._session.client)
-        table, schema = self._session.client.to_table(query, self._plan.observations)
+        table, schema, self._execution_info = self._session.client.to_table(
+            query, self._plan.observations
+        )
         assert table is not None
         return (table, schema)
 
-    def toPandas(self) -> "pandas.DataFrame":
-        query = self._plan.to_proto(self._session.client)
-        return self._session.client.to_pandas(query, self._plan.observations)
+    def toArrow(self) -> "pa.Table":
+        schema = to_arrow_schema(self.schema, error_on_duplicated_field_names_in_struct=True)
+        table, _ = self._to_table()
+        return table.cast(schema)
 
-    toPandas.__doc__ = PySparkDataFrame.toPandas.__doc__
+    def toPandas(self) -> "PandasDataFrameLike":
+        query = self._plan.to_proto(self._session.client)
+        pdf, ei = self._session.client.to_pandas(query, self._plan.observations)
+        self._execution_info = ei
+        return pdf
 
     @property
     def schema(self) -> StructType:
@@ -1837,58 +1871,45 @@ def schema(self) -> StructType:
         if self._cached_schema is None:
             query = self._plan.to_proto(self._session.client)
             self._cached_schema = self._session.client.schema(query)
-        return self._cached_schema
-
-    schema.__doc__ = PySparkDataFrame.schema.__doc__
+        return copy.deepcopy(self._cached_schema)
 
+    @functools.cache
     def isLocal(self) -> bool:
         query = self._plan.to_proto(self._session.client)
         result = self._session.client._analyze(method="is_local", plan=query).is_local
         assert result is not None
         return result
 
-    isLocal.__doc__ = PySparkDataFrame.isLocal.__doc__
-
-    @property
+    @functools.cached_property
     def isStreaming(self) -> bool:
         query = self._plan.to_proto(self._session.client)
         result = self._session.client._analyze(method="is_streaming", plan=query).is_streaming
         assert result is not None
         return result
 
-    isStreaming.__doc__ = PySparkDataFrame.isStreaming.__doc__
-
-    def _tree_string(self, level: Optional[int] = None) -> str:
-        query = self._plan.to_proto(self._session.client)
-        result = self._session.client._analyze(
-            method="tree_string", plan=query, level=level
-        ).tree_string
-        assert result is not None
-        return result
-
     def printSchema(self, level: Optional[int] = None) -> None:
-        print(self._tree_string(level))
-
-    printSchema.__doc__ = PySparkDataFrame.printSchema.__doc__
+        if level:
+            print(self.schema.treeString(level))
+        else:
+            print(self.schema.treeString())
 
+    @functools.cache
     def inputFiles(self) -> List[str]:
         query = self._plan.to_proto(self._session.client)
         result = self._session.client._analyze(method="input_files", plan=query).input_files
         assert result is not None
         return result
 
-    inputFiles.__doc__ = PySparkDataFrame.inputFiles.__doc__
-
-    def to(self, schema: StructType) -> "DataFrame":
+    def to(self, schema: StructType) -> ParentDataFrame:
         assert schema is not None
-        return DataFrame(
+        res = DataFrame(
             plan.ToSchema(child=self._plan, schema=schema),
             session=self._session,
         )
+        res._cached_schema = schema
+        return res
 
-    to.__doc__ = PySparkDataFrame.to.__doc__
-
-    def toDF(self, *cols: str) -> "DataFrame":
+    def toDF(self, *cols: str) -> ParentDataFrame:
         for col_ in cols:
             if not isinstance(col_, str):
                 raise PySparkTypeError(
@@ -1897,17 +1918,15 @@ def toDF(self, *cols: str) -> "DataFrame":
                 )
         return DataFrame(plan.ToDF(self._plan, list(cols)), self._session)
 
-    toDF.__doc__ = PySparkDataFrame.toDF.__doc__
-
-    def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any) -> "DataFrame":
+    def transform(
+        self, func: Callable[..., ParentDataFrame], *args: Any, **kwargs: Any
+    ) -> ParentDataFrame:
         result = func(self, *args, **kwargs)
         assert isinstance(
             result, DataFrame
         ), "Func returned an instance of type [%s], " "should have been DataFrame." % type(result)
         return result
 
-    transform.__doc__ = PySparkDataFrame.transform.__doc__
-
     def _explain_string(
         self, extended: Optional[Union[bool, str]] = None, mode: Optional[str] = None
     ) -> str:
@@ -1961,57 +1980,47 @@ def explain(
     ) -> None:
         print(self._explain_string(extended=extended, mode=mode))
 
-    explain.__doc__ = PySparkDataFrame.explain.__doc__
-
     def createTempView(self, name: str) -> None:
         command = plan.CreateView(
             child=self._plan, name=name, is_global=False, replace=False
         ).command(session=self._session.client)
-        self._session.client.execute_command(command, self._plan.observations)
-
-    createTempView.__doc__ = PySparkDataFrame.createTempView.__doc__
+        _, _, ei = self._session.client.execute_command(command, self._plan.observations)
+        self._execution_info = ei
 
     def createOrReplaceTempView(self, name: str) -> None:
         command = plan.CreateView(
             child=self._plan, name=name, is_global=False, replace=True
         ).command(session=self._session.client)
-        self._session.client.execute_command(command, self._plan.observations)
-
-    createOrReplaceTempView.__doc__ = PySparkDataFrame.createOrReplaceTempView.__doc__
+        _, _, ei = self._session.client.execute_command(command, self._plan.observations)
+        self._execution_info = ei
 
     def createGlobalTempView(self, name: str) -> None:
         command = plan.CreateView(
             child=self._plan, name=name, is_global=True, replace=False
         ).command(session=self._session.client)
-        self._session.client.execute_command(command, self._plan.observations)
-
-    createGlobalTempView.__doc__ = PySparkDataFrame.createGlobalTempView.__doc__
+        _, _, ei = self._session.client.execute_command(command, self._plan.observations)
+        self._execution_info = ei
 
     def createOrReplaceGlobalTempView(self, name: str) -> None:
         command = plan.CreateView(
             child=self._plan, name=name, is_global=True, replace=True
         ).command(session=self._session.client)
-        self._session.client.execute_command(command, self._plan.observations)
-
-    createOrReplaceGlobalTempView.__doc__ = PySparkDataFrame.createOrReplaceGlobalTempView.__doc__
+        _, _, ei = self._session.client.execute_command(command, self._plan.observations)
+        self._execution_info = ei
 
-    def cache(self) -> "DataFrame":
+    def cache(self) -> ParentDataFrame:
         return self.persist()
 
-    cache.__doc__ = PySparkDataFrame.cache.__doc__
-
     def persist(
         self,
         storageLevel: StorageLevel = (StorageLevel.MEMORY_AND_DISK_DESER),
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         relation = self._plan.plan(self._session.client)
         self._session.client._analyze(
             method="persist", relation=relation, storage_level=storageLevel
         )
         return self
 
-    persist.__doc__ = PySparkDataFrame.persist.__doc__
-
     @property
     def storageLevel(self) -> StorageLevel:
         relation = self._plan.plan(self._session.client)
@@ -2021,15 +2030,11 @@ def storageLevel(self) -> StorageLevel:
         assert storage_level is not None
         return storage_level
 
-    storageLevel.__doc__ = PySparkDataFrame.storageLevel.__doc__
-
-    def unpersist(self, blocking: bool = False) -> "DataFrame":
+    def unpersist(self, blocking: bool = False) -> ParentDataFrame:
         relation = self._plan.plan(self._session.client)
         self._session.client._analyze(method="unpersist", relation=relation, blocking=blocking)
         return self
 
-    unpersist.__doc__ = PySparkDataFrame.unpivot.__doc__
-
     @property
     def is_cached(self) -> bool:
         return self.storageLevel != StorageLevel.NONE
@@ -2051,8 +2056,6 @@ def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
                     schema = from_arrow_schema(table.schema, prefer_timestamp_ntz=True)
                 yield from ArrowTableToRowsConversion.convert(table, schema)
 
-    toLocalIterator.__doc__ = PySparkDataFrame.toLocalIterator.__doc__
-
     def pandas_api(
         self, index_col: Optional[Union[str, List[str]]] = None
     ) -> "PandasOnSparkDataFrame":
@@ -2060,22 +2063,18 @@ def pandas_api(
         from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
         from pyspark.pandas.internal import InternalFrame
 
-        index_spark_columns, index_names = _get_index_map(self, index_col)  # type: ignore[arg-type]
+        index_spark_columns, index_names = _get_index_map(self, index_col)
         internal = InternalFrame(
-            spark_frame=self,  # type: ignore[arg-type]
+            spark_frame=self,
             index_spark_columns=index_spark_columns,
             index_names=index_names,  # type: ignore[arg-type]
         )
         return PandasOnSparkDataFrame(internal)
 
-    pandas_api.__doc__ = PySparkDataFrame.pandas_api.__doc__
-
     def registerTempTable(self, name: str) -> None:
         warnings.warn("Deprecated in 2.0, use createOrReplaceTempView instead.", FutureWarning)
         self.createOrReplaceTempView(name)
 
-    registerTempTable.__doc__ = PySparkDataFrame.registerTempTable.__doc__
-
     def _map_partitions(
         self,
         func: "PandasMapIterFunction",
@@ -2083,16 +2082,17 @@ def _map_partitions(
         evalType: int,
         barrier: bool,
         profile: Optional[ResourceProfile],
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         from pyspark.sql.connect.udf import UserDefinedFunction
 
+        _validate_pandas_udf(func, evalType)
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
             evalType=evalType,
         )
 
-        return DataFrame(
+        res = DataFrame(
             plan.MapPartitions(
                 child=self._plan,
                 function=udf_obj,
@@ -2102,6 +2102,9 @@ def _map_partitions(
             ),
             session=self._session,
         )
+        if isinstance(schema, StructType):
+            res._cached_schema = schema
+        return res
 
     def mapInPandas(
         self,
@@ -2109,26 +2112,22 @@ def mapInPandas(
         schema: Union[StructType, str],
         barrier: bool = False,
         profile: Optional[ResourceProfile] = None,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         return self._map_partitions(
             func, schema, PythonEvalType.SQL_MAP_PANDAS_ITER_UDF, barrier, profile
         )
 
-    mapInPandas.__doc__ = PySparkDataFrame.mapInPandas.__doc__
-
     def mapInArrow(
         self,
         func: "ArrowMapIterFunction",
         schema: Union[StructType, str],
         barrier: bool = False,
         profile: Optional[ResourceProfile] = None,
-    ) -> "DataFrame":
+    ) -> ParentDataFrame:
         return self._map_partitions(
             func, schema, PythonEvalType.SQL_MAP_ARROW_ITER_UDF, barrier, profile
         )
 
-    mapInArrow.__doc__ = PySparkDataFrame.mapInArrow.__doc__
-
     def foreach(self, f: Callable[[Row], None]) -> None:
         def foreach_func(row: Any) -> None:
             f(row)
@@ -2137,8 +2136,6 @@ def foreach_func(row: Any) -> None:
             F.udf(foreach_func, StructType())("row")  # type: ignore[arg-type]
         ).collect()
 
-    foreach.__doc__ = PySparkDataFrame.foreach.__doc__
-
     def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None:
         schema = self.schema
         field_converters = [
@@ -2161,15 +2158,11 @@ def flatten() -> Iterator[Row]:
 
         self.mapInArrow(foreach_partition_func, schema=StructType()).collect()
 
-    foreachPartition.__doc__ = PySparkDataFrame.foreachPartition.__doc__
-
     @property
     def writeStream(self) -> DataStreamWriter:
         return DataStreamWriter(plan=self._plan, session=self._session)
 
-    writeStream.__doc__ = PySparkDataFrame.writeStream.__doc__
-
-    def sameSemantics(self, other: "DataFrame") -> bool:
+    def sameSemantics(self, other: ParentDataFrame) -> bool:
         if not isinstance(other, DataFrame):
             raise PySparkTypeError(
                 error_class="NOT_DATAFRAME",
@@ -2181,50 +2174,80 @@ def sameSemantics(self, other: "DataFrame") -> bool:
             other=other._plan.to_proto(other._session.client),
         )
 
-    sameSemantics.__doc__ = PySparkDataFrame.sameSemantics.__doc__
-
+    @functools.cache
     def semanticHash(self) -> int:
         return self._session.client.semantic_hash(
             plan=self._plan.to_proto(self._session.client),
         )
 
-    semanticHash.__doc__ = PySparkDataFrame.semanticHash.__doc__
-
     def writeTo(self, table: str) -> "DataFrameWriterV2":
-        return DataFrameWriterV2(self._plan, self._session, table)
+        def cb(ei: "ExecutionInfo") -> None:
+            self._execution_info = ei
 
-    writeTo.__doc__ = PySparkDataFrame.writeTo.__doc__
+        return DataFrameWriterV2(self._plan, self._session, table, cb)
 
-    # SparkConnect specific API
-    def offset(self, n: int) -> "DataFrame":
+    def offset(self, n: int) -> ParentDataFrame:
         return DataFrame(plan.Offset(child=self._plan, offset=n), session=self._session)
 
-    offset.__doc__ = PySparkDataFrame.offset.__doc__
+    def checkpoint(self, eager: bool = True) -> "DataFrame":
+        cmd = plan.Checkpoint(child=self._plan, local=False, eager=eager)
+        _, properties, self._execution_info = self._session.client.execute_command(
+            cmd.command(self._session.client)
+        )
+        assert "checkpoint_command_result" in properties
+        checkpointed = properties["checkpoint_command_result"]
+        assert isinstance(checkpointed._plan, plan.CachedRemoteRelation)
+        return checkpointed
+
+    def localCheckpoint(self, eager: bool = True) -> "DataFrame":
+        cmd = plan.Checkpoint(child=self._plan, local=True, eager=eager)
+        _, properties, self._execution_info = self._session.client.execute_command(
+            cmd.command(self._session.client)
+        )
+        assert "checkpoint_command_result" in properties
+        checkpointed = properties["checkpoint_command_result"]
+        assert isinstance(checkpointed._plan, plan.CachedRemoteRelation)
+        return checkpointed
+
+    if not is_remote_only():
+
+        def toJSON(self, use_unicode: bool = True) -> "RDD[str]":
+            raise PySparkNotImplementedError(
+                error_class="NOT_IMPLEMENTED",
+                message_parameters={"feature": "toJSON()"},
+            )
+
+        @property
+        def rdd(self) -> "RDD[Row]":
+            raise PySparkNotImplementedError(
+                error_class="NOT_IMPLEMENTED",
+                message_parameters={"feature": "rdd"},
+            )
+
+    @property
+    def executionInfo(self) -> Optional["ExecutionInfo"]:
+        return self._execution_info
 
 
-class DataFrameNaFunctions:
-    def __init__(self, df: DataFrame):
+class DataFrameNaFunctions(ParentDataFrameNaFunctions):
+    def __init__(self, df: ParentDataFrame):
         self.df = df
 
     def fill(
         self,
         value: Union["LiteralType", Dict[str, "LiteralType"]],
         subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
-    ) -> DataFrame:
-        return self.df.fillna(value=value, subset=subset)
-
-    fill.__doc__ = DataFrame.fillna.__doc__
+    ) -> ParentDataFrame:
+        return self.df.fillna(value=value, subset=subset)  # type: ignore[arg-type]
 
     def drop(
         self,
         how: str = "any",
         thresh: Optional[int] = None,
         subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
-    ) -> DataFrame:
+    ) -> ParentDataFrame:
         return self.df.dropna(how=how, thresh=thresh, subset=subset)
 
-    drop.__doc__ = DataFrame.dropna.__doc__
-
     def replace(
         self,
         to_replace: Union[List["LiteralType"], Dict["LiteralType", "OptionalPrimitiveType"]],
@@ -2232,29 +2255,20 @@ def replace(
             Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], _NoValueType]
         ] = _NoValue,
         subset: Optional[List[str]] = None,
-    ) -> DataFrame:
-        return self.df.replace(to_replace, value, subset)
-
-    replace.__doc__ = DataFrame.replace.__doc__
-
+    ) -> ParentDataFrame:
+        return self.df.replace(to_replace, value, subset)  # type: ignore[arg-type]
 
-DataFrameNaFunctions.__doc__ = PySparkDataFrameNaFunctions.__doc__
 
-
-class DataFrameStatFunctions:
-    def __init__(self, df: DataFrame):
+class DataFrameStatFunctions(ParentDataFrameStatFunctions):
+    def __init__(self, df: ParentDataFrame):
         self.df = df
 
     def cov(self, col1: str, col2: str) -> float:
         return self.df.cov(col1, col2)
 
-    cov.__doc__ = DataFrame.cov.__doc__
-
     def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
         return self.df.corr(col1, col2, method)
 
-    corr.__doc__ = DataFrame.corr.__doc__
-
     def approxQuantile(
         self,
         col: Union[str, List[str], Tuple[str]],
@@ -2263,41 +2277,37 @@ def approxQuantile(
     ) -> Union[List[float], List[List[float]]]:
         return self.df.approxQuantile(col, probabilities, relativeError)
 
-    approxQuantile.__doc__ = DataFrame.approxQuantile.__doc__
-
-    def crosstab(self, col1: str, col2: str) -> DataFrame:
+    def crosstab(self, col1: str, col2: str) -> ParentDataFrame:
         return self.df.crosstab(col1, col2)
 
-    crosstab.__doc__ = DataFrame.crosstab.__doc__
-
     def freqItems(
         self, cols: Union[List[str], Tuple[str]], support: Optional[float] = None
-    ) -> DataFrame:
+    ) -> ParentDataFrame:
         return self.df.freqItems(cols, support)
 
-    freqItems.__doc__ = DataFrame.freqItems.__doc__
-
     def sampleBy(
         self, col: str, fractions: Dict[Any, float], seed: Optional[int] = None
-    ) -> DataFrame:
+    ) -> ParentDataFrame:
         return self.df.sampleBy(col, fractions, seed)
 
-    sampleBy.__doc__ = DataFrame.sampleBy.__doc__
-
-
-DataFrameStatFunctions.__doc__ = PySparkDataFrameStatFunctions.__doc__
-
 
 def _test() -> None:
     import os
     import sys
     import doctest
+    from pyspark.util import is_remote_only
     from pyspark.sql import SparkSession as PySparkSession
-    import pyspark.sql.connect.dataframe
+    import pyspark.sql.dataframe
 
+    # It inherits docstrings but doctests cannot detect them so we run
+    # the parent classe's doctests here directly.
     os.chdir(os.environ["SPARK_HOME"])
 
-    globs = pyspark.sql.connect.dataframe.__dict__.copy()
+    globs = pyspark.sql.dataframe.__dict__.copy()
+
+    if not is_remote_only():
+        del pyspark.sql.dataframe.DataFrame.toJSON.__doc__
+        del pyspark.sql.dataframe.DataFrame.rdd.__doc__
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.dataframe tests")
@@ -2306,7 +2316,7 @@ def _test() -> None:
     )
 
     (failure_count, test_count) = doctest.testmod(
-        pyspark.sql.connect.dataframe,
+        pyspark.sql.dataframe,
         globs=globs,
         optionflags=doctest.ELLIPSIS
         | doctest.NORMALIZE_WHITESPACE
diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py
index b1735f65f520e..c10bef56c3b83 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 from pyspark.sql.connect.utils import check_dependencies
-from pyspark.sql.utils import is_timestamp_ntz_preferred
 
 check_dependencies(__name__)
 
@@ -61,7 +60,7 @@
 )
 
 import pyspark.sql.connect.proto as proto
-from pyspark.sql.connect.types import (
+from pyspark.util import (
     JVM_BYTE_MIN,
     JVM_BYTE_MAX,
     JVM_SHORT_MIN,
@@ -70,11 +69,15 @@
     JVM_INT_MAX,
     JVM_LONG_MIN,
     JVM_LONG_MAX,
+)
+from pyspark.sql.connect.types import (
     UnparsedDataType,
     pyspark_types_to_proto_types,
     proto_schema_to_pyspark_data_type,
 )
 from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.errors.utils import current_origin
+from pyspark.sql.utils import is_timestamp_ntz_preferred
 
 if TYPE_CHECKING:
     from pyspark.sql.connect.client import SparkConnectClient
@@ -87,7 +90,16 @@ class Expression:
     """
 
     def __init__(self) -> None:
-        pass
+        origin = current_origin()
+        fragment = origin.fragment
+        call_site = origin.call_site
+        self.origin = None
+        if fragment is not None and call_site is not None:
+            self.origin = proto.Origin(
+                python_origin=proto.PythonOrigin(
+                    fragment=origin.fragment, call_site=origin.call_site
+                )
+            )
 
     def to_plan(  # type: ignore[empty-body]
         self, session: "SparkConnectClient"
@@ -110,6 +122,12 @@ def alias(self, *alias: str, **kwargs: Any) -> "ColumnAlias":
     def name(self) -> str:  # type: ignore[empty-body]
         ...
 
+    def _create_proto_expression(self) -> proto.Expression:
+        plan = proto.Expression()
+        if self.origin is not None:
+            plan.common.origin.CopyFrom(self.origin)
+        return plan
+
 
 class CaseWhen(Expression):
     def __init__(
@@ -151,18 +169,18 @@ def __repr__(self) -> str:
 
 
 class ColumnAlias(Expression):
-    def __init__(self, parent: Expression, alias: Sequence[str], metadata: Any):
+    def __init__(self, child: Expression, alias: Sequence[str], metadata: Any):
         super().__init__()
 
         self._alias = alias
         self._metadata = metadata
-        self._parent = parent
+        self._child = child
 
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
         if len(self._alias) == 1:
-            exp = proto.Expression()
+            exp = self._create_proto_expression()
             exp.alias.name.append(self._alias[0])
-            exp.alias.expr.CopyFrom(self._parent.to_plan(session))
+            exp.alias.expr.CopyFrom(self._child.to_plan(session))
 
             if self._metadata:
                 exp.alias.metadata = json.dumps(self._metadata)
@@ -173,13 +191,13 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
                     error_class="CANNOT_PROVIDE_METADATA",
                     message_parameters={},
                 )
-            exp = proto.Expression()
+            exp = self._create_proto_expression()
             exp.alias.name.extend(self._alias)
-            exp.alias.expr.CopyFrom(self._parent.to_plan(session))
+            exp.alias.expr.CopyFrom(self._child.to_plan(session))
             return exp
 
     def __repr__(self) -> str:
-        return f"{self._parent} AS {','.join(self._alias)}"
+        return f"{self._child} AS {','.join(self._alias)}"
 
 
 class LiteralExpression(Expression):
@@ -405,7 +423,7 @@ def _to_value(
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
         """Converts the literal expression to the literal in proto."""
 
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
 
         if self._value is None:
             expr.literal.null.CopyFrom(pyspark_types_to_proto_types(self._dataType))
@@ -455,7 +473,10 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
         return expr
 
     def __repr__(self) -> str:
-        return f"{self._value}"
+        if self._value is None:
+            return "NULL"
+        else:
+            return f"{self._value}"
 
 
 class ColumnReference(Expression):
@@ -478,7 +499,7 @@ def name(self) -> str:
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         """Returns the Proto representation of the expression."""
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.unresolved_attribute.unparsed_identifier = self._unparsed_identifier
         if self._plan_id is not None:
             expr.unresolved_attribute.plan_id = self._plan_id
@@ -507,7 +528,7 @@ def __init__(self, unparsed_target: Optional[str], plan_id: Optional[int] = None
         self._plan_id = plan_id
 
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.unresolved_star.SetInParent()
         if self._unparsed_target is not None:
             expr.unresolved_star.unparsed_target = self._unparsed_target
@@ -536,17 +557,21 @@ class SQLExpression(Expression):
 
     def __init__(self, expr: str) -> None:
         super().__init__()
+        assert isinstance(expr, str)
         self._expr: str = expr
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         """Returns the Proto representation of the SQL expression."""
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.expression_string.expression = self._expr
         return expr
 
     def __eq__(self, other: Any) -> bool:
         return other is not None and isinstance(other, SQLExpression) and other._expr == self._expr
 
+    def __repr__(self) -> str:
+        return self._expr
+
 
 class SortOrder(Expression):
     def __init__(self, child: Expression, ascending: bool = True, nullsFirst: bool = True) -> None:
@@ -563,7 +588,7 @@ def __repr__(self) -> str:
         )
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        sort = proto.Expression()
+        sort = self._create_proto_expression()
         sort.sort_order.child.CopyFrom(self._child.to_plan(session))
 
         if self._ascending:
@@ -602,7 +627,7 @@ def __init__(
         self._is_distinct = is_distinct
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        fun = proto.Expression()
+        fun = self._create_proto_expression()
         fun.unresolved_function.function_name = self._name
         if len(self._args) > 0:
             fun.unresolved_function.arguments.extend([arg.to_plan(session) for arg in self._args])
@@ -699,7 +724,7 @@ def __init__(
         self._function = function
 
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.common_inline_user_defined_function.function_name = self._function_name
         expr.common_inline_user_defined_function.deterministic = self._deterministic
         if len(self._arguments) > 0:
@@ -753,7 +778,7 @@ def __init__(
         self._valueExpr = valueExpr
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.update_fields.struct_expression.CopyFrom(self._structExpr.to_plan(session))
         expr.update_fields.field_name = self._fieldName
         expr.update_fields.value_expression.CopyFrom(self._valueExpr.to_plan(session))
@@ -778,7 +803,7 @@ def __init__(
         self._fieldName = fieldName
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.update_fields.struct_expression.CopyFrom(self._structExpr.to_plan(session))
         expr.update_fields.field_name = self._fieldName
         return expr
@@ -802,7 +827,7 @@ def __init__(
         self._extraction = extraction
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.unresolved_extract_value.child.CopyFrom(self._child.to_plan(session))
         expr.unresolved_extract_value.extraction.CopyFrom(self._extraction.to_plan(session))
         return expr
@@ -822,7 +847,7 @@ def __init__(self, col_name: str, plan_id: Optional[int] = None) -> None:
         self._plan_id = plan_id
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.unresolved_regex.col_name = self.col_name
         if self._plan_id is not None:
             expr.unresolved_regex.plan_id = self._plan_id
@@ -841,6 +866,7 @@ def __init__(
     ) -> None:
         super().__init__()
         self._expr = expr
+        assert isinstance(data_type, (DataType, str))
         self._data_type = data_type
         if eval_mode is not None:
             assert isinstance(eval_mode, str)
@@ -848,7 +874,7 @@ def __init__(
         self._eval_mode = eval_mode
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        fun = proto.Expression()
+        fun = self._create_proto_expression()
         fun.cast.expr.CopyFrom(self._expr.to_plan(session))
         if isinstance(self._data_type, str):
             fun.cast.type_str = self._data_type
@@ -866,7 +892,18 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         return fun
 
     def __repr__(self) -> str:
-        return f"({self._expr} ({self._data_type}))"
+        # We cannot guarantee the string representations be exactly the same, e.g.
+        # str(sf.col("a").cast("long")):
+        #   Column<'CAST(a AS BIGINT)'>     <- Spark Classic
+        #   Column<'CAST(a AS LONG)'>       <- Spark Connect
+        if isinstance(self._data_type, DataType):
+            str_data_type = self._data_type.simpleString().upper()
+        else:
+            str_data_type = str(self._data_type).upper()
+        if self._eval_mode is not None and self._eval_mode == "try":
+            return f"TRY_CAST({self._expr} AS {str_data_type})"
+        else:
+            return f"CAST({self._expr} AS {str_data_type})"
 
 
 class UnresolvedNamedLambdaVariable(Expression):
@@ -888,12 +925,12 @@ def __init__(
         self._name_parts = name_parts
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.unresolved_named_lambda_variable.name_parts.extend(self._name_parts)
         return expr
 
     def __repr__(self) -> str:
-        return f"(UnresolvedNamedLambdaVariable({', '.join(self._name_parts)})"
+        return ", ".join(self._name_parts)
 
     @staticmethod
     def fresh_var_name(name: str) -> str:
@@ -930,7 +967,7 @@ def __init__(
         self._arguments = arguments
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.lambda_function.function.CopyFrom(self._function.to_plan(session))
         expr.lambda_function.arguments.extend(
             [arg.to_plan(session).unresolved_named_lambda_variable for arg in self._arguments]
@@ -938,7 +975,10 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         return expr
 
     def __repr__(self) -> str:
-        return f"(LambdaFunction({str(self._function)}, {', '.join(self._arguments)})"
+        return (
+            f"LambdaFunction({str(self._function)}, "
+            + f"{', '.join([str(arg) for arg in self._arguments])})"
+        )
 
 
 class WindowExpression(Expression):
@@ -960,7 +1000,7 @@ def __init__(
         self._windowSpec = windowSpec
 
     def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
 
         expr.window.window_function.CopyFrom(self._windowFunction.to_plan(session))
 
@@ -1067,7 +1107,7 @@ def __init__(self, name: str, args: Sequence["Expression"]):
         self._args = args
 
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.call_function.function_name = self._name
         if len(self._args) > 0:
             expr.call_function.arguments.extend([arg.to_plan(session) for arg in self._args])
@@ -1091,7 +1131,7 @@ def __init__(self, key: str, value: Expression):
         self._value = value
 
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
-        expr = proto.Expression()
+        expr = self._create_proto_expression()
         expr.named_argument_expression.key = self._key
         expr.named_argument_expression.value.CopyFrom(self._value.to_plan(session))
         return expr
diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py
index 2b40b31b75280..8d3442b6496f7 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -42,7 +42,8 @@
 import numpy as np
 
 from pyspark.errors import PySparkTypeError, PySparkValueError
-from pyspark.sql.connect.column import Column
+from pyspark.sql.dataframe import DataFrame as ParentDataFrame
+from pyspark.sql import Column
 from pyspark.sql.connect.expressions import (
     CaseWhen,
     SortOrder,
@@ -80,7 +81,6 @@
         DataTypeOrString,
         UserDefinedFunctionLike,
     )
-    from pyspark.sql.connect.dataframe import DataFrame
     from pyspark.sql.connect.udtf import UserDefinedTableFunction
 
 
@@ -112,14 +112,16 @@ def _invoke_function(name: str, *args: Union[Column, Expression]) -> Column:
     -------
     :class:`Column`
     """
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
     expressions: List[Expression] = []
     for arg in args:
         assert isinstance(arg, (Column, Expression))
         if isinstance(arg, Column):
-            expressions.append(arg._expr)
+            expressions.append(arg._expr)  # type: ignore[arg-type]
         else:
             expressions.append(arg)
-    return Column(UnresolvedFunction(name, expressions))
+    return ConnectColumn(UnresolvedFunction(name, expressions))
 
 
 def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
@@ -180,6 +182,8 @@ def _create_lambda(f: Callable) -> LambdaFunction:
             - (Column, Column) -> Column: ...
             - (Column, Column, Column) -> Column: ...
     """
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
     parameters = _get_lambda_parameters(f)
 
     arg_names = ["x", "y", "z"][: len(parameters)]
@@ -187,7 +191,7 @@ def _create_lambda(f: Callable) -> LambdaFunction:
         UnresolvedNamedLambdaVariable([UnresolvedNamedLambdaVariable.fresh_var_name(arg_name)])
         for arg_name in arg_names
     ]
-    arg_cols = [Column(arg_expr) for arg_expr in arg_exprs]
+    arg_cols = [ConnectColumn(arg_expr) for arg_expr in arg_exprs]
 
     result = f(*arg_cols)
 
@@ -197,7 +201,7 @@ def _create_lambda(f: Callable) -> LambdaFunction:
             message_parameters={"func_name": f.__name__, "return_type": type(result).__name__},
         )
 
-    return LambdaFunction(result._expr, arg_exprs)
+    return LambdaFunction(result._expr, arg_exprs)  # type: ignore[arg-type]
 
 
 def _invoke_higher_order_function(
@@ -234,12 +238,14 @@ def _options_to_col(options: Dict[str, Any]) -> Column:
 
 
 def col(col: str) -> Column:
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
     if col == "*":
-        return Column(UnresolvedStar(unparsed_target=None))
+        return ConnectColumn(UnresolvedStar(unparsed_target=None))
     elif col.endswith(".*"):
-        return Column(UnresolvedStar(unparsed_target=col))
+        return ConnectColumn(UnresolvedStar(unparsed_target=col))
     else:
-        return Column(ColumnReference(unparsed_identifier=col))
+        return ConnectColumn(ColumnReference(unparsed_identifier=col))
 
 
 col.__doc__ = pysparkfuncs.col.__doc__
@@ -249,6 +255,8 @@ def col(col: str) -> Column:
 
 
 def lit(col: Any) -> Column:
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
     if isinstance(col, Column):
         return col
     elif isinstance(col, list):
@@ -272,7 +280,7 @@ def lit(col: Any) -> Column:
 
         return array(*[lit(c) for c in col])
     else:
-        return Column(LiteralExpression._from_value(col))
+        return ConnectColumn(LiteralExpression._from_value(col))
 
 
 lit.__doc__ = pysparkfuncs.lit.__doc__
@@ -314,7 +322,7 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
 getbit.__doc__ = pysparkfuncs.getbit.__doc__
 
 
-def broadcast(df: "DataFrame") -> "DataFrame":
+def broadcast(df: "ParentDataFrame") -> "ParentDataFrame":
     from pyspark.sql.connect.dataframe import DataFrame
 
     if not isinstance(df, DataFrame):
@@ -336,7 +344,9 @@ def coalesce(*cols: "ColumnOrName") -> Column:
 
 
 def expr(str: str) -> Column:
-    return Column(SQLExpression(str))
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
+    return ConnectColumn(SQLExpression(str))
 
 
 expr.__doc__ = pysparkfuncs.expr.__doc__
@@ -429,6 +439,8 @@ def spark_partition_id() -> Column:
 
 
 def when(condition: Column, value: Any) -> Column:
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
     # Explicitly not using ColumnOrName type here to make reading condition less opaque
     if not isinstance(condition, Column):
         raise PySparkTypeError(
@@ -438,7 +450,12 @@ def when(condition: Column, value: Any) -> Column:
 
     value_col = value if isinstance(value, Column) else lit(value)
 
-    return Column(CaseWhen(branches=[(condition._expr, value_col._expr)], else_value=None))
+    return ConnectColumn(
+        CaseWhen(
+            branches=[(condition._expr, value_col._expr)],  # type: ignore[list-item]
+            else_value=None,
+        )
+    )
 
 
 when.__doc__ = pysparkfuncs.when.__doc__
@@ -917,6 +934,13 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
 try_divide.__doc__ = pysparkfuncs.try_divide.__doc__
 
 
+def try_remainder(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("try_remainder", left, right)
+
+
+try_remainder.__doc__ = pysparkfuncs.try_remainder.__doc__
+
+
 def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("try_multiply", left, right)
 
@@ -1045,8 +1069,12 @@ def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
 
 
 def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
     _exprs = [_to_col(c)._expr for c in [col] + list(cols)]
-    return Column(UnresolvedFunction("count", _exprs, is_distinct=True))
+    return ConnectColumn(
+        UnresolvedFunction("count", _exprs, is_distinct=True)  # type: ignore[arg-type]
+    )
 
 
 count_distinct.__doc__ = pysparkfuncs.count_distinct.__doc__
@@ -1167,20 +1195,10 @@ def percentile(
     percentage: Union[Column, float, List[float], Tuple[float]],
     frequency: Union[Column, int] = 1,
 ) -> Column:
-    if isinstance(percentage, Column):
-        _percentage = percentage
-    elif isinstance(percentage, (list, tuple)):
-        # Convert tuple to list
-        _percentage = lit(list(percentage))
-    else:
-        # Probably scalar
-        _percentage = lit(percentage)
+    if isinstance(percentage, (list, tuple)):
+        percentage = list(percentage)
 
-    if isinstance(frequency, int):
-        _frequency = lit(frequency)
-    elif isinstance(frequency, Column):
-        _frequency = frequency
-    else:
+    if not isinstance(frequency, (int, Column)):
         raise PySparkTypeError(
             error_class="NOT_COLUMN_OR_INT",
             message_parameters={
@@ -1189,7 +1207,7 @@ def percentile(
             },
         )
 
-    return _invoke_function("percentile", _to_col(col), _percentage, _frequency)
+    return _invoke_function("percentile", _to_col(col), lit(percentage), lit(frequency))
 
 
 percentile.__doc__ = pysparkfuncs.percentile.__doc__
@@ -1200,16 +1218,10 @@ def percentile_approx(
     percentage: Union[Column, float, List[float], Tuple[float]],
     accuracy: Union[Column, float] = 10000,
 ) -> Column:
-    if isinstance(percentage, Column):
-        percentage_col = percentage
-    elif isinstance(percentage, (list, tuple)):
-        # Convert tuple to list
-        percentage_col = lit(list(percentage))
-    else:
-        # Probably scalar
-        percentage_col = lit(percentage)
+    if isinstance(percentage, (list, tuple)):
+        percentage = lit(list(percentage))
 
-    return _invoke_function("percentile_approx", _to_col(col), percentage_col, lit(accuracy))
+    return _invoke_function("percentile_approx", _to_col(col), lit(percentage), lit(accuracy))
 
 
 percentile_approx.__doc__ = pysparkfuncs.percentile_approx.__doc__
@@ -1220,16 +1232,10 @@ def approx_percentile(
     percentage: Union[Column, float, List[float], Tuple[float]],
     accuracy: Union[Column, float] = 10000,
 ) -> Column:
-    if isinstance(percentage, Column):
-        percentage_col = percentage
-    elif isinstance(percentage, (list, tuple)):
-        # Convert tuple to list
-        percentage_col = lit(list(percentage))
-    else:
-        # Probably scalar
-        percentage_col = lit(percentage)
+    if isinstance(percentage, (list, tuple)):
+        percentage = list(percentage)
 
-    return _invoke_function("approx_percentile", _to_col(col), percentage_col, lit(accuracy))
+    return _invoke_function("approx_percentile", _to_col(col), lit(percentage), lit(accuracy))
 
 
 approx_percentile.__doc__ = pysparkfuncs.approx_percentile.__doc__
@@ -1293,7 +1299,11 @@ def sumDistinct(col: "ColumnOrName") -> Column:
 
 
 def sum_distinct(col: "ColumnOrName") -> Column:
-    return Column(UnresolvedFunction("sum", [_to_col(col)._expr], is_distinct=True))
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
+    return ConnectColumn(
+        UnresolvedFunction("sum", [_to_col(col)._expr], is_distinct=True)  # type: ignore[list-item]
+    )
 
 
 sum_distinct.__doc__ = pysparkfuncs.sum_distinct.__doc__
@@ -1853,12 +1863,10 @@ def from_json(
     schema: Union[ArrayType, StructType, Column, str],
     options: Optional[Dict[str, str]] = None,
 ) -> Column:
-    if isinstance(schema, Column):
-        _schema = schema
+    if isinstance(schema, (str, Column)):
+        _schema = lit(schema)
     elif isinstance(schema, DataType):
         _schema = lit(schema.json())
-    elif isinstance(schema, str):
-        _schema = lit(schema)
     else:
         raise PySparkTypeError(
             error_class="NOT_COLUMN_OR_DATATYPE_OR_STR",
@@ -1879,12 +1887,10 @@ def from_xml(
     schema: Union[StructType, Column, str],
     options: Optional[Dict[str, str]] = None,
 ) -> Column:
-    if isinstance(schema, Column):
-        _schema = schema
+    if isinstance(schema, (str, Column)):
+        _schema = lit(schema)
     elif isinstance(schema, StructType):
         _schema = lit(schema.json())
-    elif isinstance(schema, str):
-        _schema = lit(schema)
     else:
         raise PySparkTypeError(
             error_class="NOT_COLUMN_OR_STR_OR_STRUCT",
@@ -2041,6 +2047,13 @@ def str_to_map(
 str_to_map.__doc__ = pysparkfuncs.str_to_map.__doc__
 
 
+def try_parse_json(col: "ColumnOrName") -> Column:
+    return _invoke_function("try_parse_json", _to_col(col))
+
+
+try_parse_json.__doc__ = pysparkfuncs.try_parse_json.__doc__
+
+
 def parse_json(col: "ColumnOrName") -> Column:
     return _invoke_function("parse_json", _to_col(col))
 
@@ -2048,6 +2061,41 @@ def parse_json(col: "ColumnOrName") -> Column:
 parse_json.__doc__ = pysparkfuncs.parse_json.__doc__
 
 
+def is_variant_null(v: "ColumnOrName") -> Column:
+    return _invoke_function("is_variant_null", _to_col(v))
+
+
+is_variant_null.__doc__ = pysparkfuncs.is_variant_null.__doc__
+
+
+def variant_get(v: "ColumnOrName", path: str, targetType: str) -> Column:
+    return _invoke_function("variant_get", _to_col(v), lit(path), lit(targetType))
+
+
+variant_get.__doc__ = pysparkfuncs.variant_get.__doc__
+
+
+def try_variant_get(v: "ColumnOrName", path: str, targetType: str) -> Column:
+    return _invoke_function("try_variant_get", _to_col(v), lit(path), lit(targetType))
+
+
+try_variant_get.__doc__ = pysparkfuncs.try_variant_get.__doc__
+
+
+def schema_of_variant(v: "ColumnOrName") -> Column:
+    return _invoke_function("schema_of_variant", _to_col(v))
+
+
+schema_of_variant.__doc__ = pysparkfuncs.schema_of_variant.__doc__
+
+
+def schema_of_variant_agg(v: "ColumnOrName") -> Column:
+    return _invoke_function("schema_of_variant_agg", _to_col(v))
+
+
+schema_of_variant_agg.__doc__ = pysparkfuncs.schema_of_variant_agg.__doc__
+
+
 def posexplode(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("posexplode", col)
 
@@ -2081,10 +2129,8 @@ def sequence(
 sequence.__doc__ = pysparkfuncs.sequence.__doc__
 
 
-def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
-    if isinstance(csv, Column):
-        _csv = csv
-    elif isinstance(csv, str):
+def schema_of_csv(csv: Union[str, Column], options: Optional[Dict[str, str]] = None) -> Column:
+    if isinstance(csv, (str, Column)):
         _csv = lit(csv)
     else:
         raise PySparkTypeError(
@@ -2101,10 +2147,8 @@ def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None)
 schema_of_csv.__doc__ = pysparkfuncs.schema_of_csv.__doc__
 
 
-def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
-    if isinstance(json, Column):
-        _json = json
-    elif isinstance(json, str):
+def schema_of_json(json: Union[str, Column], options: Optional[Dict[str, str]] = None) -> Column:
+    if isinstance(json, (str, Column)):
         _json = lit(json)
     else:
         raise PySparkTypeError(
@@ -2121,10 +2165,8 @@ def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = Non
 schema_of_json.__doc__ = pysparkfuncs.schema_of_json.__doc__
 
 
-def schema_of_xml(xml: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
-    if isinstance(xml, Column):
-        _xml = xml
-    elif isinstance(xml, str):
+def schema_of_xml(xml: Union[str, Column], options: Optional[Dict[str, str]] = None) -> Column:
+    if isinstance(xml, (str, Column)):
         _xml = lit(xml)
     else:
         raise PySparkTypeError(
@@ -2476,8 +2518,13 @@ def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column:
 repeat.__doc__ = pysparkfuncs.repeat.__doc__
 
 
-def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
-    return _invoke_function("split", _to_col(str), lit(pattern), lit(limit))
+def split(
+    str: "ColumnOrName",
+    pattern: Union[Column, str],
+    limit: Union["ColumnOrName", int] = -1,
+) -> Column:
+    limit = lit(limit) if isinstance(limit, int) else _to_col(limit)
+    return _invoke_function("split", _to_col(str), lit(pattern), limit)
 
 
 split.__doc__ = pysparkfuncs.split.__doc__
@@ -3350,6 +3397,20 @@ def timestamp_micros(col: "ColumnOrName") -> Column:
 timestamp_micros.__doc__ = pysparkfuncs.timestamp_micros.__doc__
 
 
+def timestamp_diff(unit: str, start: "ColumnOrName", end: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("timestampdiff", lit(unit), start, end)
+
+
+timestamp_diff.__doc__ = pysparkfuncs.timestamp_diff.__doc__
+
+
+def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("timestampadd", lit(unit), quantity, ts)
+
+
+timestamp_add.__doc__ = pysparkfuncs.timestamp_add.__doc__
+
+
 def window(
     timeColumn: "ColumnOrName",
     windowDuration: str,
@@ -3764,27 +3825,27 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
 sha2.__doc__ = pysparkfuncs.sha2.__doc__
 
 
-def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]] = None) -> Column:
+def hll_sketch_agg(
+    col: "ColumnOrName",
+    lgConfigK: Optional[Union[int, Column]] = None,
+) -> Column:
     if lgConfigK is None:
         return _invoke_function_over_columns("hll_sketch_agg", col)
     else:
-        _lgConfigK = lit(lgConfigK) if isinstance(lgConfigK, int) else lgConfigK
-        return _invoke_function_over_columns("hll_sketch_agg", col, _lgConfigK)
+        return _invoke_function_over_columns("hll_sketch_agg", col, lit(lgConfigK))
 
 
 hll_sketch_agg.__doc__ = pysparkfuncs.hll_sketch_agg.__doc__
 
 
-def hll_union_agg(col: "ColumnOrName", allowDifferentLgConfigK: Optional[bool] = None) -> Column:
+def hll_union_agg(
+    col: "ColumnOrName",
+    allowDifferentLgConfigK: Optional[Union[bool, Column]] = None,
+) -> Column:
     if allowDifferentLgConfigK is None:
         return _invoke_function_over_columns("hll_union_agg", col)
     else:
-        _allowDifferentLgConfigK = (
-            lit(allowDifferentLgConfigK)
-            if isinstance(allowDifferentLgConfigK, bool)
-            else allowDifferentLgConfigK
-        )
-        return _invoke_function_over_columns("hll_union_agg", col, _allowDifferentLgConfigK)
+        return _invoke_function_over_columns("hll_union_agg", col, lit(allowDifferentLgConfigK))
 
 
 hll_union_agg.__doc__ = pysparkfuncs.hll_union_agg.__doc__
@@ -4061,8 +4122,10 @@ def udtf(
 
 
 def call_function(funcName: str, *cols: "ColumnOrName") -> Column:
+    from pyspark.sql.connect.column import Column as ConnectColumn
+
     expressions = [_to_col(c)._expr for c in cols]
-    return Column(CallFunction(funcName, expressions))
+    return ConnectColumn(CallFunction(funcName, expressions))  # type: ignore[arg-type]
 
 
 call_function.__doc__ = pysparkfuncs.call_function.__doc__
diff --git a/python/pyspark/sql/connect/functions/partitioning.py b/python/pyspark/sql/connect/functions/partitioning.py
index bfeddad7d5686..5d2dd58313bb6 100644
--- a/python/pyspark/sql/connect/functions/partitioning.py
+++ b/python/pyspark/sql/connect/functions/partitioning.py
@@ -22,7 +22,7 @@
 
 from pyspark.errors import PySparkTypeError
 from pyspark.sql import functions as pysparkfuncs
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.functions.builtin import _to_col, _invoke_function_over_columns
 from pyspark.sql.connect.functions.builtin import lit, _invoke_function
 
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index b866f61efe4ae..85806b1a265b0 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -34,11 +34,12 @@
 from pyspark.util import PythonEvalType
 from pyspark.sql.group import GroupedData as PySparkGroupedData
 from pyspark.sql.pandas.group_ops import PandasCogroupedOps as PySparkPandasCogroupedOps
+from pyspark.sql.pandas.functions import _validate_pandas_udf  # type: ignore[attr-defined]
 from pyspark.sql.types import NumericType
 from pyspark.sql.types import StructType
 
 import pyspark.sql.connect.plan as plan
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.functions import builtin as F
 from pyspark.errors import PySparkNotImplementedError, PySparkTypeError
 
@@ -61,10 +62,10 @@ def __init__(
         self,
         df: "DataFrame",
         group_type: str,
-        grouping_cols: Sequence["Column"],
-        pivot_col: Optional["Column"] = None,
+        grouping_cols: Sequence[Column],
+        pivot_col: Optional[Column] = None,
         pivot_values: Optional[Sequence["LiteralType"]] = None,
-        grouping_sets: Optional[Sequence[Sequence["Column"]]] = None,
+        grouping_sets: Optional[Sequence[Sequence[Column]]] = None,
     ) -> None:
         from pyspark.sql.connect.dataframe import DataFrame
 
@@ -293,13 +294,14 @@ def applyInPandas(
         from pyspark.sql.connect.udf import UserDefinedFunction
         from pyspark.sql.connect.dataframe import DataFrame
 
+        _validate_pandas_udf(func, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
             evalType=PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
         )
 
-        return DataFrame(
+        res = DataFrame(
             plan.GroupMap(
                 child=self._df._plan,
                 grouping_cols=self._grouping_cols,
@@ -308,6 +310,9 @@ def applyInPandas(
             ),
             session=self._df._session,
         )
+        if isinstance(schema, StructType):
+            res._cached_schema = schema
+        return res
 
     applyInPandas.__doc__ = PySparkGroupedData.applyInPandas.__doc__
 
@@ -322,6 +327,7 @@ def applyInPandasWithState(
         from pyspark.sql.connect.udf import UserDefinedFunction
         from pyspark.sql.connect.dataframe import DataFrame
 
+        _validate_pandas_udf(func, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE)
         udf_obj = UserDefinedFunction(
             func,
             returnType=outputStructType,
@@ -360,13 +366,14 @@ def applyInArrow(
         from pyspark.sql.connect.udf import UserDefinedFunction
         from pyspark.sql.connect.dataframe import DataFrame
 
+        _validate_pandas_udf(func, PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF)
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
             evalType=PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF,
         )
 
-        return DataFrame(
+        res = DataFrame(
             plan.GroupMap(
                 child=self._df._plan,
                 grouping_cols=self._grouping_cols,
@@ -375,6 +382,9 @@ def applyInArrow(
             ),
             session=self._df._session,
         )
+        if isinstance(schema, StructType):
+            res._cached_schema = schema
+        return res
 
     applyInArrow.__doc__ = PySparkGroupedData.applyInArrow.__doc__
 
@@ -399,13 +409,14 @@ def applyInPandas(
         from pyspark.sql.connect.udf import UserDefinedFunction
         from pyspark.sql.connect.dataframe import DataFrame
 
+        _validate_pandas_udf(func, PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF)
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
             evalType=PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
         )
 
-        return DataFrame(
+        res = DataFrame(
             plan.CoGroupMap(
                 input=self._gd1._df._plan,
                 input_grouping_cols=self._gd1._grouping_cols,
@@ -415,6 +426,9 @@ def applyInPandas(
             ),
             session=self._gd1._df._session,
         )
+        if isinstance(schema, StructType):
+            res._cached_schema = schema
+        return res
 
     applyInPandas.__doc__ = PySparkPandasCogroupedOps.applyInPandas.__doc__
 
@@ -424,13 +438,14 @@ def applyInArrow(
         from pyspark.sql.connect.udf import UserDefinedFunction
         from pyspark.sql.connect.dataframe import DataFrame
 
+        _validate_pandas_udf(func, PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF)
         udf_obj = UserDefinedFunction(
             func,
             returnType=schema,
             evalType=PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF,
         )
 
-        return DataFrame(
+        res = DataFrame(
             plan.CoGroupMap(
                 input=self._gd1._df._plan,
                 input_grouping_cols=self._gd1._grouping_cols,
@@ -440,6 +455,9 @@ def applyInArrow(
             ),
             session=self._gd1._df._session,
         )
+        if isinstance(schema, StructType):
+            res._cached_schema = schema
+        return res
 
     applyInArrow.__doc__ = PySparkPandasCogroupedOps.applyInArrow.__doc__
 
diff --git a/python/pyspark/sql/connect/observation.py b/python/pyspark/sql/connect/observation.py
index 4fefb8aac41fb..2471cf04cfbe7 100644
--- a/python/pyspark/sql/connect/observation.py
+++ b/python/pyspark/sql/connect/observation.py
@@ -23,7 +23,7 @@
     IllegalArgumentException,
     PySparkAssertionError,
 )
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.dataframe import DataFrame
 from pyspark.sql.observation import Observation as PySparkObservation
 import pyspark.sql.connect.plan as plan
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 239ee23c2061c..19377515ed28c 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -14,6 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
+# mypy: disable-error-code="operator"
+
 from pyspark.resource import ResourceProfile
 from pyspark.sql.connect.utils import check_dependencies
 
@@ -37,6 +40,7 @@
 import pickle
 from threading import Lock
 from inspect import signature, isclass
+import warnings
 
 import pyarrow as pa
 
@@ -45,20 +49,22 @@
 from pyspark.sql.types import DataType
 
 import pyspark.sql.connect.proto as proto
+from pyspark.sql.column import Column
+from pyspark.sql.connect.proto import base_pb2 as spark_dot_connect_dot_base__pb2
 from pyspark.sql.connect.conversion import storage_level_to_proto
-from pyspark.sql.connect.column import Column
 from pyspark.sql.connect.expressions import Expression
 from pyspark.sql.connect.types import pyspark_types_to_proto_types, UnparsedDataType
 from pyspark.errors import (
+    AnalysisException,
     PySparkValueError,
     PySparkPicklingError,
-    IllegalArgumentException,
 )
 
 if TYPE_CHECKING:
     from pyspark.sql.connect.client import SparkConnectClient
     from pyspark.sql.connect.udf import UserDefinedFunction
     from pyspark.sql.connect.observation import Observation
+    from pyspark.sql.connect.session import SparkSession
 
 
 class LogicalPlan:
@@ -544,14 +550,49 @@ class CachedRemoteRelation(LogicalPlan):
     """Logical plan object for a DataFrame reference which represents a DataFrame that's been
     cached on the server with a given id."""
 
-    def __init__(self, relationId: str):
+    def __init__(self, relation_id: str, spark_session: "SparkSession"):
         super().__init__(None)
-        self._relationId = relationId
-
-    def plan(self, session: "SparkConnectClient") -> proto.Relation:
-        plan = self._create_proto_relation()
-        plan.cached_remote_relation.relation_id = self._relationId
-        return plan
+        self._relation_id = relation_id
+        # Needs to hold the session to make a request itself.
+        self._spark_session = spark_session
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.cached_remote_relation.relation_id = self._relation_id
+        return plan
+
+    def __del__(self) -> None:
+        session = self._spark_session
+        # If session is already closed, all cached DataFrame should be released.
+        if session is not None and not session.client.is_closed and self._relation_id is not None:
+            try:
+                command = RemoveRemoteCachedRelation(self).command(session=session.client)
+                req = session.client._execute_plan_request_with_metadata()
+                if session.client._user_id:
+                    req.user_context.user_id = session.client._user_id
+                req.plan.command.CopyFrom(command)
+
+                for attempt in session.client._retrying():
+                    with attempt:
+                        # !!HACK ALERT!!
+                        # unary_stream does not work on Python's exit for an unknown reasons
+                        # Therefore, here we open unary_unary channel instead.
+                        # See also :class:`SparkConnectServiceStub`.
+                        request_serializer = (
+                            spark_dot_connect_dot_base__pb2.ExecutePlanRequest.SerializeToString
+                        )
+                        response_deserializer = (
+                            spark_dot_connect_dot_base__pb2.ExecutePlanResponse.FromString
+                        )
+                        channel = session.client._channel.unary_unary(
+                            "/spark.connect.SparkConnectService/ExecutePlan",
+                            request_serializer=request_serializer,
+                            response_deserializer=response_deserializer,
+                        )
+                        metadata = session.client._builder.metadata()
+                        channel(req, metadata=metadata)  # type: ignore[arg-type]
+            except Exception as e:
+                warnings.warn(f"RemoveRemoteCachedRelation failed with exception: {e}.")
 
 
 class Hint(LogicalPlan):
@@ -641,7 +682,7 @@ def __init__(
         self,
         child: Optional["LogicalPlan"],
         all_columns_as_keys: bool = False,
-        column_names: Optional[List[str]] = None,
+        column_names: Optional[Sequence[str]] = None,
         within_watermark: bool = False,
     ) -> None:
         super().__init__(child)
@@ -714,7 +755,7 @@ def __init__(
         lower_bound: float,
         upper_bound: float,
         with_replacement: bool,
-        seed: Optional[int],
+        seed: int,
         deterministic_order: bool = False,
     ) -> None:
         super().__init__(child)
@@ -731,8 +772,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan.sample.lower_bound = self.lower_bound
         plan.sample.upper_bound = self.upper_bound
         plan.sample.with_replacement = self.with_replacement
-        if self.seed is not None:
-            plan.sample.seed = self.seed
+        plan.sample.seed = self.seed
         plan.sample.deterministic_order = self.deterministic_order
         return plan
 
@@ -847,7 +887,7 @@ def __init__(
         elif how == "cross":
             join_type = proto.Join.JoinType.JOIN_TYPE_CROSS
         else:
-            raise IllegalArgumentException(
+            raise AnalysisException(
                 error_class="UNSUPPORTED_JOIN_TYPE",
                 message_parameters={"join_type": how},
             )
@@ -1523,7 +1563,7 @@ def __init__(
         child: Optional["LogicalPlan"],
         col: Column,
         fractions: Sequence[Tuple[Column, float]],
-        seed: Optional[int],
+        seed: int,
     ) -> None:
         super().__init__(child)
 
@@ -1551,8 +1591,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
                 fraction.stratum.CopyFrom(k.to_plan(session).literal)
                 fraction.fraction = float(v)
                 plan.sample_by.fractions.append(fraction)
-        if self._seed is not None:
-            plan.sample_by.seed = self._seed
+        plan.sample_by.seed = self._seed
         return plan
 
 
@@ -1784,9 +1823,39 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
         return cmd
 
 
-# Catalog API (internal-only)
+class RemoveRemoteCachedRelation(LogicalPlan):
+    def __init__(self, relation: CachedRemoteRelation) -> None:
+        super().__init__(None)
+        self._relation = relation
+
+    def command(self, session: "SparkConnectClient") -> proto.Command:
+        plan = self._create_proto_relation()
+        plan.cached_remote_relation.relation_id = self._relation._relation_id
+        cmd = proto.Command()
+        cmd.remove_cached_remote_relation_command.relation.CopyFrom(plan.cached_remote_relation)
+        return cmd
+
+
+class Checkpoint(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], local: bool, eager: bool) -> None:
+        super().__init__(child)
+        self._local = local
+        self._eager = eager
+
+    def command(self, session: "SparkConnectClient") -> proto.Command:
+        cmd = proto.Command()
+        assert self._child is not None
+        cmd.checkpoint_command.CopyFrom(
+            proto.CheckpointCommand(
+                relation=self._child.plan(session),
+                local=self._local,
+                eager=self._eager,
+            )
+        )
+        return cmd
 
 
+# Catalog API (internal-only)
 class CurrentDatabase(LogicalPlan):
     def __init__(self) -> None:
         super().__init__(None)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py
index 2a30ffe60a9f2..5243e55576f8e 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.py
+++ b/python/pyspark/sql/connect/proto/base_pb2.py
@@ -37,7 +37,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf8\x13\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x11 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa3\x05\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x01R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB)\n\'_client_observed_server_side_session_idB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xe6\x15\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x87\x01\n&streaming_query_listener_events_result\x18\x10 \x01(\x0b\x32\x31.spark.connect.StreamingQueryListenerEventsResultH\x00R"streamingQueryListenerEventsResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x87\x01\n&create_resource_profile_command_result\x18\x11 \x01(\x0b\x32\x31.spark.connect.CreateResourceProfileCommandResultH\x00R"createResourceProfileCommandResult\x12\x65\n\x12\x65xecution_progress\x18\x12 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1at\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x1a\x10\n\x0eResultComplete\x1a\xcd\x02\n\x11\x45xecutionProgress\x12V\n\x06stages\x18\x01 \x03(\x0b\x32>.spark.connect.ExecutePlanResponse.ExecutionProgress.StageInfoR\x06stages\x12,\n\x12num_inflight_tasks\x18\x02 \x01(\x03R\x10numInflightTasks\x1a\xb1\x01\n\tStageInfo\x12\x19\n\x08stage_id\x18\x01 \x01(\x03R\x07stageId\x12\x1b\n\tnum_tasks\x18\x02 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x03 \x01(\x03R\x11numCompletedTasks\x12(\n\x10input_bytes_read\x18\x04 \x01(\x03R\x0einputBytesRead\x12\x12\n\x04\x64one\x18\x05 \x01(\x08R\x04\x64oneB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x87\t\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xea\x07\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x02R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc6\x02\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xdb\x04\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x96\x03\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x06 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x02R\x0elastResponseId\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc9\x04\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xcc\x02\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf8\x13\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x11 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa3\x05\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x01R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB)\n\'_client_observed_server_side_session_idB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xe6\x16\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x87\x01\n&streaming_query_listener_events_result\x18\x10 \x01(\x0b\x32\x31.spark.connect.StreamingQueryListenerEventsResultH\x00R"streamingQueryListenerEventsResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x87\x01\n&create_resource_profile_command_result\x18\x11 \x01(\x0b\x32\x31.spark.connect.CreateResourceProfileCommandResultH\x00R"createResourceProfileCommandResult\x12\x65\n\x12\x65xecution_progress\x18\x12 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x64\n\x19\x63heckpoint_command_result\x18\x13 \x01(\x0b\x32&.spark.connect.CheckpointCommandResultH\x00R\x17\x63heckpointCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a\x8d\x01\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x12\x17\n\x07plan_id\x18\x04 \x01(\x03R\x06planId\x1a\x10\n\x0eResultComplete\x1a\xcd\x02\n\x11\x45xecutionProgress\x12V\n\x06stages\x18\x01 \x03(\x0b\x32>.spark.connect.ExecutePlanResponse.ExecutionProgress.StageInfoR\x06stages\x12,\n\x12num_inflight_tasks\x18\x02 \x01(\x03R\x10numInflightTasks\x1a\xb1\x01\n\tStageInfo\x12\x19\n\x08stage_id\x18\x01 \x01(\x03R\x07stageId\x12\x1b\n\tnum_tasks\x18\x02 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x03 \x01(\x03R\x11numCompletedTasks\x12(\n\x10input_bytes_read\x18\x04 \x01(\x03R\x0einputBytesRead\x12\x12\n\x04\x64one\x18\x05 \x01(\x08R\x04\x64oneB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x87\t\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xea\x07\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x02R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc6\x02\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xdb\x04\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x96\x03\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x06 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x02R\x0elastResponseId\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc9\x04\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xcc\x02\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx"Z\n\x17\x43heckpointCommandResult\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -120,109 +120,111 @@
     _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_start = 5196
     _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_end = 5361
     _EXECUTEPLANRESPONSE._serialized_start = 5440
-    _EXECUTEPLANRESPONSE._serialized_end = 8230
-    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 7030
-    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 7101
-    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 7103
-    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 7221
-    _EXECUTEPLANRESPONSE_METRICS._serialized_start = 7224
-    _EXECUTEPLANRESPONSE_METRICS._serialized_end = 7741
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 7319
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 7651
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 7528
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 7651
-    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 7653
-    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 7741
-    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 7743
-    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 7859
-    _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_start = 7861
-    _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_end = 7877
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_start = 7880
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 8213
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_start = 8036
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_end = 8213
-    _KEYVALUE._serialized_start = 8232
-    _KEYVALUE._serialized_end = 8297
-    _CONFIGREQUEST._serialized_start = 8300
-    _CONFIGREQUEST._serialized_end = 9459
-    _CONFIGREQUEST_OPERATION._serialized_start = 8608
-    _CONFIGREQUEST_OPERATION._serialized_end = 9106
-    _CONFIGREQUEST_SET._serialized_start = 9108
-    _CONFIGREQUEST_SET._serialized_end = 9160
-    _CONFIGREQUEST_GET._serialized_start = 9162
-    _CONFIGREQUEST_GET._serialized_end = 9187
-    _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 9189
-    _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 9252
-    _CONFIGREQUEST_GETOPTION._serialized_start = 9254
-    _CONFIGREQUEST_GETOPTION._serialized_end = 9285
-    _CONFIGREQUEST_GETALL._serialized_start = 9287
-    _CONFIGREQUEST_GETALL._serialized_end = 9335
-    _CONFIGREQUEST_UNSET._serialized_start = 9337
-    _CONFIGREQUEST_UNSET._serialized_end = 9364
-    _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 9366
-    _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 9400
-    _CONFIGRESPONSE._serialized_start = 9462
-    _CONFIGRESPONSE._serialized_end = 9637
-    _ADDARTIFACTSREQUEST._serialized_start = 9640
-    _ADDARTIFACTSREQUEST._serialized_end = 10642
-    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 10115
-    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 10168
-    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 10170
-    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 10281
-    _ADDARTIFACTSREQUEST_BATCH._serialized_start = 10283
-    _ADDARTIFACTSREQUEST_BATCH._serialized_end = 10376
-    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 10379
-    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 10572
-    _ADDARTIFACTSRESPONSE._serialized_start = 10645
-    _ADDARTIFACTSRESPONSE._serialized_end = 10917
-    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 10836
-    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 10917
-    _ARTIFACTSTATUSESREQUEST._serialized_start = 10920
-    _ARTIFACTSTATUSESREQUEST._serialized_end = 11246
-    _ARTIFACTSTATUSESRESPONSE._serialized_start = 11249
-    _ARTIFACTSTATUSESRESPONSE._serialized_end = 11601
-    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 11444
-    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 11559
-    _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 11561
-    _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 11601
-    _INTERRUPTREQUEST._serialized_start = 11604
-    _INTERRUPTREQUEST._serialized_end = 12207
-    _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 12007
-    _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 12135
-    _INTERRUPTRESPONSE._serialized_start = 12210
-    _INTERRUPTRESPONSE._serialized_end = 12354
-    _REATTACHOPTIONS._serialized_start = 12356
-    _REATTACHOPTIONS._serialized_end = 12409
-    _REATTACHEXECUTEREQUEST._serialized_start = 12412
-    _REATTACHEXECUTEREQUEST._serialized_end = 12818
-    _RELEASEEXECUTEREQUEST._serialized_start = 12821
-    _RELEASEEXECUTEREQUEST._serialized_end = 13406
-    _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 13275
-    _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 13287
-    _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 13289
-    _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 13336
-    _RELEASEEXECUTERESPONSE._serialized_start = 13409
-    _RELEASEEXECUTERESPONSE._serialized_end = 13574
-    _RELEASESESSIONREQUEST._serialized_start = 13577
-    _RELEASESESSIONREQUEST._serialized_end = 13748
-    _RELEASESESSIONRESPONSE._serialized_start = 13750
-    _RELEASESESSIONRESPONSE._serialized_end = 13858
-    _FETCHERRORDETAILSREQUEST._serialized_start = 13861
-    _FETCHERRORDETAILSREQUEST._serialized_end = 14193
-    _FETCHERRORDETAILSRESPONSE._serialized_start = 14196
-    _FETCHERRORDETAILSRESPONSE._serialized_end = 15751
-    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 14425
-    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 14599
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 14602
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 14970
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 14933
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 14970
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 14973
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 15382
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 15284
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 15352
-    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 15385
-    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 15732
-    _SPARKCONNECTSERVICE._serialized_start = 15754
-    _SPARKCONNECTSERVICE._serialized_end = 16700
+    _EXECUTEPLANRESPONSE._serialized_end = 8358
+    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 7132
+    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 7203
+    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 7205
+    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 7323
+    _EXECUTEPLANRESPONSE_METRICS._serialized_start = 7326
+    _EXECUTEPLANRESPONSE_METRICS._serialized_end = 7843
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 7421
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 7753
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 7630
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 7753
+    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 7755
+    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 7843
+    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 7846
+    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 7987
+    _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_start = 7989
+    _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_end = 8005
+    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_start = 8008
+    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 8341
+    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_start = 8164
+    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_end = 8341
+    _KEYVALUE._serialized_start = 8360
+    _KEYVALUE._serialized_end = 8425
+    _CONFIGREQUEST._serialized_start = 8428
+    _CONFIGREQUEST._serialized_end = 9587
+    _CONFIGREQUEST_OPERATION._serialized_start = 8736
+    _CONFIGREQUEST_OPERATION._serialized_end = 9234
+    _CONFIGREQUEST_SET._serialized_start = 9236
+    _CONFIGREQUEST_SET._serialized_end = 9288
+    _CONFIGREQUEST_GET._serialized_start = 9290
+    _CONFIGREQUEST_GET._serialized_end = 9315
+    _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 9317
+    _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 9380
+    _CONFIGREQUEST_GETOPTION._serialized_start = 9382
+    _CONFIGREQUEST_GETOPTION._serialized_end = 9413
+    _CONFIGREQUEST_GETALL._serialized_start = 9415
+    _CONFIGREQUEST_GETALL._serialized_end = 9463
+    _CONFIGREQUEST_UNSET._serialized_start = 9465
+    _CONFIGREQUEST_UNSET._serialized_end = 9492
+    _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 9494
+    _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 9528
+    _CONFIGRESPONSE._serialized_start = 9590
+    _CONFIGRESPONSE._serialized_end = 9765
+    _ADDARTIFACTSREQUEST._serialized_start = 9768
+    _ADDARTIFACTSREQUEST._serialized_end = 10770
+    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 10243
+    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 10296
+    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 10298
+    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 10409
+    _ADDARTIFACTSREQUEST_BATCH._serialized_start = 10411
+    _ADDARTIFACTSREQUEST_BATCH._serialized_end = 10504
+    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 10507
+    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 10700
+    _ADDARTIFACTSRESPONSE._serialized_start = 10773
+    _ADDARTIFACTSRESPONSE._serialized_end = 11045
+    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 10964
+    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 11045
+    _ARTIFACTSTATUSESREQUEST._serialized_start = 11048
+    _ARTIFACTSTATUSESREQUEST._serialized_end = 11374
+    _ARTIFACTSTATUSESRESPONSE._serialized_start = 11377
+    _ARTIFACTSTATUSESRESPONSE._serialized_end = 11729
+    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 11572
+    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 11687
+    _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 11689
+    _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 11729
+    _INTERRUPTREQUEST._serialized_start = 11732
+    _INTERRUPTREQUEST._serialized_end = 12335
+    _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 12135
+    _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 12263
+    _INTERRUPTRESPONSE._serialized_start = 12338
+    _INTERRUPTRESPONSE._serialized_end = 12482
+    _REATTACHOPTIONS._serialized_start = 12484
+    _REATTACHOPTIONS._serialized_end = 12537
+    _REATTACHEXECUTEREQUEST._serialized_start = 12540
+    _REATTACHEXECUTEREQUEST._serialized_end = 12946
+    _RELEASEEXECUTEREQUEST._serialized_start = 12949
+    _RELEASEEXECUTEREQUEST._serialized_end = 13534
+    _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 13403
+    _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 13415
+    _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 13417
+    _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 13464
+    _RELEASEEXECUTERESPONSE._serialized_start = 13537
+    _RELEASEEXECUTERESPONSE._serialized_end = 13702
+    _RELEASESESSIONREQUEST._serialized_start = 13705
+    _RELEASESESSIONREQUEST._serialized_end = 13876
+    _RELEASESESSIONRESPONSE._serialized_start = 13878
+    _RELEASESESSIONRESPONSE._serialized_end = 13986
+    _FETCHERRORDETAILSREQUEST._serialized_start = 13989
+    _FETCHERRORDETAILSREQUEST._serialized_end = 14321
+    _FETCHERRORDETAILSRESPONSE._serialized_start = 14324
+    _FETCHERRORDETAILSRESPONSE._serialized_end = 15879
+    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 14553
+    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 14727
+    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 14730
+    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 15098
+    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 15061
+    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 15098
+    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 15101
+    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 15510
+    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 15412
+    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 15480
+    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 15513
+    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 15860
+    _CHECKPOINTCOMMANDRESULT._serialized_start = 15881
+    _CHECKPOINTCOMMANDRESULT._serialized_end = 15971
+    _SPARKCONNECTSERVICE._serialized_start = 15974
+    _SPARKCONNECTSERVICE._serialized_end = 16920
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi
index d22502f8839db..1f9dfbb3294d0 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/base_pb2.pyi
@@ -1406,6 +1406,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         NAME_FIELD_NUMBER: builtins.int
         VALUES_FIELD_NUMBER: builtins.int
         KEYS_FIELD_NUMBER: builtins.int
+        PLAN_ID_FIELD_NUMBER: builtins.int
         name: builtins.str
         @property
         def values(
@@ -1417,6 +1418,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         def keys(
             self,
         ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: ...
+        plan_id: builtins.int
         def __init__(
             self,
             *,
@@ -1426,11 +1428,12 @@ class ExecutePlanResponse(google.protobuf.message.Message):
             ]
             | None = ...,
             keys: collections.abc.Iterable[builtins.str] | None = ...,
+            plan_id: builtins.int = ...,
         ) -> None: ...
         def ClearField(
             self,
             field_name: typing_extensions.Literal[
-                "keys", b"keys", "name", b"name", "values", b"values"
+                "keys", b"keys", "name", b"name", "plan_id", b"plan_id", "values", b"values"
             ],
         ) -> None: ...
 
@@ -1530,6 +1533,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
     RESULT_COMPLETE_FIELD_NUMBER: builtins.int
     CREATE_RESOURCE_PROFILE_COMMAND_RESULT_FIELD_NUMBER: builtins.int
     EXECUTION_PROGRESS_FIELD_NUMBER: builtins.int
+    CHECKPOINT_COMMAND_RESULT_FIELD_NUMBER: builtins.int
     EXTENSION_FIELD_NUMBER: builtins.int
     METRICS_FIELD_NUMBER: builtins.int
     OBSERVED_METRICS_FIELD_NUMBER: builtins.int
@@ -1591,6 +1595,9 @@ class ExecutePlanResponse(google.protobuf.message.Message):
     def execution_progress(self) -> global___ExecutePlanResponse.ExecutionProgress:
         """(Optional) Intermediate query progress reports."""
     @property
+    def checkpoint_command_result(self) -> global___CheckpointCommandResult:
+        """Response for command that checkpoints a DataFrame."""
+    @property
     def extension(self) -> google.protobuf.any_pb2.Any:
         """Support arbitrary result objects."""
     @property
@@ -1631,6 +1638,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         create_resource_profile_command_result: pyspark.sql.connect.proto.commands_pb2.CreateResourceProfileCommandResult
         | None = ...,
         execution_progress: global___ExecutePlanResponse.ExecutionProgress | None = ...,
+        checkpoint_command_result: global___CheckpointCommandResult | None = ...,
         extension: google.protobuf.any_pb2.Any | None = ...,
         metrics: global___ExecutePlanResponse.Metrics | None = ...,
         observed_metrics: collections.abc.Iterable[global___ExecutePlanResponse.ObservedMetrics]
@@ -1642,6 +1650,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         field_name: typing_extensions.Literal[
             "arrow_batch",
             b"arrow_batch",
+            "checkpoint_command_result",
+            b"checkpoint_command_result",
             "create_resource_profile_command_result",
             b"create_resource_profile_command_result",
             "execution_progress",
@@ -1675,6 +1685,8 @@ class ExecutePlanResponse(google.protobuf.message.Message):
         field_name: typing_extensions.Literal[
             "arrow_batch",
             b"arrow_batch",
+            "checkpoint_command_result",
+            b"checkpoint_command_result",
             "create_resource_profile_command_result",
             b"create_resource_profile_command_result",
             "execution_progress",
@@ -1727,6 +1739,7 @@ class ExecutePlanResponse(google.protobuf.message.Message):
             "result_complete",
             "create_resource_profile_command_result",
             "execution_progress",
+            "checkpoint_command_result",
             "extension",
         ]
         | None
@@ -3700,3 +3713,24 @@ class FetchErrorDetailsResponse(google.protobuf.message.Message):
     ) -> typing_extensions.Literal["root_error_idx"] | None: ...
 
 global___FetchErrorDetailsResponse = FetchErrorDetailsResponse
+
+class CheckpointCommandResult(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    RELATION_FIELD_NUMBER: builtins.int
+    @property
+    def relation(self) -> pyspark.sql.connect.proto.relations_pb2.CachedRemoteRelation:
+        """(Required) The logical plan checkpointed."""
+    def __init__(
+        self,
+        *,
+        relation: pyspark.sql.connect.proto.relations_pb2.CachedRemoteRelation | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["relation", b"relation"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["relation", b"relation"]
+    ) -> None: ...
+
+global___CheckpointCommandResult = CheckpointCommandResult
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.py b/python/pyspark/sql/connect/proto/commands_pb2.py
index 50cd309dcd8da..8f67f817c3f00 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.py
+++ b/python/pyspark/sql/connect/proto/commands_pb2.py
@@ -35,7 +35,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto"\xd5\n\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xca\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xdc\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\xa0\x06\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto"\xaf\x0c\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12Q\n\x12\x63heckpoint_command\x18\x0e \x01(\x0b\x32 .spark.connect.CheckpointCommandH\x00R\x11\x63heckpointCommand\x12\x84\x01\n%remove_cached_remote_relation_command\x18\x0f \x01(\x0b\x32\x30.spark.connect.RemoveCachedRemoteRelationCommandH\x00R!removeCachedRemoteRelationCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xca\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xdc\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\xa0\x06\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId"d\n!RemoveCachedRemoteRelationCommand\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation"t\n\x11\x43heckpointCommand\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x14\n\x05local\x18\x02 \x01(\x08R\x05local\x12\x14\n\x05\x65\x61ger\x18\x03 \x01(\x08R\x05\x65\x61ger*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -71,98 +71,102 @@
     _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_options = b"8\001"
     _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._options = None
     _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_options = b"8\001"
-    _STREAMINGQUERYEVENTTYPE._serialized_start = 10080
-    _STREAMINGQUERYEVENTTYPE._serialized_end = 10213
+    _STREAMINGQUERYEVENTTYPE._serialized_start = 10518
+    _STREAMINGQUERYEVENTTYPE._serialized_end = 10651
     _COMMAND._serialized_start = 167
-    _COMMAND._serialized_end = 1532
-    _SQLCOMMAND._serialized_start = 1535
-    _SQLCOMMAND._serialized_end = 2089
-    _SQLCOMMAND_ARGSENTRY._serialized_start = 1905
-    _SQLCOMMAND_ARGSENTRY._serialized_end = 1995
-    _SQLCOMMAND_NAMEDARGUMENTSENTRY._serialized_start = 1997
-    _SQLCOMMAND_NAMEDARGUMENTSENTRY._serialized_end = 2089
-    _CREATEDATAFRAMEVIEWCOMMAND._serialized_start = 2092
-    _CREATEDATAFRAMEVIEWCOMMAND._serialized_end = 2242
-    _WRITEOPERATION._serialized_start = 2245
-    _WRITEOPERATION._serialized_end = 3343
-    _WRITEOPERATION_OPTIONSENTRY._serialized_start = 2767
-    _WRITEOPERATION_OPTIONSENTRY._serialized_end = 2825
-    _WRITEOPERATION_SAVETABLE._serialized_start = 2828
-    _WRITEOPERATION_SAVETABLE._serialized_end = 3086
-    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_start = 2962
-    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_end = 3086
-    _WRITEOPERATION_BUCKETBY._serialized_start = 3088
-    _WRITEOPERATION_BUCKETBY._serialized_end = 3179
-    _WRITEOPERATION_SAVEMODE._serialized_start = 3182
-    _WRITEOPERATION_SAVEMODE._serialized_end = 3319
-    _WRITEOPERATIONV2._serialized_start = 3346
-    _WRITEOPERATIONV2._serialized_end = 4206
-    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_start = 2767
-    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_end = 2825
-    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_start = 3965
-    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_end = 4031
-    _WRITEOPERATIONV2_MODE._serialized_start = 4034
-    _WRITEOPERATIONV2_MODE._serialized_end = 4193
-    _WRITESTREAMOPERATIONSTART._serialized_start = 4209
-    _WRITESTREAMOPERATIONSTART._serialized_end = 5009
-    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_start = 2767
-    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_end = 2825
-    _STREAMINGFOREACHFUNCTION._serialized_start = 5012
-    _STREAMINGFOREACHFUNCTION._serialized_end = 5191
-    _WRITESTREAMOPERATIONSTARTRESULT._serialized_start = 5194
-    _WRITESTREAMOPERATIONSTARTRESULT._serialized_end = 5406
-    _STREAMINGQUERYINSTANCEID._serialized_start = 5408
-    _STREAMINGQUERYINSTANCEID._serialized_end = 5473
-    _STREAMINGQUERYCOMMAND._serialized_start = 5476
-    _STREAMINGQUERYCOMMAND._serialized_end = 6108
-    _STREAMINGQUERYCOMMAND_EXPLAINCOMMAND._serialized_start = 5975
-    _STREAMINGQUERYCOMMAND_EXPLAINCOMMAND._serialized_end = 6019
-    _STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND._serialized_start = 6021
-    _STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND._serialized_end = 6097
-    _STREAMINGQUERYCOMMANDRESULT._serialized_start = 6111
-    _STREAMINGQUERYCOMMANDRESULT._serialized_end = 7252
-    _STREAMINGQUERYCOMMANDRESULT_STATUSRESULT._serialized_start = 6694
-    _STREAMINGQUERYCOMMANDRESULT_STATUSRESULT._serialized_end = 6864
-    _STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT._serialized_start = 6866
-    _STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT._serialized_end = 6938
-    _STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT._serialized_start = 6940
-    _STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT._serialized_end = 6979
-    _STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT._serialized_start = 6982
-    _STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT._serialized_end = 7179
-    _STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT._serialized_start = 7181
-    _STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT._serialized_end = 7237
-    _STREAMINGQUERYMANAGERCOMMAND._serialized_start = 7255
-    _STREAMINGQUERYMANAGERCOMMAND._serialized_end = 8084
-    _STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND._serialized_start = 7786
-    _STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND._serialized_end = 7865
-    _STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND._serialized_start = 7868
-    _STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND._serialized_end = 8073
-    _STREAMINGQUERYMANAGERCOMMANDRESULT._serialized_start = 8087
-    _STREAMINGQUERYMANAGERCOMMANDRESULT._serialized_end = 9163
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT._serialized_start = 8695
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT._serialized_end = 8822
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE._serialized_start = 8824
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE._serialized_end = 8939
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT._serialized_start = 8941
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT._serialized_end = 9000
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE._serialized_start = 9002
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE._serialized_end = 9077
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT._serialized_start = 9079
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT._serialized_end = 9148
-    _STREAMINGQUERYLISTENERBUSCOMMAND._serialized_start = 9166
-    _STREAMINGQUERYLISTENERBUSCOMMAND._serialized_end = 9339
-    _STREAMINGQUERYLISTENEREVENT._serialized_start = 9342
-    _STREAMINGQUERYLISTENEREVENT._serialized_end = 9473
-    _STREAMINGQUERYLISTENEREVENTSRESULT._serialized_start = 9476
-    _STREAMINGQUERYLISTENEREVENTSRESULT._serialized_end = 9680
-    _GETRESOURCESCOMMAND._serialized_start = 9682
-    _GETRESOURCESCOMMAND._serialized_end = 9703
-    _GETRESOURCESCOMMANDRESULT._serialized_start = 9706
-    _GETRESOURCESCOMMANDRESULT._serialized_end = 9918
-    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_start = 9822
-    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_end = 9918
-    _CREATERESOURCEPROFILECOMMAND._serialized_start = 9920
-    _CREATERESOURCEPROFILECOMMAND._serialized_end = 10008
-    _CREATERESOURCEPROFILECOMMANDRESULT._serialized_start = 10010
-    _CREATERESOURCEPROFILECOMMANDRESULT._serialized_end = 10077
+    _COMMAND._serialized_end = 1750
+    _SQLCOMMAND._serialized_start = 1753
+    _SQLCOMMAND._serialized_end = 2307
+    _SQLCOMMAND_ARGSENTRY._serialized_start = 2123
+    _SQLCOMMAND_ARGSENTRY._serialized_end = 2213
+    _SQLCOMMAND_NAMEDARGUMENTSENTRY._serialized_start = 2215
+    _SQLCOMMAND_NAMEDARGUMENTSENTRY._serialized_end = 2307
+    _CREATEDATAFRAMEVIEWCOMMAND._serialized_start = 2310
+    _CREATEDATAFRAMEVIEWCOMMAND._serialized_end = 2460
+    _WRITEOPERATION._serialized_start = 2463
+    _WRITEOPERATION._serialized_end = 3561
+    _WRITEOPERATION_OPTIONSENTRY._serialized_start = 2985
+    _WRITEOPERATION_OPTIONSENTRY._serialized_end = 3043
+    _WRITEOPERATION_SAVETABLE._serialized_start = 3046
+    _WRITEOPERATION_SAVETABLE._serialized_end = 3304
+    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_start = 3180
+    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_end = 3304
+    _WRITEOPERATION_BUCKETBY._serialized_start = 3306
+    _WRITEOPERATION_BUCKETBY._serialized_end = 3397
+    _WRITEOPERATION_SAVEMODE._serialized_start = 3400
+    _WRITEOPERATION_SAVEMODE._serialized_end = 3537
+    _WRITEOPERATIONV2._serialized_start = 3564
+    _WRITEOPERATIONV2._serialized_end = 4424
+    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_start = 2985
+    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_end = 3043
+    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_start = 4183
+    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_end = 4249
+    _WRITEOPERATIONV2_MODE._serialized_start = 4252
+    _WRITEOPERATIONV2_MODE._serialized_end = 4411
+    _WRITESTREAMOPERATIONSTART._serialized_start = 4427
+    _WRITESTREAMOPERATIONSTART._serialized_end = 5227
+    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_start = 2985
+    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_end = 3043
+    _STREAMINGFOREACHFUNCTION._serialized_start = 5230
+    _STREAMINGFOREACHFUNCTION._serialized_end = 5409
+    _WRITESTREAMOPERATIONSTARTRESULT._serialized_start = 5412
+    _WRITESTREAMOPERATIONSTARTRESULT._serialized_end = 5624
+    _STREAMINGQUERYINSTANCEID._serialized_start = 5626
+    _STREAMINGQUERYINSTANCEID._serialized_end = 5691
+    _STREAMINGQUERYCOMMAND._serialized_start = 5694
+    _STREAMINGQUERYCOMMAND._serialized_end = 6326
+    _STREAMINGQUERYCOMMAND_EXPLAINCOMMAND._serialized_start = 6193
+    _STREAMINGQUERYCOMMAND_EXPLAINCOMMAND._serialized_end = 6237
+    _STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND._serialized_start = 6239
+    _STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND._serialized_end = 6315
+    _STREAMINGQUERYCOMMANDRESULT._serialized_start = 6329
+    _STREAMINGQUERYCOMMANDRESULT._serialized_end = 7470
+    _STREAMINGQUERYCOMMANDRESULT_STATUSRESULT._serialized_start = 6912
+    _STREAMINGQUERYCOMMANDRESULT_STATUSRESULT._serialized_end = 7082
+    _STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT._serialized_start = 7084
+    _STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT._serialized_end = 7156
+    _STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT._serialized_start = 7158
+    _STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT._serialized_end = 7197
+    _STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT._serialized_start = 7200
+    _STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT._serialized_end = 7397
+    _STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT._serialized_start = 7399
+    _STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT._serialized_end = 7455
+    _STREAMINGQUERYMANAGERCOMMAND._serialized_start = 7473
+    _STREAMINGQUERYMANAGERCOMMAND._serialized_end = 8302
+    _STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND._serialized_start = 8004
+    _STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND._serialized_end = 8083
+    _STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND._serialized_start = 8086
+    _STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND._serialized_end = 8291
+    _STREAMINGQUERYMANAGERCOMMANDRESULT._serialized_start = 8305
+    _STREAMINGQUERYMANAGERCOMMANDRESULT._serialized_end = 9381
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT._serialized_start = 8913
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT._serialized_end = 9040
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE._serialized_start = 9042
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE._serialized_end = 9157
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT._serialized_start = 9159
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT._serialized_end = 9218
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE._serialized_start = 9220
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE._serialized_end = 9295
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT._serialized_start = 9297
+    _STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT._serialized_end = 9366
+    _STREAMINGQUERYLISTENERBUSCOMMAND._serialized_start = 9384
+    _STREAMINGQUERYLISTENERBUSCOMMAND._serialized_end = 9557
+    _STREAMINGQUERYLISTENEREVENT._serialized_start = 9560
+    _STREAMINGQUERYLISTENEREVENT._serialized_end = 9691
+    _STREAMINGQUERYLISTENEREVENTSRESULT._serialized_start = 9694
+    _STREAMINGQUERYLISTENEREVENTSRESULT._serialized_end = 9898
+    _GETRESOURCESCOMMAND._serialized_start = 9900
+    _GETRESOURCESCOMMAND._serialized_end = 9921
+    _GETRESOURCESCOMMANDRESULT._serialized_start = 9924
+    _GETRESOURCESCOMMANDRESULT._serialized_end = 10136
+    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_start = 10040
+    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_end = 10136
+    _CREATERESOURCEPROFILECOMMAND._serialized_start = 10138
+    _CREATERESOURCEPROFILECOMMAND._serialized_end = 10226
+    _CREATERESOURCEPROFILECOMMANDRESULT._serialized_start = 10228
+    _CREATERESOURCEPROFILECOMMANDRESULT._serialized_end = 10295
+    _REMOVECACHEDREMOTERELATIONCOMMAND._serialized_start = 10297
+    _REMOVECACHEDREMOTERELATIONCOMMAND._serialized_end = 10397
+    _CHECKPOINTCOMMAND._serialized_start = 10399
+    _CHECKPOINTCOMMAND._serialized_end = 10515
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.pyi b/python/pyspark/sql/connect/proto/commands_pb2.pyi
index f86ae653508e3..04d50d5b5e4f4 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/commands_pb2.pyi
@@ -101,6 +101,8 @@ class Command(google.protobuf.message.Message):
     STREAMING_QUERY_LISTENER_BUS_COMMAND_FIELD_NUMBER: builtins.int
     REGISTER_DATA_SOURCE_FIELD_NUMBER: builtins.int
     CREATE_RESOURCE_PROFILE_COMMAND_FIELD_NUMBER: builtins.int
+    CHECKPOINT_COMMAND_FIELD_NUMBER: builtins.int
+    REMOVE_CACHED_REMOTE_RELATION_COMMAND_FIELD_NUMBER: builtins.int
     EXTENSION_FIELD_NUMBER: builtins.int
     @property
     def register_function(
@@ -135,6 +137,12 @@ class Command(google.protobuf.message.Message):
     @property
     def create_resource_profile_command(self) -> global___CreateResourceProfileCommand: ...
     @property
+    def checkpoint_command(self) -> global___CheckpointCommand: ...
+    @property
+    def remove_cached_remote_relation_command(
+        self,
+    ) -> global___RemoveCachedRemoteRelationCommand: ...
+    @property
     def extension(self) -> google.protobuf.any_pb2.Any:
         """This field is used to mark extensions to the protocol. When plugins generate arbitrary
         Commands they can add them here. During the planning the correct resolution is done.
@@ -159,11 +167,16 @@ class Command(google.protobuf.message.Message):
         register_data_source: pyspark.sql.connect.proto.relations_pb2.CommonInlineUserDefinedDataSource
         | None = ...,
         create_resource_profile_command: global___CreateResourceProfileCommand | None = ...,
+        checkpoint_command: global___CheckpointCommand | None = ...,
+        remove_cached_remote_relation_command: global___RemoveCachedRemoteRelationCommand
+        | None = ...,
         extension: google.protobuf.any_pb2.Any | None = ...,
     ) -> None: ...
     def HasField(
         self,
         field_name: typing_extensions.Literal[
+            "checkpoint_command",
+            b"checkpoint_command",
             "command_type",
             b"command_type",
             "create_dataframe_view",
@@ -180,6 +193,8 @@ class Command(google.protobuf.message.Message):
             b"register_function",
             "register_table_function",
             b"register_table_function",
+            "remove_cached_remote_relation_command",
+            b"remove_cached_remote_relation_command",
             "sql_command",
             b"sql_command",
             "streaming_query_command",
@@ -199,6 +214,8 @@ class Command(google.protobuf.message.Message):
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "checkpoint_command",
+            b"checkpoint_command",
             "command_type",
             b"command_type",
             "create_dataframe_view",
@@ -215,6 +232,8 @@ class Command(google.protobuf.message.Message):
             b"register_function",
             "register_table_function",
             b"register_table_function",
+            "remove_cached_remote_relation_command",
+            b"remove_cached_remote_relation_command",
             "sql_command",
             b"sql_command",
             "streaming_query_command",
@@ -248,6 +267,8 @@ class Command(google.protobuf.message.Message):
             "streaming_query_listener_bus_command",
             "register_data_source",
             "create_resource_profile_command",
+            "checkpoint_command",
+            "remove_cached_remote_relation_command",
             "extension",
         ]
         | None
@@ -2119,3 +2140,60 @@ class CreateResourceProfileCommandResult(google.protobuf.message.Message):
     ) -> None: ...
 
 global___CreateResourceProfileCommandResult = CreateResourceProfileCommandResult
+
+class RemoveCachedRemoteRelationCommand(google.protobuf.message.Message):
+    """Command to remove `CashedRemoteRelation`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    RELATION_FIELD_NUMBER: builtins.int
+    @property
+    def relation(self) -> pyspark.sql.connect.proto.relations_pb2.CachedRemoteRelation:
+        """(Required) The remote to be related"""
+    def __init__(
+        self,
+        *,
+        relation: pyspark.sql.connect.proto.relations_pb2.CachedRemoteRelation | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["relation", b"relation"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["relation", b"relation"]
+    ) -> None: ...
+
+global___RemoveCachedRemoteRelationCommand = RemoveCachedRemoteRelationCommand
+
+class CheckpointCommand(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    RELATION_FIELD_NUMBER: builtins.int
+    LOCAL_FIELD_NUMBER: builtins.int
+    EAGER_FIELD_NUMBER: builtins.int
+    @property
+    def relation(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+        """(Required) The logical plan to checkpoint."""
+    local: builtins.bool
+    """(Required) Locally checkpoint using a local temporary
+    directory in Spark Connect server (Spark Driver)
+    """
+    eager: builtins.bool
+    """(Required) Whether to checkpoint this dataframe immediately."""
+    def __init__(
+        self,
+        *,
+        relation: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        local: builtins.bool = ...,
+        eager: builtins.bool = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["relation", b"relation"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "eager", b"eager", "local", b"local", "relation", b"relation"
+        ],
+    ) -> None: ...
+
+global___CheckpointCommand = CheckpointCommand
diff --git a/python/pyspark/sql/connect/proto/common_pb2.py b/python/pyspark/sql/connect/proto/common_pb2.py
index a77d1463e51da..fd528fae33691 100644
--- a/python/pyspark/sql/connect/proto/common_pb2.py
+++ b/python/pyspark/sql/connect/proto/common_pb2.py
@@ -29,7 +29,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1aspark/connect/common.proto\x12\rspark.connect"\xb0\x01\n\x0cStorageLevel\x12\x19\n\x08use_disk\x18\x01 \x01(\x08R\x07useDisk\x12\x1d\n\nuse_memory\x18\x02 \x01(\x08R\tuseMemory\x12 \n\x0cuse_off_heap\x18\x03 \x01(\x08R\nuseOffHeap\x12"\n\x0c\x64\x65serialized\x18\x04 \x01(\x08R\x0c\x64\x65serialized\x12 \n\x0breplication\x18\x05 \x01(\x05R\x0breplication"G\n\x13ResourceInformation\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1c\n\taddresses\x18\x02 \x03(\tR\taddresses"\xc3\x01\n\x17\x45xecutorResourceRequest\x12#\n\rresource_name\x18\x01 \x01(\tR\x0cresourceName\x12\x16\n\x06\x61mount\x18\x02 \x01(\x03R\x06\x61mount\x12.\n\x10\x64iscovery_script\x18\x03 \x01(\tH\x00R\x0f\x64iscoveryScript\x88\x01\x01\x12\x1b\n\x06vendor\x18\x04 \x01(\tH\x01R\x06vendor\x88\x01\x01\x42\x13\n\x11_discovery_scriptB\t\n\x07_vendor"R\n\x13TaskResourceRequest\x12#\n\rresource_name\x18\x01 \x01(\tR\x0cresourceName\x12\x16\n\x06\x61mount\x18\x02 \x01(\x01R\x06\x61mount"\xa5\x03\n\x0fResourceProfile\x12\x64\n\x12\x65xecutor_resources\x18\x01 \x03(\x0b\x32\x35.spark.connect.ResourceProfile.ExecutorResourcesEntryR\x11\x65xecutorResources\x12X\n\x0etask_resources\x18\x02 \x03(\x0b\x32\x31.spark.connect.ResourceProfile.TaskResourcesEntryR\rtaskResources\x1al\n\x16\x45xecutorResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12<\n\x05value\x18\x02 \x01(\x0b\x32&.spark.connect.ExecutorResourceRequestR\x05value:\x02\x38\x01\x1a\x64\n\x12TaskResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.TaskResourceRequestR\x05value:\x02\x38\x01\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1aspark/connect/common.proto\x12\rspark.connect"\xb0\x01\n\x0cStorageLevel\x12\x19\n\x08use_disk\x18\x01 \x01(\x08R\x07useDisk\x12\x1d\n\nuse_memory\x18\x02 \x01(\x08R\tuseMemory\x12 \n\x0cuse_off_heap\x18\x03 \x01(\x08R\nuseOffHeap\x12"\n\x0c\x64\x65serialized\x18\x04 \x01(\x08R\x0c\x64\x65serialized\x12 \n\x0breplication\x18\x05 \x01(\x05R\x0breplication"G\n\x13ResourceInformation\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1c\n\taddresses\x18\x02 \x03(\tR\taddresses"\xc3\x01\n\x17\x45xecutorResourceRequest\x12#\n\rresource_name\x18\x01 \x01(\tR\x0cresourceName\x12\x16\n\x06\x61mount\x18\x02 \x01(\x03R\x06\x61mount\x12.\n\x10\x64iscovery_script\x18\x03 \x01(\tH\x00R\x0f\x64iscoveryScript\x88\x01\x01\x12\x1b\n\x06vendor\x18\x04 \x01(\tH\x01R\x06vendor\x88\x01\x01\x42\x13\n\x11_discovery_scriptB\t\n\x07_vendor"R\n\x13TaskResourceRequest\x12#\n\rresource_name\x18\x01 \x01(\tR\x0cresourceName\x12\x16\n\x06\x61mount\x18\x02 \x01(\x01R\x06\x61mount"\xa5\x03\n\x0fResourceProfile\x12\x64\n\x12\x65xecutor_resources\x18\x01 \x03(\x0b\x32\x35.spark.connect.ResourceProfile.ExecutorResourcesEntryR\x11\x65xecutorResources\x12X\n\x0etask_resources\x18\x02 \x03(\x0b\x32\x31.spark.connect.ResourceProfile.TaskResourcesEntryR\rtaskResources\x1al\n\x16\x45xecutorResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12<\n\x05value\x18\x02 \x01(\x0b\x32&.spark.connect.ExecutorResourceRequestR\x05value:\x02\x38\x01\x1a\x64\n\x12TaskResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.TaskResourceRequestR\x05value:\x02\x38\x01"X\n\x06Origin\x12\x42\n\rpython_origin\x18\x01 \x01(\x0b\x32\x1b.spark.connect.PythonOriginH\x00R\x0cpythonOriginB\n\n\x08\x66unction"G\n\x0cPythonOrigin\x12\x1a\n\x08\x66ragment\x18\x01 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x02 \x01(\tR\x08\x63\x61llSiteB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -59,4 +59,8 @@
     _RESOURCEPROFILE_EXECUTORRESOURCESENTRY._serialized_end = 899
     _RESOURCEPROFILE_TASKRESOURCESENTRY._serialized_start = 901
     _RESOURCEPROFILE_TASKRESOURCESENTRY._serialized_end = 1001
+    _ORIGIN._serialized_start = 1003
+    _ORIGIN._serialized_end = 1091
+    _PYTHONORIGIN._serialized_start = 1093
+    _PYTHONORIGIN._serialized_end = 1164
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/common_pb2.pyi b/python/pyspark/sql/connect/proto/common_pb2.pyi
index 163781b41998f..eda172e26cf4e 100644
--- a/python/pyspark/sql/connect/proto/common_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/common_pb2.pyi
@@ -296,3 +296,54 @@ class ResourceProfile(google.protobuf.message.Message):
     ) -> None: ...
 
 global___ResourceProfile = ResourceProfile
+
+class Origin(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    PYTHON_ORIGIN_FIELD_NUMBER: builtins.int
+    @property
+    def python_origin(self) -> global___PythonOrigin: ...
+    def __init__(
+        self,
+        *,
+        python_origin: global___PythonOrigin | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "function", b"function", "python_origin", b"python_origin"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "function", b"function", "python_origin", b"python_origin"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["function", b"function"]
+    ) -> typing_extensions.Literal["python_origin"] | None: ...
+
+global___Origin = Origin
+
+class PythonOrigin(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    FRAGMENT_FIELD_NUMBER: builtins.int
+    CALL_SITE_FIELD_NUMBER: builtins.int
+    fragment: builtins.str
+    """(Required) Name of the origin, for example, the name of the function"""
+    call_site: builtins.str
+    """(Required) Callsite to show to end users, for example, stacktrace."""
+    def __init__(
+        self,
+        *,
+        fragment: builtins.str = ...,
+        call_site: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["call_site", b"call_site", "fragment", b"fragment"],
+    ) -> None: ...
+
+global___PythonOrigin = PythonOrigin
diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py
index e42acbf49a7df..c8a183105fd11 100644
--- a/python/pyspark/sql/connect/proto/expressions_pb2.py
+++ b/python/pyspark/sql/connect/proto/expressions_pb2.py
@@ -30,10 +30,11 @@
 
 from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
 from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
+from pyspark.sql.connect.proto import common_pb2 as spark_dot_connect_dot_common__pb2
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto"\xde.\n\nExpression\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\x9b\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer"\xb8\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05valueB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\x97/\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05valueB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -45,68 +46,70 @@
     DESCRIPTOR._serialized_options = (
         b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
     )
-    _EXPRESSION._serialized_start = 105
-    _EXPRESSION._serialized_end = 6087
-    _EXPRESSION_WINDOW._serialized_start = 1645
-    _EXPRESSION_WINDOW._serialized_end = 2428
-    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 1935
-    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2428
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2202
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2347
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2349
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2428
-    _EXPRESSION_SORTORDER._serialized_start = 2431
-    _EXPRESSION_SORTORDER._serialized_end = 2856
-    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2661
-    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 2769
-    _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 2771
-    _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 2856
-    _EXPRESSION_CAST._serialized_start = 2859
-    _EXPRESSION_CAST._serialized_end = 3174
-    _EXPRESSION_CAST_EVALMODE._serialized_start = 3060
-    _EXPRESSION_CAST_EVALMODE._serialized_end = 3158
-    _EXPRESSION_LITERAL._serialized_start = 3177
-    _EXPRESSION_LITERAL._serialized_end = 4740
-    _EXPRESSION_LITERAL_DECIMAL._serialized_start = 4012
-    _EXPRESSION_LITERAL_DECIMAL._serialized_end = 4129
-    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 4131
-    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 4229
-    _EXPRESSION_LITERAL_ARRAY._serialized_start = 4232
-    _EXPRESSION_LITERAL_ARRAY._serialized_end = 4362
-    _EXPRESSION_LITERAL_MAP._serialized_start = 4365
-    _EXPRESSION_LITERAL_MAP._serialized_end = 4592
-    _EXPRESSION_LITERAL_STRUCT._serialized_start = 4595
-    _EXPRESSION_LITERAL_STRUCT._serialized_end = 4724
-    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 4743
-    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 4929
-    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 4932
-    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 5136
-    _EXPRESSION_EXPRESSIONSTRING._serialized_start = 5138
-    _EXPRESSION_EXPRESSIONSTRING._serialized_end = 5188
-    _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 5190
-    _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 5314
-    _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 5316
-    _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 5402
-    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 5405
-    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 5537
-    _EXPRESSION_UPDATEFIELDS._serialized_start = 5540
-    _EXPRESSION_UPDATEFIELDS._serialized_end = 5727
-    _EXPRESSION_ALIAS._serialized_start = 5729
-    _EXPRESSION_ALIAS._serialized_end = 5849
-    _EXPRESSION_LAMBDAFUNCTION._serialized_start = 5852
-    _EXPRESSION_LAMBDAFUNCTION._serialized_end = 6010
-    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 6012
-    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 6074
-    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 6090
-    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 6454
-    _PYTHONUDF._serialized_start = 6457
-    _PYTHONUDF._serialized_end = 6612
-    _SCALARSCALAUDF._serialized_start = 6615
-    _SCALARSCALAUDF._serialized_end = 6799
-    _JAVAUDF._serialized_start = 6802
-    _JAVAUDF._serialized_end = 6951
-    _CALLFUNCTION._serialized_start = 6953
-    _CALLFUNCTION._serialized_end = 7061
-    _NAMEDARGUMENTEXPRESSION._serialized_start = 7063
-    _NAMEDARGUMENTEXPRESSION._serialized_end = 7155
+    _EXPRESSION._serialized_start = 133
+    _EXPRESSION._serialized_end = 6172
+    _EXPRESSION_WINDOW._serialized_start = 1730
+    _EXPRESSION_WINDOW._serialized_end = 2513
+    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 2020
+    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2513
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2287
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2432
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2434
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2513
+    _EXPRESSION_SORTORDER._serialized_start = 2516
+    _EXPRESSION_SORTORDER._serialized_end = 2941
+    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2746
+    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 2854
+    _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 2856
+    _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 2941
+    _EXPRESSION_CAST._serialized_start = 2944
+    _EXPRESSION_CAST._serialized_end = 3259
+    _EXPRESSION_CAST_EVALMODE._serialized_start = 3145
+    _EXPRESSION_CAST_EVALMODE._serialized_end = 3243
+    _EXPRESSION_LITERAL._serialized_start = 3262
+    _EXPRESSION_LITERAL._serialized_end = 4825
+    _EXPRESSION_LITERAL_DECIMAL._serialized_start = 4097
+    _EXPRESSION_LITERAL_DECIMAL._serialized_end = 4214
+    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 4216
+    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 4314
+    _EXPRESSION_LITERAL_ARRAY._serialized_start = 4317
+    _EXPRESSION_LITERAL_ARRAY._serialized_end = 4447
+    _EXPRESSION_LITERAL_MAP._serialized_start = 4450
+    _EXPRESSION_LITERAL_MAP._serialized_end = 4677
+    _EXPRESSION_LITERAL_STRUCT._serialized_start = 4680
+    _EXPRESSION_LITERAL_STRUCT._serialized_end = 4809
+    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 4828
+    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 5014
+    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 5017
+    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 5221
+    _EXPRESSION_EXPRESSIONSTRING._serialized_start = 5223
+    _EXPRESSION_EXPRESSIONSTRING._serialized_end = 5273
+    _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 5275
+    _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 5399
+    _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 5401
+    _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 5487
+    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 5490
+    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 5622
+    _EXPRESSION_UPDATEFIELDS._serialized_start = 5625
+    _EXPRESSION_UPDATEFIELDS._serialized_end = 5812
+    _EXPRESSION_ALIAS._serialized_start = 5814
+    _EXPRESSION_ALIAS._serialized_end = 5934
+    _EXPRESSION_LAMBDAFUNCTION._serialized_start = 5937
+    _EXPRESSION_LAMBDAFUNCTION._serialized_end = 6095
+    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 6097
+    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 6159
+    _EXPRESSIONCOMMON._serialized_start = 6174
+    _EXPRESSIONCOMMON._serialized_end = 6239
+    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 6242
+    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 6606
+    _PYTHONUDF._serialized_start = 6609
+    _PYTHONUDF._serialized_end = 6813
+    _SCALARSCALAUDF._serialized_start = 6816
+    _SCALARSCALAUDF._serialized_end = 7030
+    _JAVAUDF._serialized_start = 7033
+    _JAVAUDF._serialized_end = 7182
+    _CALLFUNCTION._serialized_start = 7184
+    _CALLFUNCTION._serialized_end = 7292
+    _NAMEDARGUMENTEXPRESSION._serialized_start = 7294
+    _NAMEDARGUMENTEXPRESSION._serialized_end = 7386
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi
index 183a839da9204..42031d47bb851 100644
--- a/python/pyspark/sql/connect/proto/expressions_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi
@@ -40,6 +40,7 @@ import google.protobuf.descriptor
 import google.protobuf.internal.containers
 import google.protobuf.internal.enum_type_wrapper
 import google.protobuf.message
+import pyspark.sql.connect.proto.common_pb2
 import pyspark.sql.connect.proto.types_pb2
 import sys
 import typing
@@ -1163,6 +1164,7 @@ class Expression(google.protobuf.message.Message):
             self, field_name: typing_extensions.Literal["name_parts", b"name_parts"]
         ) -> None: ...
 
+    COMMON_FIELD_NUMBER: builtins.int
     LITERAL_FIELD_NUMBER: builtins.int
     UNRESOLVED_ATTRIBUTE_FIELD_NUMBER: builtins.int
     UNRESOLVED_FUNCTION_FIELD_NUMBER: builtins.int
@@ -1182,6 +1184,8 @@ class Expression(google.protobuf.message.Message):
     NAMED_ARGUMENT_EXPRESSION_FIELD_NUMBER: builtins.int
     EXTENSION_FIELD_NUMBER: builtins.int
     @property
+    def common(self) -> global___ExpressionCommon: ...
+    @property
     def literal(self) -> global___Expression.Literal: ...
     @property
     def unresolved_attribute(self) -> global___Expression.UnresolvedAttribute: ...
@@ -1225,6 +1229,7 @@ class Expression(google.protobuf.message.Message):
     def __init__(
         self,
         *,
+        common: global___ExpressionCommon | None = ...,
         literal: global___Expression.Literal | None = ...,
         unresolved_attribute: global___Expression.UnresolvedAttribute | None = ...,
         unresolved_function: global___Expression.UnresolvedFunction | None = ...,
@@ -1254,6 +1259,8 @@ class Expression(google.protobuf.message.Message):
             b"call_function",
             "cast",
             b"cast",
+            "common",
+            b"common",
             "common_inline_user_defined_function",
             b"common_inline_user_defined_function",
             "expr_type",
@@ -1297,6 +1304,8 @@ class Expression(google.protobuf.message.Message):
             b"call_function",
             "cast",
             b"cast",
+            "common",
+            b"common",
             "common_inline_user_defined_function",
             b"common_inline_user_defined_function",
             "expr_type",
@@ -1359,6 +1368,25 @@ class Expression(google.protobuf.message.Message):
 
 global___Expression = Expression
 
+class ExpressionCommon(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ORIGIN_FIELD_NUMBER: builtins.int
+    @property
+    def origin(self) -> pyspark.sql.connect.proto.common_pb2.Origin:
+        """(Required) Keep the information of the origin for this expression such as stacktrace."""
+    def __init__(
+        self,
+        *,
+        origin: pyspark.sql.connect.proto.common_pb2.Origin | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["origin", b"origin"]
+    ) -> builtins.bool: ...
+    def ClearField(self, field_name: typing_extensions.Literal["origin", b"origin"]) -> None: ...
+
+global___ExpressionCommon = ExpressionCommon
+
 class CommonInlineUserDefinedFunction(google.protobuf.message.Message):
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
@@ -1438,6 +1466,7 @@ class PythonUDF(google.protobuf.message.Message):
     EVAL_TYPE_FIELD_NUMBER: builtins.int
     COMMAND_FIELD_NUMBER: builtins.int
     PYTHON_VER_FIELD_NUMBER: builtins.int
+    ADDITIONAL_INCLUDES_FIELD_NUMBER: builtins.int
     @property
     def output_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
         """(Required) Output type of the Python UDF"""
@@ -1447,6 +1476,11 @@ class PythonUDF(google.protobuf.message.Message):
     """(Required) The encoded commands of the Python UDF"""
     python_ver: builtins.str
     """(Required) Python version being used in the client."""
+    @property
+    def additional_includes(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Additional includes for the Python UDF."""
     def __init__(
         self,
         *,
@@ -1454,6 +1488,7 @@ class PythonUDF(google.protobuf.message.Message):
         eval_type: builtins.int = ...,
         command: builtins.bytes = ...,
         python_ver: builtins.str = ...,
+        additional_includes: collections.abc.Iterable[builtins.str] | None = ...,
     ) -> None: ...
     def HasField(
         self, field_name: typing_extensions.Literal["output_type", b"output_type"]
@@ -1461,6 +1496,8 @@ class PythonUDF(google.protobuf.message.Message):
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "additional_includes",
+            b"additional_includes",
             "command",
             b"command",
             "eval_type",
@@ -1481,6 +1518,7 @@ class ScalarScalaUDF(google.protobuf.message.Message):
     INPUTTYPES_FIELD_NUMBER: builtins.int
     OUTPUTTYPE_FIELD_NUMBER: builtins.int
     NULLABLE_FIELD_NUMBER: builtins.int
+    AGGREGATE_FIELD_NUMBER: builtins.int
     payload: builtins.bytes
     """(Required) Serialized JVM object containing UDF definition, input encoders and output encoder"""
     @property
@@ -1495,6 +1533,8 @@ class ScalarScalaUDF(google.protobuf.message.Message):
         """(Required) Output type of the UDF"""
     nullable: builtins.bool
     """(Required) True if the UDF can return null value"""
+    aggregate: builtins.bool
+    """(Required) Indicate if the UDF is an aggregate function"""
     def __init__(
         self,
         *,
@@ -1503,6 +1543,7 @@ class ScalarScalaUDF(google.protobuf.message.Message):
         | None = ...,
         outputType: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
         nullable: builtins.bool = ...,
+        aggregate: builtins.bool = ...,
     ) -> None: ...
     def HasField(
         self, field_name: typing_extensions.Literal["outputType", b"outputType"]
@@ -1510,6 +1551,8 @@ class ScalarScalaUDF(google.protobuf.message.Message):
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
+            "aggregate",
+            b"aggregate",
             "inputTypes",
             b"inputTypes",
             "nullable",
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 467d0610bbc60..9f4d1e717a28d 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -36,7 +36,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto"\xe9\x1a\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirectionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto"\xe9\x1a\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirectionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -48,6 +48,8 @@
     DESCRIPTOR._serialized_options = (
         b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
     )
+    _RELATIONCOMMON.fields_by_name["source_info"]._options = None
+    _RELATIONCOMMON.fields_by_name["source_info"]._serialized_options = b"\030\001"
     _SQL_ARGSENTRY._options = None
     _SQL_ARGSENTRY._serialized_options = b"8\001"
     _SQL_NAMEDARGUMENTSENTRY._options = None
@@ -70,150 +72,150 @@
     _RELATION._serialized_end = 3626
     _UNKNOWN._serialized_start = 3628
     _UNKNOWN._serialized_end = 3637
-    _RELATIONCOMMON._serialized_start = 3639
-    _RELATIONCOMMON._serialized_end = 3730
-    _SQL._serialized_start = 3733
-    _SQL._serialized_end = 4211
-    _SQL_ARGSENTRY._serialized_start = 4027
-    _SQL_ARGSENTRY._serialized_end = 4117
-    _SQL_NAMEDARGUMENTSENTRY._serialized_start = 4119
-    _SQL_NAMEDARGUMENTSENTRY._serialized_end = 4211
-    _WITHRELATIONS._serialized_start = 4213
-    _WITHRELATIONS._serialized_end = 4330
-    _READ._serialized_start = 4333
-    _READ._serialized_end = 4996
-    _READ_NAMEDTABLE._serialized_start = 4511
-    _READ_NAMEDTABLE._serialized_end = 4703
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 4645
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4703
-    _READ_DATASOURCE._serialized_start = 4706
-    _READ_DATASOURCE._serialized_end = 4983
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 4645
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4703
-    _PROJECT._serialized_start = 4998
-    _PROJECT._serialized_end = 5115
-    _FILTER._serialized_start = 5117
-    _FILTER._serialized_end = 5229
-    _JOIN._serialized_start = 5232
-    _JOIN._serialized_end = 5893
-    _JOIN_JOINDATATYPE._serialized_start = 5571
-    _JOIN_JOINDATATYPE._serialized_end = 5663
-    _JOIN_JOINTYPE._serialized_start = 5666
-    _JOIN_JOINTYPE._serialized_end = 5874
-    _SETOPERATION._serialized_start = 5896
-    _SETOPERATION._serialized_end = 6375
-    _SETOPERATION_SETOPTYPE._serialized_start = 6212
-    _SETOPERATION_SETOPTYPE._serialized_end = 6326
-    _LIMIT._serialized_start = 6377
-    _LIMIT._serialized_end = 6453
-    _OFFSET._serialized_start = 6455
-    _OFFSET._serialized_end = 6534
-    _TAIL._serialized_start = 6536
-    _TAIL._serialized_end = 6611
-    _AGGREGATE._serialized_start = 6614
-    _AGGREGATE._serialized_end = 7380
-    _AGGREGATE_PIVOT._serialized_start = 7029
-    _AGGREGATE_PIVOT._serialized_end = 7140
-    _AGGREGATE_GROUPINGSETS._serialized_start = 7142
-    _AGGREGATE_GROUPINGSETS._serialized_end = 7218
-    _AGGREGATE_GROUPTYPE._serialized_start = 7221
-    _AGGREGATE_GROUPTYPE._serialized_end = 7380
-    _SORT._serialized_start = 7383
-    _SORT._serialized_end = 7543
-    _DROP._serialized_start = 7546
-    _DROP._serialized_end = 7687
-    _DEDUPLICATE._serialized_start = 7690
-    _DEDUPLICATE._serialized_end = 7930
-    _LOCALRELATION._serialized_start = 7932
-    _LOCALRELATION._serialized_end = 8021
-    _CACHEDLOCALRELATION._serialized_start = 8023
-    _CACHEDLOCALRELATION._serialized_end = 8095
-    _CACHEDREMOTERELATION._serialized_start = 8097
-    _CACHEDREMOTERELATION._serialized_end = 8152
-    _SAMPLE._serialized_start = 8155
-    _SAMPLE._serialized_end = 8428
-    _RANGE._serialized_start = 8431
-    _RANGE._serialized_end = 8576
-    _SUBQUERYALIAS._serialized_start = 8578
-    _SUBQUERYALIAS._serialized_end = 8692
-    _REPARTITION._serialized_start = 8695
-    _REPARTITION._serialized_end = 8837
-    _SHOWSTRING._serialized_start = 8840
-    _SHOWSTRING._serialized_end = 8982
-    _HTMLSTRING._serialized_start = 8984
-    _HTMLSTRING._serialized_end = 9098
-    _STATSUMMARY._serialized_start = 9100
-    _STATSUMMARY._serialized_end = 9192
-    _STATDESCRIBE._serialized_start = 9194
-    _STATDESCRIBE._serialized_end = 9275
-    _STATCROSSTAB._serialized_start = 9277
-    _STATCROSSTAB._serialized_end = 9378
-    _STATCOV._serialized_start = 9380
-    _STATCOV._serialized_end = 9476
-    _STATCORR._serialized_start = 9479
-    _STATCORR._serialized_end = 9616
-    _STATAPPROXQUANTILE._serialized_start = 9619
-    _STATAPPROXQUANTILE._serialized_end = 9783
-    _STATFREQITEMS._serialized_start = 9785
-    _STATFREQITEMS._serialized_end = 9910
-    _STATSAMPLEBY._serialized_start = 9913
-    _STATSAMPLEBY._serialized_end = 10222
-    _STATSAMPLEBY_FRACTION._serialized_start = 10114
-    _STATSAMPLEBY_FRACTION._serialized_end = 10213
-    _NAFILL._serialized_start = 10225
-    _NAFILL._serialized_end = 10359
-    _NADROP._serialized_start = 10362
-    _NADROP._serialized_end = 10496
-    _NAREPLACE._serialized_start = 10499
-    _NAREPLACE._serialized_end = 10795
-    _NAREPLACE_REPLACEMENT._serialized_start = 10654
-    _NAREPLACE_REPLACEMENT._serialized_end = 10795
-    _TODF._serialized_start = 10797
-    _TODF._serialized_end = 10885
-    _WITHCOLUMNSRENAMED._serialized_start = 10888
-    _WITHCOLUMNSRENAMED._serialized_end = 11270
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 11132
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 11199
-    _WITHCOLUMNSRENAMED_RENAME._serialized_start = 11201
-    _WITHCOLUMNSRENAMED_RENAME._serialized_end = 11270
-    _WITHCOLUMNS._serialized_start = 11272
-    _WITHCOLUMNS._serialized_end = 11391
-    _WITHWATERMARK._serialized_start = 11394
-    _WITHWATERMARK._serialized_end = 11528
-    _HINT._serialized_start = 11531
-    _HINT._serialized_end = 11663
-    _UNPIVOT._serialized_start = 11666
-    _UNPIVOT._serialized_end = 11993
-    _UNPIVOT_VALUES._serialized_start = 11923
-    _UNPIVOT_VALUES._serialized_end = 11982
-    _TOSCHEMA._serialized_start = 11995
-    _TOSCHEMA._serialized_end = 12101
-    _REPARTITIONBYEXPRESSION._serialized_start = 12104
-    _REPARTITIONBYEXPRESSION._serialized_end = 12307
-    _MAPPARTITIONS._serialized_start = 12310
-    _MAPPARTITIONS._serialized_end = 12542
-    _GROUPMAP._serialized_start = 12545
-    _GROUPMAP._serialized_end = 13180
-    _COGROUPMAP._serialized_start = 13183
-    _COGROUPMAP._serialized_end = 13709
-    _APPLYINPANDASWITHSTATE._serialized_start = 13712
-    _APPLYINPANDASWITHSTATE._serialized_end = 14069
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 14072
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 14316
-    _PYTHONUDTF._serialized_start = 14319
-    _PYTHONUDTF._serialized_end = 14496
-    _COMMONINLINEUSERDEFINEDDATASOURCE._serialized_start = 14499
-    _COMMONINLINEUSERDEFINEDDATASOURCE._serialized_end = 14650
-    _PYTHONDATASOURCE._serialized_start = 14652
-    _PYTHONDATASOURCE._serialized_end = 14727
-    _COLLECTMETRICS._serialized_start = 14730
-    _COLLECTMETRICS._serialized_end = 14866
-    _PARSE._serialized_start = 14869
-    _PARSE._serialized_end = 15257
-    _PARSE_OPTIONSENTRY._serialized_start = 4645
-    _PARSE_OPTIONSENTRY._serialized_end = 4703
-    _PARSE_PARSEFORMAT._serialized_start = 15158
-    _PARSE_PARSEFORMAT._serialized_end = 15246
-    _ASOFJOIN._serialized_start = 15260
-    _ASOFJOIN._serialized_end = 15735
+    _RELATIONCOMMON._serialized_start = 3640
+    _RELATIONCOMMON._serialized_end = 3782
+    _SQL._serialized_start = 3785
+    _SQL._serialized_end = 4263
+    _SQL_ARGSENTRY._serialized_start = 4079
+    _SQL_ARGSENTRY._serialized_end = 4169
+    _SQL_NAMEDARGUMENTSENTRY._serialized_start = 4171
+    _SQL_NAMEDARGUMENTSENTRY._serialized_end = 4263
+    _WITHRELATIONS._serialized_start = 4265
+    _WITHRELATIONS._serialized_end = 4382
+    _READ._serialized_start = 4385
+    _READ._serialized_end = 5048
+    _READ_NAMEDTABLE._serialized_start = 4563
+    _READ_NAMEDTABLE._serialized_end = 4755
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 4697
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4755
+    _READ_DATASOURCE._serialized_start = 4758
+    _READ_DATASOURCE._serialized_end = 5035
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 4697
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4755
+    _PROJECT._serialized_start = 5050
+    _PROJECT._serialized_end = 5167
+    _FILTER._serialized_start = 5169
+    _FILTER._serialized_end = 5281
+    _JOIN._serialized_start = 5284
+    _JOIN._serialized_end = 5945
+    _JOIN_JOINDATATYPE._serialized_start = 5623
+    _JOIN_JOINDATATYPE._serialized_end = 5715
+    _JOIN_JOINTYPE._serialized_start = 5718
+    _JOIN_JOINTYPE._serialized_end = 5926
+    _SETOPERATION._serialized_start = 5948
+    _SETOPERATION._serialized_end = 6427
+    _SETOPERATION_SETOPTYPE._serialized_start = 6264
+    _SETOPERATION_SETOPTYPE._serialized_end = 6378
+    _LIMIT._serialized_start = 6429
+    _LIMIT._serialized_end = 6505
+    _OFFSET._serialized_start = 6507
+    _OFFSET._serialized_end = 6586
+    _TAIL._serialized_start = 6588
+    _TAIL._serialized_end = 6663
+    _AGGREGATE._serialized_start = 6666
+    _AGGREGATE._serialized_end = 7432
+    _AGGREGATE_PIVOT._serialized_start = 7081
+    _AGGREGATE_PIVOT._serialized_end = 7192
+    _AGGREGATE_GROUPINGSETS._serialized_start = 7194
+    _AGGREGATE_GROUPINGSETS._serialized_end = 7270
+    _AGGREGATE_GROUPTYPE._serialized_start = 7273
+    _AGGREGATE_GROUPTYPE._serialized_end = 7432
+    _SORT._serialized_start = 7435
+    _SORT._serialized_end = 7595
+    _DROP._serialized_start = 7598
+    _DROP._serialized_end = 7739
+    _DEDUPLICATE._serialized_start = 7742
+    _DEDUPLICATE._serialized_end = 7982
+    _LOCALRELATION._serialized_start = 7984
+    _LOCALRELATION._serialized_end = 8073
+    _CACHEDLOCALRELATION._serialized_start = 8075
+    _CACHEDLOCALRELATION._serialized_end = 8147
+    _CACHEDREMOTERELATION._serialized_start = 8149
+    _CACHEDREMOTERELATION._serialized_end = 8204
+    _SAMPLE._serialized_start = 8207
+    _SAMPLE._serialized_end = 8480
+    _RANGE._serialized_start = 8483
+    _RANGE._serialized_end = 8628
+    _SUBQUERYALIAS._serialized_start = 8630
+    _SUBQUERYALIAS._serialized_end = 8744
+    _REPARTITION._serialized_start = 8747
+    _REPARTITION._serialized_end = 8889
+    _SHOWSTRING._serialized_start = 8892
+    _SHOWSTRING._serialized_end = 9034
+    _HTMLSTRING._serialized_start = 9036
+    _HTMLSTRING._serialized_end = 9150
+    _STATSUMMARY._serialized_start = 9152
+    _STATSUMMARY._serialized_end = 9244
+    _STATDESCRIBE._serialized_start = 9246
+    _STATDESCRIBE._serialized_end = 9327
+    _STATCROSSTAB._serialized_start = 9329
+    _STATCROSSTAB._serialized_end = 9430
+    _STATCOV._serialized_start = 9432
+    _STATCOV._serialized_end = 9528
+    _STATCORR._serialized_start = 9531
+    _STATCORR._serialized_end = 9668
+    _STATAPPROXQUANTILE._serialized_start = 9671
+    _STATAPPROXQUANTILE._serialized_end = 9835
+    _STATFREQITEMS._serialized_start = 9837
+    _STATFREQITEMS._serialized_end = 9962
+    _STATSAMPLEBY._serialized_start = 9965
+    _STATSAMPLEBY._serialized_end = 10274
+    _STATSAMPLEBY_FRACTION._serialized_start = 10166
+    _STATSAMPLEBY_FRACTION._serialized_end = 10265
+    _NAFILL._serialized_start = 10277
+    _NAFILL._serialized_end = 10411
+    _NADROP._serialized_start = 10414
+    _NADROP._serialized_end = 10548
+    _NAREPLACE._serialized_start = 10551
+    _NAREPLACE._serialized_end = 10847
+    _NAREPLACE_REPLACEMENT._serialized_start = 10706
+    _NAREPLACE_REPLACEMENT._serialized_end = 10847
+    _TODF._serialized_start = 10849
+    _TODF._serialized_end = 10937
+    _WITHCOLUMNSRENAMED._serialized_start = 10940
+    _WITHCOLUMNSRENAMED._serialized_end = 11322
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 11184
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 11251
+    _WITHCOLUMNSRENAMED_RENAME._serialized_start = 11253
+    _WITHCOLUMNSRENAMED_RENAME._serialized_end = 11322
+    _WITHCOLUMNS._serialized_start = 11324
+    _WITHCOLUMNS._serialized_end = 11443
+    _WITHWATERMARK._serialized_start = 11446
+    _WITHWATERMARK._serialized_end = 11580
+    _HINT._serialized_start = 11583
+    _HINT._serialized_end = 11715
+    _UNPIVOT._serialized_start = 11718
+    _UNPIVOT._serialized_end = 12045
+    _UNPIVOT_VALUES._serialized_start = 11975
+    _UNPIVOT_VALUES._serialized_end = 12034
+    _TOSCHEMA._serialized_start = 12047
+    _TOSCHEMA._serialized_end = 12153
+    _REPARTITIONBYEXPRESSION._serialized_start = 12156
+    _REPARTITIONBYEXPRESSION._serialized_end = 12359
+    _MAPPARTITIONS._serialized_start = 12362
+    _MAPPARTITIONS._serialized_end = 12594
+    _GROUPMAP._serialized_start = 12597
+    _GROUPMAP._serialized_end = 13232
+    _COGROUPMAP._serialized_start = 13235
+    _COGROUPMAP._serialized_end = 13761
+    _APPLYINPANDASWITHSTATE._serialized_start = 13764
+    _APPLYINPANDASWITHSTATE._serialized_end = 14121
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 14124
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 14368
+    _PYTHONUDTF._serialized_start = 14371
+    _PYTHONUDTF._serialized_end = 14548
+    _COMMONINLINEUSERDEFINEDDATASOURCE._serialized_start = 14551
+    _COMMONINLINEUSERDEFINEDDATASOURCE._serialized_end = 14702
+    _PYTHONDATASOURCE._serialized_start = 14704
+    _PYTHONDATASOURCE._serialized_end = 14779
+    _COLLECTMETRICS._serialized_start = 14782
+    _COLLECTMETRICS._serialized_end = 14918
+    _PARSE._serialized_start = 14921
+    _PARSE._serialized_end = 15309
+    _PARSE_OPTIONSENTRY._serialized_start = 4697
+    _PARSE_OPTIONSENTRY._serialized_end = 4755
+    _PARSE_PARSEFORMAT._serialized_start = 15210
+    _PARSE_PARSEFORMAT._serialized_end = 15298
+    _ASOFJOIN._serialized_start = 15312
+    _ASOFJOIN._serialized_end = 15787
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 5dfb47da67a97..864803fd33084 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -41,6 +41,7 @@ import google.protobuf.internal.containers
 import google.protobuf.internal.enum_type_wrapper
 import google.protobuf.message
 import pyspark.sql.connect.proto.catalog_pb2
+import pyspark.sql.connect.proto.common_pb2
 import pyspark.sql.connect.proto.expressions_pb2
 import pyspark.sql.connect.proto.types_pb2
 import sys
@@ -614,23 +615,38 @@ class RelationCommon(google.protobuf.message.Message):
 
     SOURCE_INFO_FIELD_NUMBER: builtins.int
     PLAN_ID_FIELD_NUMBER: builtins.int
+    ORIGIN_FIELD_NUMBER: builtins.int
     source_info: builtins.str
     """(Required) Shared relation metadata."""
     plan_id: builtins.int
     """(Optional) A per-client globally unique id for a given connect plan."""
+    @property
+    def origin(self) -> pyspark.sql.connect.proto.common_pb2.Origin:
+        """(Optional) Keep the information of the origin for this expression such as stacktrace."""
     def __init__(
         self,
         *,
         source_info: builtins.str = ...,
         plan_id: builtins.int | None = ...,
+        origin: pyspark.sql.connect.proto.common_pb2.Origin | None = ...,
     ) -> None: ...
     def HasField(
-        self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"]
+        self,
+        field_name: typing_extensions.Literal[
+            "_plan_id", b"_plan_id", "origin", b"origin", "plan_id", b"plan_id"
+        ],
     ) -> builtins.bool: ...
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
-            "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info", b"source_info"
+            "_plan_id",
+            b"_plan_id",
+            "origin",
+            b"origin",
+            "plan_id",
+            b"plan_id",
+            "source_info",
+            b"source_info",
         ],
     ) -> None: ...
     def WhichOneof(
@@ -1865,7 +1881,10 @@ class Sample(google.protobuf.message.Message):
     with_replacement: builtins.bool
     """(Optional) Whether to sample with replacement."""
     seed: builtins.int
-    """(Optional) The random seed."""
+    """(Required) The random seed.
+    This field is required to avoid generating mutable dataframes (see SPARK-48184 for details),
+    however, still keep it 'optional' here for backward compatibility.
+    """
     deterministic_order: builtins.bool
     """(Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it.
     This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the
@@ -2545,7 +2564,10 @@ class StatSampleBy(google.protobuf.message.Message):
         If a stratum is not specified, we treat its fraction as zero.
         """
     seed: builtins.int
-    """(Optional) The random seed."""
+    """(Required) The random seed.
+    This field is required to avoid generating mutable dataframes (see SPARK-48184 for details),
+    however, still keep it 'optional' here for backward compatibility.
+    """
     def __init__(
         self,
         *,
diff --git a/python/pyspark/sql/connect/proto/types_pb2.py b/python/pyspark/sql/connect/proto/types_pb2.py
index 65e5860b5dc60..1022605fb160d 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.py
+++ b/python/pyspark/sql/connect/proto/types_pb2.py
@@ -29,7 +29,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xec!\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00R\x05short\x12;\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00R\x07integer\x12\x32\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00R\x04long\x12\x35\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00R\x05\x66loat\x12\x38\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00R\x06\x64ouble\x12;\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00R\x07\x64\x65\x63imal\x12\x38\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00R\x06string\x12\x32\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00R\x04\x63har\x12<\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00R\x07varChar\x12\x32\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00R\x04\x64\x61te\x12\x41\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00R\ttimestamp\x12K\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00R\x0ctimestampNtz\x12W\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12[\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00R\x11yearMonthInterval\x12U\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32'.spark.connect.DataType.DayTimeIntervalH\x00R\x0f\x64\x61yTimeInterval\x12\x35\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00R\x05\x61rray\x12\x38\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00R\x06struct\x12/\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00R\x03map\x12;\n\x07variant\x18\x19 \x01(\x0b\x32\x1f.spark.connect.DataType.VariantH\x00R\x07variant\x12/\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00R\x03udt\x12>\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00R\x08unparsed\x1a\x43\n\x07\x42oolean\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x42yte\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05Short\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Integer\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04Long\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05\x46loat\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06\x44ouble\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x65\n\x06String\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x12!\n\x0c\x63ollation_id\x18\x02 \x01(\rR\x0b\x63ollationId\x1a\x42\n\x06\x42inary\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04NULL\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x45\n\tTimestamp\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x44\x61te\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aH\n\x0cTimestampNTZ\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aL\n\x10\x43\x61lendarInterval\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xb3\x01\n\x11YearMonthInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\xb1\x01\n\x0f\x44\x61yTimeInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1aX\n\x04\x43har\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a[\n\x07VarChar\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\x99\x01\n\x07\x44\x65\x63imal\x12\x19\n\x05scale\x18\x01 \x01(\x05H\x00R\x05scale\x88\x01\x01\x12!\n\tprecision\x18\x02 \x01(\x05H\x01R\tprecision\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x08\n\x06_scaleB\x0c\n\n_precision\x1a\xa1\x01\n\x0bStructField\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x34\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x08\x64\x61taType\x12\x1a\n\x08nullable\x18\x03 \x01(\x08R\x08nullable\x12\x1f\n\x08metadata\x18\x04 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x7f\n\x06Struct\x12;\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructFieldR\x06\x66ields\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\xa2\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12#\n\rcontains_null\x18\x02 \x01(\x08R\x0c\x63ontainsNull\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReference\x1a\xdb\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12.\n\x13value_contains_null\x18\x03 \x01(\x08R\x11valueContainsNull\x12\x38\n\x18type_variation_reference\x18\x04 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Variant\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x8f\x02\n\x03UDT\x12\x12\n\x04type\x18\x01 \x01(\tR\x04type\x12 \n\tjvm_class\x18\x02 \x01(\tH\x00R\x08jvmClass\x88\x01\x01\x12&\n\x0cpython_class\x18\x03 \x01(\tH\x01R\x0bpythonClass\x88\x01\x01\x12;\n\x17serialized_python_class\x18\x04 \x01(\tH\x02R\x15serializedPythonClass\x88\x01\x01\x12\x32\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07sqlTypeB\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_class\x1a\x34\n\x08Unparsed\x12(\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tR\x0e\x64\x61taTypeStringB\x06\n\x04kindB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
+    b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xe7!\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00R\x05short\x12;\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00R\x07integer\x12\x32\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00R\x04long\x12\x35\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00R\x05\x66loat\x12\x38\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00R\x06\x64ouble\x12;\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00R\x07\x64\x65\x63imal\x12\x38\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00R\x06string\x12\x32\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00R\x04\x63har\x12<\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00R\x07varChar\x12\x32\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00R\x04\x64\x61te\x12\x41\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00R\ttimestamp\x12K\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00R\x0ctimestampNtz\x12W\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12[\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00R\x11yearMonthInterval\x12U\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32'.spark.connect.DataType.DayTimeIntervalH\x00R\x0f\x64\x61yTimeInterval\x12\x35\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00R\x05\x61rray\x12\x38\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00R\x06struct\x12/\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00R\x03map\x12;\n\x07variant\x18\x19 \x01(\x0b\x32\x1f.spark.connect.DataType.VariantH\x00R\x07variant\x12/\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00R\x03udt\x12>\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00R\x08unparsed\x1a\x43\n\x07\x42oolean\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x42yte\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05Short\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Integer\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04Long\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05\x46loat\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06\x44ouble\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a`\n\x06String\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x12\x1c\n\tcollation\x18\x02 \x01(\tR\tcollation\x1a\x42\n\x06\x42inary\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04NULL\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x45\n\tTimestamp\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x44\x61te\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aH\n\x0cTimestampNTZ\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aL\n\x10\x43\x61lendarInterval\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xb3\x01\n\x11YearMonthInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\xb1\x01\n\x0f\x44\x61yTimeInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1aX\n\x04\x43har\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a[\n\x07VarChar\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\x99\x01\n\x07\x44\x65\x63imal\x12\x19\n\x05scale\x18\x01 \x01(\x05H\x00R\x05scale\x88\x01\x01\x12!\n\tprecision\x18\x02 \x01(\x05H\x01R\tprecision\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x08\n\x06_scaleB\x0c\n\n_precision\x1a\xa1\x01\n\x0bStructField\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x34\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x08\x64\x61taType\x12\x1a\n\x08nullable\x18\x03 \x01(\x08R\x08nullable\x12\x1f\n\x08metadata\x18\x04 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x7f\n\x06Struct\x12;\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructFieldR\x06\x66ields\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\xa2\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12#\n\rcontains_null\x18\x02 \x01(\x08R\x0c\x63ontainsNull\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReference\x1a\xdb\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12.\n\x13value_contains_null\x18\x03 \x01(\x08R\x11valueContainsNull\x12\x38\n\x18type_variation_reference\x18\x04 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Variant\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x8f\x02\n\x03UDT\x12\x12\n\x04type\x18\x01 \x01(\tR\x04type\x12 \n\tjvm_class\x18\x02 \x01(\tH\x00R\x08jvmClass\x88\x01\x01\x12&\n\x0cpython_class\x18\x03 \x01(\tH\x01R\x0bpythonClass\x88\x01\x01\x12;\n\x17serialized_python_class\x18\x04 \x01(\tH\x02R\x15serializedPythonClass\x88\x01\x01\x12\x32\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07sqlTypeB\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_class\x1a\x34\n\x08Unparsed\x12(\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tR\x0e\x64\x61taTypeStringB\x06\n\x04kindB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -42,7 +42,7 @@
         b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
     )
     _DATATYPE._serialized_start = 45
-    _DATATYPE._serialized_end = 4377
+    _DATATYPE._serialized_end = 4372
     _DATATYPE_BOOLEAN._serialized_start = 1595
     _DATATYPE_BOOLEAN._serialized_end = 1662
     _DATATYPE_BYTE._serialized_start = 1664
@@ -58,41 +58,41 @@
     _DATATYPE_DOUBLE._serialized_start = 1999
     _DATATYPE_DOUBLE._serialized_end = 2065
     _DATATYPE_STRING._serialized_start = 2067
-    _DATATYPE_STRING._serialized_end = 2168
-    _DATATYPE_BINARY._serialized_start = 2170
-    _DATATYPE_BINARY._serialized_end = 2236
-    _DATATYPE_NULL._serialized_start = 2238
-    _DATATYPE_NULL._serialized_end = 2302
-    _DATATYPE_TIMESTAMP._serialized_start = 2304
-    _DATATYPE_TIMESTAMP._serialized_end = 2373
-    _DATATYPE_DATE._serialized_start = 2375
-    _DATATYPE_DATE._serialized_end = 2439
-    _DATATYPE_TIMESTAMPNTZ._serialized_start = 2441
-    _DATATYPE_TIMESTAMPNTZ._serialized_end = 2513
-    _DATATYPE_CALENDARINTERVAL._serialized_start = 2515
-    _DATATYPE_CALENDARINTERVAL._serialized_end = 2591
-    _DATATYPE_YEARMONTHINTERVAL._serialized_start = 2594
-    _DATATYPE_YEARMONTHINTERVAL._serialized_end = 2773
-    _DATATYPE_DAYTIMEINTERVAL._serialized_start = 2776
-    _DATATYPE_DAYTIMEINTERVAL._serialized_end = 2953
-    _DATATYPE_CHAR._serialized_start = 2955
-    _DATATYPE_CHAR._serialized_end = 3043
-    _DATATYPE_VARCHAR._serialized_start = 3045
-    _DATATYPE_VARCHAR._serialized_end = 3136
-    _DATATYPE_DECIMAL._serialized_start = 3139
-    _DATATYPE_DECIMAL._serialized_end = 3292
-    _DATATYPE_STRUCTFIELD._serialized_start = 3295
-    _DATATYPE_STRUCTFIELD._serialized_end = 3456
-    _DATATYPE_STRUCT._serialized_start = 3458
-    _DATATYPE_STRUCT._serialized_end = 3585
-    _DATATYPE_ARRAY._serialized_start = 3588
-    _DATATYPE_ARRAY._serialized_end = 3750
-    _DATATYPE_MAP._serialized_start = 3753
-    _DATATYPE_MAP._serialized_end = 3972
-    _DATATYPE_VARIANT._serialized_start = 3974
-    _DATATYPE_VARIANT._serialized_end = 4041
-    _DATATYPE_UDT._serialized_start = 4044
-    _DATATYPE_UDT._serialized_end = 4315
-    _DATATYPE_UNPARSED._serialized_start = 4317
-    _DATATYPE_UNPARSED._serialized_end = 4369
+    _DATATYPE_STRING._serialized_end = 2163
+    _DATATYPE_BINARY._serialized_start = 2165
+    _DATATYPE_BINARY._serialized_end = 2231
+    _DATATYPE_NULL._serialized_start = 2233
+    _DATATYPE_NULL._serialized_end = 2297
+    _DATATYPE_TIMESTAMP._serialized_start = 2299
+    _DATATYPE_TIMESTAMP._serialized_end = 2368
+    _DATATYPE_DATE._serialized_start = 2370
+    _DATATYPE_DATE._serialized_end = 2434
+    _DATATYPE_TIMESTAMPNTZ._serialized_start = 2436
+    _DATATYPE_TIMESTAMPNTZ._serialized_end = 2508
+    _DATATYPE_CALENDARINTERVAL._serialized_start = 2510
+    _DATATYPE_CALENDARINTERVAL._serialized_end = 2586
+    _DATATYPE_YEARMONTHINTERVAL._serialized_start = 2589
+    _DATATYPE_YEARMONTHINTERVAL._serialized_end = 2768
+    _DATATYPE_DAYTIMEINTERVAL._serialized_start = 2771
+    _DATATYPE_DAYTIMEINTERVAL._serialized_end = 2948
+    _DATATYPE_CHAR._serialized_start = 2950
+    _DATATYPE_CHAR._serialized_end = 3038
+    _DATATYPE_VARCHAR._serialized_start = 3040
+    _DATATYPE_VARCHAR._serialized_end = 3131
+    _DATATYPE_DECIMAL._serialized_start = 3134
+    _DATATYPE_DECIMAL._serialized_end = 3287
+    _DATATYPE_STRUCTFIELD._serialized_start = 3290
+    _DATATYPE_STRUCTFIELD._serialized_end = 3451
+    _DATATYPE_STRUCT._serialized_start = 3453
+    _DATATYPE_STRUCT._serialized_end = 3580
+    _DATATYPE_ARRAY._serialized_start = 3583
+    _DATATYPE_ARRAY._serialized_end = 3745
+    _DATATYPE_MAP._serialized_start = 3748
+    _DATATYPE_MAP._serialized_end = 3967
+    _DATATYPE_VARIANT._serialized_start = 3969
+    _DATATYPE_VARIANT._serialized_end = 4036
+    _DATATYPE_UDT._serialized_start = 4039
+    _DATATYPE_UDT._serialized_end = 4310
+    _DATATYPE_UNPARSED._serialized_start = 4312
+    _DATATYPE_UNPARSED._serialized_end = 4364
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.pyi b/python/pyspark/sql/connect/proto/types_pb2.pyi
index e6b34d3485c2f..b376211045377 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/types_pb2.pyi
@@ -178,22 +178,19 @@ class DataType(google.protobuf.message.Message):
         DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
         TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
-        COLLATION_ID_FIELD_NUMBER: builtins.int
+        COLLATION_FIELD_NUMBER: builtins.int
         type_variation_reference: builtins.int
-        collation_id: builtins.int
+        collation: builtins.str
         def __init__(
             self,
             *,
             type_variation_reference: builtins.int = ...,
-            collation_id: builtins.int = ...,
+            collation: builtins.str = ...,
         ) -> None: ...
         def ClearField(
             self,
             field_name: typing_extensions.Literal[
-                "collation_id",
-                b"collation_id",
-                "type_variation_reference",
-                b"type_variation_reference",
+                "collation", b"collation", "type_variation_reference", b"type_variation_reference"
             ],
         ) -> None: ...
 
diff --git a/python/pyspark/sql/connect/protobuf/functions.py b/python/pyspark/sql/connect/protobuf/functions.py
index fcf1ed1ee02ee..07e9b4b8c6861 100644
--- a/python/pyspark/sql/connect/protobuf/functions.py
+++ b/python/pyspark/sql/connect/protobuf/functions.py
@@ -27,7 +27,7 @@
 
 from pyspark.sql.protobuf import functions as PyProtobufFunctions
 
-from pyspark.sql.connect.column import Column
+from pyspark.sql.column import Column
 from pyspark.sql.connect.functions.builtin import _invoke_function, _to_col, _options_to_col, lit
 
 if TYPE_CHECKING:
@@ -120,7 +120,6 @@ def _read_descriptor_set_file(filePath: str) -> bytes:
 def _test() -> None:
     import os
     import sys
-    from pyspark.util import is_remote_only
     from pyspark.testing.utils import search_jar
 
     protobuf_jar = search_jar("connector/protobuf", "spark-protobuf-assembly-", "spark-protobuf")
@@ -142,12 +141,6 @@ def _test() -> None:
     import pyspark.sql.connect.protobuf.functions
 
     globs = pyspark.sql.connect.protobuf.functions.__dict__.copy()
-
-    # TODO(SPARK-47763): Reeanble Protobuf function doctests
-    if is_remote_only():
-        del pyspark.sql.connect.protobuf.functions.from_protobuf
-        del pyspark.sql.connect.protobuf.functions.to_protobuf
-
     globs["spark"] = (
         PySparkSession.builder.appName("sql.protobuf.functions tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index bf7dc4d369057..de62cf65b01ed 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -19,7 +19,7 @@
 check_dependencies(__name__)
 
 from typing import Dict
-from typing import Optional, Union, List, overload, Tuple, cast
+from typing import Optional, Union, List, overload, Tuple, cast, Callable
 from typing import TYPE_CHECKING
 
 from pyspark.sql.connect.plan import Read, DataSource, LogicalPlan, WriteOperation, WriteOperationV2
@@ -37,6 +37,7 @@
     from pyspark.sql.connect.dataframe import DataFrame
     from pyspark.sql.connect._typing import ColumnOrName, OptionalPrimitiveType
     from pyspark.sql.connect.session import SparkSession
+    from pyspark.sql.metrics import ExecutionInfo
 
 __all__ = ["DataFrameReader", "DataFrameWriter"]
 
@@ -486,11 +487,18 @@ def _jreader(self) -> None:
 
 
 class DataFrameWriter(OptionUtils):
-    def __init__(self, plan: "LogicalPlan", session: "SparkSession"):
+    def __init__(
+        self,
+        plan: "LogicalPlan",
+        session: "SparkSession",
+        callback: Optional[Callable[["ExecutionInfo"], None]] = None,
+    ):
         self._df: "LogicalPlan" = plan
         self._spark: "SparkSession" = session
         self._write: "WriteOperation" = WriteOperation(self._df)
 
+        self._callback = callback if callback is not None else lambda _: None
+
     def mode(self, saveMode: Optional[str]) -> "DataFrameWriter":
         # At the JVM side, the default value of mode is already set to "error".
         # So, if the given saveMode is None, we will not call JVM-side's mode method.
@@ -649,9 +657,10 @@ def save(
         if format is not None:
             self.format(format)
         self._write.path = path
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     save.__doc__ = PySparkDataFrameWriter.save.__doc__
 
@@ -660,9 +669,10 @@ def insertInto(self, tableName: str, overwrite: Optional[bool] = None) -> None:
             self.mode("overwrite" if overwrite else "append")
         self._write.table_name = tableName
         self._write.table_save_method = "insert_into"
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     insertInto.__doc__ = PySparkDataFrameWriter.insertInto.__doc__
 
@@ -681,9 +691,10 @@ def saveAsTable(
             self.format(format)
         self._write.table_name = name
         self._write.table_save_method = "save_as_table"
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     saveAsTable.__doc__ = PySparkDataFrameWriter.saveAsTable.__doc__
 
@@ -845,11 +856,18 @@ def jdbc(
 
 
 class DataFrameWriterV2(OptionUtils):
-    def __init__(self, plan: "LogicalPlan", session: "SparkSession", table: str):
+    def __init__(
+        self,
+        plan: "LogicalPlan",
+        session: "SparkSession",
+        table: str,
+        callback: Optional[Callable[["ExecutionInfo"], None]] = None,
+    ):
         self._df: "LogicalPlan" = plan
         self._spark: "SparkSession" = session
         self._table_name: str = table
         self._write: "WriteOperationV2" = WriteOperationV2(self._df, self._table_name)
+        self._callback = callback if callback is not None else lambda _: None
 
     def using(self, provider: str) -> "DataFrameWriterV2":
         self._write.provider = provider
@@ -884,50 +902,56 @@ def partitionedBy(self, col: "ColumnOrName", *cols: "ColumnOrName") -> "DataFram
 
     def create(self) -> None:
         self._write.mode = "create"
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     create.__doc__ = PySparkDataFrameWriterV2.create.__doc__
 
     def replace(self) -> None:
         self._write.mode = "replace"
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     replace.__doc__ = PySparkDataFrameWriterV2.replace.__doc__
 
     def createOrReplace(self) -> None:
         self._write.mode = "create_or_replace"
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     createOrReplace.__doc__ = PySparkDataFrameWriterV2.createOrReplace.__doc__
 
     def append(self) -> None:
         self._write.mode = "append"
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     append.__doc__ = PySparkDataFrameWriterV2.append.__doc__
 
     def overwrite(self, condition: "ColumnOrName") -> None:
         self._write.mode = "overwrite"
         self._write.overwrite_condition = F._to_col(condition)
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     overwrite.__doc__ = PySparkDataFrameWriterV2.overwrite.__doc__
 
     def overwritePartitions(self) -> None:
         self._write.mode = "overwrite_partitions"
-        self._spark.client.execute_command(
+        _, _, ei = self._spark.client.execute_command(
             self._write.command(self._spark.client), self._write.observations
         )
+        self._callback(ei)
 
     overwritePartitions.__doc__ = PySparkDataFrameWriterV2.overwritePartitions.__doc__
 
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index 3be6c83cf13ba..8e277b3fc63aa 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 from pyspark.sql.connect.utils import check_dependencies
+from pyspark.sql.utils import is_timestamp_ntz_preferred
 
 check_dependencies(__name__)
 
@@ -22,7 +23,7 @@
 import os
 import warnings
 from collections.abc import Sized
-from functools import reduce
+import functools
 from threading import RLock
 from typing import (
     Optional,
@@ -48,10 +49,11 @@
 )
 import urllib
 
+from pyspark.sql.connect.dataframe import DataFrame
+from pyspark.sql.dataframe import DataFrame as ParentDataFrame
 from pyspark.loose_version import LooseVersion
 from pyspark.sql.connect.client import SparkConnectClient, DefaultChannelBuilder
 from pyspark.sql.connect.conf import RuntimeConf
-from pyspark.sql.connect.dataframe import DataFrame
 from pyspark.sql.connect.plan import (
     SQL,
     Range,
@@ -72,7 +74,9 @@
     to_arrow_schema,
     to_arrow_type,
     _deduplicate_field_names,
+    from_arrow_schema,
     from_arrow_type,
+    _check_arrow_table_timestamps_localize,
 )
 from pyspark.sql.profiler import Profile
 from pyspark.sql.session import classproperty, SparkSession as PySparkSession
@@ -107,7 +111,6 @@
     from pyspark.sql.connect.shell.progress import ProgressHandler
     from pyspark.sql.connect.datasource import DataSourceRegistration
 
-
 try:
     import memory_profiler  # noqa: F401
 
@@ -238,7 +241,7 @@ def getOrCreate(self) -> "SparkSession":
             with SparkSession._lock:
                 session = SparkSession.getActiveSession()
                 if session is None:
-                    session = SparkSession._default_session
+                    session = SparkSession._get_default_session()
                     if session is None:
                         session = self.create()
                 self._apply_options(session)
@@ -285,9 +288,19 @@ def _set_default_and_active_session(cls, session: "SparkSession") -> None:
         if getattr(cls._active_session, "session", None) is None:
             cls._active_session.session = session
 
+    @classmethod
+    def _get_default_session(cls) -> Optional["SparkSession"]:
+        s = cls._default_session
+        if s is not None and not s.is_stopped:
+            return s
+        return None
+
     @classmethod
     def getActiveSession(cls) -> Optional["SparkSession"]:
-        return getattr(cls._active_session, "session", None)
+        s = getattr(cls._active_session, "session", None)
+        if s is not None and not s.is_stopped:
+            return s
+        return None
 
     @classmethod
     def _getActiveSessionIfMatches(cls, session_id: str) -> "SparkSession":
@@ -315,7 +328,7 @@ def _getActiveSessionIfMatches(cls, session_id: str) -> "SparkSession":
     def active(cls) -> "SparkSession":
         session = cls.getActiveSession()
         if session is None:
-            session = cls._default_session
+            session = cls._get_default_session()
             if session is None:
                 raise PySparkRuntimeError(
                     error_class="NO_ACTIVE_OR_DEFAULT_SESSION",
@@ -325,7 +338,7 @@ def active(cls) -> "SparkSession":
 
     active.__doc__ = PySparkSession.active.__doc__
 
-    def table(self, tableName: str) -> DataFrame:
+    def table(self, tableName: str) -> ParentDataFrame:
         if not isinstance(tableName, str):
             raise PySparkTypeError(
                 error_class="NOT_STR",
@@ -378,13 +391,15 @@ def _inferSchemaFromList(
         (
             infer_dict_as_struct,
             infer_array_from_first_element,
+            infer_map_from_first_pair,
             prefer_timestamp_ntz,
         ) = self._client.get_configs(
             "spark.sql.pyspark.inferNestedDictAsStruct.enabled",
             "spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled",
+            "spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled",
             "spark.sql.timestampType",
         )
-        return reduce(
+        return functools.reduce(
             _merge_type,
             (
                 _infer_schema(
@@ -392,6 +407,7 @@ def _inferSchemaFromList(
                     names,
                     infer_dict_as_struct=(infer_dict_as_struct == "true"),
                     infer_array_from_first_element=(infer_array_from_first_element == "true"),
+                    infer_map_from_first_pair=(infer_map_from_first_pair == "true"),
                     prefer_timestamp_ntz=(prefer_timestamp_ntz == "TIMESTAMP_NTZ"),
                 )
                 for row in data
@@ -400,9 +416,11 @@ def _inferSchemaFromList(
 
     def createDataFrame(
         self,
-        data: Union["pd.DataFrame", "np.ndarray", Iterable[Any]],
+        data: Union["pd.DataFrame", "np.ndarray", "pa.Table", Iterable[Any]],
         schema: Optional[Union[AtomicType, StructType, str, List[str], Tuple[str, ...]]] = None,
-    ) -> "DataFrame":
+        samplingRatio: Optional[float] = None,
+        verifySchema: Optional[bool] = None,
+    ) -> "ParentDataFrame":
         assert data is not None
         if isinstance(data, DataFrame):
             raise PySparkTypeError(
@@ -410,6 +428,12 @@ def createDataFrame(
                 message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
             )
 
+        if samplingRatio is not None:
+            warnings.warn("'samplingRatio' is ignored. It is not supported with Spark Connect.")
+
+        if verifySchema is not None:
+            warnings.warn("'verifySchema' is ignored. It is not supported with Spark Connect.")
+
         _schema: Optional[Union[AtomicType, StructType]] = None
         _cols: Optional[List[str]] = None
         _num_cols: Optional[int] = None
@@ -455,6 +479,7 @@ def createDataFrame(
                 )
 
         _table: Optional[pa.Table] = None
+        timezone: Optional[str] = None
 
         if isinstance(data, pd.DataFrame):
             # Logic was borrowed from `_create_from_pandas_with_arrow` in
@@ -540,6 +565,28 @@ def createDataFrame(
                     cast(StructType, _deduplicate_field_names(schema)).names
                 ).cast(arrow_schema)
 
+        elif isinstance(data, pa.Table):
+            prefer_timestamp_ntz = is_timestamp_ntz_preferred()
+
+            (timezone,) = self._client.get_configs("spark.sql.session.timeZone")
+
+            # If no schema supplied by user then get the names of columns only
+            if schema is None:
+                _cols = data.column_names
+            if isinstance(schema, (list, tuple)) and cast(int, _num_cols) < len(data.columns):
+                assert isinstance(_cols, list)
+                _cols.extend([f"_{i + 1}" for i in range(cast(int, _num_cols), len(data.columns))])
+                _num_cols = len(_cols)
+
+            if not isinstance(schema, StructType):
+                schema = from_arrow_schema(data.schema, prefer_timestamp_ntz=prefer_timestamp_ntz)
+
+            _table = (
+                _check_arrow_table_timestamps_localize(data, schema, True, timezone)
+                .cast(to_arrow_schema(schema, error_on_duplicated_field_names_in_struct=True))
+                .rename_columns(schema.names)
+            )
+
         elif isinstance(data, np.ndarray):
             if _cols is None:
                 if data.ndim == 1 or data.shape[1] == 1:
@@ -636,7 +683,7 @@ def createDataFrame(
 
         df = DataFrame(plan, self)
         if _cols is not None and len(_cols) > 0:
-            df = df.toDF(*_cols)
+            df = df.toDF(*_cols)  # type: ignore[assignment]
         return df
 
     createDataFrame.__doc__ = PySparkSession.createDataFrame.__doc__
@@ -646,7 +693,7 @@ def sql(
         sqlQuery: str,
         args: Optional[Union[Dict[str, Any], List]] = None,
         **kwargs: Any,
-    ) -> "DataFrame":
+    ) -> "ParentDataFrame":
         _args = []
         _named_args = {}
         if args is not None:
@@ -673,9 +720,12 @@ def sql(
                 _views.append(SubqueryAlias(df._plan, name))
 
         cmd = SQL(sqlQuery, _args, _named_args, _views)
-        data, properties = self.client.execute_command(cmd.command(self._client))
+        data, properties, ei = self.client.execute_command(cmd.command(self._client))
         if "sql_command_result" in properties:
-            return DataFrame(CachedRelation(properties["sql_command_result"]), self)
+            df = DataFrame(CachedRelation(properties["sql_command_result"]), self)
+            # A command result contains the execution.
+            df._execution_info = ei
+            return df
         else:
             return DataFrame(cmd, self)
 
@@ -687,7 +737,7 @@ def range(
         end: Optional[int] = None,
         step: int = 1,
         numPartitions: Optional[int] = None,
-    ) -> DataFrame:
+    ) -> ParentDataFrame:
         if end is None:
             actual_end = start
             start = 0
@@ -718,6 +768,9 @@ def catalog(self) -> "Catalog":
 
     def __del__(self) -> None:
         try:
+            # StreamingQueryManager has client states that needs to be cleaned up
+            if hasattr(self, "_sqm"):
+                self._sqm.close()
             # Try its best to close.
             self.client.close()
         except Exception:
@@ -848,7 +901,7 @@ def dataSource(self) -> "DataSourceRegistration":
 
     dataSource.__doc__ = PySparkSession.dataSource.__doc__
 
-    @property
+    @functools.cached_property
     def version(self) -> str:
         result = self._client._analyze(method="spark_version").spark_version
         assert result is not None
@@ -896,13 +949,13 @@ def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None:
 
     copyFromLocalToFs.__doc__ = PySparkSession.copyFromLocalToFs.__doc__
 
-    def _create_remote_dataframe(self, remote_id: str) -> "DataFrame":
+    def _create_remote_dataframe(self, remote_id: str) -> "ParentDataFrame":
         """
         In internal API to reference a runtime DataFrame on the server side.
         This is used in ForeachBatch() runner, where the remote DataFrame refers to the
         output of a micro batch.
         """
-        return DataFrame(CachedRemoteRelation(remote_id), self)
+        return DataFrame(CachedRemoteRelation(remote_id, spark_session=self), self)
 
     @staticmethod
     def _start_connect_server(master: str, opts: Dict[str, Any]) -> None:
diff --git a/python/pyspark/sql/connect/streaming/query.py b/python/pyspark/sql/connect/streaming/query.py
index 0624f8943ac40..13458d650fa9f 100644
--- a/python/pyspark/sql/connect/streaming/query.py
+++ b/python/pyspark/sql/connect/streaming/query.py
@@ -181,7 +181,7 @@ def _execute_streaming_query_cmd(
         cmd.query_id.run_id = self._run_id
         exec_cmd = pb2.Command()
         exec_cmd.streaming_query_command.CopyFrom(cmd)
-        (_, properties) = self._session.client.execute_command(exec_cmd)
+        (_, properties, _) = self._session.client.execute_command(exec_cmd)
         return cast(pb2.StreamingQueryCommandResult, properties["streaming_query_command_result"])
 
 
@@ -190,6 +190,9 @@ def __init__(self, session: "SparkSession") -> None:
         self._session = session
         self._sqlb = StreamingQueryListenerBus(self)
 
+    def close(self) -> None:
+        self._sqlb.close()
+
     @property
     def active(self) -> List[StreamingQuery]:
         cmd = pb2.StreamingQueryManagerCommand()
@@ -257,7 +260,7 @@ def _execute_streaming_query_manager_cmd(
     ) -> pb2.StreamingQueryManagerCommandResult:
         exec_cmd = pb2.Command()
         exec_cmd.streaming_query_manager_command.CopyFrom(cmd)
-        (_, properties) = self._session.client.execute_command(exec_cmd)
+        (_, properties, _) = self._session.client.execute_command(exec_cmd)
         return cast(
             pb2.StreamingQueryManagerCommandResult,
             properties["streaming_query_manager_command_result"],
@@ -276,6 +279,10 @@ def __init__(self, sqm: "StreamingQueryManager") -> None:
         self._execution_thread: Optional[Thread] = None
         self._lock = Lock()
 
+    def close(self) -> None:
+        for listener in self._listener_bus:
+            self.remove(listener)
+
     def append(self, listener: StreamingQueryListener) -> None:
         """
         Append a listener to the local listener bus. When the added listener is
diff --git a/python/pyspark/sql/connect/streaming/readwriter.py b/python/pyspark/sql/connect/streaming/readwriter.py
index 4973bb5b6cf73..9b11bf328b853 100644
--- a/python/pyspark/sql/connect/streaming/readwriter.py
+++ b/python/pyspark/sql/connect/streaming/readwriter.py
@@ -446,6 +446,11 @@ def partitionBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
     partitionBy.__doc__ = PySparkDataStreamWriter.partitionBy.__doc__
 
     def queryName(self, queryName: str) -> "DataStreamWriter":
+        if not queryName or type(queryName) != str or len(queryName.strip()) == 0:
+            raise PySparkValueError(
+                error_class="VALUE_NOT_NON_EMPTY_STR",
+                message_parameters={"arg_name": "queryName", "arg_value": str(queryName)},
+            )
         self._write_proto.query_name = queryName
         return self
 
@@ -596,7 +601,7 @@ def _start_internal(
             self._write_proto.table_name = tableName
 
         cmd = self._write_stream.command(self._session.client)
-        (_, properties) = self._session.client.execute_command(cmd)
+        (_, properties, _) = self._session.client.execute_command(cmd)
 
         start_result = cast(
             pb2.WriteStreamOperationStartResult, properties["write_stream_operation_start_result"]
@@ -605,7 +610,9 @@ def _start_internal(
             session=self._session,
             queryId=start_result.query_id.id,
             runId=start_result.query_id.run_id,
-            name=start_result.name,
+            # A Streaming Query cannot have empty string as name
+            # Spark throws error in that case, so this cast is safe
+            name=start_result.name if start_result.name != "" else None,
         )
 
         if start_result.HasField("query_started_event_json"):
diff --git a/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py b/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py
index 92ed7a4aaff53..0c92de6372b6f 100644
--- a/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py
+++ b/python/pyspark/sql/connect/streaming/worker/foreach_batch_worker.py
@@ -62,13 +62,6 @@ def main(infile: IO, outfile: IO) -> None:
     assert spark_connect_session.session_id == session_id
     spark = spark_connect_session
 
-    # TODO(SPARK-44461): Enable Process Isolation
-
-    func = worker.read_command(pickle_ser, infile)
-    write_int(0, outfile)  # Indicate successful initialization
-
-    outfile.flush()
-
     log_name = "Streaming ForeachBatch worker"
 
     def process(df_id, batch_id):  # type: ignore[no-untyped-def]
@@ -78,16 +71,21 @@ def process(df_id, batch_id):  # type: ignore[no-untyped-def]
         func(batch_df, batch_id)
         print(f"{log_name} Completed batch {batch_id} with DF id {df_id}")
 
-    while True:
-        df_ref_id = utf8_deserializer.loads(infile)
-        batch_id = read_long(infile)
-        # Handle errors inside Python worker. Write 0 to outfile if no errors and write -2 with
-        # traceback string if error occurs.
-        try:
+    try:
+        func = worker.read_command(pickle_ser, infile)
+        write_int(0, outfile)
+        outfile.flush()
+
+        while True:
+            df_ref_id = utf8_deserializer.loads(infile)
+            batch_id = read_long(infile)
+            # Handle errors inside Python worker. Write 0 to outfile if no errors and write -2 with
+            # traceback string if error occurs.
             process(df_ref_id, int(batch_id))
             write_int(0, outfile)
-        except BaseException as e:
-            handle_worker_exception(e, outfile)
+            outfile.flush()
+    except Exception as e:
+        handle_worker_exception(e, outfile)
         outfile.flush()
 
 
@@ -98,4 +96,6 @@ def process(df_id, batch_id):  # type: ignore[no-untyped-def]
     (sock_file, sock) = local_connect_and_auth(java_port, auth_secret)
     # There could be a long time between each micro batch.
     sock.settimeout(None)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/connect/streaming/worker/listener_worker.py b/python/pyspark/sql/connect/streaming/worker/listener_worker.py
index d3efb5894fc02..a7a5066ca0d77 100644
--- a/python/pyspark/sql/connect/streaming/worker/listener_worker.py
+++ b/python/pyspark/sql/connect/streaming/worker/listener_worker.py
@@ -70,8 +70,6 @@ def main(infile: IO, outfile: IO) -> None:
     assert spark_connect_session.session_id == session_id
     spark = spark_connect_session
 
-    # TODO(SPARK-44461): Enable Process Isolation
-
     listener = worker.read_command(pickle_ser, infile)
     write_int(0, outfile)  # Indicate successful initialization
 
@@ -112,4 +110,6 @@ def process(listener_event_str, listener_event_type):  # type: ignore[no-untyped
     (sock_file, sock) = local_connect_and_auth(java_port, auth_secret)
     # There could be a long time between each listener event.
     sock.settimeout(None)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
index f058c6390612a..885ce62e7db6f 100644
--- a/python/pyspark/sql/connect/types.py
+++ b/python/pyspark/sql/connect/types.py
@@ -55,16 +55,6 @@
 import pyspark.sql.connect.proto as pb2
 
 
-JVM_BYTE_MIN: int = -(1 << 7)
-JVM_BYTE_MAX: int = (1 << 7) - 1
-JVM_SHORT_MIN: int = -(1 << 15)
-JVM_SHORT_MAX: int = (1 << 15) - 1
-JVM_INT_MIN: int = -(1 << 31)
-JVM_INT_MAX: int = (1 << 31) - 1
-JVM_LONG_MIN: int = -(1 << 63)
-JVM_LONG_MAX: int = (1 << 63) - 1
-
-
 class UnparsedDataType(DataType):
     """
     Unparsed data type.
@@ -139,7 +129,7 @@ def pyspark_types_to_proto_types(data_type: DataType) -> pb2.DataType:
     if isinstance(data_type, NullType):
         ret.null.CopyFrom(pb2.DataType.NULL())
     elif isinstance(data_type, StringType):
-        ret.string.collation_id = data_type.collationId
+        ret.string.collation = data_type.collation
     elif isinstance(data_type, BooleanType):
         ret.boolean.CopyFrom(pb2.DataType.Boolean())
     elif isinstance(data_type, BinaryType):
@@ -239,7 +229,8 @@ def proto_schema_to_pyspark_data_type(schema: pb2.DataType) -> DataType:
         s = schema.decimal.scale if schema.decimal.HasField("scale") else 0
         return DecimalType(precision=p, scale=s)
     elif schema.HasField("string"):
-        return StringType.fromCollationId(schema.string.collation_id)
+        collation = schema.string.collation if schema.string.collation != "" else "UTF8_BINARY"
+        return StringType(collation)
     elif schema.HasField("char"):
         return CharType(schema.char.length)
     elif schema.HasField("var_char"):
diff --git a/python/pyspark/sql/connect/udf.py b/python/pyspark/sql/connect/udf.py
index f3aa719b2bb64..f5daf3ff841fd 100644
--- a/python/pyspark/sql/connect/udf.py
+++ b/python/pyspark/sql/connect/udf.py
@@ -36,9 +36,11 @@
     PythonUDF,
 )
 from pyspark.sql.connect.column import Column
-from pyspark.sql.connect.types import UnparsedDataType
-from pyspark.sql.types import DataType, StringType
-from pyspark.sql.udf import UDFRegistration as PySparkUDFRegistration
+from pyspark.sql.types import DataType, StringType, _parse_datatype_string
+from pyspark.sql.udf import (
+    UDFRegistration as PySparkUDFRegistration,
+    UserDefinedFunction as PySparkUserDefinedFunction,
+)
 from pyspark.errors import PySparkTypeError, PySparkRuntimeError
 
 if TYPE_CHECKING:
@@ -148,20 +150,36 @@ def __init__(
             )
 
         self.func = func
-        self.returnType: DataType = (
-            UnparsedDataType(returnType) if isinstance(returnType, str) else returnType
-        )
+        self._returnType = returnType
+        self._returnType_placeholder: Optional[DataType] = None
         self._name = name or (
             func.__name__ if hasattr(func, "__name__") else func.__class__.__name__
         )
         self.evalType = evalType
         self.deterministic = deterministic
 
+    @property
+    def returnType(self) -> DataType:
+        # Make sure this is called after Connect Session is initialized.
+        # ``_parse_datatype_string`` accesses to Connect Server for parsing a DDL formatted string.
+        # TODO: PythonEvalType.SQL_BATCHED_UDF
+        if self._returnType_placeholder is None:
+            if isinstance(self._returnType, DataType):
+                self._returnType_placeholder = self._returnType
+            else:
+                self._returnType_placeholder = _parse_datatype_string(self._returnType)
+
+        PySparkUserDefinedFunction._check_return_type(self._returnType_placeholder, self.evalType)
+        return self._returnType_placeholder
+
     def _build_common_inline_user_defined_function(
         self, *args: "ColumnOrName", **kwargs: "ColumnOrName"
     ) -> CommonInlineUserDefinedFunction:
         def to_expr(col: "ColumnOrName") -> Expression:
-            return col._expr if isinstance(col, Column) else ColumnReference(col)
+            if isinstance(col, Column):
+                return col._expr
+            else:
+                return ColumnReference(col)  # type: ignore[arg-type]
 
         arg_exprs: List[Expression] = [to_expr(arg) for arg in args] + [
             NamedArgumentExpression(key, to_expr(value)) for key, value in kwargs.items()
diff --git a/python/pyspark/sql/connect/udtf.py b/python/pyspark/sql/connect/udtf.py
index 4ee39dc89b8e8..739289d72a3b1 100644
--- a/python/pyspark/sql/connect/udtf.py
+++ b/python/pyspark/sql/connect/udtf.py
@@ -141,7 +141,10 @@ def _build_common_inline_user_defined_table_function(
         self, *args: "ColumnOrName", **kwargs: "ColumnOrName"
     ) -> CommonInlineUserDefinedTableFunction:
         def to_expr(col: "ColumnOrName") -> Expression:
-            return col._expr if isinstance(col, Column) else ColumnReference(col)
+            if isinstance(col, Column):
+                return col._expr
+            else:
+                return ColumnReference(col)  # type: ignore[arg-type]
 
         arg_exprs: List[Expression] = [to_expr(arg) for arg in args] + [
             NamedArgumentExpression(key, to_expr(value)) for key, value in kwargs.items()
diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
index e30a5b7d7a9e2..cbca6886060cf 100644
--- a/python/pyspark/sql/connect/window.py
+++ b/python/pyspark/sql/connect/window.py
@@ -18,42 +18,37 @@
 
 check_dependencies(__name__)
 
-import sys
-from typing import TYPE_CHECKING, Union, Sequence, List, Optional
-
-from pyspark.sql.connect.column import Column
-from pyspark.sql.connect.expressions import (
-    ColumnReference,
-    Expression,
-    SortOrder,
-)
-from pyspark.sql.connect.types import (
-    JVM_LONG_MIN,
-    JVM_LONG_MAX,
+from typing import TYPE_CHECKING, Union, Sequence, List, Optional, Tuple, cast, Iterable
+
+from pyspark.sql.column import Column
+from pyspark.sql.window import (
+    Window as ParentWindow,
+    WindowSpec as ParentWindowSpec,
 )
-from pyspark.sql.window import Window as PySparkWindow, WindowSpec as PySparkWindowSpec
-from pyspark.errors import PySparkTypeError
+from pyspark.sql.connect.expressions import Expression, SortOrder
+from pyspark.sql.connect.functions import builtin as F
 
 if TYPE_CHECKING:
-    from pyspark.sql.connect._typing import ColumnOrName
+    from pyspark.sql.connect._typing import ColumnOrName, ColumnOrName_
 
 __all__ = ["Window", "WindowSpec"]
 
 
+def _to_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]) -> List[Column]:
+    if len(cols) == 1 and isinstance(cols[0], list):
+        cols = cols[0]  # type: ignore[assignment]
+    return [F._to_col(c) for c in cast(Iterable["ColumnOrName"], cols)]
+
+
 class WindowFrame:
     def __init__(self, isRowFrame: bool, start: int, end: int) -> None:
         super().__init__()
 
         assert isinstance(isRowFrame, bool)
-
         assert isinstance(start, int)
-
         assert isinstance(end, int)
-
         self._isRowFrame = isRowFrame
-
         self._start = start
-
         self._end = end
 
     def __repr__(self) -> str:
@@ -63,7 +58,17 @@ def __repr__(self) -> str:
             return f"WindowFrame(RANGE_FRAME, {self._start}, {self._end})"
 
 
-class WindowSpec:
+class WindowSpec(ParentWindowSpec):
+    def __new__(
+        cls,
+        partitionSpec: Sequence[Expression],
+        orderSpec: Sequence[SortOrder],
+        frame: Optional[WindowFrame],
+    ) -> "WindowSpec":
+        self = object.__new__(cls)
+        self.__init__(partitionSpec, orderSpec, frame)  # type: ignore[misc]
+        return self
+
     def __init__(
         self,
         partitionSpec: Sequence[Expression],
@@ -73,87 +78,27 @@ def __init__(
         assert isinstance(partitionSpec, list) and all(
             isinstance(p, Expression) for p in partitionSpec
         )
-
         assert isinstance(orderSpec, list) and all(isinstance(s, SortOrder) for s in orderSpec)
-
         assert frame is None or isinstance(frame, WindowFrame)
-
         self._partitionSpec = partitionSpec
-
         self._orderSpec = orderSpec
-
         self._frame = frame
 
-    def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
-        _cols: List[ColumnOrName] = []
-        for col in cols:
-            if isinstance(col, (str, Column)):
-                _cols.append(col)
-            elif isinstance(col, list):
-                for c in col:
-                    if isinstance(c, (str, Column)):
-                        _cols.append(c)
-                    else:
-                        raise PySparkTypeError(
-                            error_class="NOT_COLUMN_OR_LIST_OR_STR",
-                            message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
-                        )
-            else:
-                raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_LIST_OR_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(col).__name__},
-                )
-
-        newPartitionSpec: List[Expression] = []
-        for c in _cols:
-            if isinstance(c, Column):
-                newPartitionSpec.append(c._expr)
-            else:
-                newPartitionSpec.append(ColumnReference(c))
-
+    def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
         return WindowSpec(
-            partitionSpec=newPartitionSpec,
+            partitionSpec=[c._expr for c in _to_cols(cols)],  # type: ignore[misc]
             orderSpec=self._orderSpec,
             frame=self._frame,
         )
 
-    def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
-        _cols: List[ColumnOrName] = []
-        for col in cols:
-            if isinstance(col, (str, Column)):
-                _cols.append(col)
-            elif isinstance(col, list):
-                for c in col:
-                    if isinstance(c, (str, Column)):
-                        _cols.append(c)
-                    else:
-                        raise PySparkTypeError(
-                            error_class="NOT_COLUMN_OR_LIST_OR_STR",
-                            message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
-                        )
-            else:
-                raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_LIST_OR_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(col).__name__},
-                )
-
-        newOrderSpec: List[SortOrder] = []
-        for c in _cols:
-            if isinstance(c, Column):
-                if isinstance(c._expr, SortOrder):
-                    newOrderSpec.append(c._expr)
-                else:
-                    newOrderSpec.append(SortOrder(c._expr))
-            else:
-                newOrderSpec.append(SortOrder(ColumnReference(c)))
-
+    def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
         return WindowSpec(
             partitionSpec=self._partitionSpec,
-            orderSpec=newOrderSpec,
+            orderSpec=[cast(SortOrder, F._sort_col(c)._expr) for c in _to_cols(cols)],
             frame=self._frame,
         )
 
-    def rowsBetween(self, start: int, end: int) -> "WindowSpec":
+    def rowsBetween(self, start: int, end: int) -> ParentWindowSpec:
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
@@ -165,7 +110,7 @@ def rowsBetween(self, start: int, end: int) -> "WindowSpec":
             frame=WindowFrame(isRowFrame=True, start=start, end=end),
         )
 
-    def rangeBetween(self, start: int, end: int) -> "WindowSpec":
+    def rangeBetween(self, start: int, end: int) -> ParentWindowSpec:
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
@@ -190,57 +135,34 @@ def __repr__(self) -> str:
         return "WindowSpec(" + ", ".join(strs) + ")"
 
 
-WindowSpec.rangeBetween.__doc__ = PySparkWindowSpec.rangeBetween.__doc__
-WindowSpec.rowsBetween.__doc__ = PySparkWindowSpec.rowsBetween.__doc__
-WindowSpec.orderBy.__doc__ = PySparkWindowSpec.orderBy.__doc__
-WindowSpec.partitionBy.__doc__ = PySparkWindowSpec.partitionBy.__doc__
-WindowSpec.__doc__ = PySparkWindowSpec.__doc__
-
-
-class Window:
-    _PRECEDING_THRESHOLD = max(-sys.maxsize, JVM_LONG_MIN)
-    _FOLLOWING_THRESHOLD = min(sys.maxsize, JVM_LONG_MAX)
-
-    unboundedPreceding: int = JVM_LONG_MIN
-
-    unboundedFollowing: int = JVM_LONG_MAX
-
-    currentRow: int = 0
-
+class Window(ParentWindow):
     _spec = WindowSpec(partitionSpec=[], orderSpec=[], frame=None)
 
     @staticmethod
-    def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
+    def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
         return Window._spec.partitionBy(*cols)
 
     @staticmethod
-    def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
+    def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
         return Window._spec.orderBy(*cols)
 
     @staticmethod
-    def rowsBetween(start: int, end: int) -> "WindowSpec":
+    def rowsBetween(start: int, end: int) -> ParentWindowSpec:
         return Window._spec.rowsBetween(start, end)
 
     @staticmethod
-    def rangeBetween(start: int, end: int) -> "WindowSpec":
+    def rangeBetween(start: int, end: int) -> ParentWindowSpec:
         return Window._spec.rangeBetween(start, end)
 
 
-Window.orderBy.__doc__ = PySparkWindow.orderBy.__doc__
-Window.rowsBetween.__doc__ = PySparkWindow.rowsBetween.__doc__
-Window.rangeBetween.__doc__ = PySparkWindow.rangeBetween.__doc__
-Window.partitionBy.__doc__ = PySparkWindow.partitionBy.__doc__
-Window.__doc__ = PySparkWindow.__doc__
-
-
 def _test() -> None:
     import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
-    import pyspark.sql.connect.window
+    import pyspark.sql.window
 
-    globs = pyspark.sql.connect.window.__dict__.copy()
+    globs = pyspark.sql.window.__dict__.copy()
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.window tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
@@ -248,7 +170,7 @@ def _test() -> None:
     )
 
     (failure_count, test_count) = doctest.testmod(
-        pyspark.sql.connect.window,
+        pyspark.sql.window,
         globs=globs,
         optionflags=doctest.ELLIPSIS
         | doctest.NORMALIZE_WHITESPACE
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index cbb0299e2195d..3fe47615b8761 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -46,6 +46,7 @@
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
+    import pyarrow as pa
     from pyspark.core.rdd import RDD
     from pyspark.core.context import SparkContext
     from pyspark.sql._typing import (
@@ -343,14 +344,14 @@ def createDataFrame(
 
     @overload
     def createDataFrame(
-        self, data: "PandasDataFrameLike", samplingRatio: Optional[float] = ...
+        self, data: Union["PandasDataFrameLike", "pa.Table"], samplingRatio: Optional[float] = ...
     ) -> DataFrame:
         ...
 
     @overload
     def createDataFrame(
         self,
-        data: "PandasDataFrameLike",
+        data: Union["PandasDataFrameLike", "pa.Table"],
         schema: Union[StructType, str],
         verifySchema: bool = ...,
     ) -> DataFrame:
@@ -358,13 +359,14 @@ def createDataFrame(
 
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike"],
+        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike", "pa.Table"],
         schema: Optional[Union[AtomicType, StructType, str]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
     ) -> DataFrame:
         """
-        Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`.
+        Creates a :class:`DataFrame` from an :class:`RDD`, a list, a :class:`pandas.DataFrame`, or
+        a :class:`pyarrow.Table`.
 
         When ``schema`` is a list of column names, the type of each column
         will be inferred from ``data``.
@@ -393,12 +395,15 @@ def createDataFrame(  # type: ignore[misc]
         .. versionchanged:: 2.1.0
            Added verifySchema.
 
+        .. versionchanged:: 4.0.0
+           Added support for :class:`pyarrow.Table`.
+
         Parameters
         ----------
         data : :class:`RDD` or iterable
             an RDD of any kind of SQL data representation (:class:`Row`,
-            :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or
-            :class:`pandas.DataFrame`.
+            :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`,
+            :class:`pandas.DataFrame`, or :class:`pyarrow.Table`.
         schema : :class:`pyspark.sql.types.DataType`, str or list, optional
             a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
             column names, default is None.  The data type string format equals to
@@ -452,6 +457,12 @@ def createDataFrame(  # type: ignore[misc]
         >>> sqlContext.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  # doctest: +SKIP
         [Row(0=1, 1=2)]
 
+        >>> sqlContext.createDataFrame(df.toArrow()).collect()  # doctest: +SKIP
+        [Row(name='Alice', age=1)]
+        >>> table = pyarrow.table({'0': [1], '1': [2]})  # doctest: +SKIP
+        >>> sqlContext.createDataFrame(table).collect()  # doctest: +SKIP
+        [Row(0=1, 1=2)]
+
         >>> sqlContext.createDataFrame(rdd, "a: string, b: int").collect()
         [Row(a='Alice', b=1)]
         >>> rdd = rdd.map(lambda row: row[1])
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index a599d04705184..625678588bf9e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -15,13 +15,8 @@
 # limitations under the License.
 #
 
-import json
-import os
-import sys
-import random
-import warnings
-from collections.abc import Iterable
-from functools import reduce
+# mypy: disable-error-code="empty-body"
+
 from typing import (
     Any,
     Callable,
@@ -31,45 +26,27 @@
     Optional,
     Sequence,
     Tuple,
-    Type,
     Union,
-    cast,
     overload,
     TYPE_CHECKING,
 )
 
 from pyspark import _NoValue
 from pyspark._globals import _NoValueType
-from pyspark.errors import (
-    PySparkTypeError,
-    PySparkValueError,
-    PySparkIndexError,
-    PySparkAttributeError,
-)
-from pyspark.util import (
-    is_remote_only,
-    _load_from_socket,
-    _local_iterator_from_socket,
-)
-from pyspark.serializers import BatchedSerializer, CPickleSerializer, UTF8Deserializer
+from pyspark.util import is_remote_only
 from pyspark.storagelevel import StorageLevel
-from pyspark.traceback_utils import SCCallSiteSync
-from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column
+from pyspark.resource import ResourceProfile
+from pyspark.sql.column import Column
 from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
 from pyspark.sql.streaming import DataStreamWriter
-from pyspark.sql.types import (
-    StructType,
-    Row,
-    _parse_datatype_json_string,
-)
-from pyspark.sql.utils import get_active_spark_context, toJArray
-from pyspark.sql.pandas.conversion import PandasConversionMixin
-from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
+from pyspark.sql.types import StructType, Row
+from pyspark.sql.utils import dispatch_df_method
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
-    from pyspark.core.rdd import RDD
+    import pyarrow as pa
     from pyspark.core.context import SparkContext
+    from pyspark.core.rdd import RDD
     from pyspark._typing import PrimitiveType
     from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
     from pyspark.sql._typing import (
@@ -82,12 +59,18 @@
     from pyspark.sql.session import SparkSession
     from pyspark.sql.group import GroupedData
     from pyspark.sql.observation import Observation
+    from pyspark.sql.pandas._typing import (
+        PandasMapIterFunction,
+        ArrowMapIterFunction,
+        DataFrameLike as PandasDataFrameLike,
+    )
+    from pyspark.sql.metrics import ExecutionInfo
 
 
 __all__ = ["DataFrame", "DataFrameNaFunctions", "DataFrameStatFunctions"]
 
 
-class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
+class DataFrame:
     """A distributed collection of data grouped into named columns.
 
     .. versionadded:: 1.3.0
@@ -125,12 +108,13 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
 
     >>> people.filter(people.age > 30).join(
     ...     department, people.deptId == department.id).groupBy(
-    ...     department.name, "gender").agg({"salary": "avg", "age": "max"}).show()
+    ...     department.name, "gender").agg(
+    ...         {"salary": "avg", "age": "max"}).sort("max(age)").show()
     +-------+------+-----------+--------+
     |   name|gender|avg(salary)|max(age)|
     +-------+------+-----------+--------+
-    |     ML|     F|      150.0|      60|
     |PySpark|     M|       75.0|      50|
+    |     ML|     F|      150.0|      60|
     +-------+------+-----------+--------+
 
     Notes
@@ -139,49 +123,26 @@ class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
     created via using the constructor.
     """
 
-    def __init__(
-        self,
+    # HACK ALERT!! this is to reduce the backward compatibility concern, and returns
+    # Spark Classic DataFrame by default. This is NOT an API, and NOT supposed to
+    # be directly invoked. DO NOT use this constructor.
+    _sql_ctx: Optional["SQLContext"]
+    _session: "SparkSession"
+    _sc: "SparkContext"
+    _jdf: "JavaObject"
+    is_cached: bool
+    _schema: Optional[StructType]
+    _lazy_rdd: Optional["RDD[Row]"]
+    _support_repr_html: bool
+
+    def __new__(
+        cls,
         jdf: "JavaObject",
         sql_ctx: Union["SQLContext", "SparkSession"],
-    ):
-        from pyspark.sql.context import SQLContext
-
-        self._sql_ctx: Optional["SQLContext"] = None
-
-        if isinstance(sql_ctx, SQLContext):
-            assert not os.environ.get("SPARK_TESTING")  # Sanity check for our internal usage.
-            assert isinstance(sql_ctx, SQLContext)
-            # We should remove this if-else branch in the future release, and rename
-            # sql_ctx to session in the constructor. This is an internal code path but
-            # was kept with a warning because it's used intensively by third-party libraries.
-            warnings.warn("DataFrame constructor is internal. Do not directly use it.")
-            self._sql_ctx = sql_ctx
-            session = sql_ctx.sparkSession
-        else:
-            session = sql_ctx
-        self._session: "SparkSession" = session
-
-        self._sc: "SparkContext" = sql_ctx._sc
-        self._jdf: "JavaObject" = jdf
-        self.is_cached = False
-        # initialized lazily
-        self._schema: Optional[StructType] = None
-        self._lazy_rdd: Optional["RDD[Row]"] = None
-        # Check whether _repr_html is supported or not, we use it to avoid calling _jdf twice
-        # by __repr__ and _repr_html_ while eager evaluation opens.
-        self._support_repr_html = False
-
-    @property
-    def sql_ctx(self) -> "SQLContext":
-        from pyspark.sql.context import SQLContext
+    ) -> "DataFrame":
+        from pyspark.sql.classic.dataframe import DataFrame
 
-        warnings.warn(
-            "DataFrame.sql_ctx is an internal property, and will be removed "
-            "in future releases. Use DataFrame.sparkSession instead."
-        )
-        if self._sql_ctx is None:
-            self._sql_ctx = SQLContext._get_or_create(self._sc)
-        return self._sql_ctx
+        return DataFrame.__new__(DataFrame, jdf, sql_ctx)
 
     @property
     def sparkSession(self) -> "SparkSession":
@@ -202,7 +163,7 @@ def sparkSession(self) -> "SparkSession":
         >>> type(df.sparkSession)
         <class '...session.SparkSession'>
         """
-        return self._session
+        ...
 
     if not is_remote_only():
 
@@ -222,14 +183,7 @@ def rdd(self) -> "RDD[Row]":
             >>> type(df.rdd)
             <class 'pyspark.core.rdd.RDD'>
             """
-            from pyspark.core.rdd import RDD
-
-            if self._lazy_rdd is None:
-                jrdd = self._jdf.javaToPython()
-                self._lazy_rdd = RDD(
-                    jrdd, self.sparkSession._sc, BatchedSerializer(CPickleSerializer())
-                )
-            return self._lazy_rdd
+            ...
 
     @property
     def na(self) -> "DataFrameNaFunctions":
@@ -259,7 +213,7 @@ def na(self) -> "DataFrameNaFunctions":
         |  1|  2|
         +---+---+
         """
-        return DataFrameNaFunctions(self)
+        ...
 
     @property
     def stat(self) -> "DataFrameStatFunctions":
@@ -283,7 +237,7 @@ def stat(self) -> "DataFrameStatFunctions":
         >>> df.stat.corr("id", "c")
         1.0
         """
-        return DataFrameStatFunctions(self)
+        ...
 
     if not is_remote_only():
 
@@ -309,11 +263,9 @@ def toJSON(self, use_unicode: bool = True) -> "RDD[str]":
             >>> df.toJSON().first()
             '{"age":2,"name":"Alice"}'
             """
-            from pyspark.core.rdd import RDD
-
-            rdd = self._jdf.toJSON()
-            return RDD(rdd.toJavaRDD(), self._sc, UTF8Deserializer(use_unicode))
+            ...
 
+    @dispatch_df_method
     def registerTempTable(self, name: str) -> None:
         """Registers this :class:`DataFrame` as a temporary table using the given name.
 
@@ -344,9 +296,9 @@ def registerTempTable(self, name: str) -> None:
         True
 
         """
-        warnings.warn("Deprecated in 2.0, use createOrReplaceTempView instead.", FutureWarning)
-        self._jdf.createOrReplaceTempView(name)
+        ...
 
+    @dispatch_df_method
     def createTempView(self, name: str) -> None:
         """Creates a local temporary view with this :class:`DataFrame`.
 
@@ -410,8 +362,9 @@ def createTempView(self, name: str) -> None:
         |  4|Jill|
         +---+----+
         """
-        self._jdf.createTempView(name)
+        ...
 
+    @dispatch_df_method
     def createOrReplaceTempView(self, name: str) -> None:
         """Creates or replaces a local temporary view with this :class:`DataFrame`.
 
@@ -453,8 +406,9 @@ def createOrReplaceTempView(self, name: str) -> None:
         ... spark.catalog.dropTempView("people")
         True
         """
-        self._jdf.createOrReplaceTempView(name)
+        ...
 
+    @dispatch_df_method
     def createGlobalTempView(self, name: str) -> None:
         """Creates a global temporary view with this :class:`DataFrame`.
 
@@ -501,8 +455,9 @@ def createGlobalTempView(self, name: str) -> None:
         >>> spark.catalog.dropGlobalTempView("people")
         True
         """
-        self._jdf.createGlobalTempView(name)
+        ...
 
+    @dispatch_df_method
     def createOrReplaceGlobalTempView(self, name: str) -> None:
         """Creates or replaces a global temporary view using the given name.
 
@@ -537,7 +492,7 @@ def createOrReplaceGlobalTempView(self, name: str) -> None:
         >>> spark.catalog.dropGlobalTempView("people")
         True
         """
-        self._jdf.createOrReplaceGlobalTempView(name)
+        ...
 
     @property
     def write(self) -> DataFrameWriter:
@@ -566,7 +521,7 @@ def write(self) -> DataFrameWriter:
         >>> df.write.saveAsTable("tab2")
         >>> _ = spark.sql("DROP TABLE tab2")
         """
-        return DataFrameWriter(self)
+        ...
 
     @property
     def writeStream(self) -> DataStreamWriter:
@@ -602,7 +557,7 @@ def writeStream(self) -> DataStreamWriter:
         ...     time.sleep(3)
         ...     query.stop()
         """
-        return DataStreamWriter(self)
+        ...
 
     @property
     def schema(self) -> StructType:
@@ -646,18 +601,9 @@ def schema(self) -> StructType:
         StructType([StructField('value', StringType(), False)])
 
         """
-        if self._schema is None:
-            try:
-                self._schema = cast(
-                    StructType, _parse_datatype_json_string(self._jdf.schema().json())
-                )
-            except Exception as e:
-                raise PySparkValueError(
-                    error_class="CANNOT_PARSE_DATATYPE",
-                    message_parameters={"error": str(e)},
-                )
-        return self._schema
+        ...
 
+    @dispatch_df_method
     def printSchema(self, level: Optional[int] = None) -> None:
         """Prints out the schema in the tree format.
         Optionally allows to specify how many levels to print if schema is nested.
@@ -710,11 +656,9 @@ def printSchema(self, level: Optional[int] = None) -> None:
          |-- nonnullable: long (nullable = false)
          |-- nullable: void (nullable = true)
         """
-        if level:
-            print(self._jdf.schema().treeString(level))
-        else:
-            print(self._jdf.schema().treeString())
+        ...
 
+    @dispatch_df_method
     def explain(
         self, extended: Optional[Union[bool, str]] = None, mode: Optional[str] = None
     ) -> None:
@@ -782,56 +726,9 @@ def explain(
         ...Statistics...
         ...
         """
+        ...
 
-        if extended is not None and mode is not None:
-            raise PySparkValueError(
-                error_class="CANNOT_SET_TOGETHER",
-                message_parameters={"arg_list": "extended and mode"},
-            )
-
-        # For the no argument case: df.explain()
-        is_no_argument = extended is None and mode is None
-
-        # For the cases below:
-        #   explain(True)
-        #   explain(extended=False)
-        is_extended_case = isinstance(extended, bool) and mode is None
-
-        # For the case when extended is mode:
-        #   df.explain("formatted")
-        is_extended_as_mode = isinstance(extended, str) and mode is None
-
-        # For the mode specified:
-        #   df.explain(mode="formatted")
-        is_mode_case = extended is None and isinstance(mode, str)
-
-        if not (is_no_argument or is_extended_case or is_extended_as_mode or is_mode_case):
-            if (extended is not None) and (not isinstance(extended, (bool, str))):
-                raise PySparkTypeError(
-                    error_class="NOT_BOOL_OR_STR",
-                    message_parameters={
-                        "arg_name": "extended",
-                        "arg_type": type(extended).__name__,
-                    },
-                )
-            if (mode is not None) and (not isinstance(mode, str)):
-                raise PySparkTypeError(
-                    error_class="NOT_STR",
-                    message_parameters={"arg_name": "mode", "arg_type": type(mode).__name__},
-                )
-
-        # Sets an explain mode depending on a given argument
-        if is_no_argument:
-            explain_mode = "simple"
-        elif is_extended_case:
-            explain_mode = "extended" if extended else "simple"
-        elif is_mode_case:
-            explain_mode = cast(str, mode)
-        elif is_extended_as_mode:
-            explain_mode = cast(str, extended)
-        assert self._sc._jvm is not None
-        print(self._sc._jvm.PythonSQLUtils.explainString(self._jdf.queryExecution(), explain_mode))
-
+    @dispatch_df_method
     def exceptAll(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing rows in this :class:`DataFrame` but
         not in another :class:`DataFrame` while preserving duplicates.
@@ -869,8 +766,9 @@ def exceptAll(self, other: "DataFrame") -> "DataFrame":
         +---+---+
 
         """
-        return DataFrame(self._jdf.exceptAll(other._jdf), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def isLocal(self) -> bool:
         """Returns ``True`` if the :func:`collect` and :func:`take` methods can be run locally
         (without any Spark executors).
@@ -890,7 +788,7 @@ def isLocal(self) -> bool:
         >>> df.isLocal()
         True
         """
-        return self._jdf.isLocal()
+        ...
 
     @property
     def isStreaming(self) -> bool:
@@ -921,8 +819,9 @@ def isStreaming(self) -> bool:
         >>> df.isStreaming
         True
         """
-        return self._jdf.isStreaming()
+        ...
 
+    @dispatch_df_method
     def isEmpty(self) -> bool:
         """
         Checks if the :class:`DataFrame` is empty and returns a boolean value.
@@ -972,8 +871,9 @@ def isEmpty(self) -> bool:
         >>> df_no_rows.isEmpty()
         True
         """
-        return self._jdf.isEmpty()
+        ...
 
+    @dispatch_df_method
     def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False) -> None:
         """
         Prints the first ``n`` rows of the DataFrame to the console.
@@ -1063,73 +963,32 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
         age  | 19
         name | This is a super l...
         """
-        print(self._show_string(n, truncate, vertical))
-
-    def _show_string(
-        self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False
-    ) -> str:
-        if not isinstance(n, int) or isinstance(n, bool):
-            raise PySparkTypeError(
-                error_class="NOT_INT",
-                message_parameters={"arg_name": "n", "arg_type": type(n).__name__},
-            )
-
-        if not isinstance(vertical, bool):
-            raise PySparkTypeError(
-                error_class="NOT_BOOL",
-                message_parameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
-            )
-
-        if isinstance(truncate, bool) and truncate:
-            return self._jdf.showString(n, 20, vertical)
-        else:
-            try:
-                int_truncate = int(truncate)
-            except ValueError:
-                raise PySparkTypeError(
-                    error_class="NOT_BOOL",
-                    message_parameters={
-                        "arg_name": "truncate",
-                        "arg_type": type(truncate).__name__,
-                    },
-                )
-
-            return self._jdf.showString(n, int_truncate, vertical)
+        ...
 
+    @dispatch_df_method
     def __repr__(self) -> str:
-        if not self._support_repr_html and self.sparkSession._jconf.isReplEagerEvalEnabled():
-            vertical = False
-            return self._jdf.showString(
-                self.sparkSession._jconf.replEagerEvalMaxNumRows(),
-                self.sparkSession._jconf.replEagerEvalTruncate(),
-                vertical,
-            )
-        else:
-            return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
+        ...
 
+    @dispatch_df_method
     def _repr_html_(self) -> Optional[str]:
         """Returns a :class:`DataFrame` with html code when you enabled eager evaluation
         by 'spark.sql.repl.eagerEval.enabled', this only called by REPL you are
         using support eager evaluation with HTML.
         """
-        if not self._support_repr_html:
-            self._support_repr_html = True
-        if self.sparkSession._jconf.isReplEagerEvalEnabled():
-            return self._jdf.htmlString(
-                self.sparkSession._jconf.replEagerEvalMaxNumRows(),
-                self.sparkSession._jconf.replEagerEvalTruncate(),
-            )
-        else:
-            return None
+        ...
 
     def checkpoint(self, eager: bool = True) -> "DataFrame":
-        """Returns a checkpointed version of this :class:`DataFrame`. Checkpointing can be used to
-        truncate the logical plan of this :class:`DataFrame`, which is especially useful in
-        iterative algorithms where the plan may grow exponentially. It will be saved to files
-        inside the checkpoint directory set with :meth:`SparkContext.setCheckpointDir`.
+        """Returns a checkpointed version of this :class:`DataFrame`. Checkpointing can be
+        used to truncate the logical plan of this :class:`DataFrame`, which is especially
+        useful in iterative algorithms where the plan may grow exponentially. It will be
+        saved to files inside the checkpoint directory set with
+        :meth:`SparkContext.setCheckpointDir`, or `spark.checkpoint.dir` configuration.
 
         .. versionadded:: 2.1.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         eager : bool, optional, default True
@@ -1146,25 +1005,25 @@ def checkpoint(self, eager: bool = True) -> "DataFrame":
 
         Examples
         --------
-        >>> import tempfile
         >>> df = spark.createDataFrame([
         ...     (14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
-        >>> with tempfile.TemporaryDirectory(prefix="checkpoint") as d:
-        ...     spark.sparkContext.setCheckpointDir("/tmp/bb")
-        ...     df.checkpoint(False)
+        >>> df.checkpoint(False)  # doctest: +SKIP
         DataFrame[age: bigint, name: string]
         """
-        jdf = self._jdf.checkpoint(eager)
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
     def localCheckpoint(self, eager: bool = True) -> "DataFrame":
-        """Returns a locally checkpointed version of this :class:`DataFrame`. Checkpointing can be
-        used to truncate the logical plan of this :class:`DataFrame`, which is especially useful in
-        iterative algorithms where the plan may grow exponentially. Local checkpoints are
-        stored in the executors using the caching subsystem and therefore they are not reliable.
+        """Returns a locally checkpointed version of this :class:`DataFrame`. Checkpointing can
+        be used to truncate the logical plan of this :class:`DataFrame`, which is especially
+        useful in iterative algorithms where the plan may grow exponentially. Local checkpoints
+        are stored in the executors using the caching subsystem and therefore they are not
+        reliable.
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         eager : bool, optional, default True
@@ -1186,9 +1045,9 @@ def localCheckpoint(self, eager: bool = True) -> "DataFrame":
         >>> df.localCheckpoint(False)
         DataFrame[age: bigint, name: string]
         """
-        jdf = self._jdf.localCheckpoint(eager)
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
         """Defines an event time watermark for this :class:`DataFrame`. A watermark tracks a point
         in time before which we assume no more late data is going to arrive.
@@ -1252,22 +1111,9 @@ def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
         >>> time.sleep(3)
         >>> query.stop()
         """
-        if not eventTime or type(eventTime) is not str:
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "eventTime", "arg_type": type(eventTime).__name__},
-            )
-        if not delayThreshold or type(delayThreshold) is not str:
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={
-                    "arg_name": "delayThreshold",
-                    "arg_type": type(delayThreshold).__name__,
-                },
-            )
-        jdf = self._jdf.withWatermark(eventTime, delayThreshold)
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def hint(
         self, name: str, *parameters: Union["PrimitiveType", "Column", List["PrimitiveType"]]
     ) -> "DataFrame":
@@ -1308,67 +1154,9 @@ def hint(
         ... +- BroadcastHashJoin ...
         ...
         """
-        if len(parameters) == 1 and isinstance(parameters[0], list):
-            parameters = parameters[0]  # type: ignore[assignment]
-
-        if not isinstance(name, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
-            )
-
-        allowed_types = (str, float, int, Column, list)
-        allowed_primitive_types = (str, float, int)
-        allowed_types_repr = ", ".join(
-            [t.__name__ for t in allowed_types[:-1]]
-            + ["list[" + t.__name__ + "]" for t in allowed_primitive_types]
-        )
-        for p in parameters:
-            if not isinstance(p, allowed_types):
-                raise PySparkTypeError(
-                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                    message_parameters={
-                        "arg_name": "parameters",
-                        "arg_type": type(parameters).__name__,
-                        "allowed_types": allowed_types_repr,
-                        "item_type": type(p).__name__,
-                    },
-                )
-            if isinstance(p, list):
-                if not all(isinstance(e, allowed_primitive_types) for e in p):
-                    raise PySparkTypeError(
-                        error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                        message_parameters={
-                            "arg_name": "parameters",
-                            "arg_type": type(parameters).__name__,
-                            "allowed_types": allowed_types_repr,
-                            "item_type": type(p).__name__ + "[" + type(p[0]).__name__ + "]",
-                        },
-                    )
-
-        def _converter(parameter: Union[str, list, float, int, Column]) -> Any:
-            if isinstance(parameter, Column):
-                return _to_java_column(parameter)
-            elif isinstance(parameter, list):
-                # for list input, we are assuming only one element type exist in the list.
-                # for empty list, we are converting it into an empty long[] in the JVM side.
-                gateway = self._sc._gateway
-                assert gateway is not None
-                jclass = gateway.jvm.long
-                if len(parameter) >= 1:
-                    mapping = {
-                        str: gateway.jvm.java.lang.String,
-                        float: gateway.jvm.double,
-                        int: gateway.jvm.long,
-                    }
-                    jclass = mapping[type(parameter[0])]
-                return toJArray(gateway, jclass, parameter)
-            else:
-                return parameter
-
-        jdf = self._jdf.hint(name, self._jseq(parameters, _converter))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def count(self) -> int:
         """Returns the number of rows in this :class:`DataFrame`.
 
@@ -1392,8 +1180,9 @@ def count(self) -> int:
         >>> df.count()
         3
         """
-        return int(self._jdf.count())
+        ...
 
+    @dispatch_df_method
     def collect(self) -> List[Row]:
         """Returns all the records in the DataFrame as a list of :class:`Row`.
 
@@ -1412,6 +1201,7 @@ def collect(self) -> List[Row]:
         DataFrame.take : Returns the first `n` rows.
         DataFrame.head : Returns the first `n` rows.
         DataFrame.toPandas : Returns the data as a pandas DataFrame.
+        DataFrame.toArrow : Returns the data as a PyArrow Table.
 
         Notes
         -----
@@ -1459,10 +1249,9 @@ def collect(self) -> List[Row]:
         >>> [row.asDict() for row in rows]
         [{'age': 14, 'name': 'Tom'}, {'age': 23, 'name': 'Alice'}, {'age': 16, 'name': 'Bob'}]
         """
-        with SCCallSiteSync(self._sc):
-            sock_info = self._jdf.collectToPython()
-        return list(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))
+        ...
 
+    @dispatch_df_method
     def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
         """
         Returns an iterator that contains all of the rows in this :class:`DataFrame`.
@@ -1495,10 +1284,9 @@ def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
         >>> list(df.toLocalIterator())
         [Row(age=14, name='Tom'), Row(age=23, name='Alice'), Row(age=16, name='Bob')]
         """
-        with SCCallSiteSync(self._sc):
-            sock_info = self._jdf.toPythonIterator(prefetchPartitions)
-        return _local_iterator_from_socket(sock_info, BatchedSerializer(CPickleSerializer()))
+        ...
 
+    @dispatch_df_method
     def limit(self, num: int) -> "DataFrame":
         """Limits the result count to the number specified.
 
@@ -1534,9 +1322,9 @@ def limit(self, num: int) -> "DataFrame":
         +---+----+
         +---+----+
         """
-        jdf = self._jdf.limit(num)
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def offset(self, num: int) -> "DataFrame":
         """Returns a new :class: `DataFrame` by skipping the first `n` rows.
 
@@ -1572,9 +1360,9 @@ def offset(self, num: int) -> "DataFrame":
         +---+----+
         +---+----+
         """
-        jdf = self._jdf.offset(num)
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def take(self, num: int) -> List[Row]:
         """Returns the first ``num`` rows as a :class:`list` of :class:`Row`.
 
@@ -1604,8 +1392,9 @@ def take(self, num: int) -> List[Row]:
         >>> df.take(2)
         [Row(age=14, name='Tom'), Row(age=23, name='Alice')]
         """
-        return self.limit(num).collect()
+        ...
 
+    @dispatch_df_method
     def tail(self, num: int) -> List[Row]:
         """
         Returns the last ``num`` rows as a :class:`list` of :class:`Row`.
@@ -1637,10 +1426,9 @@ def tail(self, num: int) -> List[Row]:
         >>> df.tail(2)
         [Row(age=23, name='Alice'), Row(age=16, name='Bob')]
         """
-        with SCCallSiteSync(self._sc):
-            sock_info = self._jdf.tailToPython(num)
-        return list(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))
+        ...
 
+    @dispatch_df_method
     def foreach(self, f: Callable[[Row], None]) -> None:
         """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`.
 
@@ -1666,8 +1454,9 @@ def foreach(self, f: Callable[[Row], None]) -> None:
         ...
         >>> df.foreach(func)
         """
-        self.rdd.foreach(f)
+        ...
 
+    @dispatch_df_method
     def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None:
         """Applies the ``f`` function to each partition of this :class:`DataFrame`.
 
@@ -1694,8 +1483,9 @@ def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None:
         ...
         >>> df.foreachPartition(func)
         """
-        self.rdd.foreachPartition(f)  # type: ignore[arg-type]
+        ...
 
+    @dispatch_df_method
     def cache(self) -> "DataFrame":
         """Persists the :class:`DataFrame` with the default storage level (`MEMORY_AND_DISK_DESER`).
 
@@ -1723,10 +1513,9 @@ def cache(self) -> "DataFrame":
         == Physical Plan ==
         InMemoryTableScan ...
         """
-        self.is_cached = True
-        self._jdf.cache()
-        return self
+        ...
 
+    @dispatch_df_method
     def persist(
         self,
         storageLevel: StorageLevel = (StorageLevel.MEMORY_AND_DISK_DESER),
@@ -1771,10 +1560,7 @@ def persist(
         >>> df.persist(StorageLevel.DISK_ONLY)
         DataFrame[id: bigint]
         """
-        self.is_cached = True
-        javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
-        self._jdf.persist(javaStorageLevel)
-        return self
+        ...
 
     @property
     def storageLevel(self) -> StorageLevel:
@@ -1802,16 +1588,9 @@ def storageLevel(self) -> StorageLevel:
         >>> df2.persist(StorageLevel.DISK_ONLY_2).storageLevel
         StorageLevel(True, False, False, False, 2)
         """
-        java_storage_level = self._jdf.storageLevel()
-        storage_level = StorageLevel(
-            java_storage_level.useDisk(),
-            java_storage_level.useMemory(),
-            java_storage_level.useOffHeap(),
-            java_storage_level.deserialized(),
-            java_storage_level.replication(),
-        )
-        return storage_level
+        ...
 
+    @dispatch_df_method
     def unpersist(self, blocking: bool = False) -> "DataFrame":
         """Marks the :class:`DataFrame` as non-persistent, and remove all blocks for it from
         memory and disk.
@@ -1850,6 +1629,7 @@ def unpersist(self, blocking: bool = False) -> "DataFrame":
         self._jdf.unpersist(blocking)
         return self
 
+    @dispatch_df_method
     def coalesce(self, numPartitions: int) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` that has exactly `numPartitions` partitions.
@@ -1915,7 +1695,8 @@ def repartition(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
     def repartition(self, *cols: "ColumnOrName") -> "DataFrame":
         ...
 
-    def repartition(  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def repartition(
         self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
     ) -> "DataFrame":
         """
@@ -2020,25 +1801,7 @@ def repartition(  # type: ignore[misc]
         |        2|
         +---------+
         """
-        if isinstance(numPartitions, int):
-            if len(cols) == 0:
-                return DataFrame(self._jdf.repartition(numPartitions), self.sparkSession)
-            else:
-                return DataFrame(
-                    self._jdf.repartition(numPartitions, self._jcols(*cols)),
-                    self.sparkSession,
-                )
-        elif isinstance(numPartitions, (str, Column)):
-            cols = (numPartitions,) + cols
-            return DataFrame(self._jdf.repartition(self._jcols(*cols)), self.sparkSession)
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={
-                    "arg_name": "numPartitions",
-                    "arg_type": type(numPartitions).__name__,
-                },
-            )
+        ...
 
     @overload
     def repartitionByRange(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
@@ -2048,7 +1811,8 @@ def repartitionByRange(self, numPartitions: int, *cols: "ColumnOrName") -> "Data
     def repartitionByRange(self, *cols: "ColumnOrName") -> "DataFrame":
         ...
 
-    def repartitionByRange(  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def repartitionByRange(
         self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
     ) -> "DataFrame":
         """
@@ -2104,29 +1868,9 @@ def repartitionByRange(  # type: ignore[misc]
         | 23|Alice|                   1|
         +---+-----+--------------------+
         """
-        if isinstance(numPartitions, int):
-            if len(cols) == 0:
-                raise PySparkValueError(
-                    error_class="CANNOT_BE_EMPTY",
-                    message_parameters={"item": "partition-by expression"},
-                )
-            else:
-                return DataFrame(
-                    self._jdf.repartitionByRange(numPartitions, self._jcols(*cols)),
-                    self.sparkSession,
-                )
-        elif isinstance(numPartitions, (str, Column)):
-            cols = (numPartitions,) + cols
-            return DataFrame(self._jdf.repartitionByRange(self._jcols(*cols)), self.sparkSession)
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_INT_OR_STR",
-                message_parameters={
-                    "arg_name": "numPartitions",
-                    "arg_type": type(numPartitions).__name__,
-                },
-            )
+        ...
 
+    @dispatch_df_method
     def distinct(self) -> "DataFrame":
         """Returns a new :class:`DataFrame` containing the distinct rows in this :class:`DataFrame`.
 
@@ -2225,7 +1969,7 @@ def distinct(self) -> "DataFrame":
         | 23|Alice|     F|
         +---+-----+------+
         """
-        return DataFrame(self._jdf.distinct(), self.sparkSession)
+        ...
 
     @overload
     def sample(self, fraction: float, seed: Optional[int] = ...) -> "DataFrame":
@@ -2240,7 +1984,8 @@ def sample(
     ) -> "DataFrame":
         ...
 
-    def sample(  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def sample(
         self,
         withReplacement: Optional[Union[float, bool]] = None,
         fraction: Optional[Union[int, float]] = None,
@@ -2290,47 +2035,9 @@ def sample(  # type: ignore[misc]
         >>> df.sample(False, fraction=1.0).count()
         10
         """
+        ...
 
-        # For the cases below:
-        #   sample(True, 0.5 [, seed])
-        #   sample(True, fraction=0.5 [, seed])
-        #   sample(withReplacement=False, fraction=0.5 [, seed])
-        is_withReplacement_set = type(withReplacement) == bool and isinstance(fraction, float)
-
-        # For the case below:
-        #   sample(faction=0.5 [, seed])
-        is_withReplacement_omitted_kwargs = withReplacement is None and isinstance(fraction, float)
-
-        # For the case below:
-        #   sample(0.5 [, seed])
-        is_withReplacement_omitted_args = isinstance(withReplacement, float)
-
-        if not (
-            is_withReplacement_set
-            or is_withReplacement_omitted_kwargs
-            or is_withReplacement_omitted_args
-        ):
-            argtypes = [type(arg).__name__ for arg in [withReplacement, fraction, seed]]
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_FLOAT_OR_INT",
-                message_parameters={
-                    "arg_name": "withReplacement (optional), "
-                    + "fraction (required) and seed (optional)",
-                    "arg_type": ", ".join(argtypes),
-                },
-            )
-
-        if is_withReplacement_omitted_args:
-            if fraction is not None:
-                seed = cast(int, fraction)
-            fraction = withReplacement
-            withReplacement = None
-
-        seed = int(seed) if seed is not None else None
-        args = [arg for arg in [withReplacement, fraction, seed] if arg is not None]
-        jdf = self._jdf.sample(*args)
-        return DataFrame(jdf, self.sparkSession)
-
+    @dispatch_df_method
     def sampleBy(
         self, col: "ColumnOrName", fractions: Dict[Any, float], seed: Optional[int] = None
     ) -> "DataFrame":
@@ -2375,36 +2082,9 @@ def sampleBy(
         >>> dataset.sampleBy(col("key"), fractions={2: 1.0}, seed=0).count()
         33
         """
-        if isinstance(col, str):
-            col = Column(col)
-        elif not isinstance(col, Column):
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
-            )
-        if not isinstance(fractions, dict):
-            raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
-            )
-        for k, v in fractions.items():
-            if not isinstance(k, (float, int, str)):
-                raise PySparkTypeError(
-                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                    message_parameters={
-                        "arg_name": "fractions",
-                        "arg_type": type(fractions).__name__,
-                        "allowed_types": "float, int, str",
-                        "item_type": type(k).__name__,
-                    },
-                )
-            fractions[k] = float(v)
-        col = col._jc
-        seed = seed if seed is not None else random.randint(0, sys.maxsize)
-        return DataFrame(
-            self._jdf.stat().sampleBy(col, self._jmap(fractions), seed), self.sparkSession
-        )
+        ...
 
+    @dispatch_df_method
     def randomSplit(self, weights: List[float], seed: Optional[int] = None) -> List["DataFrame"]:
         """Randomly splits this :class:`DataFrame` with the provided weights.
 
@@ -2442,17 +2122,7 @@ def randomSplit(self, weights: List[float], seed: Optional[int] = None) -> List[
         >>> splits[1].count()
         2
         """
-        for w in weights:
-            if w < 0.0:
-                raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={"arg_name": "weights", "arg_value": str(w)},
-                )
-        seed = seed if seed is not None else random.randint(0, sys.maxsize)
-        df_array = self._jdf.randomSplit(
-            _to_list(self.sparkSession._sc, cast(List["ColumnOrName"], weights)), int(seed)
-        )
-        return [DataFrame(df, self.sparkSession) for df in df_array]
+        ...
 
     @property
     def dtypes(self) -> List[Tuple[str, str]]:
@@ -2475,7 +2145,7 @@ def dtypes(self) -> List[Tuple[str, str]]:
         >>> df.dtypes
         [('age', 'bigint'), ('name', 'string')]
         """
-        return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields]
+        ...
 
     @property
     def columns(self) -> List[str]:
@@ -2552,8 +2222,9 @@ def columns(self) -> List[str]:
         >>> df.columns == df2.columns
         False
         """
-        return [f.name for f in self.schema.fields]
+        ...
 
+    @dispatch_df_method
     def colRegex(self, colName: str) -> Column:
         """
         Selects column based on the column name specified as a regex and returns it
@@ -2585,14 +2256,9 @@ def colRegex(self, colName: str) -> Column:
         |   3|
         +----+
         """
-        if not isinstance(colName, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "colName", "arg_type": type(colName).__name__},
-            )
-        jc = self._jdf.colRegex(colName)
-        return Column(jc)
+        ...
 
+    @dispatch_df_method
     def to(self, schema: StructType) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` where each row is reconciled to match the specified
@@ -2648,10 +2314,9 @@ def to(self, schema: StructType) -> "DataFrame":
         |  1|  a|
         +---+---+
         """
-        assert schema is not None
-        jschema = self._jdf.sparkSession().parseDataType(schema.json())
-        return DataFrame(self._jdf.to(jschema), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def alias(self, alias: str) -> "DataFrame":
         """Returns a new :class:`DataFrame` with an alias set.
 
@@ -2688,9 +2353,9 @@ def alias(self, alias: str) -> "DataFrame":
         |Alice|Alice| 23|
         +-----+-----+---+
         """
-        assert isinstance(alias, str), "alias should be a string"
-        return DataFrame(getattr(self._jdf, "as")(alias), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def crossJoin(self, other: "DataFrame") -> "DataFrame":
         """Returns the cartesian product with another :class:`DataFrame`.
 
@@ -2728,10 +2393,9 @@ def crossJoin(self, other: "DataFrame") -> "DataFrame":
         | 16|  Bob|    85|
         +---+-----+------+
         """
+        ...
 
-        jdf = self._jdf.crossJoin(other._jdf)
-        return DataFrame(jdf, self.sparkSession)
-
+    @dispatch_df_method
     def join(
         self,
         other: "DataFrame",
@@ -2862,7 +2526,7 @@ def join(
 
         Outer join on multiple columns
 
-        >>> df.join(df3, ["name", "age"], "outer").show()
+        >>> df.join(df3, ["name", "age"], "outer").sort("name", "age").show()
         +-----+----+------+
         | name| age|height|
         +-----+----+------+
@@ -2911,30 +2575,10 @@ def join(
         |Alice|  2|
         +-----+---+
         """
-
-        if on is not None and not isinstance(on, list):
-            on = [on]  # type: ignore[assignment]
-
-        if on is not None:
-            if isinstance(on[0], str):
-                on = self._jseq(cast(List[str], on))
-            else:
-                assert isinstance(on[0], Column), "on should be Column or list of Column"
-                on = reduce(lambda x, y: x.__and__(y), cast(List[Column], on))
-                on = on._jc
-
-        if on is None and how is None:
-            jdf = self._jdf.join(other._jdf)
-        else:
-            if how is None:
-                how = "inner"
-            if on is None:
-                on = self._jseq([])
-            assert isinstance(how, str), "how should be a string"
-            jdf = self._jdf.join(other._jdf, on, how)
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
     # TODO(SPARK-22947): Fix the DataFrame API.
+    @dispatch_df_method
     def _joinAsOf(
         self,
         other: "DataFrame",
@@ -3018,44 +2662,9 @@ def _joinAsOf(
         [Row(a=1, left_val='a', right_val=1),
          Row(a=5, left_val='b', right_val=6)]
         """
-        if isinstance(leftAsOfColumn, str):
-            leftAsOfColumn = self[leftAsOfColumn]
-        left_as_of_jcol = leftAsOfColumn._jc
-        if isinstance(rightAsOfColumn, str):
-            rightAsOfColumn = other[rightAsOfColumn]
-        right_as_of_jcol = rightAsOfColumn._jc
-
-        if on is not None and not isinstance(on, list):
-            on = [on]  # type: ignore[assignment]
-
-        if on is not None:
-            if isinstance(on[0], str):
-                on = self._jseq(cast(List[str], on))
-            else:
-                assert isinstance(on[0], Column), "on should be Column or list of Column"
-                on = reduce(lambda x, y: x.__and__(y), cast(List[Column], on))
-                on = on._jc
-
-        if how is None:
-            how = "inner"
-        assert isinstance(how, str), "how should be a string"
-
-        if tolerance is not None:
-            assert isinstance(tolerance, Column), "tolerance should be Column"
-            tolerance = tolerance._jc
-
-        jdf = self._jdf.joinAsOf(
-            other._jdf,
-            left_as_of_jcol,
-            right_as_of_jcol,
-            on,
-            how,
-            tolerance,
-            allowExactMatches,
-            direction,
-        )
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def sortWithinPartitions(
         self,
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
@@ -3117,9 +2726,9 @@ def sortWithinPartitions(
         |  2|Alice|
         +---+-----+
         """
-        jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def sort(
         self,
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
@@ -3277,98 +2886,11 @@ def sort(
         |  2|Alice|
         +---+-----+
         """
-        jdf = self._jdf.sort(self._sort_cols(cols, kwargs))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
     orderBy = sort
 
-    def _jseq(
-        self,
-        cols: Sequence,
-        converter: Optional[Callable[..., Union["PrimitiveType", "JavaObject"]]] = None,
-    ) -> "JavaObject":
-        """Return a JVM Seq of Columns from a list of Column or names"""
-        return _to_seq(self.sparkSession._sc, cols, converter)
-
-    def _jmap(self, jm: Dict) -> "JavaObject":
-        """Return a JVM Scala Map from a dict"""
-        return _to_scala_map(self.sparkSession._sc, jm)
-
-    def _jcols(self, *cols: "ColumnOrName") -> "JavaObject":
-        """Return a JVM Seq of Columns from a list of Column or column names
-
-        If `cols` has only one list in it, cols[0] will be used as the list.
-        """
-        if len(cols) == 1 and isinstance(cols[0], list):
-            cols = cols[0]
-        return self._jseq(cols, _to_java_column)
-
-    def _jcols_ordinal(self, *cols: "ColumnOrNameOrOrdinal") -> "JavaObject":
-        """Return a JVM Seq of Columns from a list of Column or column names or column ordinals.
-
-        If `cols` has only one list in it, cols[0] will be used as the list.
-        """
-        if len(cols) == 1 and isinstance(cols[0], list):
-            cols = cols[0]
-
-        _cols = []
-        for c in cols:
-            if isinstance(c, int) and not isinstance(c, bool):
-                if c < 1:
-                    raise PySparkIndexError(
-                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
-                    )
-                # ordinal is 1-based
-                _cols.append(self[c - 1])
-            else:
-                _cols.append(c)  # type: ignore[arg-type]
-        return self._jseq(_cols, _to_java_column)
-
-    def _sort_cols(
-        self,
-        cols: Sequence[Union[int, str, Column, List[Union[int, str, Column]]]],
-        kwargs: Dict[str, Any],
-    ) -> "JavaObject":
-        """Return a JVM Seq of Columns that describes the sort order"""
-        if not cols:
-            raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={"item": "column"},
-            )
-        if len(cols) == 1 and isinstance(cols[0], list):
-            cols = cols[0]
-
-        jcols = []
-        for c in cols:
-            if isinstance(c, int) and not isinstance(c, bool):
-                # ordinal is 1-based
-                if c > 0:
-                    _c = self[c - 1]
-                # negative ordinal means sort by desc
-                elif c < 0:
-                    _c = self[-c - 1].desc()
-                else:
-                    raise PySparkIndexError(
-                        error_class="ZERO_INDEX",
-                        message_parameters={},
-                    )
-            else:
-                _c = c  # type: ignore[assignment]
-            jcols.append(_to_java_column(cast("ColumnOrName", _c)))
-
-        ascending = kwargs.get("ascending", True)
-        if isinstance(ascending, (bool, int)):
-            if not ascending:
-                jcols = [jc.desc() for jc in jcols]
-        elif isinstance(ascending, list):
-            jcols = [jc if asc else jc.desc() for asc, jc in zip(ascending, jcols)]
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_LIST",
-                message_parameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
-            )
-        return self._jseq(jcols)
-
+    @dispatch_df_method
     def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
         """Computes basic statistics for numeric and string columns.
 
@@ -3430,11 +2952,9 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
         --------
         DataFrame.summary
         """
-        if len(cols) == 1 and isinstance(cols[0], list):
-            cols = cols[0]  # type: ignore[assignment]
-        jdf = self._jdf.describe(self._jseq(cols))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def summary(self, *statistics: str) -> "DataFrame":
         """Computes specified statistics for numeric and string columns. Available statistics are:
         - count
@@ -3503,10 +3023,7 @@ def summary(self, *statistics: str) -> "DataFrame":
         --------
         DataFrame.display
         """
-        if len(statistics) == 1 and isinstance(statistics[0], list):
-            statistics = statistics[0]
-        jdf = self._jdf.summary(self._jseq(statistics))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
     @overload
     def head(self) -> Optional[Row]:
@@ -3516,6 +3033,7 @@ def head(self) -> Optional[Row]:
     def head(self, n: int) -> List[Row]:
         ...
 
+    @dispatch_df_method
     def head(self, n: Optional[int] = None) -> Union[Optional[Row], List[Row]]:
         """Returns the first ``n`` rows.
 
@@ -3551,11 +3069,9 @@ def head(self, n: Optional[int] = None) -> Union[Optional[Row], List[Row]]:
         >>> df.head(0)
         []
         """
-        if n is None:
-            rs = self.head(1)
-            return rs[0] if rs else None
-        return self.take(n)
+        ...
 
+    @dispatch_df_method
     def first(self) -> Optional[Row]:
         """Returns the first row as a :class:`Row`.
 
@@ -3576,7 +3092,7 @@ def first(self) -> Optional[Row]:
         >>> df.first()
         Row(age=2, name='Alice')
         """
-        return self.head()
+        ...
 
     @overload
     def __getitem__(self, item: Union[int, str]) -> Column:
@@ -3586,6 +3102,7 @@ def __getitem__(self, item: Union[int, str]) -> Column:
     def __getitem__(self, item: Union[Column, List, Tuple]) -> "DataFrame":
         ...
 
+    @dispatch_df_method
     def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Column, "DataFrame"]:
         """Returns the column as a :class:`Column`.
 
@@ -3658,22 +3175,9 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum
         |  5| Bob|
         +---+----+
         """
-        if isinstance(item, str):
-            jc = self._jdf.apply(item)
-            return Column(jc)
-        elif isinstance(item, Column):
-            return self.filter(item)
-        elif isinstance(item, (list, tuple)):
-            return self.select(*item)
-        elif isinstance(item, int):
-            jc = self._jdf.apply(self.columns[item])
-            return Column(jc)
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR",
-                message_parameters={"arg_name": "item", "arg_type": type(item).__name__},
-            )
+        ...
 
+    @dispatch_df_method
     def __getattr__(self, name: str) -> Column:
         """Returns the :class:`Column` denoted by ``name``.
 
@@ -3707,13 +3211,9 @@ def __getattr__(self, name: str) -> Column:
         |  5|
         +---+
         """
-        if name not in self.columns:
-            raise PySparkAttributeError(
-                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
-            )
-        jc = self._jdf.apply(name)
-        return Column(jc)
+        ...
 
+    @dispatch_df_method
     def __dir__(self) -> List[str]:
         """
         Examples
@@ -3751,9 +3251,7 @@ def __dir__(self) -> List[str]:
         >>> [attr for attr in dir(df) if attr[0] == 'i'][:7] # Doesn't include 1 or name 1
         ['i_like_pancakes', 'id', 'id2', 'inputFiles', 'intersect', 'intersectAll', 'isEmpty']
         """
-        attrs = set(super().__dir__())
-        attrs.update(filter(lambda s: s.isidentifier(), self.columns))
-        return sorted(attrs)
+        ...
 
     @overload
     def select(self, *cols: "ColumnOrName") -> "DataFrame":
@@ -3763,7 +3261,8 @@ def select(self, *cols: "ColumnOrName") -> "DataFrame":
     def select(self, __cols: Union[List[Column], List[str]]) -> "DataFrame":
         ...
 
-    def select(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def select(self, *cols: "ColumnOrName") -> "DataFrame":
         """Projects a set of expressions and returns a new :class:`DataFrame`.
 
         .. versionadded:: 1.3.0
@@ -3808,8 +3307,7 @@ def select(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
         |  Bob| 15|
         +-----+---+
         """
-        jdf = self._jdf.select(self._jcols(*cols))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
     @overload
     def selectExpr(self, *expr: str) -> "DataFrame":
@@ -3819,6 +3317,7 @@ def selectExpr(self, *expr: str) -> "DataFrame":
     def selectExpr(self, *expr: List[str]) -> "DataFrame":
         ...
 
+    @dispatch_df_method
     def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
         """Projects a set of SQL expressions and returns a new :class:`DataFrame`.
 
@@ -3846,11 +3345,9 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
         |       10|       5|
         +---------+--------+
         """
-        if len(expr) == 1 and isinstance(expr[0], list):
-            expr = expr[0]  # type: ignore[assignment]
-        jdf = self._jdf.selectExpr(self._jseq(expr))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def filter(self, condition: "ColumnOrName") -> "DataFrame":
         """Filters rows using the given condition.
 
@@ -4004,16 +3501,7 @@ def filter(self, condition: "ColumnOrName") -> "DataFrame":
         |  5|  Bob|Physics|
         +---+-----+-------+
         """
-        if isinstance(condition, str):
-            jdf = self._jdf.filter(condition)
-        elif isinstance(condition, Column):
-            jdf = self._jdf.filter(condition._jc)
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
-            )
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
     @overload
     def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
@@ -4023,7 +3511,8 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
     def groupBy(self, __cols: Union[List[Column], List[str], List[int]]) -> "GroupedData":
         ...
 
-    def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
         """
         Groups the :class:`DataFrame` by the specified columns so that aggregation
         can be performed on them.
@@ -4122,10 +3611,7 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ign
         |  Bob|  5|    1|
         +-----+---+-----+
         """
-        jgd = self._jdf.groupBy(self._jcols_ordinal(*cols))
-        from pyspark.sql.group import GroupedData
-
-        return GroupedData(jgd, self)
+        ...
 
     @overload
     def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
@@ -4135,7 +3621,8 @@ def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
     def rollup(self, __cols: Union[List[Column], List[str]]) -> "GroupedData":
         ...
 
-    def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
         """
         Create a multi-dimensional rollup for the current :class:`DataFrame` using
         the specified columns, allowing for aggregation on them.
@@ -4207,10 +3694,7 @@ def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: igno
         |  Bob|   5|    1|
         +-----+----+-----+
         """
-        jgd = self._jdf.rollup(self._jcols_ordinal(*cols))
-        from pyspark.sql.group import GroupedData
-
-        return GroupedData(jgd, self)
+        ...
 
     @overload
     def cube(self, *cols: "ColumnOrName") -> "GroupedData":
@@ -4220,7 +3704,8 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData":
     def cube(self, __cols: Union[List[Column], List[str]]) -> "GroupedData":
         ...
 
-    def cube(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def cube(self, *cols: "ColumnOrName") -> "GroupedData":
         """
         Create a multi-dimensional cube for the current :class:`DataFrame` using
         the specified columns, allowing aggregations to be performed on them.
@@ -4297,11 +3782,9 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
         |  Bob|   5|    1|
         +-----+----+-----+
         """
-        jgd = self._jdf.cube(self._jcols_ordinal(*cols))
-        from pyspark.sql.group import GroupedData
-
-        return GroupedData(jgd, self)
+        ...
 
+    @dispatch_df_method
     def groupingSets(
         self, groupingSets: Sequence[Sequence["ColumnOrName"]], *cols: "ColumnOrName"
     ) -> "GroupedData":
@@ -4391,13 +3874,9 @@ def groupingSets(
         --------
         GroupedData
         """
-        from pyspark.sql.group import GroupedData
-
-        jgrouping_sets = _to_seq(self._sc, [self._jcols(*inner) for inner in groupingSets])
-
-        jgd = self._jdf.groupingSets(jgrouping_sets, self._jcols(*cols))
-        return GroupedData(jgd, self)
+        ...
 
+    @dispatch_df_method
     def unpivot(
         self,
         ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
@@ -4479,26 +3958,9 @@ def unpivot(
         --------
         DataFrame.melt
         """
-        assert ids is not None, "ids must not be None"
-
-        def to_jcols(
-            cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
-        ) -> "JavaObject":
-            if isinstance(cols, list):
-                return self._jcols(*cols)
-            if isinstance(cols, tuple):
-                return self._jcols(*list(cols))
-            return self._jcols(cols)
-
-        jids = to_jcols(ids)
-        if values is None:
-            jdf = self._jdf.unpivotWithSeq(jids, variableColumnName, valueColumnName)
-        else:
-            jvals = to_jcols(values)
-            jdf = self._jdf.unpivotWithSeq(jids, jvals, variableColumnName, valueColumnName)
-
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def melt(
         self,
         ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
@@ -4542,8 +4004,9 @@ def melt(
         -----
         Supports Spark Connect.
         """
-        return self.unpivot(ids, values, variableColumnName, valueColumnName)
+        ...
 
+    @dispatch_df_method
     def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
         """Aggregate on the entire :class:`DataFrame` without groups
         (shorthand for ``df.groupBy().agg()``).
@@ -4580,8 +4043,9 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
         |       2|
         +--------+
         """
-        return self.groupBy().agg(*exprs)  # type: ignore[arg-type]
+        ...
 
+    @dispatch_df_method
     def observe(
         self,
         observation: Union["Observation", str],
@@ -4651,6 +4115,7 @@ def observe(
         When ``observation`` is a string, streaming queries also work as below.
 
         >>> from pyspark.sql.streaming import StreamingQueryListener
+        >>> import time
         >>> class MyErrorListener(StreamingQueryListener):
         ...    def onQueryStarted(self, event):
         ...        pass
@@ -4671,45 +4136,28 @@ def observe(
         ...    def onQueryTerminated(self, event):
         ...        pass
         ...
-        >>> spark.streams.addListener(MyErrorListener())
+        >>> error_listener = MyErrorListener()
+        >>> spark.streams.addListener(error_listener)
+        >>> sdf = spark.readStream.format("rate").load().withColumn(
+        ...     "error", col("value")
+        ... )
         >>> # Observe row count (rc) and error row count (erc) in the streaming Dataset
-        ... observed_ds = df.observe(
+        ... observed_ds = sdf.observe(
         ...     "my_event",
         ...     count(lit(1)).alias("rc"),
-        ...     count(col("error")).alias("erc"))  # doctest: +SKIP
-        >>> observed_ds.writeStream.format("console").start()  # doctest: +SKIP
-        """
-        from pyspark.sql import Observation
-
-        if len(exprs) == 0:
-            raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={"item": "exprs"},
-            )
-        if not all(isinstance(c, Column) for c in exprs):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OF_COLUMN",
-                message_parameters={"arg_name": "exprs"},
-            )
-
-        if isinstance(observation, Observation):
-            return observation._on(self, *exprs)
-        elif isinstance(observation, str):
-            return DataFrame(
-                self._jdf.observe(
-                    observation, exprs[0]._jc, _to_seq(self._sc, [c._jc for c in exprs[1:]])
-                ),
-                self.sparkSession,
-            )
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OF_COLUMN",
-                message_parameters={
-                    "arg_name": "observation",
-                    "arg_type": type(observation).__name__,
-                },
-            )
+        ...     count(col("error")).alias("erc"))
+        >>> try:
+        ...     q = observed_ds.writeStream.format("console").start()
+        ...     time.sleep(5)
+        ...
+        ... finally:
+        ...     q.stop()
+        ...     spark.streams.removeListener(error_listener)
+        ...
+        """
+        ...
 
+    @dispatch_df_method
     def union(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing the union of rows in this and another
         :class:`DataFrame`.
@@ -4806,8 +4254,9 @@ def union(self, other: "DataFrame") -> "DataFrame":
         |  4|    D|
         +---+-----+
         """
-        return DataFrame(self._jdf.union(other._jdf), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def unionAll(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing the union of rows in this and another
         :class:`DataFrame`.
@@ -4840,8 +4289,9 @@ def unionAll(self, other: "DataFrame") -> "DataFrame":
         --------
         DataFrame.union
         """
-        return self.union(other)
+        ...
 
+    @dispatch_df_method
     def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) -> "DataFrame":
         """Returns a new :class:`DataFrame` containing union of rows in this and another
         :class:`DataFrame`.
@@ -4920,8 +4370,9 @@ def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) ->
         |NULL|NULL|NULL|   3|   4|   5|
         +----+----+----+----+----+----+
         """
-        return DataFrame(self._jdf.unionByName(other._jdf, allowMissingColumns), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def intersect(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing rows only in
         both this :class:`DataFrame` and another :class:`DataFrame`.
@@ -4986,8 +4437,9 @@ def intersect(self, other: "DataFrame") -> "DataFrame":
         |  1|  2|
         +---+---+
         """
-        return DataFrame(self._jdf.intersect(other._jdf), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def intersectAll(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing rows in both this :class:`DataFrame`
         and another :class:`DataFrame` while preserving duplicates.
@@ -5051,8 +4503,9 @@ def intersectAll(self, other: "DataFrame") -> "DataFrame":
         |  1|  2|
         +---+---+
         """
-        return DataFrame(self._jdf.intersectAll(other._jdf), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def subtract(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing rows in this :class:`DataFrame`
         but not in another :class:`DataFrame`.
@@ -5113,9 +4566,10 @@ def subtract(self, other: "DataFrame") -> "DataFrame":
         +---+---+
         +---+---+
         """
-        return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sparkSession)
+        ...
 
-    def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
+    @dispatch_df_method
+    def dropDuplicates(self, *subset: Union[str, List[str]]) -> "DataFrame":
         """Return a new :class:`DataFrame` with duplicate rows removed,
         optionally only considering certain columns.
 
@@ -5132,6 +4586,9 @@ def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
         .. versionchanged:: 3.4.0
             Supports Spark Connect.
 
+        .. versionchanged:: 4.0.0
+            Supports variable-length argument
+
         Parameters
         ----------
         subset : list of column names, optional
@@ -5163,26 +4620,17 @@ def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
 
         Deduplicate values on 'name' and 'height' columns.
 
-        >>> df.dropDuplicates(['name', 'height']).show()
+        >>> df.dropDuplicates('name', 'height').show()
         +-----+---+------+
         | name|age|height|
         +-----+---+------+
         |Alice|  5|    80|
         +-----+---+------+
         """
-        if subset is not None and (not isinstance(subset, Iterable) or isinstance(subset, str)):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
-            )
-
-        if subset is None:
-            jdf = self._jdf.dropDuplicates()
-        else:
-            jdf = self._jdf.dropDuplicates(self._jseq(subset))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
-    def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> "DataFrame":
+    @dispatch_df_method
+    def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> "DataFrame":
         """Return a new :class:`DataFrame` with duplicate rows removed,
          optionally only considering certain columns, within watermark.
 
@@ -5199,6 +4647,9 @@ def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> "
 
          .. versionadded:: 3.5.0
 
+         .. versionchanged:: 4.0.0
+            Supports variable-length argument
+
          Parameters
          ----------
          subset : List of column names, optional
@@ -5228,20 +4679,11 @@ def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> "
 
          Deduplicate values on 'value' columns.
 
-         >>> df.dropDuplicatesWithinWatermark(['value'])  # doctest: +SKIP
+         >>> df.dropDuplicatesWithinWatermark('value')  # doctest: +SKIP
         """
-        if subset is not None and (not isinstance(subset, Iterable) or isinstance(subset, str)):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
-            )
-
-        if subset is None:
-            jdf = self._jdf.dropDuplicatesWithinWatermark()
-        else:
-            jdf = self._jdf.dropDuplicatesWithinWatermark(self._jseq(subset))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def dropna(
         self,
         how: str = "any",
@@ -5323,26 +4765,7 @@ def dropna(
         |  5|  NULL|  Bob|
         +---+------+-----+
         """
-        if how is not None and how not in ["any", "all"]:
-            raise PySparkValueError(
-                error_class="VALUE_NOT_ANY_OR_ALL",
-                message_parameters={"arg_name": "how", "arg_type": how},
-            )
-
-        if subset is None:
-            subset = self.columns
-        elif isinstance(subset, str):
-            subset = [subset]
-        elif not isinstance(subset, (list, tuple)):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
-            )
-
-        if thresh is None:
-            thresh = len(subset) if how == "any" else 1
-
-        return DataFrame(self._jdf.na().drop(thresh, self._jseq(subset)), self.sparkSession)
+        ...
 
     @overload
     def fillna(
@@ -5356,6 +4779,7 @@ def fillna(
     def fillna(self, value: Dict[str, "LiteralType"]) -> "DataFrame":
         ...
 
+    @dispatch_df_method
     def fillna(
         self,
         value: Union["LiteralType", Dict[str, "LiteralType"]],
@@ -5445,32 +4869,7 @@ def fillna(
         |NULL|  NULL|Spark|true|
         +----+------+-----+----+
         """
-        if not isinstance(value, (float, int, str, bool, dict)):
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
-                message_parameters={"arg_name": "value", "arg_type": type(value).__name__},
-            )
-
-        # Note that bool validates isinstance(int), but we don't want to
-        # convert bools to floats
-
-        if not isinstance(value, bool) and isinstance(value, int):
-            value = float(value)
-
-        if isinstance(value, dict):
-            return DataFrame(self._jdf.na().fill(value), self.sparkSession)
-        elif subset is None:
-            return DataFrame(self._jdf.na().fill(value), self.sparkSession)
-        else:
-            if isinstance(subset, str):
-                subset = [subset]
-            elif not isinstance(subset, (list, tuple)):
-                raise PySparkTypeError(
-                    error_class="NOT_LIST_OR_TUPLE",
-                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
-                )
-
-            return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sparkSession)
+        ...
 
     @overload
     def replace(
@@ -5507,7 +4906,8 @@ def replace(
     ) -> "DataFrame":
         ...
 
-    def replace(  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def replace(
         self,
         to_replace: Union[
             "LiteralType", List["LiteralType"], Dict["LiteralType", "OptionalPrimitiveType"]
@@ -5610,111 +5010,7 @@ def replace(  # type: ignore[misc]
         |NULL|  NULL| NULL|
         +----+------+-----+
         """
-        if value is _NoValue:
-            if isinstance(to_replace, dict):
-                value = None
-            else:
-                raise PySparkTypeError(
-                    error_class="ARGUMENT_REQUIRED",
-                    message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
-                )
-
-        # Helper functions
-        def all_of(types: Union[Type, Tuple[Type, ...]]) -> Callable[[Iterable], bool]:
-            """Given a type or tuple of types and a sequence of xs
-            check if each x is instance of type(s)
-
-            >>> all_of(bool)([True, False])
-            True
-            >>> all_of(str)(["a", 1])
-            False
-            """
-
-            def all_of_(xs: Iterable) -> bool:
-                return all(isinstance(x, types) for x in xs)
-
-            return all_of_
-
-        all_of_bool = all_of(bool)
-        all_of_str = all_of(str)
-        all_of_numeric = all_of((float, int))
-
-        # Validate input types
-        valid_types = (bool, float, int, str, list, tuple)
-        if not isinstance(to_replace, valid_types + (dict,)):
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-                message_parameters={
-                    "arg_name": "to_replace",
-                    "arg_type": type(to_replace).__name__,
-                },
-            )
-
-        if (
-            not isinstance(value, valid_types)
-            and value is not None
-            and not isinstance(to_replace, dict)
-        ):
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE",
-                message_parameters={
-                    "arg_name": "value",
-                    "arg_type": type(value).__name__,
-                },
-            )
-
-        if isinstance(to_replace, (list, tuple)) and isinstance(value, (list, tuple)):
-            if len(to_replace) != len(value):
-                raise PySparkValueError(
-                    error_class="LENGTH_SHOULD_BE_THE_SAME",
-                    message_parameters={
-                        "arg1": "to_replace",
-                        "arg2": "value",
-                        "arg1_length": str(len(to_replace)),
-                        "arg2_length": str(len(value)),
-                    },
-                )
-
-        if not (subset is None or isinstance(subset, (list, tuple, str))):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
-            )
-
-        # Reshape input arguments if necessary
-        if isinstance(to_replace, (float, int, str)):
-            to_replace = [to_replace]
-
-        if isinstance(to_replace, dict):
-            rep_dict = to_replace
-            if value is not None:
-                warnings.warn("to_replace is a dict and value is not None. value will be ignored.")
-        else:
-            if isinstance(value, (float, int, str)) or value is None:
-                value = [value for _ in range(len(to_replace))]
-            rep_dict = dict(zip(to_replace, cast("Iterable[Optional[Union[float, str]]]", value)))
-
-        if isinstance(subset, str):
-            subset = [subset]
-
-        # Verify we were not passed in mixed type generics.
-        if not any(
-            all_of_type(rep_dict.keys())
-            and all_of_type(x for x in rep_dict.values() if x is not None)
-            for all_of_type in [all_of_bool, all_of_str, all_of_numeric]
-        ):
-            raise PySparkValueError(
-                error_class="MIXED_TYPE_REPLACEMENT",
-                message_parameters={},
-            )
-
-        if subset is None:
-            return DataFrame(self._jdf.na().replace("*", rep_dict), self.sparkSession)
-        else:
-            return DataFrame(
-                self._jdf.na().replace(self._jseq(subset), self._jmap(rep_dict)),
-                self.sparkSession,
-            )
+        ...
 
     @overload
     def approxQuantile(
@@ -5734,6 +5030,7 @@ def approxQuantile(
     ) -> List[List[float]]:
         ...
 
+    @dispatch_df_method
     def approxQuantile(
         self,
         col: Union[str, List[str], Tuple[str]],
@@ -5830,76 +5127,9 @@ def approxQuantile(
         >>> quantiles
         [1.0, 1.0, 5.0]
         """
+        ...
 
-        if not isinstance(col, (str, list, tuple)):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
-            )
-
-        isStr = isinstance(col, str)
-
-        if isinstance(col, tuple):
-            col = list(col)
-        elif isStr:
-            col = [cast(str, col)]
-
-        for c in col:
-            if not isinstance(c, str):
-                raise PySparkTypeError(
-                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                    message_parameters={
-                        "arg_name": "col",
-                        "arg_type": type(col).__name__,
-                        "allowed_types": "str",
-                        "item_type": type(c).__name__,
-                    },
-                )
-        col = _to_list(self._sc, cast(List["ColumnOrName"], col))
-
-        if not isinstance(probabilities, (list, tuple)):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={
-                    "arg_name": "probabilities",
-                    "arg_type": type(probabilities).__name__,
-                },
-            )
-        if isinstance(probabilities, tuple):
-            probabilities = list(probabilities)
-        for p in probabilities:
-            if not isinstance(p, (float, int)) or p < 0 or p > 1:
-                raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_FLOAT_OR_INT",
-                    message_parameters={
-                        "arg_name": "probabilities",
-                        "arg_type": type(p).__name__,
-                    },
-                )
-        probabilities = _to_list(self._sc, cast(List["ColumnOrName"], probabilities))
-
-        if not isinstance(relativeError, (float, int)):
-            raise PySparkTypeError(
-                error_class="NOT_FLOAT_OR_INT",
-                message_parameters={
-                    "arg_name": "relativeError",
-                    "arg_type": type(relativeError).__name__,
-                },
-            )
-        if relativeError < 0:
-            raise PySparkValueError(
-                error_class="NEGATIVE_VALUE",
-                message_parameters={
-                    "arg_name": "relativeError",
-                    "arg_value": str(relativeError),
-                },
-            )
-        relativeError = float(relativeError)
-
-        jaq = self._jdf.stat().approxQuantile(col, probabilities, relativeError)
-        jaq_list = [list(j) for j in jaq]
-        return jaq_list[0] if isStr else jaq_list
-
+    @dispatch_df_method
     def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
         """
         Calculates the correlation of two columns of a :class:`DataFrame` as a double value.
@@ -5935,25 +5165,9 @@ def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
         1.0
 
         """
-        if not isinstance(col1, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
-            )
-        if not isinstance(col2, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
-            )
-        if not method:
-            method = "pearson"
-        if not method == "pearson":
-            raise PySparkValueError(
-                error_class="VALUE_NOT_PEARSON",
-                message_parameters={"arg_name": "method", "arg_value": method},
-            )
-        return self._jdf.stat().corr(col1, col2, method)
+        ...
 
+    @dispatch_df_method
     def cov(self, col1: str, col2: str) -> float:
         """
         Calculate the sample covariance for the given columns, specified by their names, as a
@@ -5986,18 +5200,9 @@ def cov(self, col1: str, col2: str) -> float:
         1.0
 
         """
-        if not isinstance(col1, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
-            )
-        if not isinstance(col2, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
-            )
-        return self._jdf.stat().cov(col1, col2)
+        ...
 
+    @dispatch_df_method
     def crosstab(self, col1: str, col2: str) -> "DataFrame":
         """
         Computes a pair-wise frequency table of the given columns. Also known as a contingency
@@ -6039,18 +5244,9 @@ def crosstab(self, col1: str, col2: str) -> "DataFrame":
         +-----+---+---+---+
 
         """
-        if not isinstance(col1, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
-            )
-        if not isinstance(col2, str):
-            raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
-            )
-        return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def freqItems(
         self, cols: Union[List[str], Tuple[str]], support: Optional[float] = None
     ) -> "DataFrame":
@@ -6095,19 +5291,9 @@ def freqItems(
         |   [4, 1, 3]| [8, 11, 10]|
         +------------+------------+
         """
-        if isinstance(cols, tuple):
-            cols = list(cols)
-        if not isinstance(cols, list):
-            raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
-            )
-        if not support:
-            support = 0.01
-        return DataFrame(
-            self._jdf.stat().freqItems(_to_seq(self._sc, cols), support), self.sparkSession
-        )
+        ...
 
+    @dispatch_df_method
     def _ipython_key_completions_(self) -> List[str]:
         """Returns the names of columns in this :class:`DataFrame`.
 
@@ -6122,8 +5308,9 @@ def _ipython_key_completions_(self) -> List[str]:
         >>> df._ipython_key_completions_()
         ['age 1', 'name?1']
         """
-        return self.columns
+        ...
 
+    @dispatch_df_method
     def withColumns(self, *colsMap: Dict[str, Column]) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` by adding multiple columns or replacing the
@@ -6159,24 +5346,9 @@ def withColumns(self, *colsMap: Dict[str, Column]) -> "DataFrame":
         |  5|  Bob|   7|   8|
         +---+-----+----+----+
         """
-        # Below code is to help enable kwargs in future.
-        assert len(colsMap) == 1
-        colsMap = colsMap[0]  # type: ignore[assignment]
-
-        if not isinstance(colsMap, dict):
-            raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
-            )
-
-        col_names = list(colsMap.keys())
-        cols = list(colsMap.values())
-
-        return DataFrame(
-            self._jdf.withColumns(_to_seq(self._sc, col_names), self._jcols(*cols)),
-            self.sparkSession,
-        )
+        ...
 
+    @dispatch_df_method
     def withColumn(self, colName: str, col: Column) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` by adding a column or replacing the
@@ -6220,13 +5392,9 @@ def withColumn(self, colName: str, col: Column) -> "DataFrame":
         |  5|  Bob|   7|
         +---+-----+----+
         """
-        if not isinstance(col, Column):
-            raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
-            )
-        return DataFrame(self._jdf.withColumn(colName, col._jc), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` by renaming an existing column.
@@ -6287,8 +5455,9 @@ def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
         |   5|  Bob|
         +----+-----+
         """
-        return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` by renaming multiple columns.
@@ -6356,25 +5525,9 @@ def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
         |  5|  Bob|
         +---+-----+
         """
-        if not isinstance(colsMap, dict):
-            raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
-            )
-
-        col_names: List[str] = []
-        new_col_names: List[str] = []
-        for k, v in colsMap.items():
-            col_names.append(k)
-            new_col_names.append(v)
-
-        return DataFrame(
-            self._jdf.withColumnsRenamed(
-                _to_seq(self._sc, col_names), _to_seq(self._sc, new_col_names)
-            ),
-            self.sparkSession,
-        )
+        ...
 
+    @dispatch_df_method
     def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame":
         """Returns a new :class:`DataFrame` by updating an existing column with metadata.
 
@@ -6402,18 +5555,7 @@ def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame"
         >>> df_meta.schema['age'].metadata
         {'foo': 'bar'}
         """
-        from py4j.java_gateway import JVMView
-
-        if not isinstance(metadata, dict):
-            raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
-            )
-        sc = get_active_spark_context()
-        jmeta = cast(JVMView, sc._jvm).org.apache.spark.sql.types.Metadata.fromJson(
-            json.dumps(metadata)
-        )
-        return DataFrame(self._jdf.withMetadata(columnName, jmeta), self.sparkSession)
+        ...
 
     @overload
     def drop(self, cols: "ColumnOrName") -> "DataFrame":
@@ -6423,7 +5565,8 @@ def drop(self, cols: "ColumnOrName") -> "DataFrame":
     def drop(self, *cols: str) -> "DataFrame":
         ...
 
-    def drop(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def drop(self, *cols: "ColumnOrName") -> "DataFrame":
         """
         Returns a new :class:`DataFrame` without specified columns.
         This is a no-op if the schema doesn't contain the given column name(s).
@@ -6553,29 +5696,9 @@ def drop(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
         | 16|  Bob|    1|
         +---+-----+-----+
         """
-        column_names: List[str] = []
-        java_columns: List["JavaObject"] = []
-
-        for c in cols:
-            if isinstance(c, str):
-                column_names.append(c)
-            elif isinstance(c, Column):
-                java_columns.append(c._jc)
-            else:
-                raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_STR",
-                    message_parameters={"arg_name": "col", "arg_type": type(c).__name__},
-                )
-
-        jdf = self._jdf
-        if len(java_columns) > 0:
-            first_column, *remaining_columns = java_columns
-            jdf = jdf.drop(first_column, self._jseq(remaining_columns))
-        if len(column_names) > 0:
-            jdf = jdf.drop(self._jseq(column_names))
-
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def toDF(self, *cols: str) -> "DataFrame":
         """Returns a new :class:`DataFrame` that with new specified column names
 
@@ -6609,15 +5732,9 @@ def toDF(self, *cols: str) -> "DataFrame":
         | 16|  Bob|
         +---+-----+
         """
-        for col in cols:
-            if not isinstance(col, str):
-                raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(col).__name__},
-                )
-        jdf = self._jdf.toDF(self._jseq(cols))
-        return DataFrame(jdf, self.sparkSession)
+        ...
 
+    @dispatch_df_method
     def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any) -> "DataFrame":
         """Returns a new :class:`DataFrame`. Concise syntax for chaining custom transformations.
 
@@ -6673,12 +5790,9 @@ def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any)
         | 13| 13.0|
         +---+-----+
         """
-        result = func(self, *args, **kwargs)
-        assert isinstance(
-            result, DataFrame
-        ), "Func returned an instance of type [%s], " "should have been DataFrame." % type(result)
-        return result
+        ...
 
+    @dispatch_df_method
     def sameSemantics(self, other: "DataFrame") -> bool:
         """
         Returns `True` when the logical query plans inside both :class:`DataFrame`\\s are equal and
@@ -6721,13 +5835,9 @@ def sameSemantics(self, other: "DataFrame") -> bool:
         >>> df1.withColumn("col1", df1.id * 2).sameSemantics(df2.withColumn("col0", df2.id * 2))
         True
         """
-        if not isinstance(other, DataFrame):
-            raise PySparkTypeError(
-                error_class="NOT_DATAFRAME",
-                message_parameters={"arg_name": "other", "arg_type": type(other).__name__},
-            )
-        return self._jdf.sameSemantics(other._jdf)
+        ...
 
+    @dispatch_df_method
     def semanticHash(self) -> int:
         """
         Returns a hash code of the logical query plan against this :class:`DataFrame`.
@@ -6756,8 +5866,9 @@ def semanticHash(self) -> int:
         >>> spark.range(10).selectExpr("id as col1").semanticHash()  # doctest: +SKIP
         1855039936
         """
-        return self._jdf.semanticHash()
+        ...
 
+    @dispatch_df_method
     def inputFiles(self) -> List[str]:
         """
         Returns a best-effort snapshot of the files that compose this :class:`DataFrame`.
@@ -6791,15 +5902,16 @@ def inputFiles(self) -> List[str]:
         ...     len(df.inputFiles())
         1
         """
-        return list(self._jdf.inputFiles())
+        ...
 
+    @dispatch_df_method
     def where(self, condition: "ColumnOrName") -> "DataFrame":
         """
         :func:`where` is an alias for :func:`filter`.
 
         .. versionadded:: 1.3.0
         """
-        return self.filter(condition)
+        ...
 
     # Two aliases below were added for pandas compatibility many years ago.
     # There are too many differences compared to pandas and we cannot just
@@ -6814,22 +5926,31 @@ def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
     def groupby(self, __cols: Union[List[Column], List[str], List[int]]) -> "GroupedData":
         ...
 
-    def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
         """
         :func:`groupby` is an alias for :func:`groupBy`.
 
         .. versionadded:: 1.4.0
         """
-        return self.groupBy(*cols)
+        ...
 
-    def drop_duplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
+    @dispatch_df_method
+    def drop_duplicates(self, *subset: Union[str, List[str]]) -> "DataFrame":
         """
         :func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
 
         .. versionadded:: 1.4.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect
+
+        .. versionchanged:: 4.0.0
+            Supports variable-length argument
         """
-        return self.dropDuplicates(subset)
+        ...
 
+    @dispatch_df_method
     def writeTo(self, table: str) -> DataFrameWriterV2:
         """
         Create a write configuration builder for v2 sources.
@@ -6862,8 +5983,9 @@ def writeTo(self, table: str) -> DataFrameWriterV2:
         ...     "catalog.db.table"
         ... ).partitionedBy("col").createOrReplace()
         """
-        return DataFrameWriterV2(self, table)
+        ...
 
+    @dispatch_df_method
     def pandas_api(
         self, index_col: Optional[Union[str, List[str]]] = None
     ) -> "PandasOnSparkDataFrame":
@@ -6914,25 +6036,276 @@ def pandas_api(
         23   Alice
         16     Bob
         """
-        from pyspark.pandas.namespace import _get_index_map
-        from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
-        from pyspark.pandas.internal import InternalFrame
+        ...
 
-        index_spark_columns, index_names = _get_index_map(self, index_col)
-        internal = InternalFrame(
-            spark_frame=self,
-            index_spark_columns=index_spark_columns,
-            index_names=index_names,  # type: ignore[arg-type]
-        )
-        return PandasOnSparkDataFrame(internal)
+    @dispatch_df_method
+    def mapInPandas(
+        self,
+        func: "PandasMapIterFunction",
+        schema: Union[StructType, str],
+        barrier: bool = False,
+        profile: Optional[ResourceProfile] = None,
+    ) -> "DataFrame":
+        """
+        Maps an iterator of batches in the current :class:`DataFrame` using a Python native
+        function that is performed on pandas DataFrames both as input and output,
+        and returns the result as a :class:`DataFrame`.
 
+        This method applies the specified Python function to an iterator of
+        `pandas.DataFrame`\\s, each representing a batch of rows from the original DataFrame.
+        The returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`.
+        The size of the function's input and output can be different. Each `pandas.DataFrame`
+        size can be controlled by `spark.sql.execution.arrow.maxRecordsPerBatch`.
 
-def _to_scala_map(sc: "SparkContext", jm: Dict) -> "JavaObject":
-    """
-    Convert a dict into a JVM Map.
-    """
-    assert sc._jvm is not None
-    return sc._jvm.PythonUtils.toScalaMap(jm)
+        .. versionadded:: 3.0.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        func : function
+            a Python native function that takes an iterator of `pandas.DataFrame`\\s, and
+            outputs an iterator of `pandas.DataFrame`\\s.
+        schema : :class:`pyspark.sql.types.DataType` or str
+            the return type of the `func` in PySpark. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        barrier : bool, optional, default False
+            Use barrier mode execution, ensuring that all Python workers in the stage will be
+            launched concurrently.
+
+            .. versionadded: 3.5.0
+
+        profile : :class:`pyspark.resource.ResourceProfile`. The optional ResourceProfile
+            to be used for mapInPandas.
+
+            .. versionadded: 4.0.0
+
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
+
+        Filter rows with id equal to 1:
+
+        >>> def filter_func(iterator):
+        ...     for pdf in iterator:
+        ...         yield pdf[pdf.id == 1]
+        ...
+        >>> df.mapInPandas(filter_func, df.schema).show()  # doctest: +SKIP
+        +---+---+
+        | id|age|
+        +---+---+
+        |  1| 21|
+        +---+---+
+
+        Compute the mean age for each id:
+
+        >>> def mean_age(iterator):
+        ...     for pdf in iterator:
+        ...         yield pdf.groupby("id").mean().reset_index()
+        ...
+        >>> df.mapInPandas(mean_age, "id: bigint, age: double").show()  # doctest: +SKIP
+        +---+----+
+        | id| age|
+        +---+----+
+        |  1|21.0|
+        |  2|30.0|
+        +---+----+
+
+        Add a new column with the double of the age:
+
+        >>> def double_age(iterator):
+        ...     for pdf in iterator:
+        ...         pdf["double_age"] = pdf["age"] * 2
+        ...         yield pdf
+        ...
+        >>> df.mapInPandas(
+        ...     double_age, "id: bigint, age: bigint, double_age: bigint").show()  # doctest: +SKIP
+        +---+---+----------+
+        | id|age|double_age|
+        +---+---+----------+
+        |  1| 21|        42|
+        |  2| 30|        60|
+        +---+---+----------+
+
+        Set ``barrier`` to ``True`` to force the ``mapInPandas`` stage running in the
+        barrier mode, it ensures all Python workers in the stage will be
+        launched concurrently.
+
+        >>> df.mapInPandas(filter_func, df.schema, barrier=True).show()  # doctest: +SKIP
+        +---+---+
+        | id|age|
+        +---+---+
+        |  1| 21|
+        +---+---+
+
+        Notes
+        -----
+        This API is experimental
+
+        See Also
+        --------
+        pyspark.sql.functions.pandas_udf
+        """
+        ...
+
+    @dispatch_df_method
+    def mapInArrow(
+        self,
+        func: "ArrowMapIterFunction",
+        schema: Union[StructType, str],
+        barrier: bool = False,
+        profile: Optional[ResourceProfile] = None,
+    ) -> "DataFrame":
+        """
+        Maps an iterator of batches in the current :class:`DataFrame` using a Python native
+        function that is performed on `pyarrow.RecordBatch`\\s both as input and output,
+        and returns the result as a :class:`DataFrame`.
+
+        This method applies the specified Python function to an iterator of
+        `pyarrow.RecordBatch`\\s, each representing a batch of rows from the original DataFrame.
+        The returned iterator of `pyarrow.RecordBatch`\\s are combined as a :class:`DataFrame`.
+        The size of the function's input and output can be different. Each `pyarrow.RecordBatch`
+        size can be controlled by `spark.sql.execution.arrow.maxRecordsPerBatch`.
+
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        func : function
+            a Python native function that takes an iterator of `pyarrow.RecordBatch`\\s, and
+            outputs an iterator of `pyarrow.RecordBatch`\\s.
+        schema : :class:`pyspark.sql.types.DataType` or str
+            the return type of the `func` in PySpark. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        barrier : bool, optional, default False
+            Use barrier mode execution, ensuring that all Python workers in the stage will be
+            launched concurrently.
+
+            .. versionadded: 3.5.0
+
+        profile : :class:`pyspark.resource.ResourceProfile`. The optional ResourceProfile
+            to be used for mapInArrow.
+
+            .. versionadded: 4.0.0
+
+        Examples
+        --------
+        >>> import pyarrow  # doctest: +SKIP
+        >>> df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
+        >>> def filter_func(iterator):
+        ...     for batch in iterator:
+        ...         pdf = batch.to_pandas()
+        ...         yield pyarrow.RecordBatch.from_pandas(pdf[pdf.id == 1])
+        >>> df.mapInArrow(filter_func, df.schema).show()  # doctest: +SKIP
+        +---+---+
+        | id|age|
+        +---+---+
+        |  1| 21|
+        +---+---+
+
+        Set ``barrier`` to ``True`` to force the ``mapInArrow`` stage running in the
+        barrier mode, it ensures all Python workers in the stage will be
+        launched concurrently.
+
+        >>> df.mapInArrow(filter_func, df.schema, barrier=True).show()  # doctest: +SKIP
+        +---+---+
+        | id|age|
+        +---+---+
+        |  1| 21|
+        +---+---+
+
+        Notes
+        -----
+        This API is unstable, and for developers.
+
+        See Also
+        --------
+        pyspark.sql.functions.pandas_udf
+        pyspark.sql.DataFrame.mapInPandas
+        """
+        ...
+
+    @dispatch_df_method
+    def toArrow(self) -> "pa.Table":
+        """
+        Returns the contents of this :class:`DataFrame` as PyArrow ``pyarrow.Table``.
+
+        This is only available if PyArrow is installed and available.
+
+        .. versionadded:: 4.0.0
+
+        Notes
+        -----
+        This method should only be used if the resulting PyArrow ``pyarrow.Table`` is
+        expected to be small, as all the data is loaded into the driver's memory.
+
+        This API is a developer API.
+
+        Examples
+        --------
+        >>> df.toArrow()  # doctest: +SKIP
+        pyarrow.Table
+        age: int64
+        name: string
+        ----
+        age: [[2,5]]
+        name: [["Alice","Bob"]]
+        """
+        ...
+
+    def toPandas(self) -> "PandasDataFrameLike":
+        """
+        Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
+
+        This is only available if Pandas is installed and available.
+
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Notes
+        -----
+        This method should only be used if the resulting Pandas ``pandas.DataFrame`` is
+        expected to be small, as all the data is loaded into the driver's memory.
+
+        Usage with ``spark.sql.execution.arrow.pyspark.enabled=True`` is experimental.
+
+        Examples
+        --------
+        >>> df.toPandas()  # doctest: +SKIP
+           age   name
+        0    2  Alice
+        1    5    Bob
+        """
+        ...
+
+    @property
+    def executionInfo(self) -> Optional["ExecutionInfo"]:
+        """
+        Returns a QueryExecution object after the query was executed.
+
+        The queryExecution method allows to introspect information about the actual
+        query execution after the successful execution. Accessing this member before
+        the query execution will return None.
+
+        If the same DataFrame is executed multiple times, the execution info will be
+        overwritten by the latest operation.
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        An instance of QueryExecution or None when the value is not set yet.
+
+        Notes
+        -----
+        This is an API dedicated to Spark Connect client only. With regular Spark Session, it throws
+        an exception.
+        """
+        ...
 
 
 class DataFrameNaFunctions:
@@ -6947,13 +6320,14 @@ class DataFrameNaFunctions:
     def __init__(self, df: DataFrame):
         self.df = df
 
+    @dispatch_df_method
     def drop(
         self,
         how: str = "any",
         thresh: Optional[int] = None,
         subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
     ) -> DataFrame:
-        return self.df.dropna(how=how, thresh=thresh, subset=subset)
+        ...
 
     drop.__doc__ = DataFrame.dropna.__doc__
 
@@ -6965,12 +6339,13 @@ def fill(self, value: "LiteralType", subset: Optional[List[str]] = ...) -> DataF
     def fill(self, value: Dict[str, "LiteralType"]) -> DataFrame:
         ...
 
+    @dispatch_df_method
     def fill(
         self,
         value: Union["LiteralType", Dict[str, "LiteralType"]],
         subset: Optional[List[str]] = None,
     ) -> DataFrame:
-        return self.df.fillna(value=value, subset=subset)  # type: ignore[arg-type]
+        ...
 
     fill.__doc__ = DataFrame.fillna.__doc__
 
@@ -7000,7 +6375,8 @@ def replace(
     ) -> DataFrame:
         ...
 
-    def replace(  # type: ignore[misc]
+    @dispatch_df_method  # type: ignore[misc]
+    def replace(
         self,
         to_replace: Union[List["LiteralType"], Dict["LiteralType", "OptionalPrimitiveType"]],
         value: Optional[
@@ -7008,7 +6384,7 @@ def replace(  # type: ignore[misc]
         ] = _NoValue,
         subset: Optional[List[str]] = None,
     ) -> DataFrame:
-        return self.df.replace(to_replace, value, subset)  # type: ignore[arg-type]
+        ...
 
     replace.__doc__ = DataFrame.replace.__doc__
 
@@ -7043,61 +6419,45 @@ def approxQuantile(
     ) -> List[List[float]]:
         ...
 
+    @dispatch_df_method
     def approxQuantile(
         self,
         col: Union[str, List[str], Tuple[str]],
         probabilities: Union[List[float], Tuple[float]],
         relativeError: float,
     ) -> Union[List[float], List[List[float]]]:
-        return self.df.approxQuantile(col, probabilities, relativeError)
+        ...
 
     approxQuantile.__doc__ = DataFrame.approxQuantile.__doc__
 
+    @dispatch_df_method
     def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
-        return self.df.corr(col1, col2, method)
+        ...
 
     corr.__doc__ = DataFrame.corr.__doc__
 
+    @dispatch_df_method
     def cov(self, col1: str, col2: str) -> float:
-        return self.df.cov(col1, col2)
+        ...
 
     cov.__doc__ = DataFrame.cov.__doc__
 
+    @dispatch_df_method
     def crosstab(self, col1: str, col2: str) -> DataFrame:
-        return self.df.crosstab(col1, col2)
+        ...
 
     crosstab.__doc__ = DataFrame.crosstab.__doc__
 
+    @dispatch_df_method
     def freqItems(self, cols: List[str], support: Optional[float] = None) -> DataFrame:
-        return self.df.freqItems(cols, support)
+        ...
 
     freqItems.__doc__ = DataFrame.freqItems.__doc__
 
+    @dispatch_df_method
     def sampleBy(
         self, col: str, fractions: Dict[Any, float], seed: Optional[int] = None
     ) -> DataFrame:
-        return self.df.sampleBy(col, fractions, seed)
+        ...
 
     sampleBy.__doc__ = DataFrame.sampleBy.__doc__
-
-
-def _test() -> None:
-    import doctest
-    from pyspark.sql import SparkSession
-    import pyspark.sql.dataframe
-
-    globs = pyspark.sql.dataframe.__dict__.copy()
-    spark = SparkSession.builder.master("local[4]").appName("sql.dataframe tests").getOrCreate()
-    globs["spark"] = spark
-    (failure_count, test_count) = doctest.testmod(
-        pyspark.sql.dataframe,
-        globs=globs,
-        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
-    )
-    spark.stop()
-    if failure_count:
-        sys.exit(-1)
-
-
-if __name__ == "__main__":
-    _test()
diff --git a/python/pyspark/sql/datasource.py b/python/pyspark/sql/datasource.py
index c08b5b7af77fb..8ea36bb04fb68 100644
--- a/python/pyspark/sql/datasource.py
+++ b/python/pyspark/sql/datasource.py
@@ -16,7 +16,7 @@
 #
 from abc import ABC, abstractmethod
 from collections import UserDict
-from typing import Any, Dict, Iterator, List, Sequence, Tuple, Type, Union, TYPE_CHECKING
+from typing import Any, Dict, Iterator, List, Optional, Sequence, Tuple, Type, Union, TYPE_CHECKING
 
 from pyspark.sql import Row
 from pyspark.sql.types import StructType
@@ -30,9 +30,12 @@
     "DataSource",
     "DataSourceReader",
     "DataSourceStreamReader",
+    "SimpleDataSourceStreamReader",
     "DataSourceWriter",
+    "DataSourceStreamWriter",
     "DataSourceRegistration",
     "InputPartition",
+    "SimpleDataSourceStreamReader",
     "WriterCommitMessage",
 ]
 
@@ -183,11 +186,36 @@ def streamWriter(self, schema: StructType, overwrite: bool) -> "DataSourceStream
             message_parameters={"feature": "streamWriter"},
         )
 
+    def simpleStreamReader(self, schema: StructType) -> "SimpleDataSourceStreamReader":
+        """
+        Returns a :class:`SimpleDataSourceStreamReader` instance for reading data.
+
+        One of simpleStreamReader() and streamReader() must be implemented for readable streaming
+        data source. Spark will check whether streamReader() is implemented, if yes, create a
+        DataSourceStreamReader to read data. simpleStreamReader() will only be invoked when
+        streamReader() is not implemented.
+
+        Parameters
+        ----------
+        schema : :class:`StructType`
+            The schema of the data to be read.
+
+        Returns
+        -------
+        reader : :class:`SimpleDataSourceStreamReader`
+            A reader instance for this data source.
+        """
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "simpleStreamReader"},
+        )
+
     def streamReader(self, schema: StructType) -> "DataSourceStreamReader":
         """
         Returns a :class:`DataSourceStreamReader` instance for reading streaming data.
 
-        The implementation is required for readable streaming data sources.
+        One of simpleStreamReader() and streamReader() must be implemented for readable streaming
+        data source.
 
         Parameters
         ----------
@@ -305,7 +333,7 @@ def partitions(self) -> Sequence[InputPartition]:
         )
 
     @abstractmethod
-    def read(self, partition: InputPartition) -> Union[Iterator[Tuple], Iterator[Row]]:
+    def read(self, partition: InputPartition) -> Iterator[Tuple]:
         """
         Generates data for a given partition and returns an iterator of tuples or rows.
 
@@ -396,8 +424,10 @@ def latestOffset(self) -> dict:
 
     def partitions(self, start: dict, end: dict) -> Sequence[InputPartition]:
         """
-        Returns a list of InputPartition  given the start and end offsets. Each InputPartition
-        represents a data split that can be processed by one Spark task.
+        Returns a list of InputPartition given the start and end offsets. Each InputPartition
+        represents a data split that can be processed by one Spark task. This may be called with
+        an empty offset range when start == end, in that case the method should return
+        an empty sequence of InputPartition.
 
         Parameters
         ----------
@@ -418,7 +448,7 @@ def partitions(self, start: dict, end: dict) -> Sequence[InputPartition]:
         )
 
     @abstractmethod
-    def read(self, partition: InputPartition) -> Union[Iterator[Tuple], Iterator[Row]]:
+    def read(self, partition: InputPartition) -> Iterator[Tuple]:
         """
         Generates data for a given partition and returns an iterator of tuples or rows.
 
@@ -469,6 +499,102 @@ def stop(self) -> None:
         ...
 
 
+class SimpleDataSourceStreamReader(ABC):
+    """
+    A base class for simplified streaming data source readers.
+    Compared to :class:`DataSourceStreamReader`, :class:`SimpleDataSourceStreamReader` doesn't
+    require planning data partition. Also, the read api of :class:`SimpleDataSourceStreamReader`
+    allows reading data and planning the latest offset at the same time.
+
+    Because  :class:`SimpleDataSourceStreamReader` read records in Spark driver node to determine
+    end offset of each batch without partitioning, it is only supposed to be used in
+    lightweight use cases where input rate and batch size is small.
+    Use :class:`DataSourceStreamReader` when read throughput is high and can't be handled
+    by a single process.
+
+    .. versionadded: 4.0.0
+    """
+
+    def initialOffset(self) -> dict:
+        """
+        Return the initial offset of the streaming data source.
+        A new streaming query starts reading data from the initial offset.
+        If Spark is restarting an existing query, it will restart from the check-pointed offset
+        rather than the initial one.
+
+        Returns
+        -------
+        dict
+            A dict or recursive dict whose key and value are primitive types, which includes
+            Integer, String and Boolean.
+
+        Examples
+        --------
+        >>> def initialOffset(self):
+        ...     return {"parititon-1": {"index": 3, "closed": True}, "partition-2": {"index": 5}}
+        """
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "initialOffset"},
+        )
+
+    def read(self, start: dict) -> Tuple[Iterator[Tuple], dict]:
+        """
+        Read all available data from start offset and return the offset that next read attempt
+        starts from.
+
+        Parameters
+        ----------
+        start : dict
+            The start offset to start reading from.
+
+        Returns
+        -------
+        A :class:`Tuple` of an iterator of :class:`Tuple` and a dict\\s
+            The iterator contains all the available records after start offset.
+            The dict is the end offset of this read attempt and the start of next read attempt.
+        """
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "read"},
+        )
+
+    def readBetweenOffsets(self, start: dict, end: dict) -> Iterator[Tuple]:
+        """
+        Read all available data from specific start offset and end offset.
+        This is invoked during failure recovery to re-read a batch deterministically.
+
+        Parameters
+        ----------
+        start : dict
+            The start offset to start reading from.
+
+        end : dict
+            The offset where the reading stop.
+
+        Returns
+        -------
+        iterator of :class:`Tuple`\\s
+            All the records between start offset and end offset.
+        """
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "readBetweenOffsets"},
+        )
+
+    def commit(self, end: dict) -> None:
+        """
+        Informs the source that Spark has completed processing all data for offsets less than or
+        equal to `end` and will only request offsets greater than `end` in the future.
+
+        Parameters
+        ----------
+        end : dict
+            The latest offset that the streaming query has processed for this source.
+        """
+        ...
+
+
 class DataSourceWriter(ABC):
     """
     A base class for data source writers. Data source writers are responsible for saving
@@ -503,7 +629,7 @@ def write(self, iterator: Iterator[Row]) -> "WriterCommitMessage":
         """
         ...
 
-    def commit(self, messages: List["WriterCommitMessage"]) -> None:
+    def commit(self, messages: List[Optional["WriterCommitMessage"]]) -> None:
         """
         Commits this writing job with a list of commit messages.
 
@@ -515,11 +641,11 @@ def commit(self, messages: List["WriterCommitMessage"]) -> None:
         Parameters
         ----------
         messages : list of :class:`WriterCommitMessage`\\s
-            A list of commit messages.
+            A list of commit messages. If a write task fails, the commit message will be `None`.
         """
         ...
 
-    def abort(self, messages: List["WriterCommitMessage"]) -> None:
+    def abort(self, messages: List[Optional["WriterCommitMessage"]]) -> None:
         """
         Aborts this writing job due to task failures.
 
@@ -531,7 +657,7 @@ def abort(self, messages: List["WriterCommitMessage"]) -> None:
         Parameters
         ----------
         messages : list of :class:`WriterCommitMessage`\\s
-            A list of commit messages.
+            A list of commit messages. If a write task fails, the commit message will be `None`.
         """
         ...
 
@@ -568,7 +694,7 @@ def write(self, iterator: Iterator[Row]) -> "WriterCommitMessage":
         """
         ...
 
-    def commit(self, messages: List["WriterCommitMessage"], batchId: int) -> None:
+    def commit(self, messages: List[Optional["WriterCommitMessage"]], batchId: int) -> None:
         """
         Commits this microbatch with a list of commit messages.
 
@@ -579,15 +705,15 @@ def commit(self, messages: List["WriterCommitMessage"], batchId: int) -> None:
 
         Parameters
         ----------
-        messages : List[WriterCommitMessage]
-            A list of commit messages.
+        messages : list of :class:`WriterCommitMessage`\\s
+            A list of commit messages. If a write task fails, the commit message will be `None`.
         batchId: int
             An integer that uniquely identifies a batch of data being written.
             The integer increase by 1 with each microbatch processed.
         """
         ...
 
-    def abort(self, messages: List["WriterCommitMessage"], batchId: int) -> None:
+    def abort(self, messages: List[Optional["WriterCommitMessage"]], batchId: int) -> None:
         """
         Aborts this microbatch due to task failures.
 
@@ -598,8 +724,8 @@ def abort(self, messages: List["WriterCommitMessage"], batchId: int) -> None:
 
         Parameters
         ----------
-        messages : List[WriterCommitMessage]
-            A list of commit messages.
+        messages : list of :class:`WriterCommitMessage`\\s
+            A list of commit messages. If a write task fails, the commit message will be `None`.
         batchId: int
             An integer that uniquely identifies a batch of data being written.
             The integer increase by 1 with each microbatch processed.
diff --git a/python/pyspark/sql/datasource_internal.py b/python/pyspark/sql/datasource_internal.py
new file mode 100644
index 0000000000000..6df0be4192ec8
--- /dev/null
+++ b/python/pyspark/sql/datasource_internal.py
@@ -0,0 +1,146 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import json
+import copy
+from itertools import chain
+from typing import Iterator, List, Optional, Sequence, Tuple
+
+from pyspark.sql.datasource import (
+    DataSource,
+    DataSourceStreamReader,
+    InputPartition,
+    SimpleDataSourceStreamReader,
+)
+from pyspark.sql.types import StructType
+from pyspark.errors import PySparkNotImplementedError
+
+
+def _streamReader(datasource: DataSource, schema: StructType) -> "DataSourceStreamReader":
+    """
+    Fallback to simpleStreamReader() method when streamReader() is not implemented.
+    This should be invoked whenever a DataSourceStreamReader needs to be created instead of
+    invoking datasource.streamReader() directly.
+    """
+    try:
+        return datasource.streamReader(schema=schema)
+    except PySparkNotImplementedError:
+        return _SimpleStreamReaderWrapper(datasource.simpleStreamReader(schema=schema))
+
+
+class SimpleInputPartition(InputPartition):
+    def __init__(self, start: dict, end: dict):
+        self.start = start
+        self.end = end
+
+
+class PrefetchedCacheEntry:
+    def __init__(self, start: dict, end: dict, iterator: Iterator[Tuple]):
+        self.start = start
+        self.end = end
+        self.iterator = iterator
+
+
+class _SimpleStreamReaderWrapper(DataSourceStreamReader):
+    """
+    A private class that wrap :class:`SimpleDataSourceStreamReader` in prefetch and cache pattern,
+    so that :class:`SimpleDataSourceStreamReader` can integrate with streaming engine like an
+    ordinary :class:`DataSourceStreamReader`.
+
+    current_offset tracks the latest progress of the record prefetching, it is initialized to be
+    initialOffset() when query start for the first time or initialized to be the end offset of
+    the last planned batch when query restarts.
+
+    When streaming engine calls latestOffset(), the wrapper calls read() that starts from
+    current_offset, prefetches and cache the data, then updates the current_offset to be
+    the end offset of the new data.
+
+    When streaming engine call planInputPartitions(start, end), the wrapper get the prefetched data
+    from cache and send it to JVM along with the input partitions.
+
+    When query restart, batches in write ahead offset log that has not been committed will be
+    replayed by reading data between start and end offset through readBetweenOffsets(start, end).
+    """
+
+    def __init__(self, simple_reader: SimpleDataSourceStreamReader):
+        self.simple_reader = simple_reader
+        self.initial_offset: Optional[dict] = None
+        self.current_offset: Optional[dict] = None
+        self.cache: List[PrefetchedCacheEntry] = []
+
+    def initialOffset(self) -> dict:
+        if self.initial_offset is None:
+            self.initial_offset = self.simple_reader.initialOffset()
+        return self.initial_offset
+
+    def latestOffset(self) -> dict:
+        # when query start for the first time, use initial offset as the start offset.
+        if self.current_offset is None:
+            self.current_offset = self.initialOffset()
+        (iter, end) = self.simple_reader.read(self.current_offset)
+        self.cache.append(PrefetchedCacheEntry(self.current_offset, end, iter))
+        self.current_offset = end
+        return end
+
+    def commit(self, end: dict) -> None:
+        if self.current_offset is None:
+            self.current_offset = end
+
+        end_idx = -1
+        for idx, entry in enumerate(self.cache):
+            if json.dumps(entry.end) == json.dumps(end):
+                end_idx = idx
+                break
+        if end_idx > 0:
+            # Drop prefetched data for batch that has been committed.
+            self.cache = self.cache[end_idx:]
+        self.simple_reader.commit(end)
+
+    def partitions(self, start: dict, end: dict) -> Sequence["InputPartition"]:
+        # when query restart from checkpoint, use the last committed offset as the start offset.
+        # This depends on the streaming engine calling planInputPartitions() of the last batch
+        # in offset log when query restart.
+        if self.current_offset is None:
+            self.current_offset = end
+        if len(self.cache) > 0:
+            assert self.cache[-1].end == end
+        return [SimpleInputPartition(start, end)]
+
+    def getCache(self, start: dict, end: dict) -> Iterator[Tuple]:
+        start_idx = -1
+        end_idx = -1
+        for idx, entry in enumerate(self.cache):
+            # There is no convenient way to compare 2 offsets.
+            # Serialize into json string before comparison.
+            if json.dumps(entry.start) == json.dumps(start):
+                start_idx = idx
+            if json.dumps(entry.end) == json.dumps(end):
+                end_idx = idx
+                break
+        if start_idx == -1 or end_idx == -1:
+            return None  # type: ignore[return-value]
+        # Chain all the data iterator between start offset and end offset
+        # need to copy here to avoid exhausting the original data iterator.
+        entries = [copy.copy(entry.iterator) for entry in self.cache[start_idx : end_idx + 1]]
+        it = chain(*entries)
+        return it
+
+    def read(
+        self, input_partition: SimpleInputPartition  # type: ignore[override]
+    ) -> Iterator[Tuple]:
+        return self.simple_reader.readBetweenOffsets(input_partition.start, input_partition.end)
diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
index 58eb136da216c..2a302d1e51125 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -40,7 +40,7 @@
 )
 
 from pyspark.errors import PySparkTypeError, PySparkValueError
-from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal
+from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.types import ArrayType, DataType, StringType, StructType, _from_numpy_type
 
@@ -86,7 +86,7 @@ def _get_jvm_function(name: str, sc: "SparkContext") -> Callable:
     Java gateway associated with sc.
     """
     assert sc._jvm is not None
-    return getattr(sc._jvm.functions, name)
+    return getattr(getattr(sc._jvm, "org.apache.spark.sql.functions"), name)
 
 
 def _invoke_function(name: str, *args: Any) -> Column:
@@ -106,6 +106,8 @@ def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
     Invokes n-ary JVM function identified by name
     and wraps the result with :class:`~pyspark.sql.Column`.
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function(name, *(_to_java_column(col) for col in cols))
 
 
@@ -114,6 +116,8 @@ def _invoke_function_over_seq_of_columns(name: str, cols: "Iterable[ColumnOrName
     Invokes unary JVM function identified by name with
     and wraps the result with :class:`~pyspark.sql.Column`.
     """
+    from pyspark.sql.classic.column import _to_java_column, _to_seq
+
     sc = _get_active_spark_context()
     return _invoke_function(name, _to_seq(sc, cols, _to_java_column))
 
@@ -123,6 +127,7 @@ def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column:
     Invokes binary JVM math function identified by name
     and wraps the result with :class:`~pyspark.sql.Column`.
     """
+    from pyspark.sql.classic.column import _to_java_column, _create_column_from_literal
 
     # For legacy reasons, the arguments here can be implicitly converted into column
     cols = [
@@ -633,7 +638,7 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     |                                          4 months|
     +--------------------------------------------------+
 
-    Example 3: Exception druing division, resulting in NULL when ANSI mode is on
+    Example 3: Exception during division, resulting in NULL when ANSI mode is on
 
     >>> import pyspark.sql.functions as sf
     >>> origin = spark.conf.get("spark.sql.ansi.enabled")
@@ -652,6 +657,56 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("try_divide", left, right)
 
 
+@_try_remote_functions
+def try_remainder(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    """
+    Returns the remainder after `dividend`/`divisor`.  Its result is
+    always null if `divisor` is 0.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+        dividend
+    right : :class:`~pyspark.sql.Column` or str
+        divisor
+
+    Examples
+    --------
+    Example 1: Integer divided by Integer.
+
+    >>> import pyspark.sql.functions as sf
+    >>> spark.createDataFrame(
+    ...     [(6000, 15), (3, 2), (1234, 0)], ["a", "b"]
+    ... ).select(sf.try_remainder("a", "b")).show()
+    +-------------------+
+    |try_remainder(a, b)|
+    +-------------------+
+    |                  0|
+    |                  1|
+    |               NULL|
+    +-------------------+
+
+    Example 2: Exception during division, resulting in NULL when ANSI mode is on
+
+    >>> import pyspark.sql.functions as sf
+    >>> origin = spark.conf.get("spark.sql.ansi.enabled")
+    >>> spark.conf.set("spark.sql.ansi.enabled", "true")
+    >>> try:
+    ...     df = spark.range(1)
+    ...     df.select(sf.try_remainder(df.id, sf.lit(0))).show()
+    ... finally:
+    ...     spark.conf.set("spark.sql.ansi.enabled", origin)
+    +--------------------+
+    |try_remainder(id, 0)|
+    +--------------------+
+    |                NULL|
+    +--------------------+
+    """
+    return _invoke_function_over_columns("try_remainder", left, right)
+
+
 @_try_remote_functions
 def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     """
@@ -1005,6 +1060,8 @@ def mode(col: "ColumnOrName", deterministic: bool = False) -> Column:
     |                                    -10|
     +---------------------------------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("mode", _to_java_column(col), deterministic)
 
 
@@ -1268,7 +1325,7 @@ def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([
     ...     ("Consult", "Eva", 6), ("Finance", "Frank", 5),
-    ...     ("Finance", "George", 5), ("Consult", "Henry", 7)],
+    ...     ("Finance", "George", 9), ("Consult", "Henry", 7)],
     ...     schema=("department", "name", "years_in_dept"))
     >>> df.groupby("department").agg(
     ...     sf.max_by("name", "years_in_dept")
@@ -1349,7 +1406,7 @@ def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([
     ...     ("Consult", "Eva", 6), ("Finance", "Frank", 5),
-    ...     ("Finance", "George", 5), ("Consult", "Henry", 7)],
+    ...     ("Finance", "George", 9), ("Consult", "Henry", 7)],
     ...     schema=("department", "name", "years_in_dept"))
     >>> df.groupby("department").agg(
     ...     sf.min_by("name", "years_in_dept")
@@ -1358,7 +1415,7 @@ def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     |department|min_by(name, years_in_dept)|
     +----------+---------------------------+
     |   Consult|                        Eva|
-    |   Finance|                     George|
+    |   Finance|                      Frank|
     +----------+---------------------------+
     """
     return _invoke_function_over_columns("min_by", col, ord)
@@ -2061,7 +2118,7 @@ def ceil(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Col
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
 
-            .. versionadded:: 4.0.0
+        .. versionadded:: 4.0.0
 
     Returns
     -------
@@ -2114,7 +2171,7 @@ def ceiling(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) ->
     scale : :class:`~pyspark.sql.Column` or int
         An optional parameter to control the rounding behavior.
 
-            .. versionadded:: 4.0.0
+        .. versionadded:: 4.0.0
 
     Returns
     -------
@@ -2375,7 +2432,7 @@ def floor(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
 
-            .. versionadded:: 4.0.0
+        .. versionadded:: 4.0.0
 
 
     Returns
@@ -5123,6 +5180,8 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
     |           95546|      102065|
     +----------------+------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if rsd is None:
         return _invoke_function_over_columns("approx_count_distinct", col)
     else:
@@ -5386,6 +5445,8 @@ def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
     |                             2|
     +------------------------------+
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     sc = _get_active_spark_context()
     return _invoke_function(
         "count_distinct", _to_java_column(col), _to_seq(sc, cols, _to_java_column)
@@ -5413,8 +5474,8 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     ----------
     col : :class:`~pyspark.sql.Column` or str
         column to fetch first value for.
-    ignorenulls : :class:`~pyspark.sql.Column` or str
-        if first value is null then look for first non-null value.
+    ignorenulls : bool
+        if first value is null then look for first non-null value. ``False``` by default.
 
     Returns
     -------
@@ -5443,6 +5504,8 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     |  Bob|         5|
     +-----+----------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("first", _to_java_column(col), ignorenulls)
 
 
@@ -5684,8 +5747,8 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     ----------
     col : :class:`~pyspark.sql.Column` or str
         column to fetch last value for.
-    ignorenulls : :class:`~pyspark.sql.Column` or str
-        if last value is null then look for non-null value.
+    ignorenulls : bool
+        if last value is null then look for non-null value. ``False``` by default.
 
     Returns
     -------
@@ -5714,6 +5777,8 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     |  Bob|        5|
     +-----+---------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("last", _to_java_column(col), ignorenulls)
 
 
@@ -5847,6 +5912,8 @@ def percentile(
     |  2|  19.967859769284075|
     +---+--------------------+
     """
+    from pyspark.sql.classic.column import _to_seq, _create_column_from_literal, _to_java_column
+
     sc = _get_active_spark_context()
 
     if isinstance(percentage, (list, tuple)):
@@ -5924,6 +5991,8 @@ def percentile_approx(
      |-- key: long (nullable = true)
      |-- median: double (nullable = true)
     """
+    from pyspark.sql.classic.column import _to_seq, _create_column_from_literal, _to_java_column
+
     sc = _get_active_spark_context()
 
     if isinstance(percentage, (list, tuple)):
@@ -5998,6 +6067,8 @@ def approx_percentile(
      |-- key: long (nullable = true)
      |-- approx_percentile(value, 0.5, 1000000): double (nullable = true)
     """
+    from pyspark.sql.classic.column import _to_seq, _create_column_from_literal, _to_java_column
+
     sc = _get_active_spark_context()
 
     if isinstance(percentage, (list, tuple)):
@@ -6145,8 +6216,8 @@ def round(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
 
-            .. versionchanged:: 4.0.0
-                Support Column type.
+        .. versionchanged:: 4.0.0
+            Support Column type.
 
     Returns
     -------
@@ -6200,8 +6271,8 @@ def bround(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> C
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
 
-            .. versionchanged:: 4.0.0
-                Support Column type.
+        .. versionchanged:: 4.0.0
+            Support Column type.
 
     Returns
     -------
@@ -6279,6 +6350,8 @@ def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
     >>> spark.createDataFrame([(21,)], ['a']).select(shiftleft('a', 1).alias('r')).collect()
     [Row(r=42)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("shiftleft", _to_java_column(col), numBits)
 
 
@@ -6324,6 +6397,8 @@ def shiftright(col: "ColumnOrName", numBits: int) -> Column:
     >>> spark.createDataFrame([(42,)], ['a']).select(shiftright('a', 1).alias('r')).collect()
     [Row(r=21)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("shiftright", _to_java_column(col), numBits)
 
 
@@ -6370,6 +6445,8 @@ def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
     >>> df.select(shiftrightunsigned('a', 1).alias('r')).collect()
     [Row(r=9223372036854775787)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("shiftrightunsigned", _to_java_column(col), numBits)
 
 
@@ -6684,6 +6761,8 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non
     |1.3862943611198906|
     +------------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if arg2 is None:
         return _invoke_function_over_columns("log", cast("ColumnOrName", arg1))
     else:
@@ -6781,6 +6860,8 @@ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
     >>> df.select(conv(df.n, 2, 16).alias('hex')).collect()
     [Row(hex='15')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("conv", _to_java_column(col), fromBase, toBase)
 
 
@@ -6894,6 +6975,8 @@ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     |  b|  8|           -1|
     +---+---+-------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("lag", _to_java_column(col), offset, default)
 
 
@@ -6975,6 +7058,8 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     |  b|  8|        -1|
     +---+---+----------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("lead", _to_java_column(col), offset, default)
 
 
@@ -7049,6 +7134,8 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
     |  b|  8|        8|
     +---+---+---------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("nth_value", _to_java_column(col), offset, ignoreNulls)
 
 
@@ -7561,6 +7648,8 @@ def date_format(date: "ColumnOrName", format: str) -> Column:
     >>> df.select(date_format('dt', 'MM/dd/yyyy').alias('date')).collect()
     [Row(date='04/08/2015')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("date_format", _to_java_column(date), format)
 
 
@@ -8418,6 +8507,8 @@ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool
     >>> df.select(months_between(df.date1, df.date2, False).alias('months')).collect()
     [Row(months=3.9495967741935485)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function(
         "months_between", _to_java_column(date1), _to_java_column(date2), roundOff
     )
@@ -8459,6 +8550,8 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     >>> df.select(to_date(df.t, 'yyyy-MM-dd HH:mm:ss').alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if format is None:
         return _invoke_function_over_columns("to_date", col)
     else:
@@ -8595,6 +8688,8 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     |1997-02-28 10:30:00|
     +-------------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if format is None:
         return _invoke_function_over_columns("to_timestamp", col)
     else:
@@ -8852,6 +8947,8 @@ def trunc(date: "ColumnOrName", format: str) -> Column:
     >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
     [Row(month=datetime.date(1997, 2, 1))]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("trunc", _to_java_column(date), format)
 
 
@@ -8889,6 +8986,8 @@ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
     >>> df.select(date_trunc('mon', df.t).alias('month')).collect()
     [Row(month=datetime.datetime(1997, 2, 1, 0, 0))]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("date_trunc", format, _to_java_column(timestamp))
 
 
@@ -8922,6 +9021,8 @@ def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
     >>> df.select(next_day(df.d, 'Sun').alias('date')).collect()
     [Row(date=datetime.date(2015, 8, 2))]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("next_day", _to_java_column(date), dayOfWeek)
 
 
@@ -8951,6 +9052,8 @@ def last_day(date: "ColumnOrName") -> Column:
     >>> df.select(last_day(df.d).alias('date')).collect()
     [Row(date=datetime.date(1997, 2, 28))]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("last_day", _to_java_column(date))
 
 
@@ -8986,6 +9089,8 @@ def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss"
     [Row(ts='2015-04-08 00:00:00')]
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("from_unixtime", _to_java_column(timestamp), format)
 
 
@@ -9066,6 +9171,8 @@ def unix_timestamp(
 
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if timestamp is None:
         return _invoke_function("unix_timestamp")
     return _invoke_function("unix_timestamp", _to_java_column(timestamp), format)
@@ -9120,6 +9227,8 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
     [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
     return _invoke_function("from_utc_timestamp", _to_java_column(timestamp), tz)
@@ -9174,6 +9283,8 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
     [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if isinstance(tz, Column):
         tz = _to_java_column(tz)
     return _invoke_function("to_utc_timestamp", _to_java_column(timestamp), tz)
@@ -9290,6 +9401,129 @@ def timestamp_micros(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("timestamp_micros", col)
 
 
+@_try_remote_functions
+def timestamp_diff(unit: str, start: "ColumnOrName", end: "ColumnOrName") -> Column:
+    """
+    Gets the difference between the timestamps in the specified units by truncating
+    the fraction part.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    unit : str
+        This indicates the units of the difference between the given timestamps.
+        Supported options are (case insensitive): "YEAR", "QUARTER", "MONTH", "WEEK",
+        "DAY", "HOUR", "MINUTE", "SECOND", "MILLISECOND" and "MICROSECOND".
+    start : :class:`~pyspark.sql.Column` or str
+        A timestamp which the expression subtracts from `endTimestamp`.
+    end : :class:`~pyspark.sql.Column` or str
+        A timestamp from which the expression subtracts `startTimestamp`.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the difference between the timestamps.
+
+    Examples
+    --------
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), datetime.datetime(2024, 4, 2, 9, 0, 7))],
+    ... ).toDF("start", "end")
+    >>> df.select(sf.timestamp_diff("year", "start", "end")).show()
+    +-------------------------------+
+    |timestampdiff(year, start, end)|
+    +-------------------------------+
+    |                              8|
+    +-------------------------------+
+    >>> df.select(sf.timestamp_diff("WEEK", "start", "end")).show()
+    +-------------------------------+
+    |timestampdiff(WEEK, start, end)|
+    +-------------------------------+
+    |                            420|
+    +-------------------------------+
+    >>> df.select(sf.timestamp_diff("day", "end", "start")).show()
+    +------------------------------+
+    |timestampdiff(day, end, start)|
+    +------------------------------+
+    |                         -2944|
+    +------------------------------+
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function(
+        "timestamp_diff",
+        unit,
+        _to_java_column(start),
+        _to_java_column(end),
+    )
+
+
+@_try_remote_functions
+def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Column:
+    """
+    Gets the difference between the timestamps in the specified units by truncating
+    the fraction part.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    unit : str
+        This indicates the units of the difference between the given timestamps.
+        Supported options are (case insensitive): "YEAR", "QUARTER", "MONTH", "WEEK",
+        "DAY", "HOUR", "MINUTE", "SECOND", "MILLISECOND" and "MICROSECOND".
+    quantity : :class:`~pyspark.sql.Column` or str
+        The number of units of time that you want to add.
+    ts : :class:`~pyspark.sql.Column` or str
+        A timestamp to which you want to add.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the difference between the timestamps.
+
+    Examples
+    --------
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 2),
+    ...      (datetime.datetime(2024, 4, 2, 9, 0, 7), 3)], ["ts", "quantity"])
+    >>> df.select(sf.timestamp_add("year", "quantity", "ts")).show()
+    +--------------------------------+
+    |timestampadd(year, quantity, ts)|
+    +--------------------------------+
+    |             2018-03-11 09:00:07|
+    |             2027-04-02 09:00:07|
+    +--------------------------------+
+    >>> df.select(sf.timestamp_add("WEEK", sf.lit(5), "ts")).show()
+    +-------------------------+
+    |timestampadd(WEEK, 5, ts)|
+    +-------------------------+
+    |      2016-04-15 09:00:07|
+    |      2024-05-07 09:00:07|
+    +-------------------------+
+    >>> df.select(sf.timestamp_add("day", sf.lit(-5), "ts")).show()
+    +-------------------------+
+    |timestampadd(day, -5, ts)|
+    +-------------------------+
+    |      2016-03-06 09:00:07|
+    |      2024-03-28 09:00:07|
+    +-------------------------+
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function(
+        "timestamp_add",
+        unit,
+        _to_java_column(quantity),
+        _to_java_column(ts),
+    )
+
+
 @_try_remote_functions
 def window(
     timeColumn: "ColumnOrName",
@@ -9367,6 +9601,7 @@ def window(
     |2016-03-11 09:00:05|2016-03-11 09:00:10|  1|
     +-------------------+-------------------+---+
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     def check_string_field(field, fieldName):  # type: ignore[no-untyped-def]
         if not field or type(field) is not str:
@@ -9438,6 +9673,8 @@ def window_time(
     ... ).collect()
     [Row(end='2016-03-11 09:00:10', window_time='2016-03-11 09:00:09.999999', sum=1)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     window_col = _to_java_column(windowColumn)
     return _invoke_function("window_time", window_col)
 
@@ -9493,6 +9730,7 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
     ...          w.session_window.end.cast("string").alias("end"), "sum").collect()
     [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)]
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     def check_field(field: Union[Column, str], fieldName: str) -> None:
         if field is None or not isinstance(field, (str, Column)):
@@ -9863,6 +10101,8 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
     |Bob  |cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961|
     +-----+----------------------------------------------------------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if numBits not in [0, 224, 256, 384, 512]:
         raise PySparkValueError(
             error_class="VALUE_NOT_ALLOWED",
@@ -10000,6 +10240,8 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
     java.lang.RuntimeException: My error msg
     ...
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
+
     if errMsg is None:
         return _invoke_function_over_columns("assert_true", col)
     if not isinstance(errMsg, (str, Column)):
@@ -10042,6 +10284,8 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
     java.lang.RuntimeException: My error message
     ...
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
+
     if not isinstance(errMsg, (str, Column)):
         raise PySparkTypeError(
             error_class="NOT_COLUMN_OR_STR",
@@ -10368,6 +10612,8 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
     >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect()
     [Row(s='abcd-123')]
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     sc = _get_active_spark_context()
     return _invoke_function("concat_ws", sep, _to_seq(sc, cols, _to_java_column))
 
@@ -10405,6 +10651,8 @@ def decode(col: "ColumnOrName", charset: str) -> Column:
     |            abcd|
     +----------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("decode", _to_java_column(col), charset)
 
 
@@ -10441,6 +10689,8 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
     |   [61 62 63 64]|
     +----------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("encode", _to_java_column(col), charset)
 
 
@@ -10470,6 +10720,8 @@ def format_number(col: "ColumnOrName", d: int) -> Column:
     >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
     [Row(v='5.0000')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("format_number", _to_java_column(col), d)
 
 
@@ -10501,6 +10753,8 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
     >>> df.select(format_string('%d %s', df.a, df.b).alias('v')).collect()
     [Row(v='5 hello')]
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     sc = _get_active_spark_context()
     return _invoke_function("format_string", format, _to_seq(sc, cols, _to_java_column))
 
@@ -10539,6 +10793,8 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
     >>> df.select(instr(df.s, 'b').alias('s')).collect()
     [Row(s=2)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("instr", _to_java_column(str), substr)
 
 
@@ -10585,6 +10841,8 @@ def overlay(
     >>> df.select(overlay("x", "y", 7, 2).alias("overlayed")).collect()
     [Row(overlayed='SPARK_COREL')]
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
+
     if not isinstance(pos, (int, str, Column)):
         raise PySparkTypeError(
             error_class="NOT_COLUMN_OR_INT_OR_STR",
@@ -10657,7 +10915,9 @@ def sentences(
 
 
 @_try_remote_functions
-def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
+def substring(
+    str: "ColumnOrName", pos: Union["ColumnOrName", int], len: Union["ColumnOrName", int]
+) -> Column:
     """
     Substring starts at `pos` and is of length `len` when str is String type or
     returns the slice of byte array that starts at `pos` in byte and is of length `len`
@@ -10676,11 +10936,18 @@ def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
     ----------
     str : :class:`~pyspark.sql.Column` or str
         target column to work on.
-    pos : int
+    pos : :class:`~pyspark.sql.Column` or str or int
         starting position in str.
-    len : int
+
+        .. versionchanged:: 4.0.0
+            `pos` now accepts column and column name.
+
+    len : :class:`~pyspark.sql.Column` or str or int
         length of chars.
 
+        .. versionchanged:: 4.0.0
+            `len` now accepts column and column name.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -10691,8 +10958,17 @@ def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(substring(df.s, 1, 2).alias('s')).collect()
     [Row(s='ab')]
+    >>> df = spark.createDataFrame([('Spark', 2, 3)], ['s', 'p', 'l'])
+    >>> df.select(substring(df.s, 2, df.l).alias('s')).collect()
+    [Row(s='par')]
+    >>> df.select(substring(df.s, df.p, 3).alias('s')).collect()
+    [Row(s='par')]
+    >>> df.select(substring(df.s, df.p, df.l).alias('s')).collect()
+    [Row(s='par')]
     """
-    return _invoke_function("substring", _to_java_column(str), pos, len)
+    pos = lit(pos) if isinstance(pos, int) else pos
+    len = lit(len) if isinstance(len, int) else len
+    return _invoke_function_over_columns("substring", str, pos, len)
 
 
 @_try_remote_functions
@@ -10730,6 +11006,8 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
     >>> df.select(substring_index(df.s, '.', -3).alias('s')).collect()
     [Row(s='b.c.d')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("substring_index", _to_java_column(str), delim, count)
 
 
@@ -10770,6 +11048,8 @@ def levenshtein(
     >>> df0.select(levenshtein('l', 'r', 2).alias('d')).collect()
     [Row(d=-1)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if threshold is None:
         return _invoke_function_over_columns("levenshtein", left, right)
     else:
@@ -10813,6 +11093,8 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     >>> df.select(locate('b', df.s, 1).alias('s')).collect()
     [Row(s=2)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("locate", substr, _to_java_column(str), pos)
 
 
@@ -10846,6 +11128,8 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     >>> df.select(lpad(df.s, 6, '#').alias('s')).collect()
     [Row(s='##abcd')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("lpad", _to_java_column(col), len, pad)
 
 
@@ -10879,6 +11163,8 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     >>> df.select(rpad(df.s, 6, '#').alias('s')).collect()
     [Row(s='abcd##')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("rpad", _to_java_column(col), len, pad)
 
 
@@ -10944,7 +11230,11 @@ def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column:
 
 
 @_try_remote_functions
-def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
+def split(
+    str: "ColumnOrName",
+    pattern: Union[Column, str],
+    limit: Union["ColumnOrName", int] = -1,
+) -> Column:
     """
     Splits str around matches of the given pattern.
 
@@ -10957,10 +11247,10 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
     ----------
     str : :class:`~pyspark.sql.Column` or str
         a string expression to split
-    pattern : str
+    pattern : :class:`~pyspark.sql.Column` or str
         a string representing a regular expression. The regex string should be
         a Java regular expression.
-    limit : int, optional
+    limit : :class:`~pyspark.sql.Column` or str or int
         an integer which controls the number of times `pattern` is applied.
 
         * ``limit > 0``: The resulting array's length will not be more than `limit`, and the
@@ -10972,6 +11262,11 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
         .. versionchanged:: 3.0
            `split` now takes an optional `limit` field. If not provided, default limit value is -1.
 
+        .. versionchanged:: 4.0.0
+             `pattern` now accepts column. Does not accept column name since string type remain
+             accepted as a regular expression representation, for backwards compatibility.
+             In addition to int, `limit` now accepts column and column name.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -10979,13 +11274,53 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
 
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',])
-    >>> df.select(split(df.s, '[ABC]', 2).alias('s')).collect()
-    [Row(s=['one', 'twoBthreeC'])]
-    >>> df.select(split(df.s, '[ABC]', -1).alias('s')).collect()
-    [Row(s=['one', 'two', 'three', ''])]
+    >>> df.select(sf.split(df.s, '[ABC]', 2).alias('s')).show()
+    +-----------------+
+    |                s|
+    +-----------------+
+    |[one, twoBthreeC]|
+    +-----------------+
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',])
+    >>> df.select(sf.split(df.s, '[ABC]', -1).alias('s')).show()
+    +-------------------+
+    |                  s|
+    +-------------------+
+    |[one, two, three, ]|
+    +-------------------+
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('oneAtwoBthreeC', '[ABC]'), ('1A2B3C', '[1-9]+'), ('aa2bb3cc4', '[1-9]+')],
+    ...     ['s', 'pattern']
+    ... )
+    >>> df.select(sf.split(df.s, df.pattern).alias('s')).show()
+    +-------------------+
+    |                  s|
+    +-------------------+
+    |[one, two, three, ]|
+    |        [, A, B, C]|
+    |     [aa, bb, cc, ]|
+    +-------------------+
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('oneAtwoBthreeC', '[ABC]', 2), ('1A2B3C', '[1-9]+', -1)],
+    ...     ['s', 'pattern', 'expected_parts']
+    ... )
+    >>> df.select(sf.split(df.s, df.pattern, df.expected_parts).alias('s')).show()
+    +-----------------+
+    |                s|
+    +-----------------+
+    |[one, twoBthreeC]|
+    |      [, A, B, C]|
+    +-----------------+
     """
-    return _invoke_function("split", _to_java_column(str), pattern, limit)
+    limit = lit(limit) if isinstance(limit, int) else limit
+    return _invoke_function_over_columns("split", str, lit(pattern), limit)
 
 
 @_try_remote_functions
@@ -11193,6 +11528,8 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
     >>> df.select(regexp_extract('str', '(a+)(b)?(c)', 2).alias('d')).collect()
     [Row(d='')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("regexp_extract", _to_java_column(str), pattern, idx)
 
 
@@ -11271,6 +11608,8 @@ def regexp_replace(
     >>> df.select(regexp_replace("str", col("pattern"), col("replacement")).alias('d')).collect()
     [Row(d='-----')]
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
+
     if isinstance(pattern, str):
         pattern_col = _create_column_from_literal(pattern)
     else:
@@ -11619,6 +11958,8 @@ def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
     ...     .alias('r')).collect()
     [Row(r='1a2s3ae')]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("translate", _to_java_column(srcCol), matching, replace)
 
 
@@ -12033,6 +12374,8 @@ def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column:
     |        aa123cc|
     +---------------+
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     sc = _get_active_spark_context()
     return _invoke_function("printf", _to_java_column(format), _to_seq(sc, cols, _to_java_column))
 
@@ -12585,6 +12928,8 @@ def elt(*inputs: "ColumnOrName") -> Column:
     >>> df.select(elt(df.a, df.b, df.c).alias('r')).collect()
     [Row(r='scala')]
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     sc = _get_active_spark_context()
     return _invoke_function("elt", _to_seq(sc, inputs, _to_java_column))
 
@@ -12886,6 +13231,8 @@ def collate(col: "ColumnOrName", collation: str) -> Column:
     :class:`~pyspark.sql.Column`
         A new column of string type, where each value has the specified collation.
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("collate", _to_java_column(col), collation)
 
 
@@ -13273,8 +13620,7 @@ def array_contains(col: "ColumnOrName", value: Any) -> Column:
     |      true|
     +----------+
     """
-    value = value._jc if isinstance(value, Column) else value
-    return _invoke_function("array_contains", _to_java_column(col), value)
+    return _invoke_function_over_columns("array_contains", col, lit(value))
 
 
 @_try_remote_functions
@@ -13520,6 +13866,8 @@ def array_join(
     |                NULL,NULL|
     +-------------------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     _get_active_spark_context()
     if null_replacement is None:
         return _invoke_function("array_join", _to_java_column(col), delimiter)
@@ -13634,7 +13982,10 @@ def array_position(col: "ColumnOrName", value: Any) -> Column:
     col : :class:`~pyspark.sql.Column` or str
         target column to work on.
     value : Any
-        value to look for.
+        value or a :class:`~pyspark.sql.Column` expression to look for.
+
+        .. versionchanged:: 4.0.0
+            `value` now also accepts a Column type.
 
     Returns
     -------
@@ -13699,8 +14050,20 @@ def array_position(col: "ColumnOrName", value: Any) -> Column:
     +-----------------------+
     |                      3|
     +-----------------------+
+
+    Example 6: Finding the position of a column's value in an array of integers
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([10, 20, 30], 20)], ['data', 'col'])
+    >>> df.select(sf.array_position(df.data, df.col)).show()
+    +-------------------------+
+    |array_position(data, col)|
+    +-------------------------+
+    |                        2|
+    +-------------------------+
+
     """
-    return _invoke_function("array_position", _to_java_column(col), value)
+    return _invoke_function_over_columns("array_position", col, lit(value))
 
 
 @_try_remote_functions
@@ -13735,10 +14098,13 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
     Notes
     -----
     The position is not zero based, but 1 based index.
+    If extraction is a string, :meth:`element_at` treats it as a literal string,
+    while :meth:`try_element_at` treats it as a column name.
 
     See Also
     --------
     :meth:`get`
+    :meth:`try_element_at`
 
     Examples
     --------
@@ -13785,6 +14151,17 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
     +-------------------+
     |               NULL|
     +-------------------+
+
+    Example 5: Getting a value from a map using a literal string as the key
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0}, "a")], ['data', 'b'])
+    >>> df.select(sf.element_at(df.data, 'b')).show()
+    +-------------------+
+    |element_at(data, b)|
+    +-------------------+
+    |                2.0|
+    +-------------------+
     """
     return _invoke_function_over_columns("element_at", col, lit(extraction))
 
@@ -13809,6 +14186,17 @@ def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column:
     extraction :
         index to check for in array or key to check for in map
 
+    Notes
+    -----
+    The position is not zero based, but 1 based index.
+    If extraction is a string, :meth:`try_element_at` treats it as a column name,
+    while :meth:`element_at` treats it as a literal string.
+
+    See Also
+    --------
+    :meth:`get`
+    :meth:`element_at`
+
     Examples
     --------
     Example 1: Getting the first element of an array
@@ -13865,6 +14253,17 @@ def try_element_at(col: "ColumnOrName", extraction: "ColumnOrName") -> Column:
     +-----------------------+
     |                   NULL|
     +-----------------------+
+
+    Example 6: Getting a value from a map using a column name as the key
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0}, "a")], ['data', 'b'])
+    >>> df.select(sf.try_element_at(df.data, 'b')).show()
+    +-----------------------+
+    |try_element_at(data, b)|
+    +-----------------------+
+    |                    1.0|
+    +-----------------------+
     """
     return _invoke_function_over_columns("try_element_at", col, extraction)
 
@@ -14065,7 +14464,10 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column:
     col : :class:`~pyspark.sql.Column` or str
         name of column containing array
     element :
-        element to be removed from the array
+        element or a :class:`~pyspark.sql.Column` expression to be removed from the array
+
+        .. versionchanged:: 4.0.0
+            `element` now also accepts a Column type.
 
     Returns
     -------
@@ -14133,8 +14535,19 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column:
     +---------------------+
     |                   []|
     +---------------------+
+
+    Example 6: Removing a column's value from a simple array
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([([1, 2, 3, 1, 1], 1)], ['data', 'col'])
+    >>> df.select(sf.array_remove(df.data, df.col)).show()
+    +-----------------------+
+    |array_remove(data, col)|
+    +-----------------------+
+    |                 [2, 3]|
+    +-----------------------+
     """
-    return _invoke_function("array_remove", _to_java_column(col), element)
+    return _invoke_function_over_columns("array_remove", col, lit(element))
 
 
 @_try_remote_functions
@@ -15277,6 +15690,8 @@ def get_json_object(col: "ColumnOrName", path: str) -> Column:
     ...                   get_json_object(df.jstring, '$.f2').alias("c1") ).collect()
     [Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)]
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("get_json_object", _to_java_column(col), path)
 
 
@@ -15308,6 +15723,8 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
     >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect()
     [Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)]
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     if len(fields) == 0:
         raise PySparkValueError(
             error_class="CANNOT_BE_EMPTY",
@@ -15415,6 +15832,7 @@ def from_json(
     |[1, 2, 3]|
     +---------+
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     if isinstance(schema, DataType):
         schema = schema.json()
@@ -15423,12 +15841,44 @@ def from_json(
     return _invoke_function("from_json", _to_java_column(col), schema, _options_to_str(options))
 
 
+@_try_remote_functions
+def try_parse_json(
+    col: "ColumnOrName",
+) -> Column:
+    """
+    Parses a column containing a JSON string into a :class:`VariantType`. Returns None if a string
+    contains an invalid JSON value.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column or column name JSON formatted strings
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a new column of VariantType.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''}, {'json': '''{a : 1}'''} ])
+    >>> df.select(to_json(try_parse_json(df.json))).collect()
+    [Row(to_json(try_parse_json(json))='{"a":1}'), Row(to_json(try_parse_json(json))=None)]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("try_parse_json", _to_java_column(col))
+
+
 @_try_remote_functions
 def parse_json(
     col: "ColumnOrName",
 ) -> Column:
     """
-    Parses a column containing a JSON string into a :class:`VariantType`.
+    Parses a column containing a JSON string into a :class:`VariantType`. Throws exception if a
+    string represents an invalid JSON value.
 
     .. versionadded:: 4.0.0
 
@@ -15448,10 +15898,170 @@ def parse_json(
     >>> df.select(to_json(parse_json(df.json))).collect()
     [Row(to_json(parse_json(json))='{"a":1}')]
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     return _invoke_function("parse_json", _to_java_column(col))
 
 
+@_try_remote_functions
+def is_variant_null(v: "ColumnOrName") -> Column:
+    """
+    Check if a variant value is a variant null. Returns true if and only if the input is a variant
+    null and false otherwise (including in the case of SQL NULL).
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    v : :class:`~pyspark.sql.Column` or str
+        a variant column or column name
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a boolean column indicating whether the variant value is a variant null
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
+    >>> df.select(is_variant_null(parse_json(df.json)).alias("r")).collect()
+    [Row(r=False)]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("is_variant_null", _to_java_column(v))
+
+
+@_try_remote_functions
+def variant_get(v: "ColumnOrName", path: str, targetType: str) -> Column:
+    """
+    Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
+    `targetType`. Returns null if the path does not exist. Throws an exception if the cast fails.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    v : :class:`~pyspark.sql.Column` or str
+        a variant column or column name
+    path : str
+        the extraction path. A valid path should start with `$` and is followed by zero or more
+        segments like `[123]`, `.name`, `['name']`, or `["name"]`.
+    targetType : str
+        the target data type to cast into, in a DDL-formatted string
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of `targetType` representing the extracted result
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
+    >>> df.select(variant_get(parse_json(df.json), "$.a", "int").alias("r")).collect()
+    [Row(r=1)]
+    >>> df.select(variant_get(parse_json(df.json), "$.b", "int").alias("r")).collect()
+    [Row(r=None)]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("variant_get", _to_java_column(v), path, targetType)
+
+
+@_try_remote_functions
+def try_variant_get(v: "ColumnOrName", path: str, targetType: str) -> Column:
+    """
+    Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
+    `targetType`. Returns null if the path does not exist or the cast fails.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    v : :class:`~pyspark.sql.Column` or str
+        a variant column or column name
+    path : str
+        the extraction path. A valid path should start with `$` and is followed by zero or more
+        segments like `[123]`, `.name`, `['name']`, or `["name"]`.
+    targetType : str
+        the target data type to cast into, in a DDL-formatted string
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of `targetType` representing the extracted result
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
+    >>> df.select(try_variant_get(parse_json(df.json), "$.a", "int").alias("r")).collect()
+    [Row(r=1)]
+    >>> df.select(try_variant_get(parse_json(df.json), "$.b", "int").alias("r")).collect()
+    [Row(r=None)]
+    >>> df.select(try_variant_get(parse_json(df.json), "$.a", "binary").alias("r")).collect()
+    [Row(r=None)]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("try_variant_get", _to_java_column(v), path, targetType)
+
+
+@_try_remote_functions
+def schema_of_variant(v: "ColumnOrName") -> Column:
+    """
+    Returns schema in the SQL format of a variant.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    v : :class:`~pyspark.sql.Column` or str
+        a variant column or column name
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a string column representing the variant schema
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
+    >>> df.select(schema_of_variant(parse_json(df.json)).alias("r")).collect()
+    [Row(r='STRUCT<a: BIGINT>')]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("schema_of_variant", _to_java_column(v))
+
+
+@_try_remote_functions
+def schema_of_variant_agg(v: "ColumnOrName") -> Column:
+    """
+    Returns the merged schema in the SQL format of a variant column.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    v : :class:`~pyspark.sql.Column` or str
+        a variant column or column name
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a string column representing the variant schema
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
+    >>> df.select(schema_of_variant_agg(parse_json(df.json)).alias("r")).collect()
+    [Row(r='STRUCT<a: BIGINT>')]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("schema_of_variant_agg", _to_java_column(v))
+
+
 @_try_remote_functions
 def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
     """
@@ -15542,6 +16152,7 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co
     |["Alice","Bob"]|
     +---------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     return _invoke_function("to_json", _to_java_column(col), _options_to_str(options))
 
@@ -15584,6 +16195,8 @@ def schema_of_json(json: Union[Column, str], options: Optional[Dict[str, str]] =
     >>> df.select(schema.alias("json")).collect()
     [Row(json='STRUCT<a: BIGINT>')]
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
+
     if isinstance(json, str):
         col = _create_column_from_literal(json)
     elif isinstance(json, Column):
@@ -15721,6 +16334,7 @@ def from_xml(
     >>> df.select(sf.from_xml(df.value, schema).alias("xml")).collect()
     [Row(xml=Row(a=[1, 2]))]
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     if isinstance(schema, StructType):
         schema = schema.json()
@@ -15799,6 +16413,8 @@ def schema_of_xml(xml: Union[Column, str], options: Optional[Dict[str, str]] = N
     ... ).collect()
     [Row(xml='STRUCT<values: STRUCT<value: ARRAY<BIGINT>>>')]
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
+
     if isinstance(xml, str):
         col = _create_column_from_literal(xml)
     elif isinstance(xml, Column):
@@ -15844,6 +16460,7 @@ def to_xml(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
     >>> df.select(to_xml(df.value, {'rowTag':'person'}).alias("xml")).collect()
     [Row(xml='<person>\\n    <age>2</age>\\n    <name>Alice</name>\\n</person>')]
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     return _invoke_function("to_xml", _to_java_column(col), _options_to_str(options))
 
@@ -15920,6 +16537,8 @@ def schema_of_csv(csv: Union[Column, str], options: Optional[Dict[str, str]] = N
     |STRUCT<_c0: INT, _c1: STRING, _c2: BOOLEAN>|
     +-------------------------------------------+
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
+
     if isinstance(csv, str):
         col = _create_column_from_literal(csv)
     elif isinstance(csv, Column):
@@ -16020,6 +16639,7 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
     | 2,Alice,true|
     +-------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
 
     return _invoke_function("to_csv", _to_java_column(col), _options_to_str(options))
 
@@ -16427,6 +17047,8 @@ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
     |    [NULL, NULL, NULL]|
     +----------------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("sort_array", _to_java_column(col), asc)
 
 
@@ -16689,7 +17311,10 @@ def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
     col : :class:`~pyspark.sql.Column` or str
         The name of the column or an expression that represents the map.
     value :
-        A literal value.
+        A literal value, or a :class:`~pyspark.sql.Column` expression.
+
+        .. versionchanged:: 4.0.0
+            `value` now also accepts a Column type.
 
     Returns
     -------
@@ -16719,8 +17344,19 @@ def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
     +--------------------------+
     |                     false|
     +--------------------------+
+
+    Example 3: Check for key using a column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data, 1 as key")
+    >>> df.select(sf.map_contains_key("data", sf.col("key"))).show()
+    +---------------------------+
+    |map_contains_key(data, key)|
+    +---------------------------+
+    |                       true|
+    +---------------------------+
     """
-    return _invoke_function("map_contains_key", _to_java_column(col), value)
+    return _invoke_function_over_columns("map_contains_key", col, lit(value))
 
 
 @_try_remote_functions
@@ -17450,6 +18086,7 @@ def from_csv(
     |      {1, 2, 3}|
     +---------------+
     """
+    from pyspark.sql.classic.column import _create_column_from_literal, _to_java_column
 
     _get_active_spark_context()
     if isinstance(schema, str):
@@ -17478,6 +18115,7 @@ def _unresolved_named_lambda_variable(*name_parts: Any) -> Column:
     name_parts : str
     """
     from py4j.java_gateway import JVMView
+    from pyspark.sql.classic.column import _to_seq
 
     sc = _get_active_spark_context()
     name_parts_seq = _to_seq(sc, name_parts)
@@ -17528,6 +18166,7 @@ def _create_lambda(f: Callable) -> Callable:
             - (Column, Column, Column) -> Column: ...
     """
     from py4j.java_gateway import JVMView
+    from pyspark.sql.classic.column import _to_seq
 
     parameters = _get_lambda_parameters(f)
 
@@ -17573,6 +18212,7 @@ def _invoke_higher_order_function(
     :return: a Column
     """
     from py4j.java_gateway import JVMView
+    from pyspark.sql.classic.column import _to_java_column
 
     sc = _get_active_spark_context()
     expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions
@@ -17835,7 +18475,7 @@ def aggregate(
         initial value. Name of column or expression
     merge : function
         a binary function ``(acc: Column, x: Column) -> Column...`` returning expression
-        of the same type as ``zero``
+        of the same type as ``initialValue``
     finish : function, optional
         an optional unary function ``(x: Column) -> Column: ...``
         used to convert accumulated value.
@@ -19211,6 +19851,8 @@ def call_udf(udfName: str, *cols: "ColumnOrName") -> Column:
     |         cc|
     +-----------+
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     sc = _get_active_spark_context()
     return _invoke_function("call_udf", udfName, _to_seq(sc, cols, _to_java_column))
 
@@ -19280,6 +19922,8 @@ def call_function(funcName: str, *cols: "ColumnOrName") -> Column:
     |                               102.0|
     +------------------------------------+
     """
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
+
     sc = _get_active_spark_context()
     return _invoke_function("call_function", funcName, _to_seq(sc, cols, _to_java_column))
 
@@ -19295,11 +19939,16 @@ def unwrap_udt(col: "ColumnOrName") -> Column:
     -----
     Supports Spark Connect.
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("unwrap_udt", _to_java_column(col))
 
 
 @_try_remote_functions
-def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]] = None) -> Column:
+def hll_sketch_agg(
+    col: "ColumnOrName",
+    lgConfigK: Optional[Union[int, Column]] = None,
+) -> Column:
     """
     Aggregate function: returns the updatable binary representation of the Datasketches
     HllSketch configured with lgConfigK arg.
@@ -19308,8 +19957,8 @@ def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]]
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str or int
-    lgConfigK : int, optional
+    col : :class:`~pyspark.sql.Column` or str
+    lgConfigK : :class:`~pyspark.sql.Column` or int, optional
         The log-base-2 of K, where K is the number of buckets or slots for the HllSketch
 
     Returns
@@ -19348,13 +19997,13 @@ def hll_sketch_agg(col: "ColumnOrName", lgConfigK: Optional[Union[int, Column]]
     if lgConfigK is None:
         return _invoke_function_over_columns("hll_sketch_agg", col)
     else:
-        _lgConfigK = lit(lgConfigK) if isinstance(lgConfigK, int) else lgConfigK
-        return _invoke_function_over_columns("hll_sketch_agg", col, _lgConfigK)
+        return _invoke_function_over_columns("hll_sketch_agg", col, lit(lgConfigK))
 
 
 @_try_remote_functions
 def hll_union_agg(
-    col: "ColumnOrName", allowDifferentLgConfigK: Optional[Union[bool, Column]] = None
+    col: "ColumnOrName",
+    allowDifferentLgConfigK: Optional[Union[bool, Column]] = None,
 ) -> Column:
     """
     Aggregate function: returns the updatable binary representation of the Datasketches
@@ -19366,8 +20015,8 @@ def hll_union_agg(
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str or bool
-    allowDifferentLgConfigK : bool, optional
+    col : :class:`~pyspark.sql.Column` or str
+    allowDifferentLgConfigK : :class:`~pyspark.sql.Column` or bool, optional
         Allow sketches with different lgConfigK values to be merged (defaults to false).
 
     Returns
@@ -19412,12 +20061,7 @@ def hll_union_agg(
     if allowDifferentLgConfigK is None:
         return _invoke_function_over_columns("hll_union_agg", col)
     else:
-        _allowDifferentLgConfigK = (
-            lit(allowDifferentLgConfigK)
-            if isinstance(allowDifferentLgConfigK, bool)
-            else allowDifferentLgConfigK
-        )
-        return _invoke_function_over_columns("hll_union_agg", col, _allowDifferentLgConfigK)
+        return _invoke_function_over_columns("hll_union_agg", col, lit(allowDifferentLgConfigK))
 
 
 @_try_remote_functions
@@ -19448,6 +20092,8 @@ def hll_sketch_estimate(col: "ColumnOrName") -> Column:
     |           3|
     +------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     return _invoke_function("hll_sketch_estimate", _to_java_column(col))
 
 
@@ -19486,6 +20132,8 @@ def hll_union(
     |           6|
     +------------+
     """
+    from pyspark.sql.classic.column import _to_java_column
+
     if allowDifferentLgConfigK is not None:
         return _invoke_function(
             "hll_union", _to_java_column(col1), _to_java_column(col2), allowDifferentLgConfigK
diff --git a/python/pyspark/sql/functions/partitioning.py b/python/pyspark/sql/functions/partitioning.py
index 59c293577b08a..e89901cdbe540 100644
--- a/python/pyspark/sql/functions/partitioning.py
+++ b/python/pyspark/sql/functions/partitioning.py
@@ -26,7 +26,7 @@
 )
 
 from pyspark.errors import PySparkTypeError
-from pyspark.sql.column import Column, _to_java_column, _create_column_from_literal
+from pyspark.sql.column import Column
 from pyspark.sql.functions.builtin import _invoke_function_over_columns, _invoke_function
 from pyspark.sql.utils import (
     try_partitioning_remote_functions as _try_partitioning_remote_functions,
@@ -204,6 +204,8 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
     method of the `DataFrameWriterV2`.
 
     """
+    from pyspark.sql.classic.column import _to_java_column, _create_column_from_literal
+
     if not isinstance(numBuckets, (int, Column)):
         raise PySparkTypeError(
             error_class="NOT_COLUMN_OR_INT",
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index 15934c24b9d4a..ac4ac02a36b16 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -19,7 +19,7 @@
 
 from typing import Callable, List, Optional, TYPE_CHECKING, overload, Dict, Union, cast, Tuple
 
-from pyspark.sql.column import Column, _to_seq
+from pyspark.sql.column import Column
 from pyspark.sql.session import SparkSession
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin
@@ -44,6 +44,8 @@ def _api(self: "GroupedData") -> DataFrame:
 
 def df_varargs_api(f: Callable[..., DataFrame]) -> Callable[..., DataFrame]:
     def _api(self: "GroupedData", *cols: str) -> DataFrame:
+        from pyspark.sql.classic.column import _to_seq
+
         name = f.__name__
         jdf = getattr(self._jgd, name)(_to_seq(self.session._sc, cols))
         return DataFrame(jdf, self.session)
@@ -175,6 +177,8 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
         |  Bob|           5|
         +-----+------------+
         """
+        from pyspark.sql.classic.column import _to_seq
+
         assert exprs, "exprs should not be empty"
         if len(exprs) == 1 and isinstance(exprs[0], dict):
             jdf = self._jgd.agg(exprs[0])
@@ -489,7 +493,8 @@ def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) ->
 
         Compute the sum of earnings for each year by course with each course as a separate column
 
-        >>> df1.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").show()
+        >>> df1.groupBy("year").pivot(
+        ...     "course", ["dotNET", "Java"]).sum("earnings").sort("year").show()
         +----+------+-----+
         |year|dotNET| Java|
         +----+------+-----+
@@ -499,14 +504,15 @@ def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) ->
 
         Or without specifying column values (less efficient)
 
-        >>> df1.groupBy("year").pivot("course").sum("earnings").show()
+        >>> df1.groupBy("year").pivot("course").sum("earnings").sort("year").show()
         +----+-----+------+
         |year| Java|dotNET|
         +----+-----+------+
         |2012|20000| 15000|
         |2013|30000| 48000|
         +----+-----+------+
-        >>> df2.groupBy("sales.year").pivot("sales.course").sum("sales.earnings").show()
+        >>> df2.groupBy(
+        ...     "sales.year").pivot("sales.course").sum("sales.earnings").sort("year").show()
         ... # doctest: +SKIP
         +----+-----+------+
         |year| Java|dotNET|
diff --git a/python/pyspark/sql/metrics.py b/python/pyspark/sql/metrics.py
new file mode 100644
index 0000000000000..6664582952014
--- /dev/null
+++ b/python/pyspark/sql/metrics.py
@@ -0,0 +1,287 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import abc
+import dataclasses
+from typing import Optional, List, Tuple, Dict, Any, Union, TYPE_CHECKING, Sequence
+
+from pyspark.errors import PySparkValueError
+
+if TYPE_CHECKING:
+    from pyspark.testing.connectutils import have_graphviz
+
+    if have_graphviz:
+        import graphviz  # type: ignore
+
+
+class ObservedMetrics(abc.ABC):
+    @property
+    @abc.abstractmethod
+    def name(self) -> str:
+        ...
+
+    @property
+    @abc.abstractmethod
+    def pairs(self) -> Dict[str, Any]:
+        ...
+
+    @property
+    @abc.abstractmethod
+    def keys(self) -> List[str]:
+        ...
+
+
+class MetricValue:
+    """The metric values is the Python representation of a plan metric value from the JVM.
+    However, it does not have any reference to the original value."""
+
+    def __init__(self, name: str, value: Union[int, float], type: str):
+        self._name = name
+        self._type = type
+        self._value = value
+
+    def __repr__(self) -> str:
+        return f"<{self._name}={self._value} ({self._type})>"
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def value(self) -> Union[int, float]:
+        return self._value
+
+    @property
+    def metric_type(self) -> str:
+        return self._type
+
+
+class PlanMetrics:
+    """Represents a particular plan node and the associated metrics of this node."""
+
+    def __init__(self, name: str, id: int, parent: int, metrics: List[MetricValue]):
+        self._name = name
+        self._id = id
+        self._parent_id = parent
+        self._metrics = metrics
+
+    def __repr__(self) -> str:
+        return f"Plan({self._name}: {self._id}->{self._parent_id})={self._metrics}"
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def plan_id(self) -> int:
+        return self._id
+
+    @property
+    def parent_plan_id(self) -> int:
+        return self._parent_id
+
+    @property
+    def metrics(self) -> List[MetricValue]:
+        return self._metrics
+
+
+class CollectedMetrics:
+    @dataclasses.dataclass
+    class Node:
+        id: int
+        name: str = dataclasses.field(default="")
+        metrics: List[MetricValue] = dataclasses.field(default_factory=list)
+        children: List[int] = dataclasses.field(default_factory=list)
+
+    def text(self, current: "Node", graph: Dict[int, "Node"], prefix: str = "") -> str:
+        """
+        Converts the current node and its children into a textual representation. This is used
+        to provide a usable output for the command line or other text-based interfaces. However,
+        it is recommended to use the Graphviz representation for a more visual representation.
+
+        Parameters
+        ----------
+        current: Node
+            Current node in the graph.
+        graph: dict
+            A dictionary representing the full graph mapping from node ID (int) to the node itself.
+            The node is an instance of :class:`CollectedMetrics:Node`.
+        prefix: str
+            String prefix used for generating the output buffer.
+
+        Returns
+        -------
+        The full string representation of the current node as root.
+        """
+        base_metrics = set(["numPartitions", "peakMemory", "numOutputRows", "spillSize"])
+
+        # Format the metrics of this node:
+        metric_buffer = []
+        for m in current.metrics:
+            if m.name in base_metrics:
+                metric_buffer.append(f"{m.name}: {m.value} ({m.metric_type})")
+
+        buffer = f"{prefix}+- {current.name}({','.join(metric_buffer)})\n"
+        for i, child in enumerate(current.children):
+            c = graph[child]
+            new_prefix = prefix + "  " if i == len(c.children) - 1 else prefix
+            if current.id != c.id:
+                buffer += self.text(c, graph, new_prefix)
+        return buffer
+
+    def __init__(self, metrics: List[PlanMetrics]):
+        # Sort the input list
+        self._metrics = sorted(metrics, key=lambda x: x._parent_id, reverse=False)
+
+    def extract_graph(self) -> Tuple[int, Dict[int, "CollectedMetrics.Node"]]:
+        """
+        Builds the graph of the query plan. The graph is represented as a dictionary where the key
+        is the node ID and the value is the node itself. The root node is the node that has no
+        parent.
+
+        Returns
+        -------
+        The root node ID and the graph of all nodes.
+        """
+        all_nodes: Dict[int, CollectedMetrics.Node] = {}
+
+        for m in self._metrics:
+            # Add yourself to the list if you have to.
+            if m.plan_id not in all_nodes:
+                all_nodes[m.plan_id] = CollectedMetrics.Node(m.plan_id, m.name, m.metrics)
+            else:
+                all_nodes[m.plan_id].name = m.name
+                all_nodes[m.plan_id].metrics = m.metrics
+
+            # Now check for the parent of this node if it's in
+            if m.parent_plan_id not in all_nodes:
+                all_nodes[m.parent_plan_id] = CollectedMetrics.Node(m.parent_plan_id)
+
+            all_nodes[m.parent_plan_id].children.append(m.plan_id)
+
+        # Next step is to find all the root nodes. Root nodes are never used in children.
+        # So we start with all node ids as candidates.
+        candidates = set(all_nodes.keys())
+        for k, v in all_nodes.items():
+            for c in v.children:
+                if c in candidates and c != k:
+                    candidates.remove(c)
+
+        assert len(candidates) == 1, f"Expected 1 root node, found {len(candidates)}"
+        return candidates.pop(), all_nodes
+
+    def toText(self) -> str:
+        """
+        Converts the execution graph from a graph into a textual representation
+        that can be read at the command line for example.
+
+        Returns
+        -------
+        A string representation of the collected metrics.
+        """
+        root, graph = self.extract_graph()
+        return self.text(graph[root], graph)
+
+    def toDot(self, filename: Optional[str] = None, out_format: str = "png") -> "graphviz.Digraph":
+        """
+        Converts the collected metrics into a dot representation. Since the graphviz Digraph
+        implementation provides the ability to render the result graph directory in a
+        notebook, we return the graph object directly.
+
+        If the graphviz package is not available, a PACKAGE_NOT_INSTALLED error is raised.
+
+        Parameters
+        ----------
+        filename : str, optional
+            The filename to save the graph to given an output format. The path can be
+            relative or absolute.
+
+        out_format : str
+            The output format of the graph. The default is 'png'.
+
+        Returns
+        -------
+        An instance of the graphviz.Digraph object.
+        """
+        try:
+            import graphviz
+
+            dot = graphviz.Digraph(
+                comment="Query Plan",
+                node_attr={
+                    "shape": "box",
+                    "font-size": "10pt",
+                },
+            )
+
+            root, graph = self.extract_graph()
+            for k, v in graph.items():
+                # Build table rows for the metrics
+                rows = "\n".join(
+                    [
+                        (
+                            f'<TR><TD><FONT POINT-SIZE="8">{x.name}</FONT></TD><TD>'
+                            f'<FONT POINT-SIZE="8">{x.value} ({x.metric_type})</FONT></TD></TR>'
+                        )
+                        for x in v.metrics
+                    ]
+                )
+
+                dot.node(
+                    str(k),
+                    """<<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0">
+                    <TR>
+                        <TD COLSPAN="2" BGCOLOR="lightgrey">
+                            <FONT POINT-SIZE=\"10\">{}</FONT>
+                        </TD>
+                    </TR>
+                    <TR><TD COLSPAN="2"><FONT POINT-SIZE=\"10\">Metrics</FONT></TD></TR>
+                    {}
+                    </TABLE>>""".format(
+                        v.name, rows
+                    ),
+                )
+                for c in v.children:
+                    dot.edge(str(k), str(c))
+
+            if filename:
+                dot.render(filename, format=out_format, cleanup=True)
+            return dot
+
+        except ImportError:
+            raise PySparkValueError(
+                error_class="PACKAGE_NOT_INSTALLED",
+                message_parameters={"package_name": "graphviz", "minimum_version": "0.20"},
+            )
+
+
+class ExecutionInfo:
+    """The query execution class allows users to inspect the query execution of this particular
+    data frame. This value is only set in the data frame if it was executed."""
+
+    def __init__(
+        self, metrics: Optional[list[PlanMetrics]], obs: Optional[Sequence[ObservedMetrics]]
+    ):
+        self._metrics = CollectedMetrics(metrics) if metrics else None
+        self._observations = obs if obs else []
+
+    @property
+    def metrics(self) -> Optional[CollectedMetrics]:
+        return self._metrics
+
+    @property
+    def flows(self) -> List[Tuple[str, Dict[str, Any]]]:
+        return [(f.name, f.pairs) for f in self._observations]
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 1dae5086e3dd7..4ef4c78ba3c33 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -18,7 +18,6 @@
 from typing import Any, Dict, Optional, TYPE_CHECKING
 
 from pyspark.errors import PySparkTypeError, PySparkValueError, PySparkAssertionError
-from pyspark.sql import column
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.utils import is_remote
@@ -116,6 +115,8 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         :class:`DataFrame`
             the observed :class:`DataFrame`.
         """
+        from pyspark.sql.classic.column import _to_seq
+
         if self._jo is not None:
             raise PySparkAssertionError(error_class="REUSE_OBSERVATION", message_parameters={})
 
@@ -124,7 +125,9 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         cls = self._jvm.org.apache.spark.sql.Observation
         self._jo = cls(self._name) if self._name is not None else cls()
         observed_df = self._jo.on(
-            df._jdf, exprs[0]._jc, column._to_seq(df._sc, [c._jc for c in exprs[1:]])
+            df._jdf,
+            exprs[0]._jc,
+            _to_seq(df._sc, [c._jc for c in exprs[1:]]),
         )
         return DataFrame(observed_df, df.sparkSession)
 
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index 4f137c7004c1c..9da15caac8025 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -56,35 +56,11 @@
 
 class PandasConversionMixin:
     """
-    Mix-in for the conversion from Spark to pandas. Currently, only :class:`DataFrame`
-    can use this class.
+    Mix-in for the conversion from Spark to pandas and PyArrow. Currently, only
+    :class:`DataFrame` can use this class.
     """
 
     def toPandas(self) -> "PandasDataFrameLike":
-        """
-        Returns the contents of this :class:`DataFrame` as Pandas ``pandas.DataFrame``.
-
-        This is only available if Pandas is installed and available.
-
-        .. versionadded:: 1.3.0
-
-        .. versionchanged:: 3.4.0
-            Supports Spark Connect.
-
-        Notes
-        -----
-        This method should only be used if the resulting Pandas ``pandas.DataFrame`` is
-        expected to be small, as all the data is loaded into the driver's memory.
-
-        Usage with ``spark.sql.execution.arrow.pyspark.enabled=True`` is experimental.
-
-        Examples
-        --------
-        >>> df.toPandas()  # doctest: +SKIP
-           age   name
-        0    2  Alice
-        1    5    Bob
-        """
         from pyspark.sql.dataframe import DataFrame
 
         assert isinstance(self, DataFrame)
@@ -249,15 +225,48 @@ def toPandas(self) -> "PandasDataFrameLike":
         else:
             return pdf
 
-    def _collect_as_arrow(self, split_batches: bool = False) -> List["pa.RecordBatch"]:
+    def toArrow(self) -> "pa.Table":
+        from pyspark.sql.dataframe import DataFrame
+
+        assert isinstance(self, DataFrame)
+
+        jconf = self.sparkSession._jconf
+
+        from pyspark.sql.pandas.types import to_arrow_schema
+        from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+
+        require_minimum_pyarrow_version()
+        schema = to_arrow_schema(self.schema, error_on_duplicated_field_names_in_struct=True)
+
+        import pyarrow as pa
+
+        self_destruct = jconf.arrowPySparkSelfDestructEnabled()
+        batches = self._collect_as_arrow(
+            split_batches=self_destruct, empty_list_if_zero_records=False
+        )
+        table = pa.Table.from_batches(batches).cast(schema)
+        # Ensure only the table has a reference to the batches, so that
+        # self_destruct (if enabled) is effective
+        del batches
+        return table
+
+    def _collect_as_arrow(
+        self,
+        split_batches: bool = False,
+        empty_list_if_zero_records: bool = True,
+    ) -> List["pa.RecordBatch"]:
         """
-        Returns all records as a list of ArrowRecordBatches, pyarrow must be installed
+        Returns all records as a list of Arrow RecordBatches. PyArrow must be installed
         and available on driver and worker Python environments.
         This is an experimental feature.
 
         :param split_batches: split batches such that each column is in its own allocation, so
             that the selfDestruct optimization is effective; default False.
 
+        :param empty_list_if_zero_records: If True (the default), returns an empty list if the
+            result has 0 records. Otherwise, returns a list of length 1 containing an empty
+            Arrow RecordBatch which includes the schema.
+
         .. note:: Experimental.
         """
         from pyspark.sql.dataframe import DataFrame
@@ -306,14 +315,22 @@ def _collect_as_arrow(self, split_batches: bool = False) -> List["pa.RecordBatch
         batches = results[:-1]
         batch_order = results[-1]
 
-        # Re-order the batch list using the correct order
-        return [batches[i] for i in batch_order]
+        if len(batches) or empty_list_if_zero_records:
+            # Re-order the batch list using the correct order
+            return [batches[i] for i in batch_order]
+        else:
+            from pyspark.sql.pandas.types import to_arrow_schema
+            import pyarrow as pa
+
+            schema = to_arrow_schema(self.schema)
+            empty_arrays = [pa.array([], type=field.type) for field in schema]
+            return [pa.RecordBatch.from_arrays(empty_arrays, schema=schema)]
 
 
 class SparkConversionMixin:
     """
-    Min-in for the conversion from pandas to Spark. Currently, only :class:`SparkSession`
-    can use this class.
+    Min-in for the conversion from pandas and PyArrow to Spark. Currently, only
+    :class:`SparkSession` can use this class.
     """
 
     _jsparkSession: "JavaObject"
@@ -324,6 +341,12 @@ def createDataFrame(
     ) -> "DataFrame":
         ...
 
+    @overload
+    def createDataFrame(
+        self, data: "pa.Table", samplingRatio: Optional[float] = ...
+    ) -> "DataFrame":
+        ...
+
     @overload
     def createDataFrame(
         self,
@@ -333,9 +356,18 @@ def createDataFrame(
     ) -> "DataFrame":
         ...
 
+    @overload
+    def createDataFrame(
+        self,
+        data: "pa.Table",
+        schema: Union[StructType, str],
+        verifySchema: bool = ...,
+    ) -> "DataFrame":
+        ...
+
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: "PandasDataFrameLike",
+        data: Union["PandasDataFrameLike", "pa.Table"],
         schema: Optional[Union[StructType, List[str]]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
@@ -344,12 +376,29 @@ def createDataFrame(  # type: ignore[misc]
 
         assert isinstance(self, SparkSession)
 
+        timezone = self._jconf.sessionLocalTimeZone()
+
+        if type(data).__name__ == "Table":
+            # `data` is a PyArrow Table
+            from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+
+            require_minimum_pyarrow_version()
+
+            import pyarrow as pa
+
+            assert isinstance(data, pa.Table)
+
+            # If no schema supplied by user then get the names of columns only
+            if schema is None:
+                schema = data.schema.names
+
+            return self._create_from_arrow_table(data, schema, timezone)
+
+        # `data` is a PandasDataFrameLike object
         from pyspark.sql.pandas.utils import require_minimum_pandas_version
 
         require_minimum_pandas_version()
 
-        timezone = self._jconf.sessionLocalTimeZone()
-
         # If no schema supplied by user then get the names of columns only
         if schema is None:
             schema = [str(x) if not isinstance(x, str) else x for x in data.columns]
@@ -695,6 +744,75 @@ def create_iter_server():
         df._schema = schema
         return df
 
+    def _create_from_arrow_table(
+        self, table: "pa.Table", schema: Union[StructType, List[str]], timezone: str
+    ) -> "DataFrame":
+        """
+        Create a DataFrame from a given pyarrow.Table by slicing it into partitions then
+        sending to the JVM to parallelize.
+        """
+        from pyspark.sql import SparkSession
+        from pyspark.sql.dataframe import DataFrame
+
+        assert isinstance(self, SparkSession)
+
+        from pyspark.sql.pandas.serializers import ArrowStreamSerializer
+        from pyspark.sql.pandas.types import (
+            from_arrow_type,
+            from_arrow_schema,
+            to_arrow_schema,
+            _check_arrow_table_timestamps_localize,
+        )
+        from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+
+        require_minimum_pyarrow_version()
+
+        prefer_timestamp_ntz = is_timestamp_ntz_preferred()
+
+        # Create the Spark schema from list of names passed in with Arrow types
+        if isinstance(schema, (list, tuple)):
+            table = table.rename_columns(schema)
+            arrow_schema = table.schema
+            struct = StructType()
+            for name, field in zip(schema, arrow_schema):
+                struct.add(
+                    name,
+                    from_arrow_type(field.type, prefer_timestamp_ntz),
+                    nullable=field.nullable,
+                )
+            schema = struct
+
+        if not isinstance(schema, StructType):
+            schema = from_arrow_schema(table.schema, prefer_timestamp_ntz=prefer_timestamp_ntz)
+
+        table = _check_arrow_table_timestamps_localize(table, schema, True, timezone).cast(
+            to_arrow_schema(schema, error_on_duplicated_field_names_in_struct=True)
+        )
+
+        # Chunk the Arrow Table into RecordBatches
+        chunk_size = self._jconf.arrowMaxRecordsPerBatch()
+        arrow_data = table.to_batches(max_chunksize=chunk_size)
+
+        jsparkSession = self._jsparkSession
+
+        ser = ArrowStreamSerializer()
+
+        @no_type_check
+        def reader_func(temp_filename):
+            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(temp_filename)
+
+        @no_type_check
+        def create_iter_server():
+            return self._jvm.ArrowIteratorServer()
+
+        # Create Spark DataFrame from Arrow stream file, using one batch per partition
+        jiter = self._sc._serialize_to_jvm(arrow_data, ser, reader_func, create_iter_server)
+        assert self._jvm is not None
+        jdf = self._jvm.PythonSQLUtils.toDataFrame(jiter, schema.json(), jsparkSession)
+        df = DataFrame(jdf, self)
+        df._schema = schema
+        return df
+
 
 def _test() -> None:
     import doctest
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 62d365a3b2a1d..020105bb064ae 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -431,7 +431,8 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
         return _create_pandas_udf(f=f, returnType=return_type, evalType=eval_type)
 
 
-def _create_pandas_udf(f, returnType, evalType):
+# validate the pandas udf and return the adjusted eval type
+def _validate_pandas_udf(f, evalType) -> int:
     argspec = getfullargspec(f)
 
     # pandas UDF by type hints.
@@ -528,6 +529,12 @@ def _create_pandas_udf(f, returnType, evalType):
             },
         )
 
+    return evalType
+
+
+def _create_pandas_udf(f, returnType, evalType):
+    evalType = _validate_pandas_udf(f, evalType)
+
     if is_remote():
         from pyspark.sql.connect.udf import _create_udf as _create_connect_udf
 
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index d5b214e2f7d5b..3d1c50d949028 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -18,7 +18,7 @@
 from typing import List, Union, TYPE_CHECKING, cast
 import warnings
 
-from pyspark.errors import PySparkValueError
+from pyspark.errors import PySparkTypeError
 from pyspark.util import PythonEvalType
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
@@ -100,11 +100,9 @@ def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> DataFrame:
                 != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
             )
         ):
-            raise PySparkValueError(
-                error_class="INVALID_PANDAS_UDF",
-                message_parameters={
-                    "detail": "the udf argument must be a pandas_udf of type GROUPED_MAP."
-                },
+            raise PySparkTypeError(
+                error_class="INVALID_UDF_EVAL_TYPE",
+                message_parameters={"eval_type": "SQL_GROUPED_MAP_PANDAS_UDF"},
             )
 
         warnings.warn(
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index 8c2795a8fbe42..b02fe018b688e 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -30,7 +30,7 @@
 
 class PandasMapOpsMixin:
     """
-    Min-in for pandas map operations. Currently, only :class:`DataFrame`
+    Mix-in for pandas map operations. Currently, only :class:`DataFrame`
     can use this class.
     """
 
@@ -41,109 +41,6 @@ def mapInPandas(
         barrier: bool = False,
         profile: Optional[ResourceProfile] = None,
     ) -> "DataFrame":
-        """
-        Maps an iterator of batches in the current :class:`DataFrame` using a Python native
-        function that takes and outputs a pandas DataFrame, and returns the result as a
-        :class:`DataFrame`.
-
-        The function should take an iterator of `pandas.DataFrame`\\s and return
-        another iterator of `pandas.DataFrame`\\s. All columns are passed
-        together as an iterator of `pandas.DataFrame`\\s to the function and the
-        returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`.
-        Each `pandas.DataFrame` size can be controlled by
-        `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and
-        output can be different.
-
-        .. versionadded:: 3.0.0
-
-        .. versionchanged:: 3.4.0
-            Supports Spark Connect.
-
-        Parameters
-        ----------
-        func : function
-            a Python native function that takes an iterator of `pandas.DataFrame`\\s, and
-            outputs an iterator of `pandas.DataFrame`\\s.
-        schema : :class:`pyspark.sql.types.DataType` or str
-            the return type of the `func` in PySpark. The value can be either a
-            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-        barrier : bool, optional, default False
-            Use barrier mode execution.
-
-            .. versionadded: 3.5.0
-
-        profile : :class:`pyspark.resource.ResourceProfile`. The optional ResourceProfile
-            to be used for mapInPandas.
-
-            .. versionadded: 4.0.0
-
-
-        Examples
-        --------
-        >>> df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
-
-        Filter rows with id equal to 1:
-
-        >>> def filter_func(iterator):
-        ...     for pdf in iterator:
-        ...         yield pdf[pdf.id == 1]
-        ...
-        >>> df.mapInPandas(filter_func, df.schema).show()  # doctest: +SKIP
-        +---+---+
-        | id|age|
-        +---+---+
-        |  1| 21|
-        +---+---+
-
-        Compute the mean age for each id:
-
-        >>> def mean_age(iterator):
-        ...     for pdf in iterator:
-        ...         yield pdf.groupby("id").mean().reset_index()
-        ...
-        >>> df.mapInPandas(mean_age, "id: bigint, age: double").show()  # doctest: +SKIP
-        +---+----+
-        | id| age|
-        +---+----+
-        |  1|21.0|
-        |  2|30.0|
-        +---+----+
-
-        Add a new column with the double of the age:
-
-        >>> def double_age(iterator):
-        ...     for pdf in iterator:
-        ...         pdf["double_age"] = pdf["age"] * 2
-        ...         yield pdf
-        ...
-        >>> df.mapInPandas(
-        ...     double_age, "id: bigint, age: bigint, double_age: bigint").show()  # doctest: +SKIP
-        +---+---+----------+
-        | id|age|double_age|
-        +---+---+----------+
-        |  1| 21|        42|
-        |  2| 30|        60|
-        +---+---+----------+
-
-        Set ``barrier`` to ``True`` to force the ``mapInPandas`` stage running in the
-        barrier mode, it ensures all Python workers in the stage will be
-        launched concurrently.
-
-        >>> df.mapInPandas(filter_func, df.schema, barrier=True).show()  # doctest: +SKIP
-        +---+---+
-        | id|age|
-        +---+---+
-        |  1| 21|
-        +---+---+
-
-        Notes
-        -----
-        This API is experimental
-
-        See Also
-        --------
-        pyspark.sql.functions.pandas_udf
-        """
         from pyspark.sql import DataFrame
         from pyspark.sql.pandas.functions import pandas_udf
 
@@ -166,74 +63,6 @@ def mapInArrow(
         barrier: bool = False,
         profile: Optional[ResourceProfile] = None,
     ) -> "DataFrame":
-        """
-        Maps an iterator of batches in the current :class:`DataFrame` using a Python native
-        function that takes and outputs a PyArrow's `RecordBatch`, and returns the result as a
-        :class:`DataFrame`.
-
-        The function should take an iterator of `pyarrow.RecordBatch`\\s and return
-        another iterator of `pyarrow.RecordBatch`\\s. All columns are passed
-        together as an iterator of `pyarrow.RecordBatch`\\s to the function and the
-        returned iterator of `pyarrow.RecordBatch`\\s are combined as a :class:`DataFrame`.
-        Each `pyarrow.RecordBatch` size can be controlled by
-        `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and
-        output can be different.
-
-        .. versionadded:: 3.3.0
-
-        Parameters
-        ----------
-        func : function
-            a Python native function that takes an iterator of `pyarrow.RecordBatch`\\s, and
-            outputs an iterator of `pyarrow.RecordBatch`\\s.
-        schema : :class:`pyspark.sql.types.DataType` or str
-            the return type of the `func` in PySpark. The value can be either a
-            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-        barrier : bool, optional, default False
-            Use barrier mode execution.
-
-            .. versionadded: 3.5.0
-
-        profile : :class:`pyspark.resource.ResourceProfile`. The optional ResourceProfile
-            to be used for mapInArrow.
-
-            .. versionadded: 4.0.0
-
-        Examples
-        --------
-        >>> import pyarrow  # doctest: +SKIP
-        >>> df = spark.createDataFrame([(1, 21), (2, 30)], ("id", "age"))
-        >>> def filter_func(iterator):
-        ...     for batch in iterator:
-        ...         pdf = batch.to_pandas()
-        ...         yield pyarrow.RecordBatch.from_pandas(pdf[pdf.id == 1])
-        >>> df.mapInArrow(filter_func, df.schema).show()  # doctest: +SKIP
-        +---+---+
-        | id|age|
-        +---+---+
-        |  1| 21|
-        +---+---+
-
-        Set ``barrier`` to ``True`` to force the ``mapInArrow`` stage running in the
-        barrier mode, it ensures all Python workers in the stage will be
-        launched concurrently.
-
-        >>> df.mapInArrow(filter_func, df.schema, barrier=True).show()  # doctest: +SKIP
-        +---+---+
-        | id|age|
-        +---+---+
-        |  1| 21|
-        +---+---+
-
-        Notes
-        -----
-        This API is unstable, and for developers.
-
-        See Also
-        --------
-        pyspark.sql.functions.pandas_udf
-        pyspark.sql.DataFrame.mapInPandas
-        """
         from pyspark.sql import DataFrame
         from pyspark.sql.pandas.functions import pandas_udf
 
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index 559512bd00c1c..27c77c9d2d7f1 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -52,6 +52,7 @@
     _create_row,
 )
 from pyspark.errors import PySparkTypeError, UnsupportedOperationException, PySparkValueError
+from pyspark.loose_version import LooseVersion
 
 if TYPE_CHECKING:
     import pandas as pd
@@ -60,8 +61,32 @@
     from pyspark.sql.pandas._typing import SeriesLike as PandasSeriesLike
 
 
-def to_arrow_type(dt: DataType) -> "pa.DataType":
-    """Convert Spark data type to pyarrow type"""
+def to_arrow_type(
+    dt: DataType,
+    error_on_duplicated_field_names_in_struct: bool = False,
+    timestamp_utc: bool = True,
+) -> "pa.DataType":
+    """
+    Convert Spark data type to PyArrow type
+
+    Parameters
+    ----------
+    dt : :class:`DataType`
+        The Spark data type.
+    error_on_duplicated_field_names_in_struct: bool, default False
+        Whether to raise an exception when there are duplicated field names in a
+        :class:`pyspark.sql.types.StructType`. (default ``False``)
+    timestamp_utc : bool, default True
+        If ``True`` (the default), :class:`TimestampType` is converted to a timezone-aware
+        :class:`pyarrow.TimestampType` with UTC as the timezone. If ``False``,
+        :class:`TimestampType` is converted to a timezone-naive :class:`pyarrow.TimestampType`.
+        The JVM expects timezone-aware timestamps to be in UTC. Always keep this set to ``True``
+        except in special cases, such as when this function is used in a test.
+
+    Returns
+    -------
+    :class:`pyarrow.DataType`
+    """
     import pyarrow as pa
 
     if type(dt) == BooleanType:
@@ -86,30 +111,58 @@ def to_arrow_type(dt: DataType) -> "pa.DataType":
         arrow_type = pa.binary()
     elif type(dt) == DateType:
         arrow_type = pa.date32()
-    elif type(dt) == TimestampType:
+    elif type(dt) == TimestampType and timestamp_utc:
         # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
         arrow_type = pa.timestamp("us", tz="UTC")
+    elif type(dt) == TimestampType:
+        arrow_type = pa.timestamp("us", tz=None)
     elif type(dt) == TimestampNTZType:
         arrow_type = pa.timestamp("us", tz=None)
     elif type(dt) == DayTimeIntervalType:
         arrow_type = pa.duration("us")
     elif type(dt) == ArrayType:
-        field = pa.field("element", to_arrow_type(dt.elementType), nullable=dt.containsNull)
+        field = pa.field(
+            "element",
+            to_arrow_type(dt.elementType, error_on_duplicated_field_names_in_struct, timestamp_utc),
+            nullable=dt.containsNull,
+        )
         arrow_type = pa.list_(field)
     elif type(dt) == MapType:
-        key_field = pa.field("key", to_arrow_type(dt.keyType), nullable=False)
-        value_field = pa.field("value", to_arrow_type(dt.valueType), nullable=dt.valueContainsNull)
+        key_field = pa.field(
+            "key",
+            to_arrow_type(dt.keyType, error_on_duplicated_field_names_in_struct, timestamp_utc),
+            nullable=False,
+        )
+        value_field = pa.field(
+            "value",
+            to_arrow_type(dt.valueType, error_on_duplicated_field_names_in_struct, timestamp_utc),
+            nullable=dt.valueContainsNull,
+        )
         arrow_type = pa.map_(key_field, value_field)
     elif type(dt) == StructType:
+        field_names = dt.names
+        if error_on_duplicated_field_names_in_struct and len(set(field_names)) != len(field_names):
+            raise UnsupportedOperationException(
+                error_class="DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT",
+                message_parameters={"field_names": str(field_names)},
+            )
         fields = [
-            pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+            pa.field(
+                field.name,
+                to_arrow_type(
+                    field.dataType, error_on_duplicated_field_names_in_struct, timestamp_utc
+                ),
+                nullable=field.nullable,
+            )
             for field in dt
         ]
         arrow_type = pa.struct(fields)
     elif type(dt) == NullType:
         arrow_type = pa.null()
     elif isinstance(dt, UserDefinedType):
-        arrow_type = to_arrow_type(dt.sqlType())
+        arrow_type = to_arrow_type(
+            dt.sqlType(), error_on_duplicated_field_names_in_struct, timestamp_utc
+        )
     elif type(dt) == VariantType:
         fields = [
             pa.field("value", pa.binary(), nullable=False),
@@ -124,12 +177,40 @@ def to_arrow_type(dt: DataType) -> "pa.DataType":
     return arrow_type
 
 
-def to_arrow_schema(schema: StructType) -> "pa.Schema":
-    """Convert a schema from Spark to Arrow"""
+def to_arrow_schema(
+    schema: StructType,
+    error_on_duplicated_field_names_in_struct: bool = False,
+    timestamp_utc: bool = True,
+) -> "pa.Schema":
+    """
+    Convert a schema from Spark to Arrow
+
+    Parameters
+    ----------
+    schema : :class:`StructType`
+        The Spark schema.
+    error_on_duplicated_field_names_in_struct: bool, default False
+        Whether to raise an exception when there are duplicated field names in an inner
+        :class:`pyspark.sql.types.StructType`. (default ``False``)
+    timestamp_utc : bool, default True
+        If ``True`` (the default), :class:`TimestampType` is converted to a timezone-aware
+        :class:`pyarrow.TimestampType` with UTC as the timezone. If ``False``,
+        :class:`TimestampType` is converted to a timezone-naive :class:`pyarrow.TimestampType`.
+        The JVM expects timezone-aware timestamps to be in UTC. Always keep this set to ``True``
+        except in special cases, such as when this function is used in a test
+
+    Returns
+    -------
+    :class:`pyarrow.Schema`
+    """
     import pyarrow as pa
 
     fields = [
-        pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+        pa.field(
+            field.name,
+            to_arrow_type(field.dataType, error_on_duplicated_field_names_in_struct, timestamp_utc),
+            nullable=field.nullable,
+        )
         for field in schema
     ]
     return pa.schema(fields)
@@ -162,6 +243,8 @@ def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> Da
         spark_type = StringType()
     elif types.is_binary(at):
         spark_type = BinaryType()
+    elif types.is_fixed_size_binary(at):
+        spark_type = BinaryType()
     elif types.is_large_binary(at):
         spark_type = BinaryType()
     elif types.is_date32(at):
@@ -174,6 +257,18 @@ def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> Da
         spark_type = DayTimeIntervalType()
     elif types.is_list(at):
         spark_type = ArrayType(from_arrow_type(at.value_type, prefer_timestamp_ntz))
+    elif types.is_fixed_size_list(at):
+        import pyarrow as pa
+
+        if LooseVersion(pa.__version__) < LooseVersion("14.0.0"):
+            # PyArrow versions before 14.0.0 do not support casting FixedSizeListArray to ListArray
+            raise PySparkTypeError(
+                error_class="UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION",
+                message_parameters={"data_type": str(at)},
+            )
+        spark_type = ArrayType(from_arrow_type(at.value_type, prefer_timestamp_ntz))
+    elif types.is_large_list(at):
+        spark_type = ArrayType(from_arrow_type(at.value_type, prefer_timestamp_ntz))
     elif types.is_map(at):
         spark_type = MapType(
             from_arrow_type(at.key_type, prefer_timestamp_ntz),
@@ -232,6 +327,162 @@ def _get_local_timezone() -> str:
     return os.environ.get("TZ", "dateutil/:")
 
 
+def _check_arrow_array_timestamps_localize(
+    a: Union["pa.Array", "pa.ChunkedArray"],
+    dt: DataType,
+    truncate: bool = True,
+    timezone: Optional[str] = None,
+) -> Union["pa.Array", "pa.ChunkedArray"]:
+    """
+    Convert Arrow timestamps to timezone-naive in the specified timezone if the specified Spark
+    data type is TimestampType, and optionally truncate nanosecond timestamps to microseconds.
+
+    This function works on Arrow Arrays and ChunkedArrays, and it recurses to convert nested
+    timestamps.
+
+    Parameters
+    ----------
+    a : :class:`pyarrow.Array` or :class:`pyarrow.ChunkedArray`
+    dt : :class:`DataType`
+        The Spark data type corresponding to the Arrow Array to be converted.
+    truncate : bool, default True
+        Whether to truncate nanosecond timestamps to microseconds. (default ``True``)
+    timezone : str, optional
+        The timezone to convert from. If there is a timestamp type, it's required.
+
+    Returns
+    -------
+    :class:`pyarrow.Array` or :class:`pyarrow.ChunkedArray`
+    """
+    import pyarrow.types as types
+    import pyarrow as pa
+    import pyarrow.compute as pc
+
+    if isinstance(a, pa.ChunkedArray) and (types.is_nested(a.type) or types.is_dictionary(a.type)):
+        return pa.chunked_array(
+            [
+                _check_arrow_array_timestamps_localize(chunk, dt, truncate, timezone)
+                for chunk in a.iterchunks()
+            ]
+        )
+
+    if types.is_timestamp(a.type) and truncate and a.type.unit == "ns":
+        a = pc.floor_temporal(a, unit="microsecond")
+
+    if types.is_timestamp(a.type) and a.type.tz is None and type(dt) == TimestampType:
+        assert timezone is not None
+
+        # Only localize timestamps that will become Spark TimestampType columns.
+        # Do not localize timestamps that will become Spark TimestampNTZType columns.
+        return pc.assume_timezone(a, timezone)
+    if types.is_list(a.type):
+        # Return the ListArray as-is if it contains no nested fields or timestamps
+        if not types.is_nested(a.type.value_type) and not types.is_timestamp(a.type.value_type):
+            return a
+
+        at: ArrayType = cast(ArrayType, dt)
+        return pa.ListArray.from_arrays(
+            a.offsets,
+            _check_arrow_array_timestamps_localize(a.values, at.elementType, truncate, timezone),
+            mask=a.is_null() if a.null_count else None,
+        )
+    if types.is_map(a.type):
+        # Return the MapArray as-is if it contains no nested fields or timestamps
+        if (
+            not types.is_nested(a.type.key_type)
+            and not types.is_nested(a.type.item_type)
+            and not types.is_timestamp(a.type.key_type)
+            and not types.is_timestamp(a.type.item_type)
+        ):
+            return a
+
+        mt: MapType = cast(MapType, dt)
+
+        params = {
+            "offsets": a.offsets,
+            "keys": _check_arrow_array_timestamps_localize(a.keys, mt.keyType, truncate, timezone),
+            "items": _check_arrow_array_timestamps_localize(
+                a.items, mt.valueType, truncate, timezone
+            ),
+        }
+        # SPARK-48302: PyArrow added support for mask argument to pa.MapArray.from_arrays in
+        # version 17.0.0
+        if a.null_count and LooseVersion(pa.__version__) >= LooseVersion("17.0.0"):
+            params["mask"] = a.is_null()
+
+        return pa.MapArray.from_arrays(**params)
+    if types.is_struct(a.type):
+        # Return the StructArray as-is if it contains no nested fields or timestamps
+        if all(
+            [
+                not types.is_nested(a.type.field(i).type)
+                and not types.is_timestamp(a.type.field(i).type)
+                for i in range(a.type.num_fields)
+            ]
+        ):
+            return a
+
+        st: StructType = cast(StructType, dt)
+        assert len(a.type) == len(st.fields)
+
+        return pa.StructArray.from_arrays(
+            [
+                _check_arrow_array_timestamps_localize(
+                    a.field(i), st.fields[i].dataType, truncate, timezone
+                )
+                for i in range(len(a.type))
+            ],
+            [a.type[i].name for i in range(len(a.type))],
+            mask=a.is_null() if a.null_count else None,
+        )
+    if types.is_dictionary(a.type):
+        return pa.DictionaryArray.from_arrays(
+            a.indices,
+            _check_arrow_array_timestamps_localize(a.dictionary, dt, truncate, timezone),
+        )
+    return a
+
+
+def _check_arrow_table_timestamps_localize(
+    table: "pa.Table", schema: StructType, truncate: bool = True, timezone: Optional[str] = None
+) -> "pa.Table":
+    """
+    Convert timestamps in a PyArrow Table to timezone-naive in the specified timezone if the
+    corresponding Spark data type is TimestampType in the specified Spark schema is TimestampType,
+    and optionally truncate nanosecond timestamps to microseconds.
+
+    Parameters
+    ----------
+    table : :class:`pyarrow.Table`
+    schema : :class:`StructType`
+        The Spark schema corresponding to the schema of the Arrow Table.
+    truncate : bool, default True
+        Whether to truncate nanosecond timestamps to microseconds. (default ``True``)
+    timezone : str, optional
+        The timezone to convert from. If there is a timestamp type, it's required.
+
+    Returns
+    -------
+    :class:`pyarrow.Table`
+    """
+    import pyarrow.types as types
+    import pyarrow as pa
+
+    # Return the table as-is if it contains no nested fields or timestamps
+    if all([not types.is_nested(at) and not types.is_timestamp(at) for at in table.schema.types]):
+        return table
+
+    assert len(table.schema) == len(schema.fields)
+
+    return pa.Table.from_arrays(
+        [
+            _check_arrow_array_timestamps_localize(a, f.dataType, truncate, timezone)
+            for a, f in zip(table.columns, schema.fields)
+        ],
+        schema=table.schema,
+    )
+
+
 def _check_series_localize_timestamps(s: "PandasSeriesLike", timezone: str) -> "PandasSeriesLike":
     """
     Convert timezone aware timestamps to timezone-naive in the specified timezone or local timezone.
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
index 654b73e3b93ca..fafc3186410c3 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -22,7 +22,7 @@
 def require_minimum_pandas_version() -> None:
     """Raise ImportError if minimum version of Pandas is not installed"""
     # TODO(HyukjinKwon): Relocate and deduplicate the version specification.
-    minimum_pandas_version = "1.4.4"
+    minimum_pandas_version = "2.0.0"
 
     try:
         import pandas
diff --git a/python/pyspark/sql/protobuf/functions.py b/python/pyspark/sql/protobuf/functions.py
index 63871e4375718..1e75874e75f9a 100644
--- a/python/pyspark/sql/protobuf/functions.py
+++ b/python/pyspark/sql/protobuf/functions.py
@@ -22,7 +22,7 @@
 
 from typing import Dict, Optional, TYPE_CHECKING, cast
 
-from pyspark.sql.column import Column, _to_java_column
+from pyspark.sql.column import Column
 from pyspark.sql.utils import get_active_spark_context, try_remote_protobuf_functions
 from pyspark.util import _print_missing_jar
 
@@ -139,6 +139,7 @@ def from_protobuf(
     +------------------+
     """
     from py4j.java_gateway import JVMView
+    from pyspark.sql.classic.column import _to_java_column
 
     sc = get_active_spark_context()
     try:
@@ -260,6 +261,7 @@ def to_protobuf(
     +----------------------------+
     """
     from py4j.java_gateway import JVMView
+    from pyspark.sql.classic.column import _to_java_column
 
     sc = get_active_spark_context()
     try:
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 26fe8c5e6fa2f..0ee235c8bf026 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -18,7 +18,7 @@
 from typing import cast, overload, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING, Union
 
 from pyspark.util import is_remote_only
-from pyspark.sql.column import _to_seq, _to_java_column, Column
+from pyspark.sql.column import Column
 from pyspark.sql.types import StructType
 from pyspark.sql import utils
 from pyspark.sql.utils import to_str
@@ -619,6 +619,8 @@ def parquet(self, *paths: str, **options: "OptionalPrimitiveType") -> "DataFrame
         |  Tom|  20|  NULL|
         +-----+----+------+
         """
+        from pyspark.sql.classic.column import _to_seq
+
         mergeSchema = options.get("mergeSchema", None)
         pathGlobFilter = options.get("pathGlobFilter", None)
         modifiedBefore = options.get("modifiedBefore", None)
@@ -1042,6 +1044,8 @@ def orc(
         |100|Hyukjin Kwon|
         +---+------------+
         """
+        from pyspark.sql.classic.column import _to_seq
+
         self._set_opts(
             mergeSchema=mergeSchema,
             pathGlobFilter=pathGlobFilter,
@@ -1440,6 +1444,8 @@ def partitionBy(self, *cols: Union[str, List[str]]) -> "DataFrameWriter":
         |100|
         +---+
         """
+        from pyspark.sql.classic.column import _to_seq
+
         if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
             cols = cols[0]  # type: ignore[assignment]
         self._jwrite = self._jwrite.partitionBy(
@@ -1503,6 +1509,8 @@ def bucketBy(
         +---+------------+
         >>> _ = spark.sql("DROP TABLE bucketed_table")
         """
+        from pyspark.sql.classic.column import _to_seq
+
         if not isinstance(numBuckets, int):
             raise PySparkTypeError(
                 error_class="NOT_INT",
@@ -1594,6 +1602,8 @@ def sortBy(
         +---+------------+
         >>> _ = spark.sql("DROP TABLE sorted_bucketed_table")
         """
+        from pyspark.sql.classic.column import _to_seq
+
         if isinstance(col, (list, tuple)):
             if cols:
                 raise PySparkValueError(
@@ -2380,6 +2390,8 @@ def partitionedBy(self, col: Column, *cols: Column) -> "DataFrameWriterV2":
 
         .. versionadded: 3.1.0
         """
+        from pyspark.sql.classic.column import _to_seq, _to_java_column
+
         col = _to_java_column(col)
         cols = _to_seq(self._spark._sc, [_to_java_column(c) for c in cols])
         self._jwriter.partitionedBy(col, cols)
@@ -2435,6 +2447,8 @@ def overwrite(self, condition: Column) -> None:
 
         .. versionadded: 3.1.0
         """
+        from pyspark.sql.classic.column import _to_java_column
+
         condition = _to_java_column(condition)
         self._jwriter.overwrite(condition)
 
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 1098c41a3f4c8..d6fb4b60d90a9 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -40,7 +40,6 @@
 
 from pyspark.conf import SparkConf
 from pyspark.util import is_remote_only
-from pyspark.sql.column import _to_java_column
 from pyspark.sql.conf import RuntimeConfig
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.functions import lit
@@ -68,6 +67,7 @@
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
+    import pyarrow as pa
     from pyspark.core.context import SparkContext
     from pyspark.core.rdd import RDD
     from pyspark.sql._typing import AtomicValue, RowLike, OptionalPrimitiveType
@@ -143,6 +143,9 @@ def toDF(self, schema=None, sampleRatio=None):
 #
 # @classmethod + @property is also affected by a bug in Python's docstring which was backported
 # to Python 3.9.6 (https://github.com/python/cpython/pull/28838)
+#
+# Python 3.9 with MyPy complains about @classmethod + @property combination. We should fix
+# it together with MyPy.
 class classproperty(property):
     """Same as Python's @property decorator, but for class attributes.
 
@@ -1040,6 +1043,7 @@ def _inferSchemaFromList(
             )
         infer_dict_as_struct = self._jconf.inferDictAsStruct()
         infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement()
+        infer_map_from_first_pair = self._jconf.legacyInferMapStructTypeFromFirstItem()
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         schema = reduce(
             _merge_type,
@@ -1049,6 +1053,7 @@ def _inferSchemaFromList(
                     names,
                     infer_dict_as_struct=infer_dict_as_struct,
                     infer_array_from_first_element=infer_array_from_first_element,
+                    infer_map_from_first_pair=infer_map_from_first_pair,
                     prefer_timestamp_ntz=prefer_timestamp_ntz,
                 )
                 for row in data
@@ -1091,6 +1096,7 @@ def _inferSchema(
 
         infer_dict_as_struct = self._jconf.inferDictAsStruct()
         infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement()
+        infer_map_from_first_pair = self._jconf.legacyInferMapStructTypeFromFirstItem()
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         if samplingRatio is None:
             schema = _infer_schema(
@@ -1108,6 +1114,7 @@ def _inferSchema(
                             names=names,
                             infer_dict_as_struct=infer_dict_as_struct,
                             infer_array_from_first_element=infer_array_from_first_element,
+                            infer_map_from_first_pair=infer_map_from_first_pair,
                             prefer_timestamp_ntz=prefer_timestamp_ntz,
                         ),
                     )
@@ -1127,6 +1134,7 @@ def _inferSchema(
                     names,
                     infer_dict_as_struct=infer_dict_as_struct,
                     infer_array_from_first_element=infer_array_from_first_element,
+                    infer_map_from_first_pair=infer_map_from_first_pair,
                     prefer_timestamp_ntz=prefer_timestamp_ntz,
                 )
             ).reduce(_merge_type)
@@ -1249,7 +1257,7 @@ def _getActiveSessionOrCreate(**static_conf: Any) -> "SparkSession":
             spark = builder.getOrCreate()
         return spark
 
-    @overload
+    @overload  # type: ignore[override]
     def createDataFrame(
         self,
         data: Iterable["RowLike"],
@@ -1311,6 +1319,10 @@ def createDataFrame(
     ) -> DataFrame:
         ...
 
+    @overload
+    def createDataFrame(self, data: "pa.Table", samplingRatio: Optional[float] = ...) -> DataFrame:
+        ...
+
     @overload
     def createDataFrame(
         self,
@@ -1320,28 +1332,40 @@ def createDataFrame(
     ) -> DataFrame:
         ...
 
+    @overload
+    def createDataFrame(
+        self,
+        data: "pa.Table",
+        schema: Union[StructType, str],
+        verifySchema: bool = ...,
+    ) -> DataFrame:
+        ...
+
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike", "ArrayLike"],
+        data: Union["RDD[Any]", Iterable[Any], "PandasDataFrameLike", "ArrayLike", "pa.Table"],
         schema: Optional[Union[AtomicType, StructType, str]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
     ) -> DataFrame:
         """
-        Creates a :class:`DataFrame` from an :class:`RDD`, a list, a :class:`pandas.DataFrame`
-        or a :class:`numpy.ndarray`.
+        Creates a :class:`DataFrame` from an :class:`RDD`, a list, a :class:`pandas.DataFrame`,
+        a :class:`numpy.ndarray`, or a :class:`pyarrow.Table`.
 
         .. versionadded:: 2.0.0
 
         .. versionchanged:: 3.4.0
             Supports Spark Connect.
 
+        .. versionchanged:: 4.0.0
+            Supports :class:`pyarrow.Table`.
+
         Parameters
         ----------
         data : :class:`RDD` or iterable
             an RDD of any kind of SQL data representation (:class:`Row`,
             :class:`tuple`, ``int``, ``boolean``, ``dict``, etc.), or :class:`list`,
-            :class:`pandas.DataFrame` or :class:`numpy.ndarray`.
+            :class:`pandas.DataFrame`, :class:`numpy.ndarray`, or :class:`pyarrow.Table`.
         schema : :class:`pyspark.sql.types.DataType`, str or list, optional
             a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
             column names, default is None. The data type string format equals to
@@ -1363,12 +1387,14 @@ def createDataFrame(  # type: ignore[misc]
             later.
         samplingRatio : float, optional
             the sample ratio of rows used for inferring. The first few rows will be used
-            if ``samplingRatio`` is ``None``.
+            if ``samplingRatio`` is ``None``. This option is effective only when the input is
+            :class:`RDD`.
         verifySchema : bool, optional
             verify data types of every row against schema. Enabled by default.
-            When the input is :class:`pandas.DataFrame` and
-            `spark.sql.execution.arrow.pyspark.enabled` is enabled, this option is not
-            effective. It follows Arrow type coercion.
+            When the input is :class:`pyarrow.Table` or when the input class is
+            :class:`pandas.DataFrame` and `spark.sql.execution.arrow.pyspark.enabled` is enabled,
+            this option is not effective. It follows Arrow type coercion. This option is not
+            supported with Spark Connect.
 
             .. versionadded:: 2.1.0
 
@@ -1468,6 +1494,22 @@ def createDataFrame(  # type: ignore[misc]
         +---+---+
         |  1|  2|
         +---+---+
+
+        Create a DataFrame from a PyArrow Table.
+
+        >>> spark.createDataFrame(df.toArrow()).show()  # doctest: +SKIP
+        +-----+---+
+        | name|age|
+        +-----+---+
+        |Alice|  1|
+        +-----+---+
+        >>> table = pyarrow.table({'0': [1], '1': [2]})  # doctest: +SKIP
+        >>> spark.createDataFrame(table).collect()  # doctest: +SKIP
+        +---+---+
+        |  0|  1|
+        +---+---+
+        |  1|  2|
+        +---+---+
         """
         SparkSession._activeSession = self
         assert self._jvm is not None
@@ -1498,6 +1540,13 @@ def createDataFrame(  # type: ignore[misc]
         except Exception:
             has_numpy = False
 
+        try:
+            import pyarrow as pa
+
+            has_pyarrow = True
+        except Exception:
+            has_pyarrow = False
+
         if has_numpy and isinstance(data, np.ndarray):
             # `data` of numpy.ndarray type will be converted to a pandas DataFrame,
             # so pandas is required.
@@ -1531,6 +1580,11 @@ def createDataFrame(  # type: ignore[misc]
             return super(SparkSession, self).createDataFrame(  # type: ignore[call-overload]
                 data, schema, samplingRatio, verifySchema
             )
+        if has_pyarrow and isinstance(data, pa.Table):
+            # Create a DataFrame from PyArrow Table.
+            return super(SparkSession, self).createDataFrame(  # type: ignore[call-overload]
+                data, schema, samplingRatio, verifySchema
+            )
         return self._create_dataframe(
             data, schema, samplingRatio, verifySchema  # type: ignore[arg-type]
         )
@@ -1633,7 +1687,7 @@ def sql(
         Notes
         -----
         In Spark Classic, a temporary view referenced in `spark.sql` is resolved immediately,
-        while in Spark Connect it is lazily evaluated.
+        while in Spark Connect it is lazily analyzed.
         So in Spark Connect if a view is dropped, modified or replaced after `spark.sql`, the
         execution may fail or generate different results.
 
@@ -1724,6 +1778,7 @@ def sql(
         |  3|  6|  1|
         +---+---+---+
         """
+        from pyspark.sql.classic.column import _to_java_column
 
         formatter = SQLStringFormatter(self)
         if len(kwargs) > 0:
@@ -1766,7 +1821,7 @@ def table(self, tableName: str) -> DataFrame:
         Notes
         -----
         In Spark Classic, a temporary view referenced in `spark.table` is resolved immediately,
-        while in Spark Connect it is lazily evaluated.
+        while in Spark Connect it is lazily analyzed.
         So in Spark Connect if a view is dropped, modified or replaced after `spark.table`, the
         execution may fail or generate different results.
 
diff --git a/python/pyspark/sql/streaming/listener.py b/python/pyspark/sql/streaming/listener.py
index c1c9dce047319..2aa63cdb91ab6 100644
--- a/python/pyspark/sql/streaming/listener.py
+++ b/python/pyspark/sql/streaming/listener.py
@@ -64,16 +64,19 @@ class StreamingQueryListener(ABC):
     """
 
     def _set_spark_session(
-        self, spark: "SparkSession"  # type: ignore[name-defined] # noqa: F821
+        self, session: "SparkSession"  # type: ignore[name-defined] # noqa: F821
     ) -> None:
-        self._sparkSession = spark
+        if self.spark is None:
+            self.spark = session
 
     @property
     def spark(self) -> Optional["SparkSession"]:  # type: ignore[name-defined] # noqa: F821
-        if hasattr(self, "_sparkSession"):
-            return self._sparkSession
-        else:
-            return None
+        return getattr(self, "_sparkSession", None)
+
+    @spark.setter
+    def spark(self, session: "SparkSession") -> None:  # type: ignore[name-defined] # noqa: F821
+        # For backward compatibility
+        self._sparkSession = session
 
     def _init_listener_id(self) -> None:
         self._id = str(uuid.uuid4())
diff --git a/python/pyspark/sql/streaming/python_streaming_source_runner.py b/python/pyspark/sql/streaming/python_streaming_source_runner.py
index 76f9048e3edbe..754ecff61b973 100644
--- a/python/pyspark/sql/streaming/python_streaming_source_runner.py
+++ b/python/pyspark/sql/streaming/python_streaming_source_runner.py
@@ -18,7 +18,7 @@
 import os
 import sys
 import json
-from typing import IO
+from typing import IO, Iterator, Tuple
 
 from pyspark.accumulators import _accumulatorRegistry
 from pyspark.errors import IllegalArgumentException, PySparkAssertionError, PySparkRuntimeError
@@ -29,10 +29,13 @@
     SpecialLengths,
 )
 from pyspark.sql.datasource import DataSource, DataSourceStreamReader
+from pyspark.sql.datasource_internal import _SimpleStreamReaderWrapper, _streamReader
+from pyspark.sql.pandas.serializers import ArrowStreamSerializer
 from pyspark.sql.types import (
     _parse_datatype_json_string,
     StructType,
 )
+from pyspark.sql.worker.plan_data_source_read import records_to_arrow_batches
 from pyspark.util import handle_worker_exception, local_connect_and_auth
 from pyspark.worker_util import (
     check_python_version,
@@ -49,6 +52,10 @@
 PARTITIONS_FUNC_ID = 886
 COMMIT_FUNC_ID = 887
 
+PREFETCHED_RECORDS_NOT_FOUND = 0
+NON_EMPTY_PYARROW_RECORD_BATCHES = 1
+EMPTY_PYARROW_RECORD_BATCHES = 2
+
 
 def initial_offset_func(reader: DataSourceStreamReader, outfile: IO) -> None:
     offset = reader.initialOffset()
@@ -60,7 +67,14 @@ def latest_offset_func(reader: DataSourceStreamReader, outfile: IO) -> None:
     write_with_length(json.dumps(offset).encode("utf-8"), outfile)
 
 
-def partitions_func(reader: DataSourceStreamReader, infile: IO, outfile: IO) -> None:
+def partitions_func(
+    reader: DataSourceStreamReader,
+    data_source: DataSource,
+    schema: StructType,
+    max_arrow_batch_size: int,
+    infile: IO,
+    outfile: IO,
+) -> None:
     start_offset = json.loads(utf8_deserializer.loads(infile))
     end_offset = json.loads(utf8_deserializer.loads(infile))
     partitions = reader.partitions(start_offset, end_offset)
@@ -68,6 +82,14 @@ def partitions_func(reader: DataSourceStreamReader, infile: IO, outfile: IO) ->
     write_int(len(partitions), outfile)
     for partition in partitions:
         pickleSer._write_with_length(partition, outfile)
+    if isinstance(reader, _SimpleStreamReaderWrapper):
+        it = reader.getCache(start_offset, end_offset)
+        if it is None:
+            write_int(PREFETCHED_RECORDS_NOT_FOUND, outfile)
+        else:
+            send_batch_func(it, outfile, schema, max_arrow_batch_size, data_source)
+    else:
+        write_int(PREFETCHED_RECORDS_NOT_FOUND, outfile)
 
 
 def commit_func(reader: DataSourceStreamReader, infile: IO, outfile: IO) -> None:
@@ -76,6 +98,23 @@ def commit_func(reader: DataSourceStreamReader, infile: IO, outfile: IO) -> None
     write_int(0, outfile)
 
 
+def send_batch_func(
+    rows: Iterator[Tuple],
+    outfile: IO,
+    schema: StructType,
+    max_arrow_batch_size: int,
+    data_source: DataSource,
+) -> None:
+    batches = list(records_to_arrow_batches(rows, max_arrow_batch_size, schema, data_source))
+    if len(batches) != 0:
+        write_int(NON_EMPTY_PYARROW_RECORD_BATCHES, outfile)
+        write_int(SpecialLengths.START_ARROW_STREAM, outfile)
+        serializer = ArrowStreamSerializer()
+        serializer.dump_stream(batches, outfile)
+    else:
+        write_int(EMPTY_PYARROW_RECORD_BATCHES, outfile)
+
+
 def main(infile: IO, outfile: IO) -> None:
     try:
         check_python_version(infile)
@@ -91,7 +130,7 @@ def main(infile: IO, outfile: IO) -> None:
 
         if not isinstance(data_source, DataSource):
             raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": "a Python data source instance of type 'DataSource'",
                     "actual": f"'{type(data_source).__name__}'",
@@ -103,16 +142,22 @@ def main(infile: IO, outfile: IO) -> None:
         schema = _parse_datatype_json_string(schema_json)
         if not isinstance(schema, StructType):
             raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": "an output schema of type 'StructType'",
                     "actual": f"'{type(schema).__name__}'",
                 },
             )
 
+        max_arrow_batch_size = read_int(infile)
+        assert max_arrow_batch_size > 0, (
+            "The maximum arrow batch size should be greater than 0, but got "
+            f"'{max_arrow_batch_size}'"
+        )
+
         # Instantiate data source reader.
         try:
-            reader = data_source.streamReader(schema=schema)
+            reader = _streamReader(data_source, schema)
             # Initialization succeed.
             write_int(0, outfile)
             outfile.flush()
@@ -125,7 +170,9 @@ def main(infile: IO, outfile: IO) -> None:
                 elif func_id == LATEST_OFFSET_FUNC_ID:
                     latest_offset_func(reader, outfile)
                 elif func_id == PARTITIONS_FUNC_ID:
-                    partitions_func(reader, infile, outfile)
+                    partitions_func(
+                        reader, data_source, schema, max_arrow_batch_size, infile, outfile
+                    )
                 elif func_id == COMMIT_FUNC_ID:
                     commit_func(reader, infile, outfile)
                 else:
@@ -162,5 +209,9 @@ def main(infile: IO, outfile: IO) -> None:
     # Read information about how to connect back to the JVM from the environment.
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
-    (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    (sock_file, sock) = local_connect_and_auth(java_port, auth_secret)
+    # Prevent the socket from timeout error when query trigger interval is large.
+    sock.settimeout(None)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/streaming/query.py b/python/pyspark/sql/streaming/query.py
index bcab8a104f1d9..d3d58da3562b6 100644
--- a/python/pyspark/sql/streaming/query.py
+++ b/python/pyspark/sql/streaming/query.py
@@ -114,7 +114,7 @@ def runId(self) -> str:
     @property
     def name(self) -> str:
         """
-        Returns the user-specified name of the query, or null if not specified.
+        Returns the user-specified name of the query, or None if not specified.
         This name can be specified in the `org.apache.spark.sql.streaming.DataStreamWriter`
         as `dataframe.writeStream.queryName("query").start()`.
         This name, if set, must be unique across all active queries.
@@ -127,14 +127,14 @@ def name(self) -> str:
         Returns
         -------
         str
-            The user-specified name of the query, or null if not specified.
+            The user-specified name of the query, or None if not specified.
 
         Examples
         --------
         >>> sdf = spark.readStream.format("rate").load()
         >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
 
-        Get the user-specified name of the query, or null if not specified.
+        Get the user-specified name of the query, or None if not specified.
 
         >>> sq.name
         'this_query'
diff --git a/python/pyspark/sql/streaming/readwriter.py b/python/pyspark/sql/streaming/readwriter.py
index 58901f34cfc9b..b202a499e8b08 100644
--- a/python/pyspark/sql/streaming/readwriter.py
+++ b/python/pyspark/sql/streaming/readwriter.py
@@ -19,7 +19,6 @@
 from collections.abc import Iterator
 from typing import cast, overload, Any, Callable, List, Optional, TYPE_CHECKING, Union
 
-from pyspark.sql.column import _to_seq
 from pyspark.sql.readwriter import OptionUtils, to_str
 from pyspark.sql.streaming.query import StreamingQuery
 from pyspark.sql.types import Row, StructType
@@ -554,8 +553,8 @@ def text(
 
         Parameters
         ----------
-        path : str or list
-            string, or list of strings, for input path(s).
+        path : str
+            string for input path.
 
         Other Parameters
         ----------------
@@ -642,8 +641,8 @@ def csv(
 
         Parameters
         ----------
-        path : str or list
-            string, or list of strings, for input path(s).
+        path : str
+            string for input path.
         schema : :class:`pyspark.sql.types.StructType` or str, optional
             an optional :class:`pyspark.sql.types.StructType` for the input schema
             or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
@@ -1117,6 +1116,8 @@ def partitionBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
         +...---------+-----+
         ...
         """
+        from pyspark.sql.classic.column import _to_seq
+
         if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
             cols = cols[0]
         self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
diff --git a/python/pyspark/sql/tests/connect/client/test_artifact.py b/python/pyspark/sql/tests/connect/client/test_artifact.py
index f4f49ab251266..c886ff36d776f 100644
--- a/python/pyspark/sql/tests/connect/client/test_artifact.py
+++ b/python/pyspark/sql/tests/connect/client/test_artifact.py
@@ -25,7 +25,7 @@
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import ReusedConnectTestCase, should_test_connect
 from pyspark.testing.utils import SPARK_HOME
-from pyspark.sql.functions import udf
+from pyspark.sql.functions import udf, assert_true, lit
 
 if should_test_connect:
     from pyspark.sql.connect.client.artifact import ArtifactManager
@@ -46,7 +46,7 @@ def func(x):
                 return my_pyfile.my_func()
 
             spark_session.addArtifacts(pyfile_path, pyfile=True)
-            self.assertEqual(spark_session.range(1).select(func("id")).first()[0], 10)
+            spark_session.range(1).select(assert_true(func("id") == lit(10))).show()
 
     def test_add_pyfile(self):
         self.check_add_pyfile(self.spark)
@@ -94,7 +94,7 @@ def func(x):
                 return my_zipfile.my_func()
 
             spark_session.addArtifacts(f"{package_path}.zip", pyfile=True)
-            self.assertEqual(spark_session.range(1).select(func("id")).first()[0], 5)
+            spark_session.range(1).select(assert_true(func("id") == lit(5))).show()
 
     def test_add_zipped_package(self):
         self.check_add_zipped_package(self.spark)
@@ -130,7 +130,7 @@ def func(x):
                 ) as my_file:
                     return my_file.read().strip()
 
-            self.assertEqual(spark_session.range(1).select(func("id")).first()[0], "hello world!")
+            spark_session.range(1).select(assert_true(func("id") == lit("hello world!"))).show()
 
     def test_add_archive(self):
         self.check_add_archive(self.spark)
@@ -160,7 +160,7 @@ def func(x):
                 with open(os.path.join(root, "my_file.txt"), "r") as my_file:
                     return my_file.read().strip()
 
-            self.assertEqual(spark_session.range(1).select(func("id")).first()[0], "Hello world!!")
+            spark_session.range(1).select(assert_true(func("id") == lit("Hello world!!"))).show()
 
     def test_add_file(self):
         self.check_add_file(self.spark)
@@ -425,33 +425,3 @@ def test_add_not_existing_artifact(self):
                 self.artifact_manager.add_artifacts(
                     os.path.join(d, "not_existing"), file=True, pyfile=False, archive=False
                 )
-
-
-@unittest.skipIf(is_remote_only(), "Requires local cluster to run")
-class LocalClusterArtifactTests(ReusedConnectTestCase, ArtifactTestsMixin):
-    @classmethod
-    def conf(cls):
-        return (
-            super().conf().set("spark.driver.memory", "512M").set("spark.executor.memory", "512M")
-        )
-
-    @classmethod
-    def root(cls):
-        # In local cluster, we can mimic the production usage.
-        return "."
-
-    @classmethod
-    def master(cls):
-        return "local-cluster[2,2,512]"
-
-
-if __name__ == "__main__":
-    from pyspark.sql.tests.connect.client.test_artifact import *  # noqa: F401
-
-    try:
-        import xmlrunner  # type: ignore
-
-        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
-    except ImportError:
-        testRunner = None
-    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/client/test_artifact_localcluster.py b/python/pyspark/sql/tests/connect/client/test_artifact_localcluster.py
new file mode 100644
index 0000000000000..83584b83333ee
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/client/test_artifact_localcluster.py
@@ -0,0 +1,50 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+import os
+
+from pyspark.sql.tests.connect.client.test_artifact import ArtifactTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class LocalClusterArtifactTests(ReusedConnectTestCase, ArtifactTestsMixin):
+    @classmethod
+    def conf(cls):
+        return (
+            super().conf().set("spark.driver.memory", "512M").set("spark.executor.memory", "512M")
+        )
+
+    @classmethod
+    def root(cls):
+        # In local cluster, we can mimic the production usage.
+        return "."
+
+    @classmethod
+    def master(cls):
+        return os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local-cluster[2,2,512]")
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.client.test_artifact_localcluster import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py b/python/pyspark/sql/tests/connect/client/test_client.py
index b96fc44d50a7e..196c9eb5d81d8 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -18,13 +18,14 @@
 import unittest
 import uuid
 from collections.abc import Generator
-from typing import Optional, Any
+from typing import Optional, Any, Union
 
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
 from pyspark.testing.utils import eventually
 
 if should_test_connect:
     import grpc
+    from google.rpc import status_pb2
     import pandas as pd
     import pyarrow as pa
     from pyspark.sql.connect.client import SparkConnectClient, DefaultChannelBuilder
@@ -33,7 +34,7 @@
         DefaultPolicy,
     )
     from pyspark.sql.connect.client.reattach import ExecutePlanResponseReattachableIterator
-    from pyspark.errors import RetriesExceeded
+    from pyspark.errors import PySparkRuntimeError, RetriesExceeded
     import pyspark.sql.connect.proto as proto
 
     class TestPolicy(DefaultPolicy):
@@ -50,9 +51,17 @@ def __init__(self):
     class TestException(grpc.RpcError, grpc.Call):
         """Exception mock to test retryable exceptions."""
 
-        def __init__(self, msg, code=grpc.StatusCode.INTERNAL):
+        def __init__(
+            self,
+            msg,
+            code=grpc.StatusCode.INTERNAL,
+            trailing_status: Union[status_pb2.Status, None] = None,
+        ):
             self.msg = msg
             self._code = code
+            self._trailer: dict[str, Any] = {}
+            if trailing_status is not None:
+                self._trailer["grpc-status-details-bin"] = trailing_status.SerializeToString()
 
         def code(self):
             return self._code
@@ -60,8 +69,11 @@ def code(self):
         def __str__(self):
             return self.msg
 
+        def details(self):
+            return self.msg
+
         def trailing_metadata(self):
-            return ()
+            return None if not self._trailer else self._trailer.items()
 
     class ResponseGenerator(Generator):
         """This class is used to generate values that are returned by the streaming
@@ -340,6 +352,71 @@ def check():
 
         eventually(timeout=1, catch_assertions=True)(check)()
 
+    def test_not_found_recovers(self):
+        """SPARK-48056: Assert that the client recovers from session or operation not
+        found error if no partial responses were previously received.
+        """
+
+        def not_found_recovers(error_code: str):
+            def not_found():
+                raise TestException(
+                    error_code,
+                    grpc.StatusCode.UNAVAILABLE,
+                    trailing_status=status_pb2.Status(code=14, message=error_code, details=""),
+                )
+
+            stub = self._stub_with([not_found, self.finished])
+            ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.retrying, [])
+
+            for _ in ite:
+                pass
+
+            def checks():
+                self.assertEqual(2, stub.execute_calls)
+                self.assertEqual(0, stub.attach_calls)
+                self.assertEqual(0, stub.release_calls)
+                self.assertEqual(0, stub.release_until_calls)
+
+            eventually(timeout=1, catch_assertions=True)(checks)()
+
+        parameters = ["INVALID_HANDLE.SESSION_NOT_FOUND", "INVALID_HANDLE.OPERATION_NOT_FOUND"]
+        for b in parameters:
+            not_found_recovers(b)
+
+    def test_not_found_fails(self):
+        """SPARK-48056: Assert that the client fails from session or operation not found error
+        if a partial response was previously received.
+        """
+
+        def not_found_fails(error_code: str):
+            def not_found():
+                raise TestException(
+                    error_code,
+                    grpc.StatusCode.UNAVAILABLE,
+                    trailing_status=status_pb2.Status(code=14, message=error_code, details=""),
+                )
+
+            stub = self._stub_with([self.response], [not_found])
+
+            with self.assertRaises(PySparkRuntimeError) as e:
+                ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.retrying, [])
+                for _ in ite:
+                    pass
+
+            self.assertTrue("RESPONSE_ALREADY_RECEIVED" in e.exception.getMessage())
+
+            def checks():
+                self.assertEqual(1, stub.execute_calls)
+                self.assertEqual(1, stub.attach_calls)
+                self.assertEqual(0, stub.release_calls)
+                self.assertEqual(0, stub.release_until_calls)
+
+            eventually(timeout=1, catch_assertions=True)(checks)()
+
+        parameters = ["INVALID_HANDLE.SESSION_NOT_FOUND", "INVALID_HANDLE.OPERATION_NOT_FOUND"]
+        for b in parameters:
+            not_found_fails(b)
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.connect.client.test_client import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py b/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py
index 4598cbbdca4e1..d79bfef2426a4 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py
@@ -20,6 +20,7 @@
 from pyspark.sql.tests.streaming.test_streaming_foreach_batch import StreamingTestsForeachBatchMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.errors import PySparkPicklingError
+from pyspark.errors.exceptions.connect import SparkConnectGrpcException
 
 
 class StreamingForeachBatchParityTests(StreamingTestsForeachBatchMixin, ReusedConnectTestCase):
@@ -66,6 +67,41 @@ def func(df, _):
             q = df.writeStream.foreachBatch(func).start()
             q.processAllAvailable()
 
+    def test_worker_initialization_error(self):
+        class SerializableButNotDeserializable:
+            @staticmethod
+            def _reduce_function():
+                raise ValueError("Cannot unpickle this object")
+
+            def __reduce__(self):
+                # Return a static method that cannot be called during unpickling
+                return self._reduce_function, ()
+
+        # Create an instance of the class
+        obj = SerializableButNotDeserializable()
+
+        df = (
+            self.spark.readStream.format("rate")
+            .option("rowsPerSecond", "10")
+            .option("numPartitions", "1")
+            .load()
+        )
+
+        obj = SerializableButNotDeserializable()
+
+        def fcn(df, _):
+            print(obj)
+
+        # Assert that an exception occurs during the initialization
+        with self.assertRaises(SparkConnectGrpcException) as error:
+            df.select("value").writeStream.foreachBatch(fcn).start()
+
+        # Assert that the error message contains the expected string
+        self.assertIn(
+            "Streaming Runner initialization failed",
+            str(error.exception),
+        )
+
     def test_accessing_spark_session(self):
         spark = self.spark
 
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
index be8c30c28ce0e..14edfa4003b23 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -23,50 +23,49 @@
 from pyspark.sql.streaming.listener import StreamingQueryListener
 from pyspark.sql.functions import count, lit
 from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.utils import eventually
 
 
 # Listeners that has spark commands in callback handler functions
-# V1: Initial interface of StreamingQueryListener containing methods `onQueryStarted`,
-# `onQueryProgress`, `onQueryTerminated`. It is prior to Spark 3.5.
-class TestListenerSparkV1(StreamingQueryListener):
+class TestListenerSpark(StreamingQueryListener):
     def onQueryStarted(self, event):
         e = pyspark.cloudpickle.dumps(event)
         df = self.spark.createDataFrame(data=[(e,)])
-        df.write.mode("append").saveAsTable("listener_start_events_v1")
+        df.write.mode("append").saveAsTable("listener_start_events")
 
     def onQueryProgress(self, event):
         e = pyspark.cloudpickle.dumps(event)
         df = self.spark.createDataFrame(data=[(e,)])
-        df.write.mode("append").saveAsTable("listener_progress_events_v1")
+        df.write.mode("append").saveAsTable("listener_progress_events")
+
+    def onQueryIdle(self, event):
+        pass
 
     def onQueryTerminated(self, event):
         e = pyspark.cloudpickle.dumps(event)
         df = self.spark.createDataFrame(data=[(e,)])
-        df.write.mode("append").saveAsTable("listener_terminated_events_v1")
+        df.write.mode("append").saveAsTable("listener_terminated_events")
 
 
-# V2: The interface after the method `onQueryIdle` is added. It is Spark 3.5+.
-class TestListenerSparkV2(StreamingQueryListener):
+# V1: Initial interface of StreamingQueryListener containing methods `onQueryStarted`,
+# `onQueryProgress`, `onQueryTerminated`. It is prior to Spark 3.5.
+class TestListenerLocalV1(StreamingQueryListener):
+    def __init__(self):
+        self.start = []
+        self.progress = []
+        self.terminated = []
+
     def onQueryStarted(self, event):
-        e = pyspark.cloudpickle.dumps(event)
-        df = self.spark.createDataFrame(data=[(e,)])
-        df.write.mode("append").saveAsTable("listener_start_events_v2")
+        self.start.append(event)
 
     def onQueryProgress(self, event):
-        e = pyspark.cloudpickle.dumps(event)
-        df = self.spark.createDataFrame(data=[(e,)])
-        df.write.mode("append").saveAsTable("listener_progress_events_v2")
-
-    def onQueryIdle(self, event):
-        pass
+        self.progress.append(event)
 
     def onQueryTerminated(self, event):
-        e = pyspark.cloudpickle.dumps(event)
-        df = self.spark.createDataFrame(data=[(e,)])
-        df.write.mode("append").saveAsTable("listener_terminated_events_v2")
+        self.terminated.append(event)
 
 
-class TestListenerLocal(StreamingQueryListener):
+class TestListenerLocalV2(StreamingQueryListener):
     def __init__(self):
         self.start = []
         self.progress = []
@@ -87,19 +86,29 @@ def onQueryTerminated(self, event):
 
 class StreamingListenerParityTests(StreamingListenerTestsMixin, ReusedConnectTestCase):
     def test_listener_management(self):
-        listener1 = TestListenerLocal()
-        listener2 = TestListenerLocal()
+        listener1 = TestListenerLocalV1()
+        listener2 = TestListenerLocalV2()
 
         try:
             self.spark.streams.addListener(listener1)
             self.spark.streams.addListener(listener2)
-            q = self.spark.readStream.format("rate").load().writeStream.format("noop").start()
+            q = (
+                self.spark.readStream.format("rate")
+                .load()
+                .writeStream.format("noop")
+                .queryName("test_local")
+                .start()
+            )
 
             # Both listeners should have listener events already because onQueryStarted
             # is always called before DataStreamWriter.start() returns
             self.assertEqual(len(listener1.start), 1)
             self.assertEqual(len(listener2.start), 1)
+            self.check_start_event(listener1.start[0])
+            self.check_start_event(listener2.start[0])
 
+            while q.lastProgress is None:
+                q.awaitTermination(0.5)
             # removeListener is a blocking call, resources are cleaned up by the time it returns
             self.spark.streams.removeListener(listener1)
             self.spark.streams.removeListener(listener2)
@@ -109,12 +118,13 @@ def test_listener_management(self):
             q.stop()
 
             # need to wait a while before QueryTerminatedEvent reaches client
-            time.sleep(15)
+            while len(listener1.terminated) == 0:
+                time.sleep(1)
+
             self.assertEqual(len(listener1.terminated), 1)
 
-            self.check_start_event(listener1.start[0])
             for event in listener1.progress:
-                self.check_progress_event(event)
+                self.check_progress_event(event, is_stateful=False)
             self.check_terminated_event(listener1.terminated[0])
 
         finally:
@@ -125,7 +135,7 @@ def test_listener_management(self):
 
     def test_slow_query(self):
         try:
-            listener = TestListenerLocal()
+            listener = TestListenerLocalV2()
             self.spark.streams.addListener(listener)
 
             slow_query = (
@@ -151,8 +161,12 @@ def test_slow_query(self):
             self.assertTrue(slow_query.id in [str(e.progress.id) for e in listener.progress])
             self.assertTrue(fast_query.id in [str(e.progress.id) for e in listener.progress])
 
-            self.assertTrue(slow_query.id in [str(e.id) for e in listener.terminated])
-            self.assertTrue(fast_query.id in [str(e.id) for e in listener.terminated])
+            eventually(timeout=20, catch_assertions=True)(
+                lambda: self.assertTrue(slow_query.id in [str(e.id) for e in listener.terminated])
+            )()
+            eventually(timeout=20, catch_assertions=True)(
+                lambda: self.assertTrue(fast_query.id in [str(e.id) for e in listener.terminated])
+            )()
 
         finally:
             for listener in self.spark.streams._sqlb._listener_bus:
@@ -177,7 +191,7 @@ def onQueryTerminated(self, e):
                 raise Exception("I'm so sorry!")
 
         try:
-            listener_good = TestListenerLocal()
+            listener_good = TestListenerLocalV2()
             listener_bad = UselessListener()
             self.spark.streams.addListener(listener_good)
             self.spark.streams.addListener(listener_bad)
@@ -200,8 +214,14 @@ def onQueryTerminated(self, e):
                 q.stop()
 
     def test_listener_events_spark_command(self):
-        def verify(test_listener, table_postfix):
-            try:
+        test_listener = TestListenerSpark()
+
+        try:
+            with self.table(
+                "listener_start_events",
+                "listener_progress_events",
+                "listener_terminated_events",
+            ):
                 self.spark.streams.addListener(test_listener)
 
                 # This ensures the read socket on the server won't crash (i.e. because of timeout)
@@ -214,56 +234,42 @@ def verify(test_listener, table_postfix):
                 q = (
                     df_stateful.writeStream.format("noop")
                     .queryName("test")
-                    .outputMode("complete")
+                    .outputMode("update")
+                    .trigger(processingTime="5 seconds")
                     .start()
                 )
 
                 self.assertTrue(q.isActive)
                 # ensure at least one batch is ran
                 while q.lastProgress is None or q.lastProgress["batchId"] == 0:
-                    q.awaitTermination(5)
+                    q.awaitTermination(0.5)
                 q.stop()
                 self.assertFalse(q.isActive)
 
-                # Sleep to make sure listener_terminated_events is written successfully
-                time.sleep(60)
-
-                start_table_name = "listener_start_events" + table_postfix
-                progress_tbl_name = "listener_progress_events" + table_postfix
-                terminated_tbl_name = "listener_terminated_events" + table_postfix
+                time.sleep(
+                    60
+                )  # Sleep to make sure listener_terminated_events is written successfully
 
                 start_event = pyspark.cloudpickle.loads(
-                    self.spark.read.table(start_table_name).collect()[0][0]
+                    self.spark.read.table("listener_start_events").collect()[0][0]
                 )
 
                 progress_event = pyspark.cloudpickle.loads(
-                    self.spark.read.table(progress_tbl_name).collect()[0][0]
+                    self.spark.read.table("listener_progress_events").collect()[0][0]
                 )
 
                 terminated_event = pyspark.cloudpickle.loads(
-                    self.spark.read.table(terminated_tbl_name).collect()[0][0]
+                    self.spark.read.table("listener_terminated_events").collect()[0][0]
                 )
 
                 self.check_start_event(start_event)
-                self.check_progress_event(progress_event)
+                self.check_progress_event(progress_event, is_stateful=True)
                 self.check_terminated_event(terminated_event)
 
-            finally:
-                self.spark.streams.removeListener(test_listener)
-
-                # Remove again to verify this won't throw any error
-                self.spark.streams.removeListener(test_listener)
-
-        with self.table(
-            "listener_start_events_v1",
-            "listener_progress_events_v1",
-            "listener_terminated_events_v1",
-            "listener_start_events_v2",
-            "listener_progress_events_v2",
-            "listener_terminated_events_v2",
-        ):
-            verify(TestListenerSparkV1(), "_v1")
-            verify(TestListenerSparkV2(), "_v2")
+        finally:
+            self.spark.streams.removeListener(test_listener)
+            # Remove again to verify this won't throw any error
+            self.spark.streams.removeListener(test_listener)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py b/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
index 1c2c04f2da54f..9795e96bfa690 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
@@ -21,10 +21,7 @@
 
 class StreamingParityTests(StreamingTestsMixin, ReusedConnectTestCase):
     def _assert_exception_tree_contains_msg(self, exception, msg):
-        self.assertTrue(
-            msg in exception._message,
-            "Exception tree doesn't contain the expected message: %s" % msg,
-        )
+        self.assertIn(msg, exception._message)
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 0d84764f53602..598c76a5b25fe 100755
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -16,9 +16,12 @@
 #
 
 import os
+import gc
 import unittest
 import shutil
 import tempfile
+import io
+from contextlib import redirect_stdout
 
 from pyspark.util import is_remote_only
 from pyspark.errors import PySparkTypeError, PySparkValueError
@@ -33,6 +36,7 @@
     ArrayType,
     Row,
 )
+from pyspark.testing.utils import eventually
 from pyspark.testing.sqlutils import SQLTestUtils
 from pyspark.testing.connectutils import (
     should_test_connect,
@@ -350,6 +354,24 @@ def test_simple_explain_string(self):
         result = df._explain_string()
         self.assertGreater(len(result), 0)
 
+    def _check_print_schema(self, query: str):
+        with io.StringIO() as buf, redirect_stdout(buf):
+            self.spark.sql(query).printSchema()
+            print1 = buf.getvalue()
+        with io.StringIO() as buf, redirect_stdout(buf):
+            self.connect.sql(query).printSchema()
+            print2 = buf.getvalue()
+        self.assertEqual(print1, print2, query)
+
+        for level in [-1, 0, 1, 2, 3, 4]:
+            with io.StringIO() as buf, redirect_stdout(buf):
+                self.spark.sql(query).printSchema(level)
+                print1 = buf.getvalue()
+            with io.StringIO() as buf, redirect_stdout(buf):
+                self.connect.sql(query).printSchema(level)
+                print2 = buf.getvalue()
+            self.assertEqual(print1, print2, query)
+
     def test_schema(self):
         schema = self.connect.read.table(self.tbl_name).schema
         self.assertEqual(
@@ -371,6 +393,7 @@ def test_schema(self):
             self.spark.sql(query).schema,
             self.connect.sql(query).schema,
         )
+        self._check_print_schema(query)
 
         # test TimestampType, DateType
         query = """
@@ -384,6 +407,7 @@ def test_schema(self):
             self.spark.sql(query).schema,
             self.connect.sql(query).schema,
         )
+        self._check_print_schema(query)
 
         # test DayTimeIntervalType
         query = """ SELECT INTERVAL '100 10:30' DAY TO MINUTE AS interval """
@@ -391,6 +415,7 @@ def test_schema(self):
             self.spark.sql(query).schema,
             self.connect.sql(query).schema,
         )
+        self._check_print_schema(query)
 
         # test MapType
         query = """
@@ -404,6 +429,7 @@ def test_schema(self):
             self.spark.sql(query).schema,
             self.connect.sql(query).schema,
         )
+        self._check_print_schema(query)
 
         # test ArrayType
         query = """
@@ -417,6 +443,7 @@ def test_schema(self):
             self.spark.sql(query).schema,
             self.connect.sql(query).schema,
         )
+        self._check_print_schema(query)
 
         # test StructType
         query = """
@@ -430,6 +457,7 @@ def test_schema(self):
             self.spark.sql(query).schema,
             self.connect.sql(query).schema,
         )
+        self._check_print_schema(query)
 
     def test_to(self):
         # SPARK-41464: test DataFrame.to()
@@ -540,7 +568,7 @@ def test_toDF(self):
 
     def test_print_schema(self):
         # SPARK-41216: Test print schema
-        tree_str = self.connect.sql("SELECT 1 AS X, 2 AS Y")._tree_string()
+        tree_str = self.connect.sql("SELECT 1 AS X, 2 AS Y").schema.treeString()
         # root
         #  |-- X: integer (nullable = false)
         #  |-- Y: integer (nullable = false)
@@ -629,6 +657,18 @@ def test_deduplicate(self):
         self.assert_eq(
             df.dropDuplicates(["name"]).toPandas(), df2.dropDuplicates(["name"]).toPandas()
         )
+        self.assert_eq(
+            df.drop_duplicates(["name"]).toPandas(), df2.drop_duplicates(["name"]).toPandas()
+        )
+        self.assert_eq(
+            df.dropDuplicates(["name", "id"]).toPandas(),
+            df2.dropDuplicates(["name", "id"]).toPandas(),
+        )
+        self.assert_eq(
+            df.drop_duplicates(["name", "id"]).toPandas(),
+            df2.drop_duplicates(["name", "id"]).toPandas(),
+        )
+        self.assert_eq(df.dropDuplicates("name").toPandas(), df2.dropDuplicates("name").toPandas())
 
     def test_drop(self):
         # SPARK-41169: test drop
@@ -1359,6 +1399,92 @@ def test_verify_col_name(self):
         self.assertTrue(verify_col_name("`m```.`s.s`.`v`", cdf.schema))
 
 
+class SparkConnectGCTests(SparkConnectSQLTestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.origin = os.getenv("USER", None)
+        os.environ["USER"] = "SparkConnectGCTests"
+        super(SparkConnectGCTests, cls).setUpClass()
+
+    @classmethod
+    def tearDownClass(cls):
+        super(SparkConnectGCTests, cls).tearDownClass()
+        if cls.origin is not None:
+            os.environ["USER"] = cls.origin
+        else:
+            del os.environ["USER"]
+
+    def test_garbage_collection_checkpoint(self):
+        # SPARK-48258: Make sure garbage-collecting DataFrame remove the paired state
+        # in Spark Connect server
+        df = self.connect.range(10).localCheckpoint()
+        self.assertIsNotNone(df._plan._relation_id)
+        cached_remote_relation_id = df._plan._relation_id
+
+        jvm = self.spark._jvm
+        session_holder = getattr(
+            getattr(
+                jvm.org.apache.spark.sql.connect.service,
+                "SparkConnectService$",
+            ),
+            "MODULE$",
+        ).getOrCreateIsolatedSession(self.connect.client._user_id, self.connect.client._session_id)
+
+        # Check the state exists.
+        self.assertIsNotNone(
+            session_holder.dataFrameCache().getOrDefault(cached_remote_relation_id, None)
+        )
+
+        del df
+        gc.collect()
+
+        def condition():
+            # Check the state was removed up on garbage-collection.
+            self.assertIsNone(
+                session_holder.dataFrameCache().getOrDefault(cached_remote_relation_id, None)
+            )
+
+        eventually(catch_assertions=True)(condition)()
+
+    def test_garbage_collection_derived_checkpoint(self):
+        # SPARK-48258: Should keep the cached remote relation when derived DataFrames exist
+        df = self.connect.range(10).localCheckpoint()
+        self.assertIsNotNone(df._plan._relation_id)
+        derived = df.repartition(10)
+        cached_remote_relation_id = df._plan._relation_id
+
+        jvm = self.spark._jvm
+        session_holder = getattr(
+            getattr(
+                jvm.org.apache.spark.sql.connect.service,
+                "SparkConnectService$",
+            ),
+            "MODULE$",
+        ).getOrCreateIsolatedSession(self.connect.client._user_id, self.connect.client._session_id)
+
+        # Check the state exists.
+        self.assertIsNotNone(
+            session_holder.dataFrameCache().getOrDefault(cached_remote_relation_id, None)
+        )
+
+        del df
+        gc.collect()
+
+        def condition():
+            self.assertIsNone(
+                session_holder.dataFrameCache().getOrDefault(cached_remote_relation_id, None)
+            )
+
+        # Should not remove the cache
+        with self.assertRaises(AssertionError):
+            eventually(catch_assertions=True, timeout=5)(condition)()
+
+        del derived
+        gc.collect()
+
+        eventually(catch_assertions=True)(condition)()
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.connect.test_connect_basic import *  # noqa: F401
 
diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py b/python/pyspark/sql/tests/connect/test_connect_column.py
index 5a1cccc6e1720..fbfb4486446ff 100644
--- a/python/pyspark/sql/tests/connect/test_connect_column.py
+++ b/python/pyspark/sql/tests/connect/test_connect_column.py
@@ -51,7 +51,7 @@
     from pyspark.sql.connect import functions as CF
     from pyspark.sql.connect.column import Column
     from pyspark.sql.connect.expressions import DistributedSequenceID, LiteralExpression
-    from pyspark.sql.connect.types import (
+    from pyspark.util import (
         JVM_BYTE_MIN,
         JVM_BYTE_MAX,
         JVM_SHORT_MIN,
@@ -65,7 +65,7 @@
 
 class SparkConnectColumnTests(SparkConnectSQLTestCase):
     def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
-        from pyspark.sql.dataframe import DataFrame as SDF
+        from pyspark.sql.classic.dataframe import DataFrame as SDF
         from pyspark.sql.connect.dataframe import DataFrame as CDF
 
         assert isinstance(df1, (SDF, CDF))
@@ -772,8 +772,8 @@ def test_column_accessor(self):
             sdf.select(sdf.z[0], sdf.z[1], sdf["z"][2]).toPandas(),
         )
         self.assert_eq(
-            cdf.select(CF.col("z")[0], cdf.z[10], CF.col("z")[-10]).toPandas(),
-            sdf.select(SF.col("z")[0], sdf.z[10], SF.col("z")[-10]).toPandas(),
+            cdf.select(CF.col("z")[0], CF.get(cdf.z, 10), CF.get(CF.col("z"), -10)).toPandas(),
+            sdf.select(SF.col("z")[0], SF.get(sdf.z, 10), SF.get(SF.col("z"), -10)).toPandas(),
         )
         self.assert_eq(
             cdf.select(cdf.z.getItem(0), cdf.z.getItem(1), cdf["z"].getField(2)).toPandas(),
@@ -824,8 +824,12 @@ def test_column_arithmetic_ops(self):
         )
 
         self.assert_eq(
-            cdf.select(cdf.a % cdf["b"], cdf["a"] % 2, 12 % cdf.c).toPandas(),
-            sdf.select(sdf.a % sdf["b"], sdf["a"] % 2, 12 % sdf.c).toPandas(),
+            cdf.select(
+                cdf.a % cdf["b"], cdf["a"] % 2, CF.try_remainder(CF.lit(12), cdf.c)
+            ).toPandas(),
+            sdf.select(
+                sdf.a % sdf["b"], sdf["a"] % 2, SF.try_remainder(SF.lit(12), sdf.c)
+            ).toPandas(),
         )
 
         self.assert_eq(
@@ -1020,15 +1024,33 @@ def test_distributed_sequence_id(self):
             expected.collect(),
         )
 
+    def test_lambda_str_representation(self):
+        from pyspark.sql.connect.expressions import UnresolvedNamedLambdaVariable
+
+        # forcely clear the internal increasing id,
+        # otherwise the string representation varies with this id
+        UnresolvedNamedLambdaVariable._nextVarNameId = 0
+
+        c = CF.array_sort(
+            "data",
+            lambda x, y: CF.when(x.isNull() | y.isNull(), CF.lit(0)).otherwise(
+                CF.length(y) - CF.length(x)
+            ),
+        )
+
+        self.assertEqual(
+            str(c),
+            (
+                """Column<'array_sort(data, LambdaFunction(CASE WHEN or(isNull(x_0), """
+                """isNull(y_1)) THEN 0 ELSE -(length(y_1), length(x_0)) END, x_0, y_1))'>"""
+            ),
+        )
+
 
 if __name__ == "__main__":
-    import os
     import unittest
     from pyspark.sql.tests.connect.test_connect_column import *  # noqa: F401
 
-    # TODO(SPARK-41794): Enable ANSI mode in this file.
-    os.environ["SPARK_ANSI_SQL_MODE"] = "false"
-
     try:
         import xmlrunner
 
diff --git a/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py b/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
new file mode 100644
index 0000000000000..c712e5d6efcb6
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
@@ -0,0 +1,442 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, DoubleType
+from pyspark.sql.utils import is_remote
+
+from pyspark.sql import functions as SF
+from pyspark.sql.connect import functions as CF
+
+from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
+from pyspark.testing.sqlutils import (
+    have_pandas,
+    have_pyarrow,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+)
+
+if have_pyarrow:
+    import pyarrow as pa
+    import pyarrow.compute as pc
+
+if have_pandas:
+    import pandas as pd
+
+
+class SparkConnectDataFramePropertyTests(SparkConnectSQLTestCase):
+    def test_cached_property_is_copied(self):
+        schema = StructType(
+            [
+                StructField("id", IntegerType(), True),
+                StructField("name", StringType(), True),
+                StructField("age", IntegerType(), True),
+                StructField("city", StringType(), True),
+            ]
+        )
+        # Create some dummy data
+        data = [
+            (1, "Alice", 30, "New York"),
+            (2, "Bob", 25, "San Francisco"),
+            (3, "Cathy", 29, "Los Angeles"),
+            (4, "David", 35, "Chicago"),
+        ]
+        df = self.spark.createDataFrame(data, schema)
+        df_columns = df.columns
+        assert len(df.columns) == 4
+        for col in ["id", "name"]:
+            df_columns.remove(col)
+        assert len(df.columns) == 4
+
+    def test_cached_schema_to(self):
+        cdf = self.connect.read.table(self.tbl_name)
+        sdf = self.spark.read.table(self.tbl_name)
+
+        schema = StructType(
+            [
+                StructField("id", IntegerType(), True),
+                StructField("name", StringType(), True),
+            ]
+        )
+
+        cdf1 = cdf.to(schema)
+        self.assertEqual(cdf1._cached_schema, schema)
+
+        sdf1 = sdf.to(schema)
+
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertEqual(cdf1.collect(), sdf1.collect())
+
+    @unittest.skipIf(
+        not have_pandas or not have_pyarrow,
+        pandas_requirement_message or pyarrow_requirement_message,
+    )
+    def test_cached_schema_map_in_pandas(self):
+        data = [(1, "foo"), (2, None), (3, "bar"), (4, "bar")]
+        cdf = self.connect.createDataFrame(data, "a int, b string")
+        sdf = self.spark.createDataFrame(data, "a int, b string")
+
+        def func(iterator):
+            for pdf in iterator:
+                assert isinstance(pdf, pd.DataFrame)
+                assert [d.name for d in list(pdf.dtypes)] == ["int32", "object"]
+                yield pdf
+
+        schema = StructType(
+            [
+                StructField("a", IntegerType(), True),
+                StructField("b", StringType(), True),
+            ]
+        )
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf1 = cdf.mapInPandas(func, schema)
+            self.assertEqual(cdf1._cached_schema, schema)
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
+            # 'mapInPandas' depends on the method 'pandas_udf', which is dispatched
+            # based on 'is_remote'. However, in SparkConnectSQLTestCase, the remote
+            # mode is always on, so 'sdf.mapInPandas' fails with incorrect dispatch.
+            # Using this temp env to properly invoke mapInPandas in PySpark Classic.
+            self.assertFalse(is_remote())
+            sdf1 = sdf.mapInPandas(func, schema)
+
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertEqual(cdf1.collect(), sdf1.collect())
+
+    @unittest.skipIf(
+        not have_pandas or not have_pyarrow,
+        pandas_requirement_message or pyarrow_requirement_message,
+    )
+    def test_cached_schema_map_in_arrow(self):
+        data = [(1, "foo"), (2, None), (3, "bar"), (4, "bar")]
+        cdf = self.connect.createDataFrame(data, "a int, b string")
+        sdf = self.spark.createDataFrame(data, "a int, b string")
+
+        def func(iterator):
+            for batch in iterator:
+                assert isinstance(batch, pa.RecordBatch)
+                assert batch.schema.types == [pa.int32(), pa.string()]
+                yield batch
+
+        schema = StructType(
+            [
+                StructField("a", IntegerType(), True),
+                StructField("b", StringType(), True),
+            ]
+        )
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf1 = cdf.mapInArrow(func, schema)
+            self.assertEqual(cdf1._cached_schema, schema)
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
+            self.assertFalse(is_remote())
+            sdf1 = sdf.mapInArrow(func, schema)
+
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertEqual(cdf1.collect(), sdf1.collect())
+
+    @unittest.skipIf(
+        not have_pandas or not have_pyarrow,
+        pandas_requirement_message or pyarrow_requirement_message,
+    )
+    def test_cached_schema_group_apply_in_pandas(self):
+        data = [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)]
+        cdf = self.connect.createDataFrame(data, ("id", "v"))
+        sdf = self.spark.createDataFrame(data, ("id", "v"))
+
+        def normalize(pdf):
+            v = pdf.v
+            return pdf.assign(v=(v - v.mean()) / v.std())
+
+        schema = StructType(
+            [
+                StructField("id", LongType(), True),
+                StructField("v", DoubleType(), True),
+            ]
+        )
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf1 = cdf.groupby("id").applyInPandas(normalize, schema)
+            self.assertEqual(cdf1._cached_schema, schema)
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
+            self.assertFalse(is_remote())
+            sdf1 = sdf.groupby("id").applyInPandas(normalize, schema)
+
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertEqual(cdf1.collect(), sdf1.collect())
+
+    @unittest.skipIf(
+        not have_pandas or not have_pyarrow,
+        pandas_requirement_message or pyarrow_requirement_message,
+    )
+    def test_cached_schema_group_apply_in_arrow(self):
+        data = [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)]
+        cdf = self.connect.createDataFrame(data, ("id", "v"))
+        sdf = self.spark.createDataFrame(data, ("id", "v"))
+
+        def normalize(table):
+            v = table.column("v")
+            norm = pc.divide(pc.subtract(v, pc.mean(v)), pc.stddev(v, ddof=1))
+            return table.set_column(1, "v", norm)
+
+        schema = StructType(
+            [
+                StructField("id", LongType(), True),
+                StructField("v", DoubleType(), True),
+            ]
+        )
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf1 = cdf.groupby("id").applyInArrow(normalize, schema)
+            self.assertEqual(cdf1._cached_schema, schema)
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
+            self.assertFalse(is_remote())
+            sdf1 = sdf.groupby("id").applyInArrow(normalize, schema)
+
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertEqual(cdf1.collect(), sdf1.collect())
+
+    @unittest.skipIf(
+        not have_pandas or not have_pyarrow,
+        pandas_requirement_message or pyarrow_requirement_message,
+    )
+    def test_cached_schema_cogroup_apply_in_pandas(self):
+        data1 = [(20000101, 1, 1.0), (20000101, 2, 2.0), (20000102, 1, 3.0), (20000102, 2, 4.0)]
+        data2 = [(20000101, 1, "x"), (20000101, 2, "y")]
+
+        cdf1 = self.connect.createDataFrame(data1, ("time", "id", "v1"))
+        sdf1 = self.spark.createDataFrame(data1, ("time", "id", "v1"))
+        cdf2 = self.connect.createDataFrame(data2, ("time", "id", "v2"))
+        sdf2 = self.spark.createDataFrame(data2, ("time", "id", "v2"))
+
+        def asof_join(left, right):
+            return pd.merge_asof(left, right, on="time", by="id")
+
+        schema = StructType(
+            [
+                StructField("time", IntegerType(), True),
+                StructField("id", IntegerType(), True),
+                StructField("v1", DoubleType(), True),
+                StructField("v2", StringType(), True),
+            ]
+        )
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf3 = cdf1.groupby("id").cogroup(cdf2.groupby("id")).applyInPandas(asof_join, schema)
+            self.assertEqual(cdf3._cached_schema, schema)
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
+            self.assertFalse(is_remote())
+            sdf3 = sdf1.groupby("id").cogroup(sdf2.groupby("id")).applyInPandas(asof_join, schema)
+
+        self.assertEqual(cdf3.schema, sdf3.schema)
+        self.assertEqual(cdf3.collect(), sdf3.collect())
+
+    @unittest.skipIf(
+        not have_pandas or not have_pyarrow,
+        pandas_requirement_message or pyarrow_requirement_message,
+    )
+    def test_cached_schema_cogroup_apply_in_arrow(self):
+        data1 = [(1, 1.0), (2, 2.0), (1, 3.0), (2, 4.0)]
+        data2 = [(1, "x"), (2, "y")]
+
+        cdf1 = self.connect.createDataFrame(data1, ("id", "v1"))
+        sdf1 = self.spark.createDataFrame(data1, ("id", "v1"))
+        cdf2 = self.connect.createDataFrame(data2, ("id", "v2"))
+        sdf2 = self.spark.createDataFrame(data2, ("id", "v2"))
+
+        def summarize(left, right):
+            return pa.Table.from_pydict(
+                {
+                    "left": [left.num_rows],
+                    "right": [right.num_rows],
+                }
+            )
+
+        schema = StructType(
+            [
+                StructField("left", LongType(), True),
+                StructField("right", LongType(), True),
+            ]
+        )
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": "1"}):
+            self.assertTrue(is_remote())
+            cdf3 = cdf1.groupby("id").cogroup(cdf2.groupby("id")).applyInArrow(summarize, schema)
+            self.assertEqual(cdf3._cached_schema, schema)
+
+        with self.temp_env({"SPARK_CONNECT_MODE_ENABLED": None}):
+            self.assertFalse(is_remote())
+            sdf3 = sdf1.groupby("id").cogroup(sdf2.groupby("id")).applyInArrow(summarize, schema)
+
+        self.assertEqual(cdf3.schema, sdf3.schema)
+        self.assertEqual(cdf3.collect(), sdf3.collect())
+
+    def test_cached_schema_set_op(self):
+        data1 = [(1, 2, 3)]
+        data2 = [(6, 2, 5)]
+        data3 = [(6, 2, 5.0)]
+
+        cdf1 = self.connect.createDataFrame(data1, ["a", "b", "c"])
+        sdf1 = self.spark.createDataFrame(data1, ["a", "b", "c"])
+        cdf2 = self.connect.createDataFrame(data2, ["a", "b", "c"])
+        sdf2 = self.spark.createDataFrame(data2, ["a", "b", "c"])
+        cdf3 = self.connect.createDataFrame(data3, ["a", "b", "c"])
+        sdf3 = self.spark.createDataFrame(data3, ["a", "b", "c"])
+
+        # schema not yet cached
+        self.assertTrue(cdf1._cached_schema is None)
+        self.assertTrue(cdf2._cached_schema is None)
+        self.assertTrue(cdf3._cached_schema is None)
+
+        # no cached schema in result dataframe
+        self.assertTrue(cdf1.union(cdf1)._cached_schema is None)
+        self.assertTrue(cdf1.union(cdf2)._cached_schema is None)
+        self.assertTrue(cdf1.union(cdf3)._cached_schema is None)
+
+        self.assertTrue(cdf1.unionAll(cdf1)._cached_schema is None)
+        self.assertTrue(cdf1.unionAll(cdf2)._cached_schema is None)
+        self.assertTrue(cdf1.unionAll(cdf3)._cached_schema is None)
+
+        self.assertTrue(cdf1.unionByName(cdf1)._cached_schema is None)
+        self.assertTrue(cdf1.unionByName(cdf2)._cached_schema is None)
+        self.assertTrue(cdf1.unionByName(cdf3)._cached_schema is None)
+
+        self.assertTrue(cdf1.subtract(cdf1)._cached_schema is None)
+        self.assertTrue(cdf1.subtract(cdf2)._cached_schema is None)
+        self.assertTrue(cdf1.subtract(cdf3)._cached_schema is None)
+
+        self.assertTrue(cdf1.exceptAll(cdf1)._cached_schema is None)
+        self.assertTrue(cdf1.exceptAll(cdf2)._cached_schema is None)
+        self.assertTrue(cdf1.exceptAll(cdf3)._cached_schema is None)
+
+        self.assertTrue(cdf1.intersect(cdf1)._cached_schema is None)
+        self.assertTrue(cdf1.intersect(cdf2)._cached_schema is None)
+        self.assertTrue(cdf1.intersect(cdf3)._cached_schema is None)
+
+        self.assertTrue(cdf1.intersectAll(cdf1)._cached_schema is None)
+        self.assertTrue(cdf1.intersectAll(cdf2)._cached_schema is None)
+        self.assertTrue(cdf1.intersectAll(cdf3)._cached_schema is None)
+
+        # trigger analysis of cdf1.schema
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertTrue(cdf1._cached_schema is not None)
+
+        self.assertEqual(cdf1.union(cdf1)._cached_schema, cdf1._cached_schema)
+        # cannot infer when cdf2 doesn't cache schema
+        self.assertTrue(cdf1.union(cdf2)._cached_schema is None)
+        # cannot infer when cdf3 doesn't cache schema
+        self.assertTrue(cdf1.union(cdf3)._cached_schema is None)
+
+        # trigger analysis of cdf2.schema, cdf3.schema
+        self.assertEqual(cdf2.schema, sdf2.schema)
+        self.assertEqual(cdf3.schema, sdf3.schema)
+
+        # now all the schemas are cached
+        self.assertTrue(cdf1._cached_schema is not None)
+        self.assertTrue(cdf2._cached_schema is not None)
+        self.assertTrue(cdf3._cached_schema is not None)
+
+        self.assertEqual(cdf1.union(cdf1)._cached_schema, cdf1._cached_schema)
+        self.assertEqual(cdf1.union(cdf2)._cached_schema, cdf1._cached_schema)
+        # cannot infer when schemas mismatch
+        self.assertTrue(cdf1.union(cdf3)._cached_schema is None)
+
+        self.assertEqual(cdf1.unionAll(cdf1)._cached_schema, cdf1._cached_schema)
+        self.assertEqual(cdf1.unionAll(cdf2)._cached_schema, cdf1._cached_schema)
+        # cannot infer when schemas mismatch
+        self.assertTrue(cdf1.unionAll(cdf3)._cached_schema is None)
+
+        self.assertEqual(cdf1.unionByName(cdf1)._cached_schema, cdf1._cached_schema)
+        self.assertEqual(cdf1.unionByName(cdf2)._cached_schema, cdf1._cached_schema)
+        # cannot infer when schemas mismatch
+        self.assertTrue(cdf1.unionByName(cdf3)._cached_schema is None)
+
+        self.assertEqual(cdf1.subtract(cdf1)._cached_schema, cdf1._cached_schema)
+        self.assertEqual(cdf1.subtract(cdf2)._cached_schema, cdf1._cached_schema)
+        # cannot infer when schemas mismatch
+        self.assertTrue(cdf1.subtract(cdf3)._cached_schema is None)
+
+        self.assertEqual(cdf1.exceptAll(cdf1)._cached_schema, cdf1._cached_schema)
+        self.assertEqual(cdf1.exceptAll(cdf2)._cached_schema, cdf1._cached_schema)
+        # cannot infer when schemas mismatch
+        self.assertTrue(cdf1.exceptAll(cdf3)._cached_schema is None)
+
+        self.assertEqual(cdf1.intersect(cdf1)._cached_schema, cdf1._cached_schema)
+        self.assertEqual(cdf1.intersect(cdf2)._cached_schema, cdf1._cached_schema)
+        # cannot infer when schemas mismatch
+        self.assertTrue(cdf1.intersect(cdf3)._cached_schema is None)
+
+        self.assertEqual(cdf1.intersectAll(cdf1)._cached_schema, cdf1._cached_schema)
+        self.assertEqual(cdf1.intersectAll(cdf2)._cached_schema, cdf1._cached_schema)
+        # cannot infer when schemas mismatch
+        self.assertTrue(cdf1.intersectAll(cdf3)._cached_schema is None)
+
+    def test_cached_schema_in_chain_op(self):
+        data = [(1, 1.0), (2, 2.0), (1, 3.0), (2, 4.0)]
+
+        cdf = self.connect.createDataFrame(data, ("id", "v1"))
+        sdf = self.spark.createDataFrame(data, ("id", "v1"))
+
+        cdf1 = cdf.withColumn("v2", CF.lit(1))
+        sdf1 = sdf.withColumn("v2", SF.lit(1))
+
+        self.assertTrue(cdf1._cached_schema is None)
+        # trigger analysis of cdf1.schema
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertTrue(cdf1._cached_schema is not None)
+
+        cdf2 = cdf1.where(cdf1.v2 > 0)
+        sdf2 = sdf1.where(sdf1.v2 > 0)
+        self.assertEqual(cdf1._cached_schema, cdf2._cached_schema)
+
+        cdf3 = cdf2.repartition(10)
+        sdf3 = sdf2.repartition(10)
+        self.assertEqual(cdf1._cached_schema, cdf3._cached_schema)
+
+        cdf4 = cdf3.distinct()
+        sdf4 = sdf3.distinct()
+        self.assertEqual(cdf1._cached_schema, cdf4._cached_schema)
+
+        cdf5 = cdf4.sample(fraction=0.5)
+        sdf5 = sdf4.sample(fraction=0.5)
+        self.assertEqual(cdf1._cached_schema, cdf5._cached_schema)
+
+        self.assertEqual(cdf5.schema, sdf5.schema)
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_connect_dataframe_property import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_connect_error.py b/python/pyspark/sql/tests/connect/test_connect_error.py
index 1297e62bb96f7..d5d9f9a221847 100644
--- a/python/pyspark/sql/tests/connect/test_connect_error.py
+++ b/python/pyspark/sql/tests/connect/test_connect_error.py
@@ -21,6 +21,7 @@
 from pyspark.errors.exceptions.base import SessionNotSameException
 from pyspark.sql.types import Row
 from pyspark.testing.connectutils import should_test_connect
+from pyspark.errors import PySparkTypeError
 from pyspark.errors.exceptions.connect import AnalysisException
 from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
 
@@ -158,12 +159,10 @@ def test_different_spark_session_join_or_union(self):
     def test_unsupported_functions(self):
         # SPARK-41225: Disable unsupported functions.
         df = self.connect.read.table(self.tbl_name)
-        for f in (
-            "checkpoint",
-            "localCheckpoint",
-        ):
-            with self.assertRaises(NotImplementedError):
-                getattr(df, f)()
+        with self.assertRaises(NotImplementedError):
+            df.toJSON()
+        with self.assertRaises(NotImplementedError):
+            df.rdd
 
     def test_unsupported_jvm_attribute(self):
         # Unsupported jvm attributes for Spark session.
@@ -216,6 +215,21 @@ def test_column_cannot_be_constructed_from_string(self):
         with self.assertRaises(TypeError):
             Column("col")
 
+    def test_select_none(self):
+        with self.assertRaises(PySparkTypeError) as e1:
+            self.connect.range(1).select(None)
+
+        self.check_error(
+            exception=e1.exception,
+            error_class="NOT_LIST_OF_COLUMN_OR_STR",
+            message_parameters={"arg_name": "columns"},
+        )
+
+    def test_ym_interval_in_collect(self):
+        # YearMonthIntervalType is not supported in python side arrow conversion
+        with self.assertRaises(PySparkTypeError):
+            self.connect.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").first()
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.connect.test_connect_error import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
index 581fde3e62933..0f0abfd4b8567 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -21,7 +21,14 @@
 from pyspark.util import is_remote_only
 from pyspark.errors import PySparkTypeError, PySparkValueError
 from pyspark.sql import SparkSession as PySparkSession
-from pyspark.sql.types import StringType, StructType, StructField, ArrayType, IntegerType
+from pyspark.sql.types import (
+    _drop_metadata,
+    StringType,
+    StructType,
+    StructField,
+    ArrayType,
+    IntegerType,
+)
 from pyspark.testing import assertDataFrameEqual
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
 from pyspark.testing.connectutils import ReusedConnectTestCase, should_test_connect
@@ -32,10 +39,8 @@
     from pyspark.sql.connect.column import Column
     from pyspark.sql import functions as SF
     from pyspark.sql.window import Window as SW
-    from pyspark.sql.dataframe import DataFrame as SDF
     from pyspark.sql.connect import functions as CF
     from pyspark.sql.connect.window import Window as CW
-    from pyspark.sql.connect.dataframe import DataFrame as CDF
 
 
 @unittest.skipIf(is_remote_only(), "Requires JVM access")
@@ -60,6 +65,9 @@ def tearDownClass(cls):
         del os.environ["PYSPARK_NO_NAMESPACE_SHARE"]
 
     def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
+        from pyspark.sql.classic.dataframe import DataFrame as SDF
+        from pyspark.sql.connect.dataframe import DataFrame as CDF
+
         assert isinstance(df1, (SDF, CDF))
         if isinstance(df1, SDF):
             str1 = df1._jdf.showString(n, truncate, False)
@@ -1667,7 +1675,7 @@ def test_nested_lambda_function(self):
         )
 
         # TODO: 'cdf.schema' has an extra metadata '{'__autoGeneratedAlias': 'true'}'
-        # self.assertEqual(cdf.schema, sdf.schema)
+        self.assertEqual(_drop_metadata(cdf.schema), _drop_metadata(sdf.schema))
         self.assertEqual(cdf.collect(), sdf.collect())
 
     def test_csv_functions(self):
@@ -2029,7 +2037,6 @@ def test_string_functions_one_arg(self):
             (CF.sentences, SF.sentences),
             (CF.initcap, SF.initcap),
             (CF.soundex, SF.soundex),
-            (CF.bin, SF.bin),
             (CF.hex, SF.hex),
             (CF.unhex, SF.unhex),
             (CF.length, SF.length),
@@ -2042,6 +2049,19 @@ def test_string_functions_one_arg(self):
                 sdf.select(sfunc("a"), sfunc(sdf.b)).toPandas(),
             )
 
+        query = """
+                SELECT * FROM VALUES
+                ('   1   ', '2   ', NULL), ('   3', NULL, '4')
+                AS tab(a, b, c)
+                """
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(CF.bin(cdf.a), CF.bin(cdf.b)).toPandas(),
+            sdf.select(SF.bin(sdf.a), SF.bin(sdf.b)).toPandas(),
+        )
+
     def test_string_functions_multi_args(self):
         query = """
             SELECT * FROM VALUES
@@ -2148,15 +2168,15 @@ def test_string_functions_multi_args(self):
     def test_date_ts_functions(self):
         query = """
             SELECT * FROM VALUES
-            ('1997/02/28 10:30:00', '2023/03/01 06:00:00', 'JST', 1428476400, 2020, 12, 6),
-            ('2000/01/01 04:30:05', '2020/05/01 12:15:00', 'PST', 1403892395, 2022, 12, 6)
+            ('1997-02-28 10:30:00', '2023-03-01 06:00:00', 'JST', 1428476400, 2020, 12, 6),
+            ('2000-01-01 04:30:05', '2020-05-01 12:15:00', 'PST', 1403892395, 2022, 12, 6)
             AS tab(ts1, ts2, tz, seconds, Y, M, D)
             """
         # +-------------------+-------------------+---+----------+----+---+---+
         # |                ts1|                ts2| tz|   seconds|   Y|  M|  D|
         # +-------------------+-------------------+---+----------+----+---+---+
-        # |1997/02/28 10:30:00|2023/03/01 06:00:00|JST|1428476400|2020| 12|  6|
-        # |2000/01/01 04:30:05|2020/05/01 12:15:00|PST|1403892395|2022| 12|  6|
+        # |1997-02-28 10:30:00|2023-03-01 06:00:00|JST|1428476400|2020| 12|  6|
+        # |2000-01-01 04:30:05|2020-05-01 12:15:00|PST|1403892395|2022| 12|  6|
         # +-------------------+-------------------+---+----------+----+---+---+
 
         cdf = self.connect.sql(query)
@@ -2212,14 +2232,14 @@ def test_date_ts_functions(self):
             (CF.to_date, SF.to_date),
         ]:
             self.assert_eq(
-                cdf.select(cfunc(cdf.ts1, format="yyyy-MM-dd")).toPandas(),
-                sdf.select(sfunc(sdf.ts1, format="yyyy-MM-dd")).toPandas(),
+                cdf.select(cfunc(cdf.ts1, format="yyyy-MM-dd HH:mm:ss")).toPandas(),
+                sdf.select(sfunc(sdf.ts1, format="yyyy-MM-dd HH:mm:ss")).toPandas(),
             )
         self.compare_by_show(
             # [left]:  datetime64[ns, America/Los_Angeles]
             # [right]: datetime64[ns]
-            cdf.select(CF.to_timestamp(cdf.ts1, format="yyyy-MM-dd")),
-            sdf.select(SF.to_timestamp(sdf.ts1, format="yyyy-MM-dd")),
+            cdf.select(CF.to_timestamp(cdf.ts1, format="yyyy-MM-dd HH:mm:ss")),
+            sdf.select(SF.to_timestamp(sdf.ts1, format="yyyy-MM-dd HH:mm:ss")),
         )
 
         # With tz parameter
@@ -2589,9 +2609,6 @@ def test_non_deterministic_with_seed(self):
     import os
     from pyspark.sql.tests.connect.test_connect_function import *  # noqa: F401
 
-    # TODO(SPARK-41547): Enable ANSI mode in this file.
-    os.environ["SPARK_ANSI_SQL_MODE"] = "false"
-
     try:
         import xmlrunner  # type: ignore
 
diff --git a/python/pyspark/sql/tests/connect/test_connect_plan.py b/python/pyspark/sql/tests/connect/test_connect_plan.py
index 3a221cacedb27..47e3fb5a96023 100644
--- a/python/pyspark/sql/tests/connect/test_connect_plan.py
+++ b/python/pyspark/sql/tests/connect/test_connect_plan.py
@@ -333,6 +333,11 @@ def test_observe(self):
         from pyspark.sql.connect.observation import Observation
 
         class MockDF(DataFrame):
+            def __new__(cls, df: DataFrame) -> "DataFrame":
+                self = object.__new__(cls)
+                self.__init__(df)  # type: ignore[misc]
+                return self
+
             def __init__(self, df: DataFrame):
                 super().__init__(df._plan, df._session)
 
@@ -438,7 +443,7 @@ def test_sample(self):
         self.assertEqual(plan.root.sample.lower_bound, 0.0)
         self.assertEqual(plan.root.sample.upper_bound, 0.3)
         self.assertEqual(plan.root.sample.with_replacement, False)
-        self.assertEqual(plan.root.sample.HasField("seed"), False)
+        self.assertEqual(plan.root.sample.HasField("seed"), True)
         self.assertEqual(plan.root.sample.deterministic_order, False)
 
         plan = (
@@ -548,13 +553,25 @@ def test_deduplicate(self):
         self.assertEqual(deduplicate_on_all_columns_plan.root.deduplicate.all_columns_as_keys, True)
         self.assertEqual(len(deduplicate_on_all_columns_plan.root.deduplicate.column_names), 0)
 
-        deduplicate_on_subset_columns_plan = df.dropDuplicates(["name", "height"])._plan.to_proto(
-            self.connect
+        deduplicate_on_subset_columns_plan_list_arg = df.dropDuplicates(
+            ["name", "height"]
+        )._plan.to_proto(self.connect)
+        self.assertEqual(
+            deduplicate_on_subset_columns_plan_list_arg.root.deduplicate.all_columns_as_keys, False
+        )
+        self.assertEqual(
+            len(deduplicate_on_subset_columns_plan_list_arg.root.deduplicate.column_names), 2
+        )
+
+        deduplicate_on_subset_columns_plan_var_arg = df.dropDuplicates(
+            "name", "height"
+        )._plan.to_proto(self.connect)
+        self.assertEqual(
+            deduplicate_on_subset_columns_plan_var_arg.root.deduplicate.all_columns_as_keys, False
         )
         self.assertEqual(
-            deduplicate_on_subset_columns_plan.root.deduplicate.all_columns_as_keys, False
+            len(deduplicate_on_subset_columns_plan_var_arg.root.deduplicate.column_names), 2
         )
-        self.assertEqual(len(deduplicate_on_subset_columns_plan.root.deduplicate.column_names), 2)
 
     def test_relation_alias(self):
         df = self.connect.readTable(table_name=self.tbl_name)
diff --git a/python/pyspark/sql/tests/connect/test_connect_session.py b/python/pyspark/sql/tests/connect/test_connect_session.py
index 1caf3525cfbbc..1dd5cde0dff50 100644
--- a/python/pyspark/sql/tests/connect/test_connect_session.py
+++ b/python/pyspark/sql/tests/connect/test_connect_session.py
@@ -242,6 +242,34 @@ def toChannel(self):
         session = RemoteSparkSession.builder.channelBuilder(CustomChannelBuilder()).create()
         session.sql("select 1 + 1")
 
+    def test_reset_when_server_and_client_sessionids_mismatch(self):
+        session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
+        # run a simple query so the session id is synchronized.
+        session.range(3).collect()
+
+        # trigger a mismatch between client session id and server session id.
+        session._client._session_id = str(uuid.uuid4())
+        with self.assertRaises(SparkConnectException):
+            session.range(3).collect()
+
+        # assert that getOrCreate() generates a new session
+        session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
+        session.range(3).collect()
+
+    def test_reset_when_server_session_id_mismatch(self):
+        session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
+        # run a simple query so the session id is synchronized.
+        session.range(3).collect()
+
+        # trigger a mismatch
+        session._client._server_session_id = str(uuid.uuid4())
+        with self.assertRaises(SparkConnectException):
+            session.range(3).collect()
+
+        # assert that getOrCreate() generates a new session
+        session = RemoteSparkSession.builder.remote("sc://localhost").getOrCreate()
+        session.range(3).collect()
+
 
 class SparkConnectSessionWithOptionsTest(unittest.TestCase):
     def setUp(self) -> None:
diff --git a/python/pyspark/sql/tests/connect/test_df_debug.py b/python/pyspark/sql/tests/connect/test_df_debug.py
new file mode 100644
index 0000000000000..8a4ec68fda844
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_df_debug.py
@@ -0,0 +1,86 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.testing.connectutils import (
+    should_test_connect,
+    have_graphviz,
+    graphviz_requirement_message,
+)
+from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
+
+if should_test_connect:
+    from pyspark.sql.connect.dataframe import DataFrame
+
+
+class SparkConnectDataFrameDebug(SparkConnectSQLTestCase):
+    def test_df_debug_basics(self):
+        df: DataFrame = self.connect.range(100).repartition(10).groupBy("id").count()
+        x = df.collect()  # noqa: F841
+        ei = df.executionInfo
+
+        root, graph = ei.metrics.extract_graph()
+        self.assertIn(root, graph, "The root must be rooted in the graph")
+
+    def test_df_quey_execution_empty_before_execution(self):
+        df: DataFrame = self.connect.range(100).repartition(10).groupBy("id").count()
+        ei = df.executionInfo
+        self.assertIsNone(ei, "The query execution must be None before the action is executed")
+
+    def test_df_query_execution_with_writes(self):
+        df: DataFrame = self.connect.range(100).repartition(10).groupBy("id").count()
+        df.write.save("/tmp/test_df_query_execution_with_writes", format="json", mode="overwrite")
+        ei = df.executionInfo
+        self.assertIsNotNone(
+            ei, "The query execution must be None after the write action is executed"
+        )
+
+    def test_query_execution_text_format(self):
+        df: DataFrame = self.connect.range(100).repartition(10).groupBy("id").count()
+        df.collect()
+        self.assertIn("HashAggregate", df.executionInfo.metrics.toText())
+
+        # Different execution mode.
+        df: DataFrame = self.connect.range(100).repartition(10).groupBy("id").count()
+        df.toPandas()
+        self.assertIn("HashAggregate", df.executionInfo.metrics.toText())
+
+    @unittest.skipIf(not have_graphviz, graphviz_requirement_message)
+    def test_df_query_execution_metrics_to_dot(self):
+        df: DataFrame = self.connect.range(100).repartition(10).groupBy("id").count()
+        x = df.collect()  # noqa: F841
+        ei = df.executionInfo
+
+        dot = ei.metrics.toDot()
+        source = dot.source
+        self.assertIsNotNone(dot, "The dot representation must not be None")
+        self.assertGreater(len(source), 0, "The dot representation must not be empty")
+        self.assertIn("digraph", source, "The dot representation must contain the digraph keyword")
+        self.assertIn("Metrics", source, "The dot representation must contain the Metrics keyword")
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_df_debug import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py b/python/pyspark/sql/tests/connect/test_parity_arrow.py
index 93d0b6cf0f5f5..885b3001b1db1 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow.py
+++ b/python/pyspark/sql/tests/connect/test_parity_arrow.py
@@ -16,7 +16,6 @@
 #
 
 import unittest
-import sys
 
 from pyspark.sql.tests.test_arrow import ArrowTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
@@ -24,10 +23,6 @@
 
 
 class ArrowParityTests(ArrowTestsMixin, ReusedConnectTestCase, PandasOnSparkTestUtils):
-    @unittest.skip("Spark Connect does not support Spark Context but the test depends on that.")
-    def test_createDataFrame_empty_partition(self):
-        super().test_createDataFrame_empty_partition()
-
     @unittest.skip("Spark Connect does not support fallback.")
     def test_createDataFrame_fallback_disabled(self):
         super().test_createDataFrame_fallback_disabled()
@@ -36,8 +31,11 @@ def test_createDataFrame_fallback_disabled(self):
     def test_createDataFrame_fallback_enabled(self):
         super().test_createDataFrame_fallback_enabled()
 
-    def test_createDataFrame_with_map_type(self):
-        self.check_createDataFrame_with_map_type(True)
+    def test_createDataFrame_pandas_with_map_type(self):
+        self.check_createDataFrame_pandas_with_map_type(True)
+
+    def test_createDataFrame_pandas_with_struct_type(self):
+        self.check_createDataFrame_pandas_with_struct_type(True)
 
     def test_createDataFrame_with_ndarray(self):
         self.check_createDataFrame_with_ndarray(True)
@@ -74,6 +72,9 @@ def test_create_data_frame_to_pandas_timestamp_ntz(self):
     def test_create_data_frame_to_pandas_day_time_internal(self):
         self.check_create_data_frame_to_pandas_day_time_internal(True)
 
+    def test_createDataFrame_pandas_respect_session_timezone(self):
+        self.check_createDataFrame_pandas_respect_session_timezone(True)
+
     def test_toPandas_respect_session_timezone(self):
         self.check_toPandas_respect_session_timezone(True)
 
@@ -94,11 +95,11 @@ def test_toPandas_with_map_type(self):
     def test_toPandas_with_map_type_nulls(self):
         self.check_toPandas_with_map_type_nulls(True)
 
-    def test_createDataFrame_with_array_type(self):
-        self.check_createDataFrame_with_array_type(True)
+    def test_createDataFrame_pandas_with_array_type(self):
+        self.check_createDataFrame_pandas_with_array_type(True)
 
-    def test_createDataFrame_with_int_col_names(self):
-        self.check_createDataFrame_with_int_col_names(True)
+    def test_createDataFrame_pandas_with_int_col_names(self):
+        self.check_createDataFrame_pandas_with_int_col_names(True)
 
     def test_timestamp_nat(self):
         self.check_timestamp_nat(True)
@@ -109,19 +110,21 @@ def test_toPandas_error(self):
     def test_toPandas_duplicate_field_names(self):
         self.check_toPandas_duplicate_field_names(True)
 
-    def test_createDataFrame_duplicate_field_names(self):
-        self.check_createDataFrame_duplicate_field_names(True)
+    def test_createDataFrame_pandas_duplicate_field_names(self):
+        self.check_createDataFrame_pandas_duplicate_field_names(True)
+
+    def test_toPandas_empty_rows(self):
+        self.check_toPandas_empty_rows(True)
 
     def test_toPandas_empty_columns(self):
         self.check_toPandas_empty_columns(True)
 
-    def test_createDataFrame_nested_timestamp(self):
-        self.check_createDataFrame_nested_timestamp(True)
+    def test_createDataFrame_pandas_nested_timestamp(self):
+        self.check_createDataFrame_pandas_nested_timestamp(True)
 
     def test_toPandas_nested_timestamp(self):
         self.check_toPandas_nested_timestamp(True)
 
-    @unittest.skipIf(sys.version_info < (3, 9), "zoneinfo is available from Python 3.9+")
     def test_toPandas_timestmap_tzinfo(self):
         self.check_toPandas_timestmap_tzinfo(True)
 
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py b/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py
index f5bd99fa22cfb..732008eb05a35 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_arrow_python_udf.py
@@ -15,10 +15,6 @@
 # limitations under the License.
 #
 
-import unittest
-
-from pyspark.errors import AnalysisException, PythonException
-from pyspark.sql.functions import udf
 from pyspark.sql.tests.connect.test_parity_udf import UDFParityTests
 from pyspark.sql.tests.test_arrow_python_udf import PythonUDFArrowTestsMixin
 
@@ -36,32 +32,6 @@ def tearDownClass(cls):
         finally:
             super(ArrowPythonUDFParityTests, cls).tearDownClass()
 
-    def test_named_arguments_negative(self):
-        @udf("int")
-        def test_udf(a, b):
-            return a + b
-
-        self.spark.udf.register("test_udf", test_udf)
-
-        with self.assertRaisesRegex(
-            AnalysisException,
-            "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE",
-        ):
-            self.spark.sql("SELECT test_udf(a => id, a => id * 10) FROM range(2)").show()
-
-        with self.assertRaisesRegex(AnalysisException, "UNEXPECTED_POSITIONAL_ARGUMENT"):
-            self.spark.sql("SELECT test_udf(a => id, id * 10) FROM range(2)").show()
-
-        with self.assertRaises(PythonException):
-            self.spark.sql("SELECT test_udf(c => 'x') FROM range(2)").show()
-
-        with self.assertRaises(PythonException):
-            self.spark.sql("SELECT test_udf(id, a => id * 10) FROM range(2)").show()
-
-    @unittest.skip("Spark Connect does not validate return type in client.")
-    def test_err_return_type(self):
-        super.test_err_return_type()
-
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/connect/test_parity_column.py b/python/pyspark/sql/tests/connect/test_parity_column.py
index d02fb289b7d8d..a109d2ba3b58f 100644
--- a/python/pyspark/sql/tests/connect/test_parity_column.py
+++ b/python/pyspark/sql/tests/connect/test_parity_column.py
@@ -17,16 +17,6 @@
 
 import unittest
 
-from pyspark.testing.connectutils import should_test_connect
-
-if should_test_connect:
-    from pyspark import sql
-    from pyspark.sql.connect.column import Column
-
-    # This is a hack to make the Column instance comparison works in `ColumnTestsMixin`.
-    # e.g., `isinstance(col, pyspark.sql.Column)`.
-    sql.Column = Column
-
 from pyspark.sql.tests.test_column import ColumnTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
index 6210d4ec72fec..343f485553a98 100644
--- a/python/pyspark/sql/tests/connect/test_parity_dataframe.py
+++ b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
@@ -30,10 +30,6 @@ def test_help_command(self):
     def test_toDF_with_schema_string(self):
         super().test_toDF_with_schema_string()
 
-    @unittest.skip("Spark Connect does not support DataFrameQueryContext currently.")
-    def test_dataframe_error_context(self):
-        super().test_dataframe_error_context()
-
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe_query_context.py b/python/pyspark/sql/tests/connect/test_parity_dataframe_query_context.py
new file mode 100644
index 0000000000000..59107363571ee
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_dataframe_query_context.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_dataframe_query_context import DataFrameQueryContextTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class DataFrameQueryContextParityTests(DataFrameQueryContextTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_dataframe_query_context import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_functions.py b/python/pyspark/sql/tests/connect/test_parity_functions.py
index 4fa1cf31b3b68..0a77c5531082a 100644
--- a/python/pyspark/sql/tests/connect/test_parity_functions.py
+++ b/python/pyspark/sql/tests/connect/test_parity_functions.py
@@ -18,17 +18,10 @@
 import unittest
 
 from pyspark.sql.tests.test_functions import FunctionsTestsMixin
-from pyspark.testing.connectutils import should_test_connect, ReusedConnectTestCase
-
-if should_test_connect:
-    from pyspark.errors.exceptions.connect import SparkConnectException
-    from pyspark.sql.connect.column import Column
+from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class FunctionsParityTests(FunctionsTestsMixin, ReusedConnectTestCase):
-    def test_assert_true(self):
-        self.check_assert_true(SparkConnectException)
-
     @unittest.skip("Spark Connect does not support Spark Context but the test depends on that.")
     def test_basic_functions(self):
         super().test_basic_functions()
@@ -41,15 +34,8 @@ def test_function_parity(self):
     def test_input_file_name_reset_for_rdd(self):
         super().test_input_file_name_reset_for_rdd()
 
-    def test_raise_error(self):
-        self.check_raise_error(SparkConnectException)
-
-    def test_sorting_functions_with_column(self):
-        self.check_sorting_functions_with_column(Column)
-
 
 if __name__ == "__main__":
-    import unittest
     from pyspark.sql.tests.connect.test_parity_functions import *  # noqa: F401
 
     try:
diff --git a/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py b/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
index 513e49a144e50..c6ef9810c6840 100644
--- a/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
+++ b/python/pyspark/sql/tests/connect/test_parity_memory_profiler.py
@@ -27,6 +27,20 @@ def setUp(self) -> None:
         super().setUp()
         self.spark._profiler_collector._value = None
 
+
+class MemoryProfilerWithoutPlanCacheParityTests(MemoryProfilerParityTests):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.spark.conf.set("spark.connect.session.planCache.enabled", False)
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            cls.spark.conf.unset("spark.connect.session.planCache.enabled")
+        finally:
+            super().tearDownClass()
+
     def test_memory_profiler_udf_multiple_actions(self):
         def action(df):
             df.collect()
@@ -35,6 +49,7 @@ def action(df):
         with self.sql_conf({"spark.sql.pyspark.udf.profiler": "memory"}):
             _do_computation(self.spark, action=action)
 
+        # Without the plan cache, UDF ID will be different for each action
         self.assertEqual(6, len(self.profile_results), str(list(self.profile_results)))
 
         for id in self.profile_results:
diff --git a/python/pyspark/sql/tests/connect/test_parity_observation.py b/python/pyspark/sql/tests/connect/test_parity_observation.py
index a7b0009357b60..e16053d5a082a 100644
--- a/python/pyspark/sql/tests/connect/test_parity_observation.py
+++ b/python/pyspark/sql/tests/connect/test_parity_observation.py
@@ -25,10 +25,7 @@ class DataFrameObservationParityTests(
     DataFrameObservationTestsMixin,
     ReusedConnectTestCase,
 ):
-    # TODO(SPARK-41625): Support Structured Streaming
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_observe_str(self):
-        super().test_observe_str()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py
index 41e756546318d..00d71bda2d938 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py
@@ -20,18 +20,11 @@
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
-class CogroupedApplyInPandasTests(CogroupedApplyInPandasTestsMixin, ReusedConnectTestCase):
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_wrong_args(self):
-        self.check_wrong_args()
-
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_apply_in_pandas_returning_incompatible_type(self):
-        super().test_apply_in_pandas_returning_incompatible_type()
-
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_wrong_return_type(self):
-        super().test_wrong_return_type()
+class CogroupedApplyInPandasTests(
+    CogroupedApplyInPandasTestsMixin,
+    ReusedConnectTestCase,
+):
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
index f0e7eeb606cab..8c76313c5c96b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
@@ -26,26 +26,6 @@ class GroupedApplyInPandasTests(GroupedApplyInPandasTestsMixin, ReusedConnectTes
     def test_supported_types(self):
         super().test_supported_types()
 
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_wrong_return_type(self):
-        super().test_wrong_return_type()
-
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_wrong_args(self):
-        super().test_wrong_args()
-
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_unsupported_types(self):
-        super().test_unsupported_types()
-
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_apply_in_pandas_returning_incompatible_type(self):
-        super().test_apply_in_pandas_returning_incompatible_type()
-
-    @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
-    def test_grouped_with_empty_partition(self):
-        super().test_grouped_with_empty_partition()
-
 
 if __name__ == "__main__":
     from pyspark.sql.tests.connect.test_parity_pandas_grouped_map import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py
index dc3bdf28f81c8..67d42a7c86138 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map_with_state.py
@@ -25,29 +25,7 @@
 class GroupedApplyInPandasWithStateTests(
     GroupedApplyInPandasWithStateTestsMixin, ReusedConnectTestCase
 ):
-    @unittest.skip("foreachBatch will be supported in SPARK-42944.")
-    def test_apply_in_pandas_with_state_basic(self):
-        super().test_apply_in_pandas_with_state_basic()
-
-    @unittest.skip("foreachBatch will be supported in SPARK-42944.")
-    def test_apply_in_pandas_with_state_basic_no_state(self):
-        super().test_apply_in_pandas_with_state_basic()
-
-    @unittest.skip("foreachBatch will be supported in SPARK-42944.")
-    def test_apply_in_pandas_with_state_basic_no_state_no_data(self):
-        super().test_apply_in_pandas_with_state_basic()
-
-    @unittest.skip("foreachBatch will be supported in SPARK-42944.")
-    def test_apply_in_pandas_with_state_basic_more_data(self):
-        super().test_apply_in_pandas_with_state_basic()
-
-    @unittest.skip("foreachBatch will be supported in SPARK-42944.")
-    def test_apply_in_pandas_with_state_basic_fewer_data(self):
-        super().test_apply_in_pandas_with_state_basic()
-
-    @unittest.skip("foreachBatch will be supported in SPARK-42944.")
-    def test_apply_in_pandas_with_state_basic_with_null(self):
-        super().test_apply_in_pandas_with_state_basic()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
index b5433b38dee5f..364e41716474b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
@@ -14,49 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from pyspark.sql.functions import pandas_udf, PandasUDFType
-from pyspark.sql.tests.pandas.test_pandas_udf import PandasUDFTestsMixin
-from pyspark.testing.connectutils import should_test_connect, ReusedConnectTestCase
 
-if should_test_connect:
-    from pyspark.sql.connect.types import UnparsedDataType
+from pyspark.sql.tests.pandas.test_pandas_udf import PandasUDFTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
-    def test_udf_wrong_arg(self):
-        self.check_udf_wrong_arg()
-
-    def test_pandas_udf_decorator_with_return_type_string(self):
-        @pandas_udf("v double", PandasUDFType.GROUPED_MAP)
-        def foo(x):
-            return x
-
-        self.assertEqual(foo.returnType, UnparsedDataType("v double"))
-        self.assertEqual(foo.evalType, PandasUDFType.GROUPED_MAP)
-
-        @pandas_udf(returnType="double", functionType=PandasUDFType.SCALAR)
-        def foo(x):
-            return x
-
-        self.assertEqual(foo.returnType, UnparsedDataType("double"))
-        self.assertEqual(foo.evalType, PandasUDFType.SCALAR)
-
-    def test_pandas_udf_basic_with_return_type_string(self):
-        udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
-        self.assertEqual(udf.returnType, UnparsedDataType("double"))
-        self.assertEqual(udf.evalType, PandasUDFType.SCALAR)
-
-        udf = pandas_udf(lambda x: x, "v double", PandasUDFType.GROUPED_MAP)
-        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
-        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
-
-        udf = pandas_udf(lambda x: x, "v double", functionType=PandasUDFType.GROUPED_MAP)
-        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
-        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
-
-        udf = pandas_udf(lambda x: x, returnType="v double", functionType=PandasUDFType.GROUPED_MAP)
-        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
-        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py
index 6a3f8ab2569b7..fdb81bffbce12 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_grouped_agg.py
@@ -20,19 +20,11 @@
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
-class PandasUDFGroupedAggParityTests(GroupedAggPandasUDFTestsMixin, ReusedConnectTestCase):
-    # TODO(SPARK-43727): Parity returnType check in Spark Connect
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_unsupported_types(self):
-        super().test_unsupported_types()
-
-    @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
-    def test_grouped_with_empty_partition(self):
-        super().test_grouped_with_empty_partition()
-
-    @unittest.skip("Spark Connect does not support convert UNPARSED to catalyst types.")
-    def test_manual(self):
-        super().test_manual()
+class PandasUDFGroupedAggParityTests(
+    GroupedAggPandasUDFTestsMixin,
+    ReusedConnectTestCase,
+):
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
index b42bfaf0f58db..451f0f68d6ee5 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
@@ -15,30 +15,12 @@
 # limitations under the License.
 #
 import unittest
-from pyspark.sql.connect.column import Column
 from pyspark.sql.tests.pandas.test_pandas_udf_scalar import ScalarPandasUDFTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class PandasUDFScalarParityTests(ScalarPandasUDFTestsMixin, ReusedConnectTestCase):
-    def test_nondeterministic_vectorized_udf_in_aggregate(self):
-        self.check_nondeterministic_analysis_exception()
-
-    @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
-    def test_vectorized_udf_empty_partition(self):
-        super().test_vectorized_udf_empty_partition()
-
-    @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
-    def test_vectorized_udf_struct_with_empty_partition(self):
-        super().test_vectorized_udf_struct_with_empty_partition()
-
-    # TODO(SPARK-43727): Parity returnType check in Spark Connect
-    @unittest.skip("Fails in Spark Connect, should enable.")
-    def test_vectorized_udf_wrong_return_type(self):
-        super().test_vectorized_udf_wrong_return_type()
-
-    def test_mixed_udf_and_sql(self):
-        self._test_mixed_udf_and_sql(Column)
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_types.py b/python/pyspark/sql/tests/connect/test_parity_types.py
index 82a677574b455..6d06611def6af 100644
--- a/python/pyspark/sql/tests/connect/test_parity_types.py
+++ b/python/pyspark/sql/tests/connect/test_parity_types.py
@@ -32,24 +32,28 @@ def test_apply_schema_to_dict_and_rows(self):
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_apply_schema_to_row(self):
-        super().test_apply_schema_to_dict_and_rows()
+        super().test_apply_schema_to_row()
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_create_dataframe_schema_mismatch(self):
         super().test_create_dataframe_schema_mismatch()
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
-    def test_infer_array_element_type_empty(self):
-        super().test_infer_array_element_type_empty()
-
-    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
-    def test_infer_array_element_type_with_struct(self):
-        super().test_infer_array_element_type_with_struct()
+    def test_infer_array_element_type_empty_rdd(self):
+        super().test_infer_array_element_type_empty_rdd()
 
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_infer_array_merge_element_types_with_rdd(self):
         super().test_infer_array_merge_element_types_with_rdd()
 
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_map_pair_type_empty_rdd(self):
+        super().test_infer_map_pair_type_empty_rdd()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_map_merge_pair_types_with_rdd(self):
+        super().test_infer_map_merge_pair_types_with_rdd()
+
     @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
     def test_infer_binary_type(self):
         super().test_infer_binary_type()
@@ -86,9 +90,17 @@ def test_rdd_with_udt(self):
     def test_udt(self):
         super().test_udt()
 
-    @unittest.skip("Does not test anything related to Spark Connect")
-    def test_parse_datatype_string(self):
-        super().test_parse_datatype_string()
+    @unittest.skip("Requires JVM access.")
+    def test_schema_with_collations_json_ser_de(self):
+        super().test_schema_with_collations_json_ser_de()
+
+    @unittest.skip("This test is dedicated for PySpark Classic.")
+    def test_ym_interval_in_collect(self):
+        super().test_ym_interval_in_collect()
+
+    @unittest.skip("This test is dedicated for PySpark Classic.")
+    def test_cal_interval_in_collect(self):
+        super().test_cal_interval_in_collect()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py b/python/pyspark/sql/tests/connect/test_parity_udf.py
index 17d7ae0eb9fc7..5507f8e9f289b 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -44,10 +44,6 @@ def test_udf_with_input_file_name_for_hadooprdd(self):
     def test_same_accumulator_in_udfs(self):
         super().test_same_accumulator_in_udfs()
 
-    @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
-    def test_udf_timestamp_ntz(self):
-        super().test_udf_timestamp_ntz()
-
     @unittest.skip("Spark Connect does not support broadcast but the test depends on it.")
     def test_broadcast_in_udf(self):
         super().test_broadcast_in_udf()
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
index dfa56ff0bb888..a1789a50896db 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
@@ -27,6 +27,20 @@ def setUp(self) -> None:
         super().setUp()
         self.spark._profiler_collector._value = None
 
+
+class UDFProfilerWithoutPlanCacheParityTests(UDFProfilerParityTests):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.spark.conf.set("spark.connect.session.planCache.enabled", False)
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            cls.spark.conf.unset("spark.connect.session.planCache.enabled")
+        finally:
+            super().tearDownClass()
+
     def test_perf_profiler_udf_multiple_actions(self):
         def action(df):
             df.collect()
@@ -35,6 +49,7 @@ def action(df):
         with self.sql_conf({"spark.sql.pyspark.udf.profiler": "perf"}):
             _do_computation(self.spark, action=action)
 
+        # Without the plan cache, UDF ID will be different for each action
         self.assertEqual(6, len(self.profile_results), str(list(self.profile_results)))
 
         for id in self.profile_results:
diff --git a/python/pyspark/sql/tests/connect/test_parity_udtf.py b/python/pyspark/sql/tests/connect/test_parity_udtf.py
index 5071b69060a1d..2ea6ef8cc389d 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udtf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udtf.py
@@ -25,7 +25,6 @@
     sql.udtf.UserDefinedTableFunction = UserDefinedTableFunction
     from pyspark.sql.connect.functions import lit, udtf
 
-from pyspark.util import is_remote_only
 from pyspark.sql.tests.test_udtf import BaseUDTFTestsMixin, UDTFArrowTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.errors.exceptions.connect import SparkConnectGrpcException, PythonException
@@ -68,13 +67,11 @@ def test_udtf_with_analyze_using_broadcast(self):
     def test_udtf_with_analyze_using_accumulator(self):
         super().test_udtf_with_analyze_using_accumulator()
 
-    @unittest.skipIf(is_remote_only(), "pyspark-connect does not have SparkFiles")
     def test_udtf_with_analyze_using_archive(self):
-        super().test_udtf_with_analyze_using_archive()
+        super().check_udtf_with_analyze_using_archive(".")
 
-    @unittest.skipIf(is_remote_only(), "pyspark-connect does not have SparkFiles")
     def test_udtf_with_analyze_using_file(self):
-        super().test_udtf_with_analyze_using_file()
+        super().check_udtf_with_analyze_using_file(".")
 
     @unittest.skip("pyspark-connect can serialize SparkSession, but fails on executor")
     def test_udtf_access_spark_session(self):
diff --git a/python/pyspark/sql/tests/connect/test_resources.py b/python/pyspark/sql/tests/connect/test_resources.py
index 931acd9298043..94d71b54ff057 100644
--- a/python/pyspark/sql/tests/connect/test_resources.py
+++ b/python/pyspark/sql/tests/connect/test_resources.py
@@ -15,19 +15,16 @@
 # limitations under the License.
 #
 import unittest
+import os
 
-from pyspark.util import is_remote_only
 from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.sql.tests.test_resources import ResourceProfileTestsMixin
 
 
-# TODO(SPARK-47757): Reeanble ResourceProfileTests for pyspark-connect
-if not is_remote_only():
-    from pyspark.sql.tests.test_resources import ResourceProfileTestsMixin
-
-    class ResourceProfileTests(ResourceProfileTestsMixin, ReusedConnectTestCase):
-        @classmethod
-        def master(cls):
-            return "local-cluster[1, 4, 1024]"
+class ResourceProfileTests(ResourceProfileTestsMixin, ReusedConnectTestCase):
+    @classmethod
+    def master(cls):
+        return os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local-cluster[1, 4, 1024]")
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_session.py b/python/pyspark/sql/tests/connect/test_session.py
index 5184b9f061712..6f0e4aaad3f89 100644
--- a/python/pyspark/sql/tests/connect/test_session.py
+++ b/python/pyspark/sql/tests/connect/test_session.py
@@ -77,6 +77,34 @@ def test_session_create_sets_active_session(self):
         self.assertIs(session, session2)
         session.stop()
 
+    def test_active_session_expires_when_client_closes(self):
+        s1 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+        s2 = RemoteSparkSession.getActiveSession()
+
+        self.assertIs(s1, s2)
+
+        # We don't call close() to avoid executing ExecutePlanResponseReattachableIterator
+        s1._client._closed = True
+
+        self.assertIsNone(RemoteSparkSession.getActiveSession())
+        s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+
+        self.assertIsNot(s1, s3)
+
+    def test_default_session_expires_when_client_closes(self):
+        s1 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+        s2 = RemoteSparkSession.getDefaultSession()
+
+        self.assertIs(s1, s2)
+
+        # We don't call close() to avoid executing ExecutePlanResponseReattachableIterator
+        s1._client._closed = True
+
+        self.assertIsNone(RemoteSparkSession.getDefaultSession())
+        s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
+
+        self.assertIsNot(s1, s3)
+
 
 class JobCancellationTests(ReusedConnectTestCase):
     def test_tags(self):
@@ -91,6 +119,34 @@ def test_tags(self):
         self.assertEqual(self.spark.getTags(), set())
         self.spark.clearTags()
 
+    def test_tags_multithread(self):
+        output1 = None
+        output2 = None
+
+        def tag1():
+            nonlocal output1
+
+            self.spark.addTag("tag1")
+            output1 = self.spark.getTags()
+
+        def tag2():
+            nonlocal output2
+
+            self.spark.addTag("tag2")
+            output2 = self.spark.getTags()
+
+        t1 = threading.Thread(target=tag1)
+        t1.start()
+        t1.join()
+        t2 = threading.Thread(target=tag2)
+        t2.start()
+        t2.join()
+
+        self.assertIsNotNone(output1)
+        self.assertEquals(output1, {"tag1"})
+        self.assertIsNotNone(output2)
+        self.assertEquals(output2, {"tag2"})
+
     def test_interrupt_tag(self):
         thread_ids = range(4)
         self.check_job_cancellation(
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index 0e7d0e7ef7df8..b1060ef48156a 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -165,7 +165,7 @@ def check_apply_in_pandas_not_returning_pandas_dataframe(self):
             fn=lambda lft, rgt: lft.size + rgt.size,
             error_class=PythonException,
             error_message_regex="Return type of the user-defined function "
-            "should be pandas.DataFrame, but is int.",
+            "should be pandas.DataFrame, but is int",
         )
 
     def test_apply_in_pandas_returning_column_names(self):
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
index 0396006e2b362..a26d6d02a2bcd 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
@@ -52,7 +52,7 @@
     MapType,
     YearMonthIntervalType,
 )
-from pyspark.errors import PythonException, PySparkTypeError
+from pyspark.errors import PythonException, PySparkTypeError, PySparkValueError
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     have_pandas,
@@ -421,22 +421,43 @@ def test_wrong_args(self):
     def check_wrong_args(self):
         df = self.data
 
-        with self.assertRaisesRegex(ValueError, "Invalid function"):
+        with self.assertRaisesRegex(PySparkTypeError, "INVALID_UDF_EVAL_TYPE"):
             df.groupby("id").apply(lambda x: x)
-        with self.assertRaisesRegex(ValueError, "Invalid function"):
+        with self.assertRaisesRegex(PySparkTypeError, "INVALID_UDF_EVAL_TYPE"):
             df.groupby("id").apply(udf(lambda x: x, DoubleType()))
-        with self.assertRaisesRegex(ValueError, "Invalid function"):
+        with self.assertRaisesRegex(PySparkTypeError, "INVALID_UDF_EVAL_TYPE"):
             df.groupby("id").apply(sum(df.v))
-        with self.assertRaisesRegex(ValueError, "Invalid function"):
+        with self.assertRaisesRegex(PySparkTypeError, "INVALID_UDF_EVAL_TYPE"):
             df.groupby("id").apply(df.v + 1)
-        with self.assertRaisesRegex(ValueError, "Invalid function"):
+        with self.assertRaisesRegex(PySparkTypeError, "INVALID_UDF_EVAL_TYPE"):
+            df.groupby("id").apply(pandas_udf(lambda x, y: x, DoubleType()))
+        with self.assertRaisesRegex(PySparkTypeError, "INVALID_UDF_EVAL_TYPE"):
+            df.groupby("id").apply(pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
+
+        with self.assertRaisesRegex(PySparkValueError, "INVALID_PANDAS_UDF"):
             df.groupby("id").apply(
                 pandas_udf(lambda: 1, StructType([StructField("d", DoubleType())]))
             )
-        with self.assertRaisesRegex(ValueError, "Invalid function"):
-            df.groupby("id").apply(pandas_udf(lambda x, y: x, DoubleType()))
-        with self.assertRaisesRegex(ValueError, "Invalid function.*GROUPED_MAP"):
-            df.groupby("id").apply(pandas_udf(lambda x, y: x, DoubleType(), PandasUDFType.SCALAR))
+
+    def test_wrong_args_in_apply_func(self):
+        df1 = self.spark.range(11)
+        df2 = self.spark.range(22)
+
+        with self.assertRaisesRegex(PySparkValueError, "INVALID_PANDAS_UDF"):
+            df1.groupby("id").applyInPandas(lambda: 1, StructType([StructField("d", DoubleType())]))
+
+        with self.assertRaisesRegex(PySparkValueError, "INVALID_PANDAS_UDF"):
+            df1.groupby("id").applyInArrow(lambda: 1, StructType([StructField("d", DoubleType())]))
+
+        with self.assertRaisesRegex(PySparkValueError, "INVALID_PANDAS_UDF"):
+            df1.groupby("id").cogroup(df2.groupby("id")).applyInPandas(
+                lambda: 1, StructType([StructField("d", DoubleType())])
+            )
+
+        with self.assertRaisesRegex(PySparkValueError, "INVALID_PANDAS_UDF"):
+            df1.groupby("id").cogroup(df2.groupby("id")).applyInArrow(
+                lambda: 1, StructType([StructField("d", DoubleType())])
+            )
 
     def test_unsupported_types(self):
         with self.quiet():
@@ -679,13 +700,13 @@ def test_grouped_with_empty_partition(self):
         data = [Row(id=1, x=2), Row(id=1, x=3), Row(id=2, x=4)]
         expected = [Row(id=1, x=5), Row(id=1, x=5), Row(id=2, x=4)]
         num_parts = len(data) + 1
-        df = self.spark.createDataFrame(self.sc.parallelize(data, numSlices=num_parts))
+        df = self.spark.createDataFrame(data).repartition(num_parts)
 
         f = pandas_udf(
             lambda pdf: pdf.assign(x=pdf["x"].sum()), "id long, x int", PandasUDFType.GROUPED_MAP
         )
 
-        result = df.groupBy("id").apply(f).collect()
+        result = df.groupBy("id").apply(f).sort("id").collect()
         self.assertEqual(result, expected)
 
     def test_grouped_over_window(self):
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py
index 12ee9319d2cbe..47f7d672cc8c2 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py
@@ -95,6 +95,7 @@ def prepare_test_resource():
         self.assertEqual(q.name, "this_query")
         self.assertTrue(q.isActive)
         q.processAllAvailable()
+        self.assertTrue(q.exception() is None)
 
     def test_apply_in_pandas_with_state_basic(self):
         def func(key, pdf_iter, state):
@@ -109,10 +110,10 @@ def func(key, pdf_iter, state):
             yield pd.DataFrame({"key": [key[0]], "countAsString": [str(total_len)]})
 
         def check_results(batch_df, _):
-            self.assertEqual(
-                set(batch_df.sort("key").collect()),
-                {Row(key="hello", countAsString="1"), Row(key="this", countAsString="1")},
-            )
+            assert set(batch_df.sort("key").collect()) == {
+                Row(key="hello", countAsString="1"),
+                Row(key="this", countAsString="1"),
+            }
 
         self._test_apply_in_pandas_with_state_basic(func, check_results)
 
@@ -123,14 +124,11 @@ def func(key, pdf_iter, state):
             yield pd.DataFrame({"key": [key[0], "foo"], "countAsString": ["100", "222"]})
 
         def check_results(batch_df, _):
-            self.assertEqual(
-                set(batch_df.sort("key").collect()),
-                {
-                    Row(key="hello", countAsString="100"),
-                    Row(key="this", countAsString="100"),
-                    Row(key="foo", countAsString="222"),
-                },
-            )
+            assert set(batch_df.sort("key").collect()) == {
+                Row(key="hello", countAsString="100"),
+                Row(key="this", countAsString="100"),
+                Row(key="foo", countAsString="222"),
+            }
 
         self._test_apply_in_pandas_with_state_basic(func, check_results)
 
@@ -141,7 +139,7 @@ def func(key, pdf_iter, state):
             yield pd.DataFrame({"key": [], "countAsString": []})
 
         def check_results(batch_df, _):
-            self.assertTrue(len(set(batch_df.sort("key").collect())) == 0)
+            assert len(set(batch_df.sort("key").collect())) == 0
 
         self._test_apply_in_pandas_with_state_basic(func, check_results)
 
@@ -156,16 +154,13 @@ def func(key, pdf_iter, state):
             )
 
         def check_results(batch_df, _):
-            self.assertEqual(
-                set(batch_df.sort("key").collect()),
-                {
-                    Row(key="hello", countAsString="1"),
-                    Row(key="foo", countAsString="666"),
-                    Row(key="hello_2", countAsString="2"),
-                    Row(key="this", countAsString="1"),
-                    Row(key="this_2", countAsString="2"),
-                },
-            )
+            assert set(batch_df.sort("key").collect()) == {
+                Row(key="hello", countAsString="1"),
+                Row(key="foo", countAsString="666"),
+                Row(key="hello_2", countAsString="2"),
+                Row(key="this", countAsString="1"),
+                Row(key="this_2", countAsString="2"),
+            }
 
         self._test_apply_in_pandas_with_state_basic(func, check_results)
 
@@ -177,7 +172,7 @@ def func(key, pdf_iter, state):
             yield pd.DataFrame({"key": [], "countAsString": []})
 
         def check_results(batch_df, _):
-            self.assertTrue(len(set(batch_df.sort("key").collect())) == 0)
+            assert len(set(batch_df.sort("key").collect())) == 0
 
         self._test_apply_in_pandas_with_state_basic(func, check_results)
 
@@ -194,10 +189,7 @@ def func(key, pdf_iter, state):
             yield pd.DataFrame({"key": [None], "countAsString": [str(total_len)]})
 
         def check_results(batch_df, _):
-            self.assertEqual(
-                set(batch_df.sort("key").collect()),
-                {Row(key=None, countAsString="1")},
-            )
+            assert set(batch_df.sort("key").collect()) == {Row(key=None, countAsString="1")}
 
         self._test_apply_in_pandas_with_state_basic(func, check_results)
 
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index 37e52d4344fb8..692f9705411e0 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -151,14 +151,14 @@ def bad_iter_elem(_):
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of pandas.DataFrame, "
-            "but is int.",
+            "but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(no_iter, "a int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of pandas.DataFrame, "
-            "but is iterator of int.",
+            "but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(bad_iter_elem, "a int").count())
 
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
index a7cf45e3bcbe0..70fa31fd515bb 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
@@ -538,11 +538,11 @@ def test_grouped_with_empty_partition(self):
         data = [Row(id=1, x=2), Row(id=1, x=3), Row(id=2, x=4)]
         expected = [Row(id=1, sum=5), Row(id=2, x=4)]
         num_parts = len(data) + 1
-        df = self.spark.createDataFrame(self.sc.parallelize(data, numSlices=num_parts))
+        df = self.spark.createDataFrame(data).repartition(num_parts)
 
         f = pandas_udf(lambda x: x.sum(), "int", PandasUDFType.GROUPED_AGG)
 
-        result = df.groupBy("id").agg(f(df["x"]).alias("sum")).collect()
+        result = df.groupBy("id").agg(f(df["x"]).alias("sum")).sort("id").collect()
         self.assertEqual(result, expected)
 
     def test_grouped_without_group_by_clause(self):
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
index ec413d048d8ec..38bc633cd1ed1 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
@@ -764,15 +764,17 @@ def iter_identity(x):
             self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_empty_partition(self):
-        df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2))
+        df = self.spark.createDataFrame([Row(id=1)]).repartition(2)
         for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
             f = pandas_udf(lambda x: x, LongType(), udf_type)
             res = df.select(f(col("id")))
             self.assertEqual(df.collect(), res.collect())
 
     def test_vectorized_udf_struct_with_empty_partition(self):
-        df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)], 2)).withColumn(
-            "name", lit("John Doe")
+        df = (
+            self.spark.createDataFrame([Row(id=1)])
+            .repartition(2)
+            .withColumn("name", lit("John Doe"))
         )
 
         @pandas_udf("first string, last string")
@@ -1334,7 +1336,7 @@ def f1(x):
             return x + 1
 
         def f2(x):
-            assert type(x) == col_type
+            assert isinstance(x, col_type)
             return x + 10
 
         @pandas_udf("int")
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py
index bfb874ffe5340..7a0fccc225725 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py
@@ -14,7 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import sys
 import unittest
 from inspect import signature
 from typing import Union, Iterator, Tuple, cast, get_type_hints
@@ -114,7 +113,6 @@ def func(iter: Iterator[Tuple[Union[pd.DataFrame, pd.Series], ...]]) -> Iterator
             infer_eval_type(signature(func), get_type_hints(func)), PandasUDFType.SCALAR_ITER
         )
 
-    @unittest.skipIf(sys.version_info < (3, 9), "Type hinting generics require Python 3.9.")
     def test_type_annotation_tuple_generics(self):
         def func(iter: Iterator[tuple[pd.DataFrame, pd.Series]]) -> Iterator[pd.DataFrame]:
             pass
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py
index 9b6751564c40e..442e1c61a0ba8 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py
@@ -16,7 +16,6 @@
 #
 from __future__ import annotations
 
-import sys
 import unittest
 from inspect import signature
 from typing import Union, Iterator, Tuple, cast, get_type_hints
@@ -308,10 +307,6 @@ def pandas_plus_one(iter: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]:
         expected = df.selectExpr("id + 1 as id")
         assert_frame_equal(expected.toPandas(), actual.toPandas())
 
-    @unittest.skipIf(
-        sys.version_info < (3, 9),
-        "string annotations with future annotations do not work under Python<3.9",
-    )
     def test_string_type_annotation(self):
         def func(col: "pd.Series") -> "pd.Series":
             pass
diff --git a/python/pyspark/sql/tests/streaming/test_streaming.py b/python/pyspark/sql/tests/streaming/test_streaming.py
index abfacdbbf059b..e284d052d9ae2 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming.py
@@ -22,8 +22,9 @@
 
 from pyspark.sql import Row
 from pyspark.sql.functions import lit
-from pyspark.sql.types import StructType, StructField, IntegerType, StringType
+from pyspark.sql.types import StructType, StructField, IntegerType, StringType, TimestampType
 from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.errors import PySparkValueError
 
 
 class StreamingTestsMixin:
@@ -58,6 +59,26 @@ def test_streaming_query_functions_basic(self):
         finally:
             query.stop()
 
+    def test_streaming_query_name_edge_case(self):
+        # Query name should be None when not specified
+        q1 = self.spark.readStream.format("rate").load().writeStream.format("noop").start()
+        self.assertEqual(q1.name, None)
+
+        # Cannot set query name to be an empty string
+        error_thrown = False
+        try:
+            (
+                self.spark.readStream.format("rate")
+                .load()
+                .writeStream.format("noop")
+                .queryName("")
+                .start()
+            )
+        except PySparkValueError:
+            error_thrown = True
+
+        self.assertTrue(error_thrown)
+
     def test_stream_trigger(self):
         df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
 
@@ -263,36 +284,37 @@ def test_stream_await_termination(self):
             shutil.rmtree(tmpPath)
 
     def test_stream_exception(self):
-        sdf = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
-        sq = sdf.writeStream.format("memory").queryName("query_explain").start()
-        try:
-            sq.processAllAvailable()
-            self.assertEqual(sq.exception(), None)
-        finally:
-            sq.stop()
-
-        from pyspark.sql.functions import col, udf
-        from pyspark.errors import StreamingQueryException
-
-        bad_udf = udf(lambda x: 1 / 0)
-        sq = (
-            sdf.select(bad_udf(col("value")))
-            .writeStream.format("memory")
-            .queryName("this_query")
-            .start()
-        )
-        try:
-            # Process some data to fail the query
-            sq.processAllAvailable()
-            self.fail("bad udf should fail the query")
-        except StreamingQueryException as e:
-            # This is expected
-            self._assert_exception_tree_contains_msg(e, "ZeroDivisionError")
-        finally:
-            exception = sq.exception()
-            sq.stop()
-        self.assertIsInstance(exception, StreamingQueryException)
-        self._assert_exception_tree_contains_msg(exception, "ZeroDivisionError")
+        with self.sql_conf({"spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled": True}):
+            sdf = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
+            sq = sdf.writeStream.format("memory").queryName("query_explain").start()
+            try:
+                sq.processAllAvailable()
+                self.assertEqual(sq.exception(), None)
+            finally:
+                sq.stop()
+
+            from pyspark.sql.functions import col, udf
+            from pyspark.errors import StreamingQueryException
+
+            bad_udf = udf(lambda x: 1 / 0)
+            sq = (
+                sdf.select(bad_udf(col("value")))
+                .writeStream.format("memory")
+                .queryName("this_query")
+                .start()
+            )
+            try:
+                # Process some data to fail the query
+                sq.processAllAvailable()
+                self.fail("bad udf should fail the query")
+            except StreamingQueryException as e:
+                # This is expected
+                self._assert_exception_tree_contains_msg(e, "ZeroDivisionError")
+            finally:
+                exception = sq.exception()
+                sq.stop()
+            self.assertIsInstance(exception, StreamingQueryException)
+            self._assert_exception_tree_contains_msg(exception, "ZeroDivisionError")
 
     def test_query_manager_no_recreation(self):
         # SPARK-46873: There should not be a new StreamingQueryManager created every time
@@ -391,6 +413,30 @@ def test_streaming_with_temporary_view(self):
                 set([Row(value="view_a"), Row(value="view_b"), Row(value="view_c")]), set(result)
             )
 
+    def test_streaming_drop_duplicate_within_watermark(self):
+        """
+        This verifies dropDuplicatesWithinWatermark works with a streaming dataframe.
+        """
+        user_schema = StructType().add("time", TimestampType()).add("id", "integer")
+        df = (
+            self.spark.readStream.option("sep", ";")
+            .schema(user_schema)
+            .csv("python/test_support/sql/streaming/time")
+        )
+        q1 = (
+            df.withWatermark("time", "2 seconds")
+            .dropDuplicatesWithinWatermark(["id"])
+            .writeStream.outputMode("update")
+            .format("memory")
+            .queryName("test_streaming_drop_duplicates_within_wm")
+            .start()
+        )
+        self.assertTrue(q1.isActive)
+        q1.processAllAvailable()
+        q1.stop()
+        result = self.spark.sql("SELECT * FROM test_streaming_drop_duplicates_within_wm").collect()
+        self.assertTrue(len(result) >= 6 and len(result) <= 9)
+
 
 class StreamingTests(StreamingTestsMixin, ReusedSQLTestCase):
     def _assert_exception_tree_contains_msg(self, exception, msg):
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py b/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
index ef286115a303f..de8f30baebca5 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming_foreach_batch.py
@@ -29,17 +29,18 @@ def test_streaming_foreach_batch(self):
         q = None
 
         def collectBatch(batch_df, batch_id):
-            batch_df.createOrReplaceGlobalTempView("test_view")
+            batch_df.write.format("parquet").saveAsTable("test_table")
 
         try:
             df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
             q = df.writeStream.foreachBatch(collectBatch).start()
             q.processAllAvailable()
-            collected = self.spark.sql("select * from global_temp.test_view").collect()
+            collected = self.spark.sql("select * from test_table").collect()
             self.assertTrue(len(collected), 2)
         finally:
             if q:
                 q.stop()
+            self.spark.sql("DROP TABLE IF EXISTS test_table")
 
     def test_streaming_foreach_batch_tempview(self):
         q = None
@@ -50,18 +51,19 @@ def collectBatch(batch_df, batch_id):
             # clone the session which is no longer same with the session used to start the
             # streaming query
             assert len(batch_df.sparkSession.sql("SELECT * FROM updates").collect()) == 2
-            # Write to a global view verify on the repl/client side.
-            batch_df.createOrReplaceGlobalTempView("temp_view")
+            # Write a table to verify on the repl/client side.
+            batch_df.write.format("parquet").saveAsTable("test_table")
 
         try:
             df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
             q = df.writeStream.foreachBatch(collectBatch).start()
             q.processAllAvailable()
-            collected = self.spark.sql("SELECT * FROM global_temp.temp_view").collect()
+            collected = self.spark.sql("SELECT * FROM test_table").collect()
             self.assertTrue(len(collected[0]), 2)
         finally:
             if q:
                 q.stop()
+            self.spark.sql("DROP TABLE IF EXISTS test_table")
 
     def test_streaming_foreach_batch_propagates_python_errors(self):
         from pyspark.errors import StreamingQueryException
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_listener.py b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
index 243ad2dca0747..762fc335b56ad 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming_listener.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
@@ -30,6 +30,7 @@
     StateOperatorProgress,
     StreamingQueryProgress,
 )
+from pyspark.sql.functions import count, col, lit
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
@@ -39,16 +40,16 @@ def check_start_event(self, event):
         self.assertTrue(isinstance(event, QueryStartedEvent))
         self.assertTrue(isinstance(event.id, uuid.UUID))
         self.assertTrue(isinstance(event.runId, uuid.UUID))
-        self.assertTrue(event.name is None or event.name == "test")
+        self.assertTrue(event.name is None or event.name.startswith("test"))
         try:
             datetime.strptime(event.timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
         except ValueError:
             self.fail("'%s' is not in ISO 8601 format.")
 
-    def check_progress_event(self, event):
+    def check_progress_event(self, event, is_stateful):
         """Check QueryProgressEvent"""
         self.assertTrue(isinstance(event, QueryProgressEvent))
-        self.check_streaming_query_progress(event.progress)
+        self.check_streaming_query_progress(event.progress, is_stateful)
 
     def check_terminated_event(self, event, exception=None, error_class=None):
         """Check QueryTerminatedEvent"""
@@ -65,12 +66,12 @@ def check_terminated_event(self, event, exception=None, error_class=None):
         else:
             self.assertEqual(event.errorClassOnException, None)
 
-    def check_streaming_query_progress(self, progress):
+    def check_streaming_query_progress(self, progress, is_stateful):
         """Check StreamingQueryProgress"""
         self.assertTrue(isinstance(progress, StreamingQueryProgress))
         self.assertTrue(isinstance(progress.id, uuid.UUID))
         self.assertTrue(isinstance(progress.runId, uuid.UUID))
-        self.assertEqual(progress.name, "test")
+        self.assertTrue(progress.name.startswith("test"))
         try:
             json.loads(progress.json)
         except Exception:
@@ -108,9 +109,10 @@ def check_streaming_query_progress(self, progress):
         self.assertTrue(all(map(lambda v: isinstance(v, str), progress.eventTime.values())))
 
         self.assertTrue(isinstance(progress.stateOperators, list))
-        self.assertTrue(len(progress.stateOperators) >= 1)
-        for so in progress.stateOperators:
-            self.check_state_operator_progress(so)
+        if is_stateful:
+            self.assertTrue(len(progress.stateOperators) >= 1)
+            for so in progress.stateOperators:
+                self.check_state_operator_progress(so)
 
         self.assertTrue(isinstance(progress.sources, list))
         self.assertTrue(len(progress.sources) >= 1)
@@ -192,6 +194,53 @@ def check_sink_progress(self, progress):
         self.assertTrue(isinstance(progress.numOutputRows, int))
         self.assertTrue(isinstance(progress.metrics, dict))
 
+    # This is a generic test work for both classic Spark and Spark Connect
+    def test_listener_observed_metrics(self):
+        class MyErrorListener(StreamingQueryListener):
+            def __init__(self):
+                self.num_rows = -1
+                self.num_error_rows = -1
+
+            def onQueryStarted(self, event):
+                pass
+
+            def onQueryProgress(self, event):
+                row = event.progress.observedMetrics.get("my_event")
+                # Save observed metrics for later verification
+                self.num_rows = row["rc"]
+                self.num_error_rows = row["erc"]
+
+            def onQueryIdle(self, event):
+                pass
+
+            def onQueryTerminated(self, event):
+                pass
+
+        try:
+            error_listener = MyErrorListener()
+            self.spark.streams.addListener(error_listener)
+
+            sdf = self.spark.readStream.format("rate").load().withColumn("error", col("value"))
+
+            # Observe row count (rc) and error row count (erc) in the streaming Dataset
+            observed_ds = sdf.observe(
+                "my_event", count(lit(1)).alias("rc"), count(col("error")).alias("erc")
+            )
+
+            q = observed_ds.writeStream.format("console").start()
+
+            while q.lastProgress is None or q.lastProgress["batchId"] == 0:
+                q.awaitTermination(0.5)
+
+            time.sleep(5)
+
+            self.assertTrue(error_listener.num_rows > 0)
+            self.assertTrue(error_listener.num_error_rows > 0)
+
+        finally:
+            q.stop()
+            self.spark.streams.removeListener(error_listener)
+
 
 class StreamingListenerTests(StreamingListenerTestsMixin, ReusedSQLTestCase):
     def test_number_of_public_methods(self):
@@ -313,7 +362,7 @@ def verify(test_listener):
                 self.spark.sparkContext._jsc.sc().listenerBus().waitUntilEmpty()
 
                 self.check_start_event(start_event)
-                self.check_progress_event(progress_event)
+                self.check_progress_event(progress_event, True)
                 self.check_terminated_event(terminated_event)
 
                 # Check query terminated with exception
@@ -470,7 +519,7 @@ def test_streaming_query_progress_fromJson(self):
         """
         progress = StreamingQueryProgress.fromJson(json.loads(progress_json))
 
-        self.check_streaming_query_progress(progress)
+        self.check_streaming_query_progress(progress, True)
 
         # checks for progress
         self.assertEqual(progress.id, uuid.UUID("00000000-0000-0001-0000-000000000001"))
@@ -543,6 +592,23 @@ def test_streaming_query_progress_fromJson(self):
         self.assertEqual(sink.numOutputRows, -1)
         self.assertEqual(sink.metrics, {})
 
+    def test_spark_property_in_listener(self):
+        # SPARK-48560: Make StreamingQueryListener.spark settable
+        class TestListener(StreamingQueryListener):
+            def __init__(self, session):
+                self.spark = session
+
+            def onQueryStarted(self, event):
+                pass
+
+            def onQueryProgress(self, event):
+                pass
+
+            def onQueryTerminated(self, event):
+                pass
+
+        self.assertEqual(TestListener(self.spark).spark, self.spark)
+
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 5235e021bae9a..c1a69c404086b 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -23,7 +23,6 @@
 import unittest
 from typing import cast
 from collections import namedtuple
-import sys
 
 from pyspark import SparkConf
 from pyspark.sql import Row, SparkSession
@@ -56,6 +55,9 @@
     ExamplePointUDT,
 )
 from pyspark.errors import ArithmeticException, PySparkTypeError, UnsupportedOperationException
+from pyspark.loose_version import LooseVersion
+from pyspark.util import is_remote_only
+from pyspark.loose_version import LooseVersion
 
 if have_pandas:
     import pandas as pd
@@ -160,6 +162,45 @@ def setUpClass(cls):
         ]
         cls.data = [tuple(list(d) + [None]) for d in cls.data_wo_null]
 
+        cls.schema_nested_timestamp = (
+            StructType()
+            .add("ts", TimestampType())
+            .add("ts_ntz", TimestampNTZType())
+            .add(
+                "struct", StructType().add("ts", TimestampType()).add("ts_ntz", TimestampNTZType())
+            )
+            .add("array", ArrayType(TimestampType()))
+            .add("array_ntz", ArrayType(TimestampNTZType()))
+            .add("map", MapType(StringType(), TimestampType()))
+            .add("map_ntz", MapType(StringType(), TimestampNTZType()))
+        )
+        cls.data_nested_timestamp = [
+            Row(
+                datetime(2023, 1, 1, 0, 0, 0),
+                datetime(2023, 1, 1, 0, 0, 0),
+                Row(
+                    datetime(2023, 1, 1, 0, 0, 0),
+                    datetime(2023, 1, 1, 0, 0, 0),
+                ),
+                [datetime(2023, 1, 1, 0, 0, 0)],
+                [datetime(2023, 1, 1, 0, 0, 0)],
+                dict(ts=datetime(2023, 1, 1, 0, 0, 0)),
+                dict(ts_ntz=datetime(2023, 1, 1, 0, 0, 0)),
+            )
+        ]
+        cls.data_nested_timestamp_expected_ny = Row(
+            ts=datetime(2022, 12, 31, 21, 0, 0),
+            ts_ntz=datetime(2023, 1, 1, 0, 0, 0),
+            struct=Row(
+                ts=datetime(2022, 12, 31, 21, 0, 0),
+                ts_ntz=datetime(2023, 1, 1, 0, 0, 0),
+            ),
+            array=[datetime(2022, 12, 31, 21, 0, 0)],
+            array_ntz=[datetime(2023, 1, 1, 0, 0, 0)],
+            map=dict(ts=datetime(2022, 12, 31, 21, 0, 0)),
+            map_ntz=dict(ts_ntz=datetime(2023, 1, 1, 0, 0, 0)),
+        )
+
     @classmethod
     def tearDownClass(cls):
         del os.environ["TZ"]
@@ -179,6 +220,27 @@ def create_pandas_data_frame(self):
         data_dict["4_float_t"] = np.float32(data_dict["4_float_t"])
         return pd.DataFrame(data=data_dict)
 
+    def create_arrow_table(self):
+        import pyarrow as pa
+
+        data_dict = {}
+        for j, name in enumerate(self.schema.names):
+            data_dict[name] = [self.data[i][j] for i in range(len(self.data))]
+        t = pa.Table.from_pydict(data_dict)
+        # convert these to Arrow types
+        new_schema = t.schema.set(
+            t.schema.get_field_index("2_int_t"), pa.field("2_int_t", pa.int32())
+        )
+        new_schema = new_schema.set(
+            new_schema.get_field_index("4_float_t"), pa.field("4_float_t", pa.float32())
+        )
+        new_schema = new_schema.set(
+            new_schema.get_field_index("6_decimal_t"),
+            pa.field("6_decimal_t", pa.decimal128(38, 18)),
+        )
+        t = t.cast(new_schema)
+        return t
+
     @property
     def create_np_arrs(self):
         import numpy as np
@@ -286,6 +348,17 @@ def check_create_data_frame_to_pandas_timestamp_ntz(self, arrow_enabled):
                 pdf = df.toPandas()
             assert_frame_equal(origin, pdf)
 
+    def test_create_data_frame_to_arrow_timestamp_ntz(self):
+        with self.sql_conf({"spark.sql.session.timeZone": "America/Los_Angeles"}):
+            origin = pa.table({"a": [datetime.datetime(2012, 2, 2, 2, 2, 2)]})
+            df = self.spark.createDataFrame(
+                origin, schema=StructType([StructField("a", TimestampNTZType(), True)])
+            )
+            df.selectExpr("assert_true('2012-02-02 02:02:02' == CAST(a AS STRING))").collect()
+
+            t = df.toArrow()
+            self.assertTrue(origin.equals(t))
+
     def test_create_data_frame_to_pandas_day_time_internal(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -303,6 +376,16 @@ def check_create_data_frame_to_pandas_day_time_internal(self, arrow_enabled):
             pdf = df.toPandas()
         assert_frame_equal(origin, pdf)
 
+    def test_create_data_frame_to_arrow_day_time_internal(self):
+        origin = pa.table({"a": [datetime.timedelta(microseconds=123)]})
+        df = self.spark.createDataFrame(origin)
+        df.select(
+            assert_true(lit("INTERVAL '0 00:00:00.000123' DAY TO SECOND") == df.a.cast("string"))
+        ).collect()
+
+        t = df.toArrow()
+        self.assertTrue(origin.equals(t))
+
     def test_toPandas_respect_session_timezone(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -333,12 +416,52 @@ def check_toPandas_respect_session_timezone(self, arrow_enabled):
                     )
             assert_frame_equal(pdf_ny, pdf_la_corrected)
 
+    def test_toArrow_keep_utc_timezone(self):
+        df = self.spark.createDataFrame(self.data, schema=self.schema)
+
+        timezone = "America/Los_Angeles"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
+            t_la = df.toArrow()
+
+        timezone = "America/New_York"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
+            t_ny = df.toArrow()
+
+            self.assertTrue(t_ny.equals(t_la))
+            self.assertEqual(t_la["8_timestamp_t"].type.tz, "UTC")
+            self.assertEqual(t_ny["8_timestamp_t"].type.tz, "UTC")
+
     def test_pandas_round_trip(self):
         pdf = self.create_pandas_data_frame()
         df = self.spark.createDataFrame(self.data, schema=self.schema)
         pdf_arrow = df.toPandas()
         assert_frame_equal(pdf_arrow, pdf)
 
+    def test_arrow_round_trip(self):
+        import pyarrow.compute as pc
+
+        t_in = self.create_arrow_table()
+
+        # Convert timezone-naive local timestamp column in input table to UTC
+        # to enable comparison to UTC timestamp column in output table
+        timezone = self.spark.conf.get("spark.sql.session.timeZone")
+        t_in = t_in.set_column(
+            t_in.schema.get_field_index("8_timestamp_t"),
+            "8_timestamp_t",
+            pc.assume_timezone(t_in["8_timestamp_t"], timezone),
+        )
+        t_in = t_in.cast(
+            t_in.schema.set(
+                t_in.schema.get_field_index("8_timestamp_t"),
+                pa.field("8_timestamp_t", pa.timestamp("us", tz="UTC")),
+            )
+        )
+
+        df = self.spark.createDataFrame(self.data, schema=self.schema)
+        t_out = df.toArrow()
+
+        self.assertTrue(t_out.equals(t_in))
+
     def test_pandas_self_destruct(self):
         import pyarrow as pa
 
@@ -402,6 +525,13 @@ def raise_exception():
         with self.assertRaisesRegex(Exception, "My error"):
             df.toPandas()
 
+    def test_createDataFrame_arrow_pandas(self):
+        table = self.create_arrow_table()
+        pdf = self.create_pandas_data_frame()
+        df_arrow = self.spark.createDataFrame(table)
+        df_pandas = self.spark.createDataFrame(pdf)
+        self.assertEqual(df_arrow.collect(), df_pandas.collect())
+
     def _createDataFrame_toggle(self, data, schema=None):
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
             df_no_arrow = self.spark.createDataFrame(data, schema=schema)
@@ -415,12 +545,12 @@ def test_createDataFrame_toggle(self):
         df_no_arrow, df_arrow = self._createDataFrame_toggle(pdf, schema=self.schema)
         self.assertEqual(df_no_arrow.collect(), df_arrow.collect())
 
-    def test_createDataFrame_respect_session_timezone(self):
+    def test_createDataFrame_pandas_respect_session_timezone(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
-                self.check_createDataFrame_respect_session_timezone(arrow_enabled)
+                self.check_createDataFrame_pandas_respect_session_timezone(arrow_enabled)
 
-    def check_createDataFrame_respect_session_timezone(self, arrow_enabled):
+    def check_createDataFrame_pandas_respect_session_timezone(self, arrow_enabled):
         from datetime import timedelta
 
         pdf = self.create_pandas_data_frame()
@@ -450,18 +580,46 @@ def check_createDataFrame_respect_session_timezone(self, arrow_enabled):
             ]
             self.assertEqual(result_ny, result_la_corrected)
 
-    def test_createDataFrame_with_schema(self):
+    def test_createDataFrame_arrow_respect_session_timezone(self):
+        from datetime import timedelta
+
+        t = self.create_arrow_table()
+        timezone = "America/Los_Angeles"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
+            df_la = self.spark.createDataFrame(t, schema=self.schema)
+            result_la = df_la.collect()
+
+        timezone = "America/New_York"
+        with self.sql_conf({"spark.sql.session.timeZone": timezone}):
+            df_ny = self.spark.createDataFrame(t, schema=self.schema)
+            result_ny = df_ny.collect()
+
+            self.assertNotEqual(result_ny, result_la)
+
+            # Correct result_la by adjusting 3 hours difference between Los Angeles and New York
+            result_la_corrected = [
+                Row(
+                    **{
+                        k: v - timedelta(hours=3) if k == "8_timestamp_t" else v
+                        for k, v in row.asDict().items()
+                    }
+                )
+                for row in result_la
+            ]
+            self.assertEqual(result_ny, result_la_corrected)
+
+    def test_createDataFrame_pandas_with_schema(self):
         pdf = self.create_pandas_data_frame()
         df = self.spark.createDataFrame(pdf, schema=self.schema)
         self.assertEqual(self.schema, df.schema)
         pdf_arrow = df.toPandas()
         assert_frame_equal(pdf_arrow, pdf)
 
-    def test_createDataFrame_with_incorrect_schema(self):
+    def test_createDataFrame_pandas_with_incorrect_schema(self):
         with self.quiet():
-            self.check_createDataFrame_with_incorrect_schema()
+            self.check_createDataFrame_pandas_with_incorrect_schema()
 
-    def check_createDataFrame_with_incorrect_schema(self):
+    def check_createDataFrame_pandas_with_incorrect_schema(self):
         pdf = self.create_pandas_data_frame()
         fields = list(self.schema)
         fields[5], fields[6] = fields[6], fields[5]  # swap decimal with date
@@ -485,7 +643,15 @@ def check_createDataFrame_with_incorrect_schema(self):
             self.assertEqual(len(exception.args), 1)
             self.assertRegex(exception.args[0], "[D|d]ecimal.*got.*date")
 
-    def test_createDataFrame_with_names(self):
+    def test_createDataFrame_arrow_with_incorrect_schema(self):
+        t = self.create_arrow_table()
+        fields = list(self.schema)
+        fields[5], fields[6] = fields[6], fields[5]  # swap decimal with date
+        wrong_schema = StructType(fields)
+        with self.assertRaises(Exception):
+            self.spark.createDataFrame(t, schema=wrong_schema)
+
+    def test_createDataFrame_pandas_with_names(self):
         pdf = self.create_pandas_data_frame()
         new_names = list(map(str, range(len(self.schema.fieldNames()))))
         # Test that schema as a list of column names gets applied
@@ -495,7 +661,17 @@ def test_createDataFrame_with_names(self):
         df = self.spark.createDataFrame(pdf, schema=tuple(new_names))
         self.assertEqual(df.schema.fieldNames(), new_names)
 
-    def test_createDataFrame_column_name_encoding(self):
+    def test_createDataFrame_arrow_with_names(self):
+        t = self.create_arrow_table()
+        new_names = list(map(str, range(len(self.schema.fieldNames()))))
+        # Test that schema as a list of column names gets applied
+        df = self.spark.createDataFrame(t, schema=list(new_names))
+        self.assertEqual(df.schema.fieldNames(), new_names)
+        # Test that schema as tuple of column names gets applied
+        df = self.spark.createDataFrame(t, schema=tuple(new_names))
+        self.assertEqual(df.schema.fieldNames(), new_names)
+
+    def test_createDataFrame_pandas_column_name_encoding(self):
         pdf = pd.DataFrame({"a": [1]})
         columns = self.spark.createDataFrame(pdf).columns
         self.assertTrue(isinstance(columns[0], str))
@@ -504,6 +680,15 @@ def test_createDataFrame_column_name_encoding(self):
         self.assertTrue(isinstance(columns[0], str))
         self.assertEqual(columns[0], "b")
 
+    def test_createDataFrame_arrow_column_name_encoding(self):
+        t = pa.table({"a": [1]})
+        columns = self.spark.createDataFrame(t).columns
+        self.assertTrue(isinstance(columns[0], str))
+        self.assertEqual(columns[0], "a")
+        columns = self.spark.createDataFrame(t, ["b"]).columns
+        self.assertTrue(isinstance(columns[0], str))
+        self.assertEqual(columns[0], "b")
+
     def test_createDataFrame_with_single_data_type(self):
         with self.quiet():
             self.check_createDataFrame_with_single_data_type()
@@ -531,6 +716,17 @@ def test_createDataFrame_does_not_modify_input(self):
         self.spark.createDataFrame(pdf, schema=self.schema)
         self.assertTrue(pdf.equals(pdf_copy))
 
+    def test_createDataFrame_arrow_truncate_timestamp(self):
+        t_in = pa.Table.from_arrays(
+            [pa.array([1234567890123456789], type=pa.timestamp("ns", tz="UTC"))], names=["ts"]
+        )
+        df = self.spark.createDataFrame(t_in)
+        t_out = df.toArrow()
+        expected = pa.Table.from_arrays(
+            [pa.array([1234567890123456], type=pa.timestamp("us", tz="UTC"))], names=["ts"]
+        )
+        self.assertTrue(t_out.equals(expected))
+
     def test_schema_conversion_roundtrip(self):
         from pyspark.sql.pandas.types import from_arrow_schema, to_arrow_schema
 
@@ -565,12 +761,12 @@ def check_createDataFrame_with_ndarray(self, arrow_enabled):
             ):
                 self.spark.createDataFrame(np.array(0))
 
-    def test_createDataFrame_with_array_type(self):
+    def test_createDataFrame_pandas_with_array_type(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
-                self.check_createDataFrame_with_array_type(arrow_enabled)
+                self.check_createDataFrame_pandas_with_array_type(arrow_enabled)
 
-    def check_createDataFrame_with_array_type(self, arrow_enabled):
+    def check_createDataFrame_pandas_with_array_type(self, arrow_enabled):
         pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [["x", "y"], ["y", "z"]]})
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
             df = self.spark.createDataFrame(pdf)
@@ -580,6 +776,18 @@ def check_createDataFrame_with_array_type(self, arrow_enabled):
             for e in range(len(expected[r])):
                 self.assertTrue(expected[r][e] == result[r][e])
 
+    def test_createDataFrame_arrow_with_array_type_nulls(self):
+        t = pa.table({"a": [[1, 2], None, [3, 4]], "b": [["x", "y"], ["y", "z"], None]})
+        df = self.spark.createDataFrame(t)
+        result = df.collect()
+        expected = [
+            tuple(list(e) if e is not None else None for e in rec)
+            for rec in t.to_pandas().to_records(index=False)
+        ]
+        for r in range(len(expected)):
+            for e in range(len(expected[r])):
+                self.assertTrue(expected[r][e] == result[r][e])
+
     def test_toPandas_with_array_type(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -597,13 +805,28 @@ def check_toPandas_with_array_type(self, arrow_enabled):
             for e in range(len(expected[r])):
                 self.assertTrue(expected[r][e] == result[r][e])
 
-    def test_createDataFrame_with_map_type(self):
+    def test_toArrow_with_array_type_nulls(self):
+        expected = [([1, 2], ["x", "y"]), (None, ["y", "z"]), ([3, 4], None)]
+        array_schema = StructType(
+            [StructField("a", ArrayType(IntegerType())), StructField("b", ArrayType(StringType()))]
+        )
+        df = self.spark.createDataFrame(expected, schema=array_schema)
+        t = df.toArrow()
+        result = [
+            tuple(None if e is None else list(e) for e in rec)
+            for rec in t.to_pandas().to_records(index=False)
+        ]
+        for r in range(len(expected)):
+            for e in range(len(expected[r])):
+                self.assertTrue(expected[r][e] == result[r][e])
+
+    def test_createDataFrame_pandas_with_map_type(self):
         with self.quiet():
             for arrow_enabled in [True, False]:
                 with self.subTest(arrow_enabled=arrow_enabled):
-                    self.check_createDataFrame_with_map_type(arrow_enabled)
+                    self.check_createDataFrame_pandas_with_map_type(arrow_enabled)
 
-    def check_createDataFrame_with_map_type(self, arrow_enabled):
+    def check_createDataFrame_pandas_with_map_type(self, arrow_enabled):
         map_data = [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]
 
         pdf = pd.DataFrame({"id": [0, 1, 2, 3, 4], "m": map_data})
@@ -621,12 +844,52 @@ def check_createDataFrame_with_map_type(self, arrow_enabled):
                         i, m = row
                         self.assertEqual(m, map_data[i])
 
-    def test_createDataFrame_with_struct_type(self):
+    def test_createDataFrame_arrow_with_map_type(self):
+        map_data = [{"a": 1}, {"b": 2, "c": 3}, {}, {}, {"d": None}]
+
+        t = pa.table(
+            {"id": [0, 1, 2, 3, 4], "m": map_data},
+            schema=pa.schema([("id", pa.int64()), ("m", pa.map_(pa.string(), pa.int64()))]),
+        )
+        for schema in (
+            "id long, m map<string, long>",
+            StructType().add("id", LongType()).add("m", MapType(StringType(), LongType())),
+        ):
+            with self.subTest(schema=schema):
+                df = self.spark.createDataFrame(t, schema=schema)
+
+                result = df.collect()
+
+                for row in result:
+                    i, m = row
+                    self.assertEqual(m, map_data[i])
+
+    def test_createDataFrame_arrow_with_map_type_nulls(self):
+        map_data = [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]
+
+        t = pa.table(
+            {"id": [0, 1, 2, 3, 4], "m": map_data},
+            schema=pa.schema([("id", pa.int64()), ("m", pa.map_(pa.string(), pa.int64()))]),
+        )
+        for schema in (
+            "id long, m map<string, long>",
+            StructType().add("id", LongType()).add("m", MapType(StringType(), LongType())),
+        ):
+            with self.subTest(schema=schema):
+                df = self.spark.createDataFrame(t, schema=schema)
+
+                result = df.collect()
+
+                for row in result:
+                    i, m = row
+                    self.assertEqual(m, map_data[i])
+
+    def test_createDataFrame_pandas_with_struct_type(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
-                self.check_createDataFrame_with_struct_type(arrow_enabled)
+                self.check_createDataFrame_pandas_with_struct_type(arrow_enabled)
 
-    def check_createDataFrame_with_struct_type(self, arrow_enabled):
+    def check_createDataFrame_pandas_with_struct_type(self, arrow_enabled):
         pdf = pd.DataFrame(
             {"a": [Row(1, "a"), Row(2, "b")], "b": [{"s": 3, "t": "x"}, {"s": 4, "t": "y"}]}
         )
@@ -647,6 +910,42 @@ def check_createDataFrame_with_struct_type(self, arrow_enabled):
                             expected[r][e] == result[r][e], f"{expected[r][e]} == {result[r][e]}"
                         )
 
+    def test_createDataFrame_pandas_with_struct_type(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_createDataFrame_pandas_with_struct_type(arrow_enabled)
+
+    def test_createDataFrame_arrow_with_struct_type_nulls(self):
+        t = pa.table(
+            {
+                "a": [{"x": 1, "y": "a"}, None, {"x": None, "y": "b"}],
+                "b": [{"s": 3, "t": None}, {"s": 4, "t": "y"}, None],
+            },
+        )
+        for schema in (
+            "a struct<x int, y string>, b struct<s int, t string>",
+            StructType()
+            .add("a", StructType().add("x", LongType()).add("y", StringType()))
+            .add("b", StructType().add("s", LongType()).add("t", StringType())),
+        ):
+            with self.subTest(schema=schema):
+                df = self.spark.createDataFrame(t, schema)
+                result = df.collect()
+                expected = [
+                    (
+                        Row(
+                            a=None if rec[0] is None else (Row(**rec[0])),
+                            b=None if rec[1] is None else Row(**rec[1]),
+                        )
+                    )
+                    for rec in t.to_pandas().to_records(index=False)
+                ]
+                for r in range(len(expected)):
+                    for e in range(len(expected[r])):
+                        self.assertTrue(
+                            expected[r][e] == result[r][e], f"{expected[r][e]} == {result[r][e]}"
+                        )
+
     def test_createDataFrame_with_string_dtype(self):
         # SPARK-34521: spark.createDataFrame does not support Pandas StringDtype extension type
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": True}):
@@ -690,6 +989,22 @@ def check_toPandas_with_map_type(self, arrow_enabled):
                 pdf = df.toPandas()
                 assert_frame_equal(origin, pdf)
 
+    def test_toArrow_with_map_type(self):
+        origin = pa.table(
+            {"id": [0, 1, 2, 3], "m": [{}, {"a": 1}, {"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}]},
+            schema=pa.schema(
+                [pa.field("id", pa.int64()), pa.field("m", pa.map_(pa.string(), pa.int64()), True)]
+            ),
+        )
+        for schema in [
+            "id long, m map<string, long>",
+            StructType().add("id", LongType()).add("m", MapType(StringType(), LongType())),
+        ]:
+            df = self.spark.createDataFrame(origin, schema=schema)
+
+            t = df.toArrow()
+            self.assertTrue(origin.equals(t))
+
     def test_toPandas_with_map_type_nulls(self):
         with self.quiet():
             for arrow_enabled in [True, False]:
@@ -712,12 +1027,29 @@ def check_toPandas_with_map_type_nulls(self, arrow_enabled):
                 pdf = df.toPandas()
                 assert_frame_equal(origin, pdf)
 
-    def test_createDataFrame_with_int_col_names(self):
+    def test_toArrow_with_map_type_nulls(self):
+        map_data = [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]
+
+        origin = pa.table(
+            {"id": [0, 1, 2, 3, 4], "m": map_data},
+            schema=pa.schema(
+                [pa.field("id", pa.int64()), pa.field("m", pa.map_(pa.string(), pa.int64()), True)]
+            ),
+        )
+        for schema in [
+            "id long, m map<string, long>",
+            StructType().add("id", LongType()).add("m", MapType(StringType(), LongType())),
+        ]:
+            df = self.spark.createDataFrame(origin, schema=schema)
+            pdf = df.toArrow().to_pandas()
+            assert_frame_equal(origin.to_pandas(), pdf)
+
+    def test_createDataFrame_pandas_with_int_col_names(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
-                self.check_createDataFrame_with_int_col_names(arrow_enabled)
+                self.check_createDataFrame_pandas_with_int_col_names(arrow_enabled)
 
-    def check_createDataFrame_with_int_col_names(self, arrow_enabled):
+    def check_createDataFrame_pandas_with_int_col_names(self, arrow_enabled):
         import numpy as np
 
         pdf = pd.DataFrame(np.random.rand(4, 2))
@@ -726,6 +1058,13 @@ def check_createDataFrame_with_int_col_names(self, arrow_enabled):
         pdf_col_names = [str(c) for c in pdf.columns]
         self.assertEqual(pdf_col_names, df.columns)
 
+    def test_createDataFrame_arrow_with_int_col_names(self):
+        import numpy as np
+
+        t = pa.table(pd.DataFrame(np.random.rand(4, 2)))
+        df = self.spark.createDataFrame(t)
+        self.assertEqual(t.schema.names, df.columns)
+
     # Regression test for SPARK-23314
     def test_timestamp_dst(self):
         # Daylight saving time for Los Angeles for 2015 is Sun, Nov 1 at 2:00 am
@@ -810,6 +1149,23 @@ def test_createDataFrame_with_category_type(self):
         self.assertIsInstance(arrow_first_category_element, str)
         self.assertIsInstance(spark_first_category_element, str)
 
+    def test_createDataFrame_with_dictionary_type_nulls(self):
+        import pyarrow.compute as pc
+
+        t = pa.table({"A": ["a", "b", "c", None, "a"]})
+        t = t.add_column(1, "B", pc.dictionary_encode(t["A"]))
+        category_first_element = sorted(t["B"].combine_chunks().dictionary.to_pylist())[0]
+
+        df = self.spark.createDataFrame(t)
+        type = df.dtypes[1][1]
+        result = df.toArrow()
+        result_first_category_element = result["B"][0].as_py()
+
+        # ensure original category elements are string
+        self.assertIsInstance(category_first_element, str)
+        self.assertEqual(type, "string")
+        self.assertIsInstance(result_first_category_element, str)
+
     def test_createDataFrame_with_float_index(self):
         # SPARK-32098: float index should not produce duplicated or truncated Spark DataFrame
         self.assertEqual(
@@ -830,7 +1186,8 @@ def test_createDataFrame_empty_partition(self):
         pdf = pd.DataFrame({"c1": [1], "c2": ["string"]})
         df = self.spark.createDataFrame(pdf)
         self.assertEqual([Row(c1=1, c2="string")], df.collect())
-        self.assertGreater(self.spark.sparkContext.defaultParallelism, len(pdf))
+        if not is_remote_only():
+            self.assertGreater(self._legacy_sc.defaultParallelism, len(pdf))
 
     def test_toPandas_error(self):
         for arrow_enabled in [True, False]:
@@ -847,6 +1204,15 @@ def check_toPandas_error(self, arrow_enabled):
             with self.assertRaises(ArithmeticException):
                 self.spark.sql("select 1/0").toPandas()
 
+    def test_toArrow_error(self):
+        with self.sql_conf(
+            {
+                "spark.sql.ansi.enabled": True,
+            }
+        ):
+            with self.assertRaises(ArithmeticException):
+                self.spark.sql("select 1/0").toArrow()
+
     def test_toPandas_duplicate_field_names(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -901,12 +1267,45 @@ def check_toPandas_duplicate_field_names(self, arrow_enabled):
                             expected = pd.DataFrame.from_records(data, columns=schema.names)
                         assert_frame_equal(df.toPandas(), expected)
 
-    def test_createDataFrame_duplicate_field_names(self):
+    def test_toArrow_duplicate_field_names(self):
+        data = [[1, 1], [2, 2]]
+        names = ["a", "a"]
+        df = self.spark.createDataFrame(data, names)
+
+        expected = pa.table(
+            [[1, 2], [1, 2]],
+            schema=pa.schema([pa.field("a", pa.int64()), pa.field("a", pa.int64())]),
+        )
+
+        self.assertTrue(df.toArrow().equals(expected))
+
+        data = [Row(Row("a", 1), Row(2, 3, "b", 4, "c")), Row(Row("x", 6), Row(7, 8, "y", 9, "z"))]
+        schema = (
+            StructType()
+            .add("struct", StructType().add("x", StringType()).add("x", IntegerType()))
+            .add(
+                "struct",
+                StructType()
+                .add("a", IntegerType())
+                .add("x", IntegerType())
+                .add("x", StringType())
+                .add("y", IntegerType())
+                .add("y", StringType()),
+            )
+        )
+        df = self.spark.createDataFrame(data, schema=schema)
+
+        with self.assertRaisesRegex(
+            UnsupportedOperationException, "DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT"
+        ):
+            df.toArrow()
+
+    def test_createDataFrame_pandas_duplicate_field_names(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
-                self.check_createDataFrame_duplicate_field_names(arrow_enabled)
+                self.check_createDataFrame_pandas_duplicate_field_names(arrow_enabled)
 
-    def check_createDataFrame_duplicate_field_names(self, arrow_enabled):
+    def check_createDataFrame_pandas_duplicate_field_names(self, arrow_enabled):
         schema = (
             StructType()
             .add("struct", StructType().add("x", StringType()).add("x", IntegerType()))
@@ -929,6 +1328,66 @@ def check_createDataFrame_duplicate_field_names(self, arrow_enabled):
 
         self.assertEqual(df.collect(), data)
 
+    def test_createDataFrame_arrow_duplicate_field_names(self):
+        t = pa.table(
+            [[1, 2], [1, 2]],
+            schema=pa.schema([pa.field("a", pa.int64()), pa.field("a", pa.int64())]),
+        )
+        schema = StructType().add("a", LongType()).add("a", LongType())
+
+        df = self.spark.createDataFrame(t)
+
+        self.assertTrue(df.toArrow().equals(t))
+
+        df = self.spark.createDataFrame(t, schema=schema)
+
+        self.assertTrue(df.toArrow().equals(t))
+
+        t = pa.table(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(["a", "x"], type=pa.string()),
+                        pa.array([1, 6], type=pa.int32()),
+                    ],
+                    names=["x", "x"],
+                ),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array([2, 7], type=pa.int32()),
+                        pa.array([3, 8], type=pa.int32()),
+                        pa.array(["b", "y"], type=pa.string()),
+                        pa.array([4, 9], type=pa.int32()),
+                        pa.array(["c", "z"], type=pa.string()),
+                    ],
+                    names=["a", "x", "x", "y", "y"],
+                ),
+            ],
+            names=["struct", "struct"],
+        )
+        schema = (
+            StructType()
+            .add("struct", StructType().add("x", StringType()).add("x", IntegerType()))
+            .add(
+                "struct",
+                StructType()
+                .add("a", IntegerType())
+                .add("x", IntegerType())
+                .add("x", StringType())
+                .add("y", IntegerType())
+                .add("y", StringType()),
+            )
+        )
+        with self.assertRaisesRegex(
+            UnsupportedOperationException, "DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT"
+        ):
+            self.spark.createDataFrame(t)
+
+        with self.assertRaisesRegex(
+            UnsupportedOperationException, "DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT"
+        ):
+            self.spark.createDataFrame(t, schema)
+
     def test_toPandas_empty_columns(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -940,38 +1399,39 @@ def check_toPandas_empty_columns(self, arrow_enabled):
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
             assert_frame_equal(df.toPandas(), pd.DataFrame(columns=[], index=range(2)))
 
-    def test_createDataFrame_nested_timestamp(self):
+    def test_toArrow_empty_columns(self):
+        df = self.spark.range(2).select([])
+
+        self.assertTrue(df.toArrow().equals(pa.table([])))
+
+    def test_toPandas_empty_rows(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
-                self.check_createDataFrame_nested_timestamp(arrow_enabled)
+                self.check_toPandas_empty_rows(arrow_enabled)
 
-    def check_createDataFrame_nested_timestamp(self, arrow_enabled):
-        schema = (
-            StructType()
-            .add("ts", TimestampType())
-            .add("ts_ntz", TimestampNTZType())
-            .add(
-                "struct", StructType().add("ts", TimestampType()).add("ts_ntz", TimestampNTZType())
+    def check_toPandas_empty_rows(self, arrow_enabled):
+        df = self.spark.range(2).limit(0)
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            assert_frame_equal(df.toPandas(), pd.DataFrame({"id": pd.Series([], dtype="int64")}))
+
+    def test_toArrow_empty_rows(self):
+        df = self.spark.range(2).limit(0)
+
+        self.assertTrue(
+            df.toArrow().equals(
+                pa.Table.from_arrays([[]], schema=pa.schema([pa.field("id", pa.int64(), False)]))
             )
-            .add("array", ArrayType(TimestampType()))
-            .add("array_ntz", ArrayType(TimestampNTZType()))
-            .add("map", MapType(StringType(), TimestampType()))
-            .add("map_ntz", MapType(StringType(), TimestampNTZType()))
         )
-        data = [
-            Row(
-                datetime.datetime(2023, 1, 1, 0, 0, 0),
-                datetime.datetime(2023, 1, 1, 0, 0, 0),
-                Row(
-                    datetime.datetime(2023, 1, 1, 0, 0, 0),
-                    datetime.datetime(2023, 1, 1, 0, 0, 0),
-                ),
-                [datetime.datetime(2023, 1, 1, 0, 0, 0)],
-                [datetime.datetime(2023, 1, 1, 0, 0, 0)],
-                dict(ts=datetime.datetime(2023, 1, 1, 0, 0, 0)),
-                dict(ts_ntz=datetime.datetime(2023, 1, 1, 0, 0, 0)),
-            )
-        ]
+
+    def test_createDataFrame_pandas_nested_timestamp(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_createDataFrame_pandas_nested_timestamp(arrow_enabled)
+
+    def check_createDataFrame_pandas_nested_timestamp(self, arrow_enabled):
+        schema = self.schema_nested_timestamp
+        data = self.data_nested_timestamp
         pdf = pd.DataFrame.from_records(data, columns=schema.names)
 
         with self.sql_conf(
@@ -982,22 +1442,26 @@ def check_createDataFrame_nested_timestamp(self, arrow_enabled):
         ):
             df = self.spark.createDataFrame(pdf, schema)
 
-        expected = Row(
-            ts=datetime.datetime(2022, 12, 31, 21, 0, 0),
-            ts_ntz=datetime.datetime(2023, 1, 1, 0, 0, 0),
-            struct=Row(
-                ts=datetime.datetime(2022, 12, 31, 21, 0, 0),
-                ts_ntz=datetime.datetime(2023, 1, 1, 0, 0, 0),
-            ),
-            array=[datetime.datetime(2022, 12, 31, 21, 0, 0)],
-            array_ntz=[datetime.datetime(2023, 1, 1, 0, 0, 0)],
-            map=dict(ts=datetime.datetime(2022, 12, 31, 21, 0, 0)),
-            map_ntz=dict(ts_ntz=datetime.datetime(2023, 1, 1, 0, 0, 0)),
-        )
+        expected = self.data_nested_timestamp_expected_ny
+
+        self.assertEqual(df.first(), expected)
+
+    def test_createDataFrame_arrow_nested_timestamp(self):
+        from pyspark.sql.pandas.types import to_arrow_schema
+
+        schema = self.schema_nested_timestamp
+        data = self.data_nested_timestamp
+        pdf = pd.DataFrame.from_records(data, columns=schema.names)
+        arrow_schema = to_arrow_schema(schema, timestamp_utc=False)
+        t = pa.Table.from_pandas(pdf, arrow_schema)
+
+        with self.sql_conf({"spark.sql.session.timeZone": "America/New_York"}):
+            df = self.spark.createDataFrame(t, schema)
+
+        expected = self.data_nested_timestamp_expected_ny
 
         self.assertEqual(df.first(), expected)
 
-    @unittest.skipIf(sys.version_info < (3, 9), "zoneinfo is available from Python 3.9+")
     def test_toPandas_timestmap_tzinfo(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -1031,32 +1495,8 @@ def test_toPandas_nested_timestamp(self):
                 self.check_toPandas_nested_timestamp(arrow_enabled)
 
     def check_toPandas_nested_timestamp(self, arrow_enabled):
-        schema = (
-            StructType()
-            .add("ts", TimestampType())
-            .add("ts_ntz", TimestampNTZType())
-            .add(
-                "struct", StructType().add("ts", TimestampType()).add("ts_ntz", TimestampNTZType())
-            )
-            .add("array", ArrayType(TimestampType()))
-            .add("array_ntz", ArrayType(TimestampNTZType()))
-            .add("map", MapType(StringType(), TimestampType()))
-            .add("map_ntz", MapType(StringType(), TimestampNTZType()))
-        )
-        data = [
-            Row(
-                datetime.datetime(2023, 1, 1, 0, 0, 0),
-                datetime.datetime(2023, 1, 1, 0, 0, 0),
-                Row(
-                    datetime.datetime(2023, 1, 1, 0, 0, 0),
-                    datetime.datetime(2023, 1, 1, 0, 0, 0),
-                ),
-                [datetime.datetime(2023, 1, 1, 0, 0, 0)],
-                [datetime.datetime(2023, 1, 1, 0, 0, 0)],
-                dict(ts=datetime.datetime(2023, 1, 1, 0, 0, 0)),
-                dict(ts_ntz=datetime.datetime(2023, 1, 1, 0, 0, 0)),
-            )
-        ]
+        schema = self.schema_nested_timestamp
+        data = self.data_nested_timestamp
         df = self.spark.createDataFrame(data, schema)
 
         with self.sql_conf(
@@ -1087,6 +1527,57 @@ def check_toPandas_nested_timestamp(self, arrow_enabled):
 
         assert_frame_equal(pdf, expected)
 
+    def test_toArrow_nested_timestamp(self):
+        schema = self.schema_nested_timestamp
+        data = self.data_nested_timestamp
+        df = self.spark.createDataFrame(data, schema)
+
+        t = df.toArrow()
+
+        from pyspark.sql.pandas.types import to_arrow_schema
+
+        arrow_schema = to_arrow_schema(schema)
+        expected = pa.Table.from_pydict(
+            {
+                "ts": [datetime.datetime(2023, 1, 1, 8, 0, 0)],
+                "ts_ntz": [datetime.datetime(2023, 1, 1, 0, 0, 0)],
+                "struct": [
+                    Row(
+                        datetime.datetime(2023, 1, 1, 8, 0, 0),
+                        datetime.datetime(2023, 1, 1, 0, 0, 0),
+                    )
+                ],
+                "array": [[datetime.datetime(2023, 1, 1, 8, 0, 0)]],
+                "array_ntz": [[datetime.datetime(2023, 1, 1, 0, 0, 0)]],
+                "map": [dict(ts=datetime.datetime(2023, 1, 1, 8, 0, 0))],
+                "map_ntz": [dict(ts_ntz=datetime.datetime(2023, 1, 1, 0, 0, 0))],
+            },
+            schema=arrow_schema,
+        )
+
+        self.assertTrue(t.equals(expected))
+
+    def test_arrow_map_timestamp_nulls_round_trip(self):
+        origin_schema = pa.schema([("map", pa.map_(pa.string(), pa.timestamp("us", tz="UTC")))])
+        origin = pa.table(
+            [[dict(ts=datetime.datetime(2023, 1, 1, 8, 0, 0)), None]],
+            schema=origin_schema,
+        )
+        df = self.spark.createDataFrame(origin)
+        t = df.toArrow()
+
+        # SPARK-48302: PyArrow versions before 17.0.0 replaced nulls with empty lists when
+        # reconstructing MapArray columns to localize timestamps
+        if LooseVersion(pa.__version__) >= LooseVersion("17.0.0"):
+            expected = origin
+        else:
+            expected = pa.table(
+                [[dict(ts=datetime.datetime(2023, 1, 1, 8, 0, 0)), []]],
+                schema=origin_schema,
+            )
+
+        self.assertTrue(t.equals(expected))
+
     def test_createDataFrame_udt(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -1183,6 +1674,50 @@ def test_negative_and_zero_batch_size(self):
             pdf = pd.DataFrame({"a": [123]})
             assert_frame_equal(pdf, self.spark.createDataFrame(pdf).toPandas())
 
+    def test_createDataFrame_arrow_large_string(self):
+        a = pa.array(["a"] * 5, type=pa.large_string())
+        t = pa.table([a], ["ls"])
+        df = self.spark.createDataFrame(t)
+        self.assertIsInstance(df.schema["ls"].dataType, StringType)
+
+    def test_createDataFrame_arrow_large_binary(self):
+        a = pa.array(["a"] * 5, type=pa.large_binary())
+        t = pa.table([a], ["lb"])
+        df = self.spark.createDataFrame(t)
+        self.assertIsInstance(df.schema["lb"].dataType, BinaryType)
+
+    def test_createDataFrame_arrow_large_list(self):
+        a = pa.array([[-1, 3]] * 5, type=pa.large_list(pa.int32()))
+        t = pa.table([a], ["ll"])
+        df = self.spark.createDataFrame(t)
+        self.assertIsInstance(df.schema["ll"].dataType, ArrayType)
+
+    def test_createDataFrame_arrow_large_list_int64_offset(self):
+        # Check for expected failure if the large list contains an index >= 2^31
+        a = pa.LargeListArray.from_arrays(
+            [0, 2**31], pa.NullArray.from_buffers(pa.null(), 2**31, [None])
+        )
+        t = pa.table([a], ["ll"])
+        with self.assertRaises(Exception):
+            self.spark.createDataFrame(t)
+
+    def test_createDataFrame_arrow_fixed_size_binary(self):
+        a = pa.array(["a"] * 5, type=pa.binary(1))
+        t = pa.table([a], ["fsb"])
+        df = self.spark.createDataFrame(t)
+        self.assertIsInstance(df.schema["fsb"].dataType, BinaryType)
+
+    def test_createDataFrame_arrow_fixed_size_list(self):
+        a = pa.array([[-1, 3]] * 5, type=pa.list_(pa.int32(), 2))
+        t = pa.table([a], ["fsl"])
+        if LooseVersion(pa.__version__) < LooseVersion("14.0.0"):
+            # PyArrow versions before 14.0.0 do not support casting FixedSizeListArray to ListArray
+            with self.assertRaises(PySparkTypeError):
+                df = self.spark.createDataFrame(t)
+        else:
+            df = self.spark.createDataFrame(t)
+            self.assertIsInstance(df.schema["fsl"].dataType, ArrayType)
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
diff --git a/python/pyspark/sql/tests/test_arrow_map.py b/python/pyspark/sql/tests/test_arrow_map.py
index f5fc2ea29ebad..2e82869230db4 100644
--- a/python/pyspark/sql/tests/test_arrow_map.py
+++ b/python/pyspark/sql/tests/test_arrow_map.py
@@ -103,14 +103,14 @@ def bad_iter_elem(_):
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is int.",
+            "of pyarrow.RecordBatch, but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(not_iter, "a int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is iterator of int.",
+            "of pyarrow.RecordBatch, but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(bad_iter_elem, "a int").count())
 
diff --git a/python/pyspark/sql/tests/test_arrow_python_udf.py b/python/pyspark/sql/tests/test_arrow_python_udf.py
index 23f302ec3c8d3..5a66d61cb66a2 100644
--- a/python/pyspark/sql/tests/test_arrow_python_udf.py
+++ b/python/pyspark/sql/tests/test_arrow_python_udf.py
@@ -17,7 +17,7 @@
 
 import unittest
 
-from pyspark.errors import PythonException, PySparkNotImplementedError
+from pyspark.errors import AnalysisException, PythonException, PySparkNotImplementedError
 from pyspark.sql import Row
 from pyspark.sql.functions import udf
 from pyspark.sql.tests.test_udf import BaseUDFTestsMixin
@@ -197,6 +197,28 @@ def test_warn_no_args(self):
             " without arguments.",
         )
 
+    def test_named_arguments_negative(self):
+        @udf("int")
+        def test_udf(a, b):
+            return a + b
+
+        self.spark.udf.register("test_udf", test_udf)
+
+        with self.assertRaisesRegex(
+            AnalysisException,
+            "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE",
+        ):
+            self.spark.sql("SELECT test_udf(a => id, a => id * 10) FROM range(2)").show()
+
+        with self.assertRaisesRegex(AnalysisException, "UNEXPECTED_POSITIONAL_ARGUMENT"):
+            self.spark.sql("SELECT test_udf(a => id, id * 10) FROM range(2)").show()
+
+        with self.assertRaises(PythonException):
+            self.spark.sql("SELECT test_udf(c => 'x') FROM range(2)").show()
+
+        with self.assertRaises(PythonException):
+            self.spark.sql("SELECT test_udf(id, a => id * 10) FROM range(2)").show()
+
 
 class PythonUDFArrowTests(PythonUDFArrowTestsMixin, ReusedSQLTestCase):
     @classmethod
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index e51ae69814bdd..ea17febc00e38 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -19,7 +19,7 @@
 from itertools import chain
 from pyspark.sql import Column, Row
 from pyspark.sql import functions as sf
-from pyspark.sql.types import StructType, StructField, LongType
+from pyspark.sql.types import StructType, StructField, IntegerType, LongType
 from pyspark.errors import AnalysisException, PySparkTypeError, PySparkValueError
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
@@ -42,7 +42,7 @@ def test_and_in_expression(self):
 
     def test_validate_column_types(self):
         from pyspark.sql.functions import udf, to_json
-        from pyspark.sql.column import _to_java_column
+        from pyspark.sql.classic.column import _to_java_column
 
         self.assertTrue("Column" in _to_java_column("a").getClass().toString())
         self.assertTrue("Column" in _to_java_column("a").getClass().toString())
@@ -228,6 +228,17 @@ def test_alias_negative(self):
             message_parameters={"arg_name": "metadata"},
         )
 
+    def test_cast_str_representation(self):
+        self.assertEqual(str(sf.col("a").cast("int")), "Column<'CAST(a AS INT)'>")
+        self.assertEqual(str(sf.col("a").cast("INT")), "Column<'CAST(a AS INT)'>")
+        self.assertEqual(str(sf.col("a").cast(IntegerType())), "Column<'CAST(a AS INT)'>")
+        self.assertEqual(str(sf.col("a").cast(LongType())), "Column<'CAST(a AS BIGINT)'>")
+
+        self.assertEqual(str(sf.col("a").try_cast("int")), "Column<'TRY_CAST(a AS INT)'>")
+        self.assertEqual(str(sf.col("a").try_cast("INT")), "Column<'TRY_CAST(a AS INT)'>")
+        self.assertEqual(str(sf.col("a").try_cast(IntegerType())), "Column<'TRY_CAST(a AS INT)'>")
+        self.assertEqual(str(sf.col("a").try_cast(LongType())), "Column<'TRY_CAST(a AS BIGINT)'>")
+
     def test_cast_negative(self):
         with self.assertRaises(PySparkTypeError) as pe:
             self.spark.range(1).id.cast(123)
@@ -248,6 +259,18 @@ def test_over_negative(self):
             message_parameters={"arg_name": "window", "arg_type": "int"},
         )
 
+    def test_eqnullsafe_classmethod_usage(self):
+        df = self.spark.range(1)
+        self.assertEqual(df.select(Column.eqNullSafe(df.id, df.id)).first()[0], True)
+
+    def test_isinstance_dataframe(self):
+        self.assertIsInstance(self.spark.range(1).id, Column)
+
+    def test_expr_str_representation(self):
+        expression = sf.expr("foo")
+        when_cond = sf.when(expression, sf.lit(None))
+        self.assertEqual(str(when_cond), "Column<'CASE WHEN foo THEN NULL END'>")
+
 
 class ColumnTests(ColumnTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index b381833314861..f363b8748c0b9 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -26,13 +26,13 @@
 from pyspark import SparkContext, SQLContext
 from pyspark.sql import Row, SparkSession
 from pyspark.sql.types import StructType, StringType, StructField
-from pyspark.testing.utils import ReusedPySparkTestCase
+from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class HiveContextSQLTests(ReusedPySparkTestCase):
+class HiveContextSQLTests(ReusedSQLTestCase):
     @classmethod
     def setUpClass(cls):
-        ReusedPySparkTestCase.setUpClass()
+        ReusedSQLTestCase.setUpClass()
         cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
         cls.hive_available = True
         cls.spark = None
@@ -58,7 +58,7 @@ def setUp(self):
 
     @classmethod
     def tearDownClass(cls):
-        ReusedPySparkTestCase.tearDownClass()
+        ReusedSQLTestCase.tearDownClass()
         shutil.rmtree(cls.tempdir.name, ignore_errors=True)
         if cls.spark is not None:
             cls.spark.stop()
@@ -100,23 +100,20 @@ def test_save_and_load_table(self):
         self.spark.sql("DROP TABLE savedJsonTable")
         self.spark.sql("DROP TABLE externalJsonTable")
 
-        defaultDataSourceName = self.spark.conf.get(
-            "spark.sql.sources.default", "org.apache.spark.sql.parquet"
-        )
-        self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
-        df.write.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite")
-        actual = self.spark.catalog.createTable("externalJsonTable", path=tmpPath)
-        self.assertEqual(
-            sorted(df.collect()), sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect())
-        )
-        self.assertEqual(
-            sorted(df.collect()),
-            sorted(self.spark.sql("SELECT * FROM externalJsonTable").collect()),
-        )
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-        self.spark.sql("DROP TABLE savedJsonTable")
-        self.spark.sql("DROP TABLE externalJsonTable")
-        self.spark.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
+        with self.sql_conf({"spark.sql.sources.default": "org.apache.spark.sql.json"}):
+            df.write.saveAsTable("savedJsonTable", path=tmpPath, mode="overwrite")
+            actual = self.spark.catalog.createTable("externalJsonTable", path=tmpPath)
+            self.assertEqual(
+                sorted(df.collect()),
+                sorted(self.spark.sql("SELECT * FROM savedJsonTable").collect()),
+            )
+            self.assertEqual(
+                sorted(df.collect()),
+                sorted(self.spark.sql("SELECT * FROM externalJsonTable").collect()),
+            )
+            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            self.spark.sql("DROP TABLE savedJsonTable")
+            self.spark.sql("DROP TABLE externalJsonTable")
 
         shutil.rmtree(tmpPath)
 
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 3f6a8eece5b09..c7cf43a334541 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -23,7 +23,7 @@
 import io
 from contextlib import redirect_stdout
 
-from pyspark.sql import Row, functions
+from pyspark.sql import Row, functions, DataFrame
 from pyspark.sql.functions import col, lit, count, struct
 from pyspark.sql.types import (
     StringType,
@@ -37,9 +37,7 @@
     AnalysisException,
     IllegalArgumentException,
     PySparkTypeError,
-    ArithmeticException,
-    QueryContextType,
-    NumberFormatException,
+    PySparkValueError,
 )
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
@@ -255,24 +253,22 @@ def test_ordering_of_with_columns_renamed(self):
         self.assertEqual(df2.columns, ["a"])
 
     def test_drop_duplicates(self):
-        # SPARK-36034 test that drop duplicates throws a type error when in correct type provided
         df = self.spark.createDataFrame([("Alice", 50), ("Alice", 60)], ["name", "age"])
 
         # shouldn't drop a non-null row
         self.assertEqual(df.dropDuplicates().count(), 2)
 
         self.assertEqual(df.dropDuplicates(["name"]).count(), 1)
-
         self.assertEqual(df.dropDuplicates(["name", "age"]).count(), 2)
 
-        with self.assertRaises(PySparkTypeError) as pe:
-            df.dropDuplicates("name")
+        self.assertEqual(df.drop_duplicates(["name"]).count(), 1)
+        self.assertEqual(df.drop_duplicates(["name", "age"]).count(), 2)
 
-        self.check_error(
-            exception=pe.exception,
-            error_class="NOT_LIST_OR_TUPLE",
-            message_parameters={"arg_name": "subset", "arg_type": "str"},
-        )
+        # SPARK-48482 dropDuplicates should take varargs
+        self.assertEqual(df.dropDuplicates("name").count(), 1)
+        self.assertEqual(df.dropDuplicates("name", "age").count(), 2)
+        self.assertEqual(df.drop_duplicates("name").count(), 1)
+        self.assertEqual(df.drop_duplicates("name", "age").count(), 2)
 
     def test_drop_duplicates_with_ambiguous_reference(self):
         df1 = self.spark.createDataFrame([(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
@@ -433,6 +429,11 @@ def test_sample(self):
             IllegalArgumentException, lambda: self.spark.range(1).sample(-1.0).count()
         )
 
+    def test_sample_with_random_seed(self):
+        df = self.spark.range(10000).sample(0.1)
+        cnts = [df.count() for i in range(10)]
+        self.assertEqual(1, len(set(cnts)))
+
     def test_toDF_with_string(self):
         df = self.spark.createDataFrame([("John", 30), ("Alice", 25), ("Bob", 28)])
         data = [("John", 30), ("Alice", 25), ("Bob", 28)]
@@ -529,7 +530,7 @@ def test_join_without_on(self):
     def test_invalid_join_method(self):
         df1 = self.spark.createDataFrame([("Alice", 5), ("Bob", 8)], ["name", "age"])
         df2 = self.spark.createDataFrame([("Alice", 80), ("Bob", 90)], ["name", "height"])
-        self.assertRaises(IllegalArgumentException, lambda: df1.join(df2, how="invalid-join-type"))
+        self.assertRaises(AnalysisException, lambda: df1.join(df2, how="invalid-join-type"))
 
     # Cartesian products require cross join syntax
     def test_require_cross(self):
@@ -835,491 +836,29 @@ def test_duplicate_field_names(self):
         self.assertEqual(df.schema, schema)
         self.assertEqual(df.collect(), data)
 
-    def test_dataframe_error_context(self):
-        # SPARK-47274: Add more useful contexts for PySpark DataFrame API errors.
-        with self.sql_conf({"spark.sql.ansi.enabled": True}):
-            df = self.spark.range(10)
-
-            # DataFrameQueryContext with pysparkLoggingInfo - divide
-            with self.assertRaises(ArithmeticException) as pe:
-                df.withColumn("div_zero", df.id / 0).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="divide",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - plus
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("plus_invalid_type", df.id + "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="plus",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - minus
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("minus_invalid_type", df.id - "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="minus",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - multiply
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("multiply_invalid_type", df.id * "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="multiply",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - mod
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("mod_invalid_type", df.id % "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="mod",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - equalTo
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("equalTo_invalid_type", df.id == "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="equalTo",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - lt
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("lt_invalid_type", df.id < "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="lt",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - leq
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("leq_invalid_type", df.id <= "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="leq",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - geq
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("geq_invalid_type", df.id >= "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="geq",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - gt
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("gt_invalid_type", df.id > "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="gt",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - eqNullSafe
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("eqNullSafe_invalid_type", df.id.eqNullSafe("string")).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="eqNullSafe",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - and
-            with self.assertRaises(AnalysisException) as pe:
-                df.withColumn("and_invalid_type", df.id & "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
-                message_parameters={
-                    "inputType": '"BOOLEAN"',
-                    "actualDataType": '"BIGINT"',
-                    "sqlExpr": '"(id AND string)"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="and",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - or
-            with self.assertRaises(AnalysisException) as pe:
-                df.withColumn("or_invalid_type", df.id | "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
-                message_parameters={
-                    "inputType": '"BOOLEAN"',
-                    "actualDataType": '"BIGINT"',
-                    "sqlExpr": '"(id OR string)"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="or",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - bitwiseOR
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("bitwiseOR_invalid_type", df.id.bitwiseOR("string")).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="bitwiseOR",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - bitwiseAND
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("bitwiseAND_invalid_type", df.id.bitwiseAND("string")).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="bitwiseAND",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - bitwiseXOR
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("bitwiseXOR_invalid_type", df.id.bitwiseXOR("string")).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="bitwiseXOR",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - chained (`divide` is problematic)
-            with self.assertRaises(ArithmeticException) as pe:
-                df.withColumn("multiply_ten", df.id * 10).withColumn(
-                    "divide_zero", df.id / 0
-                ).withColumn("plus_ten", df.id + 10).withColumn("minus_ten", df.id - 10).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="divide",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - chained (`plus` is problematic)
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("multiply_ten", df.id * 10).withColumn(
-                    "divide_ten", df.id / 10
-                ).withColumn("plus_string", df.id + "string").withColumn(
-                    "minus_ten", df.id - 10
-                ).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="plus",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - chained (`minus` is problematic)
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("multiply_ten", df.id * 10).withColumn(
-                    "divide_ten", df.id / 10
-                ).withColumn("plus_ten", df.id + 10).withColumn(
-                    "minus_string", df.id - "string"
-                ).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="minus",
-            )
-
-            # DataFrameQueryContext with pysparkLoggingInfo - chained (`multiply` is problematic)
-            with self.assertRaises(NumberFormatException) as pe:
-                df.withColumn("multiply_string", df.id * "string").withColumn(
-                    "divide_ten", df.id / 10
-                ).withColumn("plus_ten", df.id + 10).withColumn("minus_ten", df.id - 10).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="multiply",
-            )
+    def test_union_classmethod_usage(self):
+        df = self.spark.range(1)
+        self.assertEqual(DataFrame.union(df, df).collect(), [Row(id=0), Row(id=0)])
 
-            # Multiple expressions in df.select (`divide` is problematic)
-            with self.assertRaises(ArithmeticException) as pe:
-                df.select(df.id - 10, df.id + 4, df.id / 0, df.id * 5).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="divide",
-            )
+    def test_isinstance_dataframe(self):
+        self.assertIsInstance(self.spark.range(1), DataFrame)
 
-            # Multiple expressions in df.select (`plus` is problematic)
-            with self.assertRaises(NumberFormatException) as pe:
-                df.select(df.id - 10, df.id + "string", df.id / 10, df.id * 5).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="plus",
-            )
-
-            # Multiple expressions in df.select (`minus` is problematic)
-            with self.assertRaises(NumberFormatException) as pe:
-                df.select(df.id - "string", df.id + 4, df.id / 10, df.id * 5).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="minus",
-            )
-
-            # Multiple expressions in df.select (`multiply` is problematic)
-            with self.assertRaises(NumberFormatException) as pe:
-                df.select(df.id - 10, df.id + 4, df.id / 10, df.id * "string").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="multiply",
-            )
-
-            # Multiple expressions with pre-declared expressions (`divide` is problematic)
-            a = df.id / 10
-            b = df.id / 0
-            with self.assertRaises(ArithmeticException) as pe:
-                df.select(a, df.id + 4, b, df.id * 5).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="divide",
-            )
-
-            # Multiple expressions with pre-declared expressions (`plus` is problematic)
-            a = df.id + "string"
-            b = df.id + 4
-            with self.assertRaises(NumberFormatException) as pe:
-                df.select(df.id / 10, a, b, df.id * 5).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="plus",
-            )
-
-            # Multiple expressions with pre-declared expressions (`minus` is problematic)
-            a = df.id - "string"
-            b = df.id - 5
-            with self.assertRaises(NumberFormatException) as pe:
-                df.select(a, df.id / 10, b, df.id * 5).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="minus",
-            )
-
-            # Multiple expressions with pre-declared expressions (`multiply` is problematic)
-            a = df.id * "string"
-            b = df.id * 10
-            with self.assertRaises(NumberFormatException) as pe:
-                df.select(a, df.id / 10, b, df.id + 5).collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
-                    "expression": "'string'",
-                    "sourceType": '"STRING"',
-                    "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
-                },
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="multiply",
-            )
-
-            # DataFrameQueryContext without pysparkLoggingInfo
-            with self.assertRaises(AnalysisException) as pe:
-                df.select("non-existing-column")
-            self.check_error(
-                exception=pe.exception,
-                error_class="UNRESOLVED_COLUMN.WITH_SUGGESTION",
-                message_parameters={"objectName": "`non-existing-column`", "proposal": "`id`"},
-                query_context_type=QueryContextType.DataFrame,
-                pyspark_fragment="",
-            )
-
-            # SQLQueryContext
-            with self.assertRaises(ArithmeticException) as pe:
-                self.spark.sql("select 10/0").collect()
-            self.check_error(
-                exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
-                query_context_type=QueryContextType.SQL,
-            )
-
-            # No QueryContext
-            with self.assertRaises(AnalysisException) as pe:
-                self.spark.sql("select * from non-existing-table")
-            self.check_error(
-                exception=pe.exception,
-                error_class="INVALID_IDENTIFIER",
-                message_parameters={"ident": "non-existing-table"},
-                query_context_type=None,
-            )
+    def test_checkpoint_dataframe(self):
+        with io.StringIO() as buf, redirect_stdout(buf):
+            self.spark.range(1).localCheckpoint().explain()
+            self.assertIn("ExistingRDD", buf.getvalue())
 
 
 class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase):
-    pass
+    def test_query_execution_unsupported_in_classic(self):
+        with self.assertRaises(PySparkValueError) as pe:
+            self.spark.range(1).executionInfo
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="CLASSIC_OPERATION_NOT_SUPPORTED_ON_DF",
+            message_parameters={"member": "queryExecution"},
+        )
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/test_dataframe_query_context.py b/python/pyspark/sql/tests/test_dataframe_query_context.py
new file mode 100644
index 0000000000000..e1a3e33df8593
--- /dev/null
+++ b/python/pyspark/sql/tests/test_dataframe_query_context.py
@@ -0,0 +1,488 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+from pyspark.errors import (
+    AnalysisException,
+    ArithmeticException,
+    QueryContextType,
+    NumberFormatException,
+)
+from pyspark.testing.sqlutils import (
+    ReusedSQLTestCase,
+)
+
+
+class DataFrameQueryContextTestsMixin:
+    def test_dataframe_query_context(self):
+        # SPARK-47274: Add more useful contexts for PySpark DataFrame API errors.
+        with self.sql_conf({"spark.sql.ansi.enabled": True}):
+            df = self.spark.range(10)
+
+            # DataFrameQueryContext with pysparkLoggingInfo - divide
+            with self.assertRaises(ArithmeticException) as pe:
+                df.withColumn("div_zero", df.id / 0).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="DIVIDE_BY_ZERO",
+                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__truediv__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - plus
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("plus_invalid_type", df.id + "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__add__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - minus
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("minus_invalid_type", df.id - "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__sub__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - multiply
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("multiply_invalid_type", df.id * "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__mul__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - mod
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("mod_invalid_type", df.id % "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__mod__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - equalTo
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("equalTo_invalid_type", df.id == "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__eq__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - lt
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("lt_invalid_type", df.id < "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__lt__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - leq
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("leq_invalid_type", df.id <= "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__le__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - geq
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("geq_invalid_type", df.id >= "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__ge__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - gt
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("gt_invalid_type", df.id > "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__gt__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - eqNullSafe
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("eqNullSafe_invalid_type", df.id.eqNullSafe("string")).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="eqNullSafe",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - bitwiseOR
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("bitwiseOR_invalid_type", df.id.bitwiseOR("string")).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="bitwiseOR",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - bitwiseAND
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("bitwiseAND_invalid_type", df.id.bitwiseAND("string")).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="bitwiseAND",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - bitwiseXOR
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("bitwiseXOR_invalid_type", df.id.bitwiseXOR("string")).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="bitwiseXOR",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - chained (`divide` is problematic)
+            with self.assertRaises(ArithmeticException) as pe:
+                df.withColumn("multiply_ten", df.id * 10).withColumn(
+                    "divide_zero", df.id / 0
+                ).withColumn("plus_ten", df.id + 10).withColumn("minus_ten", df.id - 10).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="DIVIDE_BY_ZERO",
+                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__truediv__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - chained (`plus` is problematic)
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("multiply_ten", df.id * 10).withColumn(
+                    "divide_ten", df.id / 10
+                ).withColumn("plus_string", df.id + "string").withColumn(
+                    "minus_ten", df.id - 10
+                ).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__add__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - chained (`minus` is problematic)
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("multiply_ten", df.id * 10).withColumn(
+                    "divide_ten", df.id / 10
+                ).withColumn("plus_ten", df.id + 10).withColumn(
+                    "minus_string", df.id - "string"
+                ).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__sub__",
+            )
+
+            # DataFrameQueryContext with pysparkLoggingInfo - chained (`multiply` is problematic)
+            with self.assertRaises(NumberFormatException) as pe:
+                df.withColumn("multiply_string", df.id * "string").withColumn(
+                    "divide_ten", df.id / 10
+                ).withColumn("plus_ten", df.id + 10).withColumn("minus_ten", df.id - 10).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__mul__",
+            )
+
+            # Multiple expressions in df.select (`divide` is problematic)
+            with self.assertRaises(ArithmeticException) as pe:
+                df.select(df.id - 10, df.id + 4, df.id / 0, df.id * 5).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="DIVIDE_BY_ZERO",
+                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__truediv__",
+            )
+
+            # Multiple expressions in df.select (`plus` is problematic)
+            with self.assertRaises(NumberFormatException) as pe:
+                df.select(df.id - 10, df.id + "string", df.id / 10, df.id * 5).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__add__",
+            )
+
+            # Multiple expressions in df.select (`minus` is problematic)
+            with self.assertRaises(NumberFormatException) as pe:
+                df.select(df.id - "string", df.id + 4, df.id / 10, df.id * 5).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__sub__",
+            )
+
+            # Multiple expressions in df.select (`multiply` is problematic)
+            with self.assertRaises(NumberFormatException) as pe:
+                df.select(df.id - 10, df.id + 4, df.id / 10, df.id * "string").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__mul__",
+            )
+
+            # Multiple expressions with pre-declared expressions (`divide` is problematic)
+            a = df.id / 10
+            b = df.id / 0
+            with self.assertRaises(ArithmeticException) as pe:
+                df.select(a, df.id + 4, b, df.id * 5).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="DIVIDE_BY_ZERO",
+                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__truediv__",
+            )
+
+            # Multiple expressions with pre-declared expressions (`plus` is problematic)
+            a = df.id + "string"
+            b = df.id + 4
+            with self.assertRaises(NumberFormatException) as pe:
+                df.select(df.id / 10, a, b, df.id * 5).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__add__",
+            )
+
+            # Multiple expressions with pre-declared expressions (`minus` is problematic)
+            a = df.id - "string"
+            b = df.id - 5
+            with self.assertRaises(NumberFormatException) as pe:
+                df.select(a, df.id / 10, b, df.id * 5).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__sub__",
+            )
+
+            # Multiple expressions with pre-declared expressions (`multiply` is problematic)
+            a = df.id * "string"
+            b = df.id * 10
+            with self.assertRaises(NumberFormatException) as pe:
+                df.select(a, df.id / 10, b, df.id + 5).collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="CAST_INVALID_INPUT",
+                message_parameters={
+                    "expression": "'string'",
+                    "sourceType": '"STRING"',
+                    "targetType": '"BIGINT"',
+                    "ansiConfig": '"spark.sql.ansi.enabled"',
+                },
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__mul__",
+            )
+
+    def test_sql_query_context(self):
+        with self.sql_conf({"spark.sql.ansi.enabled": True}):
+            # SQLQueryContext
+            with self.assertRaises(ArithmeticException) as pe:
+                self.spark.sql("select 10/0").collect()
+            self.check_error(
+                exception=pe.exception,
+                error_class="DIVIDE_BY_ZERO",
+                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                query_context_type=QueryContextType.SQL,
+            )
+
+            # No QueryContext
+            with self.assertRaises(AnalysisException) as pe:
+                self.spark.sql("select * from non-existing-table")
+            self.check_error(
+                exception=pe.exception,
+                error_class="INVALID_IDENTIFIER",
+                message_parameters={"ident": "non-existing-table"},
+                query_context_type=None,
+            )
+
+
+class DataFrameQueryContextTests(DataFrameQueryContextTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_dataframe_query_context import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 23f7f9e00216c..4e9b61f7d0d96 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -26,6 +26,7 @@
 
 from pyspark.errors import PySparkTypeError, PySparkValueError, SparkRuntimeException
 from pyspark.sql import Row, Window, functions as F, types
+from pyspark.sql.avro.functions import from_avro, to_avro
 from pyspark.sql.column import Column
 from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils
 from pyspark.testing.utils import have_numpy
@@ -1315,6 +1316,35 @@ def test_parse_json(self):
         self.assertEqual("""{"a":1}""", actual["var"])
         self.assertEqual("""{"b":[{"c":"str2"}]}""", actual["var_lit"])
 
+    def test_variant_expressions(self):
+        df = self.spark.createDataFrame([Row(json="""{ "a" : 1 }"""), Row(json="""{ "b" : 2 }""")])
+        v = F.parse_json(df.json)
+
+        def check(resultDf, expected):
+            self.assertEqual([r[0] for r in resultDf.collect()], expected)
+
+        check(df.select(F.is_variant_null(v)), [False, False])
+        check(df.select(F.schema_of_variant(v)), ["STRUCT<a: BIGINT>", "STRUCT<b: BIGINT>"])
+        check(df.select(F.schema_of_variant_agg(v)), ["STRUCT<a: BIGINT, b: BIGINT>"])
+
+        check(df.select(F.variant_get(v, "$.a", "int")), [1, None])
+        check(df.select(F.variant_get(v, "$.b", "int")), [None, 2])
+        check(df.select(F.variant_get(v, "$.a", "double")), [1.0, None])
+
+        with self.assertRaises(SparkRuntimeException) as ex:
+            df.select(F.variant_get(v, "$.a", "binary")).collect()
+
+        self.check_error(
+            exception=ex.exception,
+            error_class="INVALID_VARIANT_CAST",
+            message_parameters={"value": "1", "dataType": '"BINARY"'},
+        )
+
+        check(df.select(F.try_variant_get(v, "$.a", "int")), [1, None])
+        check(df.select(F.try_variant_get(v, "$.b", "int")), [None, 2])
+        check(df.select(F.try_variant_get(v, "$.a", "double")), [1.0, None])
+        check(df.select(F.try_variant_get(v, "$.a", "binary")), [None, None])
+
     def test_schema_of_json(self):
         with self.assertRaises(PySparkTypeError) as pe:
             F.schema_of_json(1)
@@ -1325,6 +1355,14 @@ def test_schema_of_json(self):
             message_parameters={"arg_name": "json", "arg_type": "int"},
         )
 
+    def test_try_parse_json(self):
+        df = self.spark.createDataFrame([{"json": """{ "a" : 1 }"""}, {"json": """{ a : 1 }"""}])
+        actual = df.select(
+            F.to_json(F.try_parse_json(df.json)).alias("var"),
+        ).collect()
+        self.assertEqual("""{"a":1}""", actual[0]["var"])
+        self.assertEqual(None, actual[1]["var"])
+
     def test_schema_of_csv(self):
         with self.assertRaises(PySparkTypeError) as pe:
             F.schema_of_csv(1)
@@ -1491,6 +1529,40 @@ def test_json_tuple_empty_fields(self):
             lambda: df.select(F.json_tuple(df.jstring)),
         )
 
+    def test_avro_type_check(self):
+        parameters = ["data", "jsonFormatSchema", "options"]
+        expected_type = ["pyspark.sql.Column or str", "str", "dict, optional"]
+        dummyDF = self.spark.createDataFrame([Row(a=i, b=i) for i in range(5)])
+
+        # test from_avro type checks for each parameter
+        wrong_type_value = 1
+        with self.assertRaises(PySparkTypeError) as pe1:
+            dummyDF.select(from_avro(wrong_type_value, "jsonSchema", None))
+        with self.assertRaises(PySparkTypeError) as pe2:
+            dummyDF.select(from_avro("value", wrong_type_value, None))
+        with self.assertRaises(PySparkTypeError) as pe3:
+            dummyDF.select(from_avro("value", "jsonSchema", wrong_type_value))
+        from_avro_pes = [pe1, pe2, pe3]
+        for i in range(3):
+            self.check_error(
+                exception=from_avro_pes[i].exception,
+                error_class="INVALID_TYPE",
+                message_parameters={"arg_name": parameters[i], "arg_type": expected_type[i]},
+            )
+
+        # test to_avro type checks for each parameter
+        with self.assertRaises(PySparkTypeError) as pe4:
+            dummyDF.select(to_avro(wrong_type_value, "jsonSchema"))
+        with self.assertRaises(PySparkTypeError) as pe5:
+            dummyDF.select(to_avro("value", wrong_type_value))
+        to_avro_pes = [pe4, pe5]
+        for i in range(2):
+            self.check_error(
+                exception=to_avro_pes[i].exception,
+                error_class="INVALID_TYPE",
+                message_parameters={"arg_name": parameters[i], "arg_type": expected_type[i]},
+            )
+
 
 class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
     pass
diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py
index 958fc4e65dac2..8e3d2d8d00033 100644
--- a/python/pyspark/sql/tests/test_group.py
+++ b/python/pyspark/sql/tests/test_group.py
@@ -26,7 +26,7 @@
     pandas_requirement_message,
     pyarrow_requirement_message,
 )
-from pyspark.testing import assertDataFrameEqual, assertSchemaEqual
+from pyspark.testing import assertDataFrameEqual
 
 
 class GroupTestsMixin:
@@ -92,30 +92,25 @@ def test_group_by_ordinal(self):
             # basic case
             df1 = spark.sql("select a, sum(b) from v group by 1;")
             df2 = df.groupBy(1).agg(sf.sum("b"))
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             # constant case
             df1 = spark.sql("select 1, 2, sum(b) from v group by 1, 2;")
             df2 = df.select(sf.lit(1), sf.lit(2), "b").groupBy(1, 2).agg(sf.sum("b"))
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             # duplicate group by column
             df1 = spark.sql("select a, 1, sum(b) from v group by a, 1;")
             df2 = df.select("a", sf.lit(1), "b").groupBy("a", 2).agg(sf.sum("b"))
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             df1 = spark.sql("select a, 1, sum(b) from v group by 1, 2;")
             df2 = df.select("a", sf.lit(1), "b").groupBy(1, 2).agg(sf.sum("b"))
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             # group by a non-aggregate expression's ordinal
             df1 = spark.sql("select a, b + 2, count(2) from v group by a, 2;")
             df2 = df.select("a", df.b + 2).groupBy(1, 2).agg(sf.count(sf.lit(2)))
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             # negative cases: ordinal out of range
@@ -152,28 +147,23 @@ def test_order_by_ordinal(self):
 
             df1 = spark.sql("select * from v order by 1 desc;")
             df2 = df.orderBy(-1)
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             df1 = spark.sql("select * from v order by 1 desc, b desc;")
             df2 = df.orderBy(-1, df.b.desc())
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             df1 = spark.sql("select * from v order by 1 desc, 2 desc;")
             df2 = df.orderBy(-1, -2)
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             # groupby ordinal with orderby ordinal
             df1 = spark.sql("select a, 1, sum(b) from v group by 1, 2 order by 1;")
             df2 = df.select("a", sf.lit(1), "b").groupBy(1, 2).agg(sf.sum("b")).sort(1)
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             df1 = spark.sql("select a, 1, sum(b) from v group by 1, 2 order by 3, 1;")
             df2 = df.select("a", sf.lit(1), "b").groupBy(1, 2).agg(sf.sum("b")).sort(3, 1)
-            assertSchemaEqual(df1.schema, df2.schema)
             assertDataFrameEqual(df1, df2)
 
             # negative cases: ordinal out of range
diff --git a/python/pyspark/sql/tests/test_python_datasource.py b/python/pyspark/sql/tests/test_python_datasource.py
index d028a210b0071..8431e9b3e35d4 100644
--- a/python/pyspark/sql/tests/test_python_datasource.py
+++ b/python/pyspark/sql/tests/test_python_datasource.py
@@ -19,7 +19,7 @@
 import unittest
 from typing import Callable, Union
 
-from pyspark.errors import PythonException
+from pyspark.errors import PythonException, AnalysisException
 from pyspark.sql.datasource import (
     DataSource,
     DataSourceReader,
@@ -154,7 +154,8 @@ def test_data_source_read_output_named_row_with_wrong_schema(self):
             read_func=lambda schema, partition: iter([Row(i=1, j=2), Row(j=3, k=4)])
         )
         with self.assertRaisesRegex(
-            PythonException, "PYTHON_DATA_SOURCE_READ_RETURN_SCHEMA_MISMATCH"
+            PythonException,
+            r"\[DATA_SOURCE_RETURN_SCHEMA_MISMATCH\] Return schema mismatch in the result",
         ):
             self.spark.read.format("test").load().show()
 
@@ -373,6 +374,47 @@ def test_case_insensitive_dict(self):
         self.assertEqual(d2["BaR"], 3)
         self.assertEqual(d2["baz"], 3)
 
+    def test_data_source_type_mismatch(self):
+        class TestDataSource(DataSource):
+            @classmethod
+            def name(cls):
+                return "test"
+
+            def schema(self):
+                return "id int"
+
+            def reader(self, schema):
+                return TestReader()
+
+            def writer(self, schema, overwrite):
+                return TestWriter()
+
+        class TestReader:
+            def partitions(self):
+                return []
+
+            def read(self, partition):
+                yield (0,)
+
+        class TestWriter:
+            def write(self, iterator):
+                return WriterCommitMessage()
+
+        self.spark.dataSource.register(TestDataSource)
+
+        with self.assertRaisesRegex(
+            AnalysisException,
+            r"\[DATA_SOURCE_TYPE_MISMATCH\] Expected an instance of DataSourceReader",
+        ):
+            self.spark.read.format("test").load().show()
+
+        df = self.spark.range(10)
+        with self.assertRaisesRegex(
+            AnalysisException,
+            r"\[DATA_SOURCE_TYPE_MISMATCH\] Expected an instance of DataSourceWriter",
+        ):
+            df.write.format("test").mode("append").saveAsTable("test_table")
+
 
 class PythonDataSourceTests(BasePythonDataSourceTestsMixin, ReusedSQLTestCase):
     ...
diff --git a/python/pyspark/sql/tests/test_python_streaming_datasource.py b/python/pyspark/sql/tests/test_python_streaming_datasource.py
index f7247599be839..183b0ad80d9d4 100644
--- a/python/pyspark/sql/tests/test_python_streaming_datasource.py
+++ b/python/pyspark/sql/tests/test_python_streaming_datasource.py
@@ -24,8 +24,10 @@
     DataSourceStreamReader,
     InputPartition,
     DataSourceStreamWriter,
+    SimpleDataSourceStreamReader,
     WriterCommitMessage,
 )
+from pyspark.sql.streaming import StreamingQueryException
 from pyspark.sql.types import Row
 from pyspark.testing.sqlutils import (
     have_pyarrow,
@@ -147,53 +149,95 @@ def check_batch(df, batch_id):
         while len(q.recentProgress) < 10:
             time.sleep(0.2)
         q.stop()
-        q.awaitTermination
+        q.awaitTermination()
         self.assertIsNone(q.exception(), "No exception has to be propagated.")
 
-    def test_stream_writer(self):
-        input_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_input")
-        output_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_output")
-        checkpoint_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_checkpoint")
+    def test_simple_stream_reader(self):
+        class SimpleStreamReader(SimpleDataSourceStreamReader):
+            def initialOffset(self):
+                return {"offset": 0}
 
-        self.spark.range(0, 30).repartition(2).write.format("json").mode("append").save(
-            input_dir.name
-        )
-        self.spark.dataSource.register(self._get_test_data_source())
-        df = self.spark.readStream.schema("id int").json(input_dir.name)
-        q = (
-            df.writeStream.format("TestDataSource")
-            .option("checkpointLocation", checkpoint_dir.name)
-            .start(output_dir.name)
-        )
-        while not q.recentProgress:
-            time.sleep(0.2)
+            def read(self, start: dict):
+                start_idx = start["offset"]
+                it = iter([(i,) for i in range(start_idx, start_idx + 2)])
+                return (it, {"offset": start_idx + 2})
+
+            def commit(self, end):
+                pass
 
-        # Test stream writer write and commit.
-        # The first microbatch contain 30 rows and 2 partitions.
-        # Number of rows and partitions is writen by StreamWriter.commit().
-        assertDataFrameEqual(self.spark.read.json(output_dir.name), [Row(2, 30)])
+            def readBetweenOffsets(self, start: dict, end: dict):
+                start_idx = start["offset"]
+                end_idx = end["offset"]
+                return iter([(i,) for i in range(start_idx, end_idx)])
+
+        class SimpleDataSource(DataSource):
+            def schema(self):
+                return "id INT"
 
-        self.spark.range(50, 80).repartition(2).write.format("json").mode("append").save(
-            input_dir.name
-        )
+            def simpleStreamReader(self, schema):
+                return SimpleStreamReader()
 
-        # Test StreamWriter write and abort.
-        # When row id > 50, write tasks throw exception and fail.
-        # 1.txt is written by StreamWriter.abort() to record the failure.
-        while q.exception() is None:
+        self.spark.dataSource.register(SimpleDataSource)
+        df = self.spark.readStream.format("SimpleDataSource").load()
+
+        def check_batch(df, batch_id):
+            assertDataFrameEqual(df, [Row(batch_id * 2), Row(batch_id * 2 + 1)])
+
+        q = df.writeStream.foreachBatch(check_batch).start()
+        while len(q.recentProgress) < 10:
             time.sleep(0.2)
-        assertDataFrameEqual(
-            self.spark.read.text(os.path.join(output_dir.name, "1.txt")), [Row("failed in batch 1")]
-        )
-        q.awaitTermination
+        q.stop()
+        q.awaitTermination()
+        self.assertIsNone(q.exception(), "No exception has to be propagated.")
+
+    def test_stream_writer(self):
+        input_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_input")
+        output_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_output")
+        checkpoint_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_checkpoint")
 
-        input_dir.cleanup()
-        output_dir.cleanup()
-        checkpoint_dir.cleanup()
+        try:
+            self.spark.range(0, 30).repartition(2).write.format("json").mode("append").save(
+                input_dir.name
+            )
+            self.spark.dataSource.register(self._get_test_data_source())
+            df = self.spark.readStream.schema("id int").json(input_dir.name)
+            q = (
+                df.writeStream.format("TestDataSource")
+                .option("checkpointLocation", checkpoint_dir.name)
+                .start(output_dir.name)
+            )
+            while not q.recentProgress:
+                time.sleep(0.2)
+
+            # Test stream writer write and commit.
+            # The first microbatch contain 30 rows and 2 partitions.
+            # Number of rows and partitions is writen by StreamWriter.commit().
+            assertDataFrameEqual(self.spark.read.json(output_dir.name), [Row(2, 30)])
+
+            self.spark.range(50, 80).repartition(2).write.format("json").mode("append").save(
+                input_dir.name
+            )
+
+            # Test StreamWriter write and abort.
+            # When row id > 50, write tasks throw exception and fail.
+            # 1.txt is written by StreamWriter.abort() to record the failure.
+            while q.exception() is None:
+                time.sleep(0.2)
+            assertDataFrameEqual(
+                self.spark.read.text(os.path.join(output_dir.name, "1.txt")),
+                [Row("failed in batch 1")],
+            )
+            q.awaitTermination()
+        except StreamingQueryException as e:
+            self.assertIn("invalid value", str(e))
+        finally:
+            input_dir.cleanup()
+            output_dir.cleanup()
+            checkpoint_dir.cleanup()
 
 
 class PythonStreamingDataSourceTests(BasePythonStreamingDataSourceTestsMixin, ReusedSQLTestCase):
-    ...
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 5784d2c729739..8060a9ae8bc76 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -55,12 +55,9 @@ def test_save_and_load(self):
             )
             self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
 
-            try:
-                self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
+            with self.sql_conf({"spark.sql.sources.default": "org.apache.spark.sql.json"}):
                 actual = self.spark.read.load(path=tmpPath)
                 self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-            finally:
-                self.spark.sql("RESET spark.sql.sources.default")
 
             csvpath = os.path.join(tempfile.mkdtemp(), "data")
             df.write.option("quote", None).format("csv").save(csvpath)
@@ -94,12 +91,9 @@ def test_save_and_load_builder(self):
             )
             self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
 
-            try:
-                self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
+            with self.sql_conf({"spark.sql.sources.default": "org.apache.spark.sql.json"}):
                 actual = self.spark.read.load(path=tmpPath)
                 self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-            finally:
-                self.spark.sql("RESET spark.sql.sources.default")
         finally:
             shutil.rmtree(tmpPath)
 
@@ -247,10 +241,9 @@ def test_create(self):
 
     def test_create_without_provider(self):
         df = self.df
-        with self.assertRaisesRegex(
-            AnalysisException, "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT"
-        ):
+        with self.table("test_table"):
             df.writeTo("test_table").create()
+            self.assertEqual(100, self.spark.sql("select * from test_table").count())
 
     def test_table_overwrite(self):
         df = self.df
diff --git a/python/pyspark/sql/tests/test_resources.py b/python/pyspark/sql/tests/test_resources.py
index 9dfb14d9c37f7..4ce61e9f763d6 100644
--- a/python/pyspark/sql/tests/test_resources.py
+++ b/python/pyspark/sql/tests/test_resources.py
@@ -16,7 +16,7 @@
 #
 import unittest
 
-from pyspark import SparkContext, TaskContext
+from pyspark import TaskContext
 from pyspark.resource import TaskResourceRequests, ResourceProfileBuilder
 from pyspark.sql import SparkSession
 from pyspark.testing.sqlutils import (
@@ -41,7 +41,7 @@ def func(iterator):
                 yield batch
 
         df = self.spark.range(10)
-        df.mapInArrow(func, "id long").collect()
+        df.mapInArrow(func, "id long").show(n=10)
 
     def test_map_in_arrow_with_profile(self):
         def func(iterator):
@@ -54,7 +54,7 @@ def func(iterator):
 
         treqs = TaskResourceRequests().cpus(3)
         rp = ResourceProfileBuilder().require(treqs).build
-        df.mapInArrow(func, "id long", False, rp).collect()
+        df.mapInArrow(func, "id long", False, rp).show(n=10)
 
     def test_map_in_pandas_without_profile(self):
         def func(iterator):
@@ -64,7 +64,7 @@ def func(iterator):
                 yield batch
 
         df = self.spark.range(10)
-        df.mapInPandas(func, "id long").collect()
+        df.mapInPandas(func, "id long").show(n=10)
 
     def test_map_in_pandas_with_profile(self):
         def func(iterator):
@@ -77,12 +77,14 @@ def func(iterator):
 
         treqs = TaskResourceRequests().cpus(3)
         rp = ResourceProfileBuilder().require(treqs).build
-        df.mapInPandas(func, "id long", False, rp).collect()
+        df.mapInPandas(func, "id long", False, rp).show(n=10)
 
 
 class ResourceProfileTests(ResourceProfileTestsMixin, ReusedPySparkTestCase):
     @classmethod
     def setUpClass(cls):
+        from pyspark.core.context import SparkContext
+
         cls.sc = SparkContext("local-cluster[1, 4, 1024]", cls.__name__, conf=cls.conf())
         cls.spark = SparkSession(cls.sc)
 
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
index ef8bbd2c370f4..01cf3c51d7de0 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -95,6 +95,14 @@ def test_time_with_timezone(self):
         self.assertEqual(now, now1)
         self.assertEqual(now, utcnow1)
 
+    def test_ntz_from_internal(self):
+        for ts in [1, 22, 333, 44444444, 5555555555]:
+            t1 = datetime.datetime.utcfromtimestamp(ts // 1000000).replace(microsecond=ts % 1000000)
+            t2 = datetime.datetime.fromtimestamp(ts // 1000000, datetime.timezone.utc).replace(
+                microsecond=ts % 1000000, tzinfo=None
+            )
+            self.assertEqual(t1, t2)
+
     # regression test for SPARK-19561
     def test_datetime_at_epoch(self):
         epoch = datetime.datetime.fromtimestamp(0)
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index af13adbc21bb2..4810cf40e2315 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -32,6 +32,7 @@
     PySparkTypeError,
     PySparkValueError,
     PySparkRuntimeError,
+    PySparkNotImplementedError,
 )
 from pyspark.sql.types import (
     DataType,
@@ -41,6 +42,7 @@
     FloatType,
     DateType,
     TimestampType,
+    TimestampNTZType,
     DayTimeIntervalType,
     YearMonthIntervalType,
     CalendarIntervalType,
@@ -191,6 +193,7 @@ def __init__(self):
             Row(a=1),
             Row("a")(1),
             A(),
+            Row(b=Row(c=datetime.datetime(1970, 1, 1, 0, 0))),
         ]
 
         df = self.spark.createDataFrame([data])
@@ -213,6 +216,7 @@ def __init__(self):
             "struct<a:bigint>",
             "struct<a:bigint>",
             "struct<a:bigint>",
+            "struct<b:struct<c:timestamp>>",
         ]
         self.assertEqual(actual, expected)
 
@@ -235,14 +239,25 @@ def __init__(self):
             Row(a=1),
             Row(a=1),
             Row(a=1),
+            Row(b=Row(c=datetime.datetime(1970, 1, 1, 0, 0))),
         ]
         self.assertEqual(actual, expected)
 
         with self.sql_conf({"spark.sql.timestampType": "TIMESTAMP_NTZ"}):
             with self.sql_conf({"spark.sql.session.timeZone": "America/Sao_Paulo"}):
-                df = self.spark.createDataFrame([(datetime.datetime(1970, 1, 1, 0, 0),)])
+                data = [
+                    (
+                        datetime.datetime(1970, 1, 1, 0, 0),
+                        Row(a=Row(a=datetime.datetime(1970, 1, 1, 0, 0))),
+                    )
+                ]
+                df = self.spark.createDataFrame(data)
                 self.assertEqual(list(df.schema)[0].dataType.simpleString(), "timestamp_ntz")
                 self.assertEqual(df.first()[0], datetime.datetime(1970, 1, 1, 0, 0))
+                self.assertEqual(
+                    list(df.schema)[1].dataType.simpleString(), "struct<a:struct<a:timestamp_ntz>>"
+                )
+                self.assertEqual(df.first()[1], Row(a=Row(a=datetime.datetime(1970, 1, 1, 0, 0))))
 
             df = self.spark.createDataFrame(
                 [
@@ -366,7 +381,7 @@ def test_infer_array_merge_element_types_with_rdd(self):
         df = self.spark.createDataFrame(rdd)
         self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first())
 
-    def test_infer_array_element_type_empty(self):
+    def test_infer_array_element_type_empty_rdd(self):
         # SPARK-39168: Test inferring array element type from all rows
         ArrayRow = Row("f1")
 
@@ -379,6 +394,12 @@ def test_infer_array_element_type_empty(self):
         self.assertEqual(Row(f1=[None]), rows[1])
         self.assertEqual(Row(f1=[1]), rows[2])
 
+    def test_infer_array_element_type_empty(self):
+        # SPARK-39168: Test inferring array element type from all rows
+        ArrayRow = Row("f1")
+
+        data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])]
+
         df = self.spark.createDataFrame(data)
         rows = df.collect()
         self.assertEqual(Row(f1=[]), rows[0])
@@ -392,12 +413,6 @@ def test_infer_array_element_type_with_struct(self):
         with self.sql_conf({"spark.sql.pyspark.inferNestedDictAsStruct.enabled": True}):
             data = [NestedRow([{"payment": 200.5}, {"name": "A"}])]
 
-            nestedRdd = self.sc.parallelize(data)
-            df = self.spark.createDataFrame(nestedRdd)
-            self.assertEqual(
-                Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()
-            )
-
             df = self.spark.createDataFrame(data)
             self.assertEqual(
                 Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()
@@ -410,6 +425,57 @@ def test_infer_array_element_type_with_struct(self):
                 df = self.spark.createDataFrame(data)
                 self.assertEqual(Row(f1=[Row(payment=200.5), Row(payment=None)]), df.first())
 
+    def test_infer_map_merge_pair_types_with_rdd(self):
+        # SPARK-48247: Test inferring map pair type from all values in array
+        MapRow = Row("f1", "f2")
+
+        data = [MapRow({"a": 1, "b": None}, {"a": None, "b": 1})]
+
+        rdd = self.sc.parallelize(data)
+        df = self.spark.createDataFrame(rdd)
+        self.assertEqual(Row(f1={"a": 1, "b": None}, f2={"a": None, "b": 1}), df.first())
+
+    def test_infer_map_pair_type_empty_rdd(self):
+        # SPARK-48247: Test inferring map pair type from all rows
+        MapRow = Row("f1")
+
+        data = [MapRow({}), MapRow({"a": None}), MapRow({"a": 1})]
+
+        rdd = self.sc.parallelize(data)
+        df = self.spark.createDataFrame(rdd)
+        rows = df.collect()
+        self.assertEqual(Row(f1={}), rows[0])
+        self.assertEqual(Row(f1={"a": None}), rows[1])
+        self.assertEqual(Row(f1={"a": 1}), rows[2])
+
+    def test_infer_map_pair_type_empty(self):
+        # SPARK-48247: Test inferring map pair type from all rows
+        MapRow = Row("f1")
+
+        data = [MapRow({}), MapRow({"a": None}), MapRow({"a": 1})]
+
+        df = self.spark.createDataFrame(data)
+        rows = df.collect()
+        self.assertEqual(Row(f1={}), rows[0])
+        self.assertEqual(Row(f1={"a": None}), rows[1])
+        self.assertEqual(Row(f1={"a": 1}), rows[2])
+
+    def test_infer_map_pair_type_with_nested_maps(self):
+        # SPARK-48247: Test inferring nested map
+        NestedRow = Row("f1", "f2")
+
+        data = [
+            NestedRow({"payment": 200.5, "name": "A"}, {"outer": {"payment": 200.5, "name": "A"}})
+        ]
+        df = self.spark.createDataFrame(data)
+        self.assertEqual(
+            Row(
+                f1={"payment": "200.5", "name": "A"},
+                f2={"outer": {"payment": "200.5", "name": "A"}},
+            ),
+            df.first(),
+        )
+
     def test_create_dataframe_from_dict_respects_schema(self):
         df = self.spark.createDataFrame([{"a": 1}], ["b"])
         self.assertEqual(df.columns, ["b"])
@@ -426,14 +492,11 @@ class User:
         self.assertEqual(asdict(user), r.asDict())
 
     def test_negative_decimal(self):
-        try:
-            self.spark.sql("set spark.sql.legacy.allowNegativeScaleOfDecimal=true")
+        with self.sql_conf({"spark.sql.legacy.allowNegativeScaleOfDecimal": True}):
             df = self.spark.createDataFrame([(1,), (11,)], ["value"])
             ret = df.select(F.col("value").cast(DecimalType(1, -1))).collect()
             actual = list(map(lambda r: int(r.value), ret))
             self.assertEqual(actual, [0, 10])
-        finally:
-            self.spark.sql("set spark.sql.legacy.allowNegativeScaleOfDecimal=false")
 
     def test_create_dataframe_from_objects(self):
         data = [MyObject(1, "1"), MyObject(2, "2")]
@@ -549,6 +612,234 @@ def test_convert_list_to_str(self):
         self.assertEqual(df.count(), 1)
         self.assertEqual(df.head(), Row(name="[123]", income=120))
 
+    def test_schema_with_collations_json_ser_de(self):
+        from pyspark.sql.types import _parse_datatype_json_string
+
+        unicode_collation = "UNICODE"
+
+        simple_struct = StructType([StructField("c1", StringType(unicode_collation))])
+
+        nested_struct = StructType([StructField("nested", simple_struct)])
+
+        array_in_schema = StructType(
+            [StructField("array", ArrayType(StringType(unicode_collation)))]
+        )
+
+        map_in_schema = StructType(
+            [
+                StructField(
+                    "map", MapType(StringType(unicode_collation), StringType(unicode_collation))
+                )
+            ]
+        )
+
+        nested_map = StructType(
+            [
+                StructField(
+                    "nested",
+                    StructType(
+                        [
+                            StructField(
+                                "mapField",
+                                MapType(
+                                    StringType(unicode_collation), StringType(unicode_collation)
+                                ),
+                            )
+                        ]
+                    ),
+                )
+            ]
+        )
+
+        array_in_map = StructType(
+            [
+                StructField(
+                    "arrInMap",
+                    MapType(
+                        StringType(unicode_collation), ArrayType(StringType(unicode_collation))
+                    ),
+                )
+            ]
+        )
+
+        nested_array_in_map_value = StructType(
+            [
+                StructField(
+                    "nestedArrayInMap",
+                    ArrayType(
+                        MapType(
+                            StringType(unicode_collation),
+                            ArrayType(ArrayType(StringType(unicode_collation))),
+                        )
+                    ),
+                )
+            ]
+        )
+
+        schema_with_multiple_fields = StructType(
+            simple_struct.fields
+            + nested_struct.fields
+            + array_in_schema.fields
+            + map_in_schema.fields
+            + nested_map.fields
+            + array_in_map.fields
+            + nested_array_in_map_value.fields
+        )
+
+        schemas = [
+            simple_struct,
+            nested_struct,
+            array_in_schema,
+            map_in_schema,
+            nested_map,
+            nested_array_in_map_value,
+            array_in_map,
+            schema_with_multiple_fields,
+        ]
+
+        for schema in schemas:
+            scala_datatype = self.spark._jsparkSession.parseDataType(schema.json())
+            python_datatype = _parse_datatype_json_string(scala_datatype.json())
+            assert schema == python_datatype
+            assert schema == _parse_datatype_json_string(schema.json())
+
+    def test_schema_with_collations_on_non_string_types(self):
+        from pyspark.sql.types import _parse_datatype_json_string, _COLLATIONS_METADATA_KEY
+
+        collations_on_int_col_json = f"""
+        {{
+          "type": "struct",
+          "fields": [
+            {{
+              "name": "c1",
+              "type": "integer",
+              "nullable": true,
+              "metadata": {{
+                "{_COLLATIONS_METADATA_KEY}": {{
+                  "c1": "icu.UNICODE"
+                }}
+              }}
+            }}
+          ]
+        }}
+        """
+
+        collations_in_array_element_json = f"""
+        {{
+          "type": "struct",
+          "fields": [
+            {{
+              "name": "arrayField",
+              "type": {{
+                  "type": "array",
+                  "elementType": "integer",
+                  "containsNull": true
+              }},
+              "nullable": true,
+              "metadata": {{
+                "{_COLLATIONS_METADATA_KEY}": {{
+                  "arrayField.element": "icu.UNICODE"
+                }}
+              }}
+            }}
+          ]
+        }}
+        """
+
+        collations_on_array_json = f"""
+        {{
+          "type": "struct",
+          "fields": [
+            {{
+              "name": "arrayField",
+              "type": {{
+                  "type": "array",
+                  "elementType": "integer",
+                  "containsNull": true
+              }},
+              "nullable": true,
+              "metadata": {{
+                "{_COLLATIONS_METADATA_KEY}": {{
+                  "arrayField": "icu.UNICODE"
+                }}
+              }}
+            }}
+          ]
+        }}
+        """
+
+        collations_in_nested_map_json = f"""
+        {{
+          "type": "struct",
+          "fields": [
+            {{
+              "name": "nested",
+              "type": {{
+                "type": "struct",
+                "fields": [
+                  {{
+                    "name": "mapField",
+                    "type": {{
+                      "type": "map",
+                      "keyType": "string",
+                      "valueType": "integer",
+                      "valueContainsNull": true
+                    }},
+                    "nullable": true,
+                    "metadata": {{
+                      "{_COLLATIONS_METADATA_KEY}": {{
+                        "mapField.value": "icu.UNICODE"
+                      }}
+                    }}
+                  }}
+                ]
+              }},
+              "nullable": true,
+              "metadata": {{}}
+            }}
+          ]
+        }}
+        """
+
+        self.assertRaises(
+            PySparkTypeError, lambda: _parse_datatype_json_string(collations_on_int_col_json)
+        )
+
+        self.assertRaises(
+            PySparkTypeError, lambda: _parse_datatype_json_string(collations_in_array_element_json)
+        )
+
+        self.assertRaises(
+            PySparkTypeError, lambda: _parse_datatype_json_string(collations_on_array_json)
+        )
+
+        self.assertRaises(
+            PySparkTypeError, lambda: _parse_datatype_json_string(collations_in_nested_map_json)
+        )
+
+    def test_schema_with_bad_collations_provider(self):
+        from pyspark.sql.types import _parse_datatype_json_string, _COLLATIONS_METADATA_KEY
+
+        schema_json = f"""
+        {{
+          "type": "struct",
+          "fields": [
+            {{
+              "name": "c1",
+              "type": "string",
+              "nullable": "true",
+              "metadata": {{
+                "{_COLLATIONS_METADATA_KEY}": {{
+                  "c1": "badProvider.UNICODE"
+                }}
+              }}
+            }}
+          ]
+        }}
+        """
+
+        self.assertRaises(PySparkValueError, lambda: _parse_datatype_json_string(schema_json))
+
     def test_udt(self):
         from pyspark.sql.types import _parse_datatype_json_string, _infer_type, _make_type_verifier
 
@@ -856,35 +1147,48 @@ def test_struct_type(self):
         self.assertRaises(IndexError, lambda: struct1[9])
         self.assertRaises(TypeError, lambda: struct1[9.9])
 
+    def test_parse_datatype_json_string(self):
+        from pyspark.sql.types import _parse_datatype_json_string
+
+        for dataType in [
+            StringType(),
+            CharType(5),
+            VarcharType(10),
+            BinaryType(),
+            BooleanType(),
+            DecimalType(),
+            DecimalType(10, 2),
+            FloatType(),
+            DoubleType(),
+            ByteType(),
+            ShortType(),
+            IntegerType(),
+            LongType(),
+            DateType(),
+            TimestampType(),
+            TimestampNTZType(),
+            NullType(),
+            VariantType(),
+            YearMonthIntervalType(),
+            YearMonthIntervalType(YearMonthIntervalType.YEAR),
+            YearMonthIntervalType(YearMonthIntervalType.YEAR, YearMonthIntervalType.MONTH),
+            DayTimeIntervalType(),
+            DayTimeIntervalType(DayTimeIntervalType.DAY),
+            DayTimeIntervalType(DayTimeIntervalType.HOUR, DayTimeIntervalType.SECOND),
+            CalendarIntervalType(),
+        ]:
+            json_str = dataType.json()
+            parsed = _parse_datatype_json_string(json_str)
+            self.assertEqual(dataType, parsed)
+
     def test_parse_datatype_string(self):
-        from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
+        from pyspark.sql.types import _all_mappable_types, _parse_datatype_string
+
+        for k, t in _all_mappable_types.items():
+            self.assertEqual(t(), _parse_datatype_string(k))
 
-        for k, t in _all_atomic_types.items():
-            if k != "varchar" and k != "char":
-                self.assertEqual(t(), _parse_datatype_string(k))
         self.assertEqual(IntegerType(), _parse_datatype_string("int"))
         self.assertEqual(StringType(), _parse_datatype_string("string"))
-        self.assertEqual(StringType(), _parse_datatype_string("string collate UTF8_BINARY"))
-        self.assertEqual(StringType(), _parse_datatype_string("string COLLATE UTF8_BINARY"))
-        self.assertEqual(
-            StringType.fromCollationId(0), _parse_datatype_string("string COLLATE   UTF8_BINARY")
-        )
-        self.assertEqual(
-            StringType.fromCollationId(1),
-            _parse_datatype_string("string COLLATE UTF8_BINARY_LCASE"),
-        )
-        self.assertEqual(
-            StringType.fromCollationId(2), _parse_datatype_string("string COLLATE UNICODE")
-        )
-        self.assertEqual(
-            StringType.fromCollationId(2), _parse_datatype_string("string COLLATE `UNICODE`")
-        )
-        self.assertEqual(
-            StringType.fromCollationId(3), _parse_datatype_string("string COLLATE UNICODE_CI")
-        )
-        self.assertEqual(
-            StringType.fromCollationId(3), _parse_datatype_string("string COLLATE `UNICODE_CI`")
-        )
         self.assertEqual(CharType(1), _parse_datatype_string("char(1)"))
         self.assertEqual(CharType(10), _parse_datatype_string("char( 10   )"))
         self.assertEqual(CharType(11), _parse_datatype_string("char( 11)"))
@@ -912,6 +1216,313 @@ def test_parse_datatype_string(self):
         )
         self.assertEqual(VariantType(), _parse_datatype_string("variant"))
 
+    def test_tree_string(self):
+        schema1 = DataType.fromDDL("c1 INT, c2 STRUCT<c3: INT, c4: STRUCT<c5: INT, c6: INT>>")
+
+        self.assertEqual(
+            schema1.treeString().split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: struct (nullable = true)",
+                " |    |-- c3: integer (nullable = true)",
+                " |    |-- c4: struct (nullable = true)",
+                " |    |    |-- c5: integer (nullable = true)",
+                " |    |    |-- c6: integer (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema1.treeString(-1).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: struct (nullable = true)",
+                " |    |-- c3: integer (nullable = true)",
+                " |    |-- c4: struct (nullable = true)",
+                " |    |    |-- c5: integer (nullable = true)",
+                " |    |    |-- c6: integer (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema1.treeString(0).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: struct (nullable = true)",
+                " |    |-- c3: integer (nullable = true)",
+                " |    |-- c4: struct (nullable = true)",
+                " |    |    |-- c5: integer (nullable = true)",
+                " |    |    |-- c6: integer (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema1.treeString(1).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: struct (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema1.treeString(2).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: struct (nullable = true)",
+                " |    |-- c3: integer (nullable = true)",
+                " |    |-- c4: struct (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema1.treeString(3).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: struct (nullable = true)",
+                " |    |-- c3: integer (nullable = true)",
+                " |    |-- c4: struct (nullable = true)",
+                " |    |    |-- c5: integer (nullable = true)",
+                " |    |    |-- c6: integer (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema1.treeString(4).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: struct (nullable = true)",
+                " |    |-- c3: integer (nullable = true)",
+                " |    |-- c4: struct (nullable = true)",
+                " |    |    |-- c5: integer (nullable = true)",
+                " |    |    |-- c6: integer (nullable = true)",
+                "",
+            ],
+        )
+
+        schema2 = DataType.fromDDL(
+            "c1 INT, c2 ARRAY<STRUCT<c3: INT>>, c4 STRUCT<c5: INT, c6: ARRAY<ARRAY<INT>>>"
+        )
+        self.assertEqual(
+            schema2.treeString(0).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: array (nullable = true)",
+                " |    |-- element: struct (containsNull = true)",
+                " |    |    |-- c3: integer (nullable = true)",
+                " |-- c4: struct (nullable = true)",
+                " |    |-- c5: integer (nullable = true)",
+                " |    |-- c6: array (nullable = true)",
+                " |    |    |-- element: array (containsNull = true)",
+                " |    |    |    |-- element: integer (containsNull = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema2.treeString(1).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: array (nullable = true)",
+                " |-- c4: struct (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema2.treeString(2).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: array (nullable = true)",
+                " |    |-- element: struct (containsNull = true)",
+                " |-- c4: struct (nullable = true)",
+                " |    |-- c5: integer (nullable = true)",
+                " |    |-- c6: array (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema2.treeString(3).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: array (nullable = true)",
+                " |    |-- element: struct (containsNull = true)",
+                " |    |    |-- c3: integer (nullable = true)",
+                " |-- c4: struct (nullable = true)",
+                " |    |-- c5: integer (nullable = true)",
+                " |    |-- c6: array (nullable = true)",
+                " |    |    |-- element: array (containsNull = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema2.treeString(4).split("\n"),
+            [
+                "root",
+                " |-- c1: integer (nullable = true)",
+                " |-- c2: array (nullable = true)",
+                " |    |-- element: struct (containsNull = true)",
+                " |    |    |-- c3: integer (nullable = true)",
+                " |-- c4: struct (nullable = true)",
+                " |    |-- c5: integer (nullable = true)",
+                " |    |-- c6: array (nullable = true)",
+                " |    |    |-- element: array (containsNull = true)",
+                " |    |    |    |-- element: integer (containsNull = true)",
+                "",
+            ],
+        )
+
+        schema3 = DataType.fromDDL(
+            "c1 MAP<INT, STRUCT<c2: MAP<INT, INT>>>, c3 STRUCT<c4: MAP<INT, MAP<INT, INT>>>"
+        )
+        self.assertEqual(
+            schema3.treeString(0).split("\n"),
+            [
+                "root",
+                " |-- c1: map (nullable = true)",
+                " |    |-- key: integer",
+                " |    |-- value: struct (valueContainsNull = true)",
+                " |    |    |-- c2: map (nullable = true)",
+                " |    |    |    |-- key: integer",
+                " |    |    |    |-- value: integer (valueContainsNull = true)",
+                " |-- c3: struct (nullable = true)",
+                " |    |-- c4: map (nullable = true)",
+                " |    |    |-- key: integer",
+                " |    |    |-- value: map (valueContainsNull = true)",
+                " |    |    |    |-- key: integer",
+                " |    |    |    |-- value: integer (valueContainsNull = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema3.treeString(1).split("\n"),
+            [
+                "root",
+                " |-- c1: map (nullable = true)",
+                " |-- c3: struct (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema3.treeString(2).split("\n"),
+            [
+                "root",
+                " |-- c1: map (nullable = true)",
+                " |    |-- key: integer",
+                " |    |-- value: struct (valueContainsNull = true)",
+                " |-- c3: struct (nullable = true)",
+                " |    |-- c4: map (nullable = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema3.treeString(3).split("\n"),
+            [
+                "root",
+                " |-- c1: map (nullable = true)",
+                " |    |-- key: integer",
+                " |    |-- value: struct (valueContainsNull = true)",
+                " |    |    |-- c2: map (nullable = true)",
+                " |-- c3: struct (nullable = true)",
+                " |    |-- c4: map (nullable = true)",
+                " |    |    |-- key: integer",
+                " |    |    |-- value: map (valueContainsNull = true)",
+                "",
+            ],
+        )
+        self.assertEqual(
+            schema3.treeString(4).split("\n"),
+            [
+                "root",
+                " |-- c1: map (nullable = true)",
+                " |    |-- key: integer",
+                " |    |-- value: struct (valueContainsNull = true)",
+                " |    |    |-- c2: map (nullable = true)",
+                " |    |    |    |-- key: integer",
+                " |    |    |    |-- value: integer (valueContainsNull = true)",
+                " |-- c3: struct (nullable = true)",
+                " |    |-- c4: map (nullable = true)",
+                " |    |    |-- key: integer",
+                " |    |    |-- value: map (valueContainsNull = true)",
+                " |    |    |    |-- key: integer",
+                " |    |    |    |-- value: integer (valueContainsNull = true)",
+                "",
+            ],
+        )
+
+    def test_tree_string_for_builtin_types(self):
+        schema = (
+            StructType()
+            .add("n", NullType())
+            .add("str", StringType())
+            .add("c", CharType(10))
+            .add("v", VarcharType(10))
+            .add("bin", BinaryType())
+            .add("bool", BooleanType())
+            .add("date", DateType())
+            .add("ts", TimestampType())
+            .add("ts_ntz", TimestampNTZType())
+            .add("dec", DecimalType(10, 2))
+            .add("double", DoubleType())
+            .add("float", FloatType())
+            .add("long", LongType())
+            .add("int", IntegerType())
+            .add("short", ShortType())
+            .add("byte", ByteType())
+            .add("ym_interval_1", YearMonthIntervalType())
+            .add("ym_interval_2", YearMonthIntervalType(YearMonthIntervalType.YEAR))
+            .add(
+                "ym_interval_3",
+                YearMonthIntervalType(YearMonthIntervalType.YEAR, YearMonthIntervalType.MONTH),
+            )
+            .add("dt_interval_1", DayTimeIntervalType())
+            .add("dt_interval_2", DayTimeIntervalType(DayTimeIntervalType.DAY))
+            .add(
+                "dt_interval_3",
+                DayTimeIntervalType(DayTimeIntervalType.HOUR, DayTimeIntervalType.SECOND),
+            )
+            .add("cal_interval", CalendarIntervalType())
+            .add("var", VariantType())
+        )
+        self.assertEqual(
+            schema.treeString().split("\n"),
+            [
+                "root",
+                " |-- n: void (nullable = true)",
+                " |-- str: string (nullable = true)",
+                " |-- c: char(10) (nullable = true)",
+                " |-- v: varchar(10) (nullable = true)",
+                " |-- bin: binary (nullable = true)",
+                " |-- bool: boolean (nullable = true)",
+                " |-- date: date (nullable = true)",
+                " |-- ts: timestamp (nullable = true)",
+                " |-- ts_ntz: timestamp_ntz (nullable = true)",
+                " |-- dec: decimal(10,2) (nullable = true)",
+                " |-- double: double (nullable = true)",
+                " |-- float: float (nullable = true)",
+                " |-- long: long (nullable = true)",
+                " |-- int: integer (nullable = true)",
+                " |-- short: short (nullable = true)",
+                " |-- byte: byte (nullable = true)",
+                " |-- ym_interval_1: interval year to month (nullable = true)",
+                " |-- ym_interval_2: interval year (nullable = true)",
+                " |-- ym_interval_3: interval year to month (nullable = true)",
+                " |-- dt_interval_1: interval day to second (nullable = true)",
+                " |-- dt_interval_2: interval day (nullable = true)",
+                " |-- dt_interval_3: interval hour to second (nullable = true)",
+                " |-- cal_interval: interval (nullable = true)",
+                " |-- var: variant (nullable = true)",
+                "",
+            ],
+        )
+
     def test_metadata_null(self):
         schema = StructType(
             [
@@ -1230,7 +1841,7 @@ def test_repr(self):
             NullType(),
             StringType(),
             StringType("UTF8_BINARY"),
-            StringType("UTF8_BINARY_LCASE"),
+            StringType("UTF8_LCASE"),
             StringType("UNICODE"),
             StringType("UNICODE_CI"),
             CharType(10),
@@ -1427,8 +2038,10 @@ def test_variant_type(self):
             ("-int4", "-69633", -69633),
             ("int8", "4295033089", 4295033089),
             ("-int8", "-4294967297", -4294967297),
-            ("float4", "1.23456789e-30", 1.23456789e-30),
-            ("-float4", "-4.56789e+29", -4.56789e29),
+            ("float4", "3.402e+38", 3.402e38),
+            ("-float4", "-3.402e+38", -3.402e38),
+            ("float8", "1.79769e+308", 1.79769e308),
+            ("-float8", "-1.79769e+308", -1.79769e308),
             ("dec4", "123.456", Decimal("123.456")),
             ("-dec4", "-321.654", Decimal("-321.654")),
             ("dec8", "429.4967297", Decimal("429.4967297")),
@@ -1447,17 +2060,77 @@ def test_variant_type(self):
             F.struct([F.parse_json(F.lit('{"b": "2"}'))]).alias("s"),
             F.create_map([F.lit("k"), F.parse_json(F.lit('{"c": true}'))]).alias("m"),
         ).collect()[0]
-        variants = [row["v"], row["a"][0], row["s"]["col1"], row["m"]["k"]]
+
+        # These data types are not supported by parse_json yet so they are being handled
+        # separately - Date, Timestamp, TimestampNTZ, Binary, Float (Single Precision)
+        date_columns = self.spark.sql(
+            "select cast(Date('2021-01-01')"
+            + " as variant) as d0, cast(Date('1800-12-31')"
+            + " as variant) as d1"
+        ).collect()[0]
+        float_columns = self.spark.sql(
+            "select cast(Float(5.5)" + " as variant) as f0, cast(Float(-5.5) as variant) as f1"
+        ).collect()[0]
+        binary_columns = self.spark.sql(
+            "select cast(binary(x'324FA69E')" + " as variant) as b"
+        ).collect()[0]
+        timetamp_ntz_columns = self.spark.sql(
+            "select cast(cast('1940-01-01 12:33:01.123'"
+            + " as timestamp_ntz) as variant) as tntz0, cast(cast('2522-12-31 05:57:13'"
+            + " as timestamp_ntz) as variant) as tntz1, cast(cast('0001-07-15 17:43:26+08:00'"
+            + " as timestamp_ntz) as variant) as tntz2"
+        ).collect()[0]
+        timetamp_columns = self.spark.sql(
+            "select cast(cast('1940-01-01 12:35:13.123+7:30'"
+            + " as timestamp) as variant) as t0, cast(cast('2522-12-31 00:00:00-5:23'"
+            + " as timestamp) as variant) as t1, cast(cast('0001-12-31 01:01:01+08:00'"
+            + " as timestamp) as variant) as t2"
+        ).collect()[0]
+
+        variants = [
+            row["v"],
+            row["a"][0],
+            row["s"]["col1"],
+            row["m"]["k"],
+            date_columns["d0"],
+            date_columns["d1"],
+            float_columns["f0"],
+            float_columns["f1"],
+            binary_columns["b"],
+            timetamp_ntz_columns["tntz0"],
+            timetamp_ntz_columns["tntz1"],
+            timetamp_ntz_columns["tntz2"],
+            timetamp_columns["t0"],
+            timetamp_columns["t1"],
+            timetamp_columns["t2"],
+        ]
+
         for v in variants:
             self.assertEqual(type(v), VariantVal)
 
-        # check str
+        # check str (to_json)
         as_string = str(variants[0])
         for key, expected, _ in expected_values:
             self.assertTrue('"%s":%s' % (key, expected) in as_string)
         self.assertEqual(str(variants[1]), '{"a":1}')
         self.assertEqual(str(variants[2]), '{"b":"2"}')
         self.assertEqual(str(variants[3]), '{"c":true}')
+        self.assertEqual(str(variants[4]), '"2021-01-01"')
+        self.assertEqual(str(variants[5]), '"1800-12-31"')
+        self.assertEqual(str(variants[6]), "5.5")
+        self.assertEqual(str(variants[7]), "-5.5")
+        self.assertEqual(str(variants[8]), '"Mk+mng=="')
+        self.assertEqual(str(variants[9]), '"1940-01-01 12:33:01.123000"')
+        self.assertEqual(str(variants[10]), '"2522-12-31 05:57:13"')
+        self.assertEqual(str(variants[11]), '"0001-07-15 17:43:26"')
+        self.assertEqual(str(variants[12]), '"1940-01-01 05:05:13.123000+00:00"')
+        self.assertEqual(str(variants[13]), '"2522-12-31 05:23:00+00:00"')
+        self.assertEqual(str(variants[14]), '"0001-12-30 17:01:01+00:00"')
+
+        # Check to_json on timestamps with custom timezones
+        self.assertEqual(
+            variants[12].toJson("America/Los_Angeles"), '"1939-12-31 21:05:13.123000-08:00"'
+        )
 
         # check toPython
         as_python = variants[0].toPython()
@@ -1466,10 +2139,68 @@ def test_variant_type(self):
         self.assertEqual(variants[1].toPython(), {"a": 1})
         self.assertEqual(variants[2].toPython(), {"b": "2"})
         self.assertEqual(variants[3].toPython(), {"c": True})
+        self.assertEqual(variants[4].toPython(), datetime.date(2021, 1, 1))
+        self.assertEqual(variants[5].toPython(), datetime.date(1800, 12, 31))
+        self.assertEqual(variants[6].toPython(), float(5.5))
+        self.assertEqual(variants[7].toPython(), float(-5.5))
+        self.assertEqual(variants[8].toPython(), bytearray(b"2O\xa6\x9e"))
+        self.assertEqual(variants[9].toPython(), datetime.datetime(1940, 1, 1, 12, 33, 1, 123000))
+        self.assertEqual(variants[10].toPython(), datetime.datetime(2522, 12, 31, 5, 57, 13))
+        self.assertEqual(variants[11].toPython(), datetime.datetime(1, 7, 15, 17, 43, 26))
+        self.assertEqual(
+            variants[12].toPython(),
+            datetime.datetime(
+                1940,
+                1,
+                1,
+                12,
+                35,
+                13,
+                123000,
+                tzinfo=datetime.timezone(datetime.timedelta(hours=7, minutes=30)),
+            ),
+        )
+        self.assertEqual(
+            variants[13].toPython(),
+            datetime.datetime(
+                2522,
+                12,
+                31,
+                3,
+                3,
+                31,
+                tzinfo=datetime.timezone(datetime.timedelta(hours=-2, minutes=-20, seconds=31)),
+            ),
+        )
+        self.assertEqual(
+            variants[14].toPython(),
+            datetime.datetime(
+                1,
+                12,
+                31,
+                16,
+                3,
+                23,
+                tzinfo=datetime.timezone(datetime.timedelta(hours=23, minutes=2, seconds=22)),
+            ),
+        )
 
         # check repr
         self.assertEqual(str(variants[0]), str(eval(repr(variants[0]))))
 
+        metadata = bytes([1, 0, 0])
+        self.assertEqual(str(VariantVal(bytes([32, 0, 1, 0, 0, 0]), metadata)), "1")
+        self.assertEqual(str(VariantVal(bytes([32, 1, 2, 0, 0, 0]), metadata)), "0.2")
+        self.assertEqual(str(VariantVal(bytes([32, 2, 3, 0, 0, 0]), metadata)), "0.03")
+        self.assertEqual(str(VariantVal(bytes([32, 0, 1, 0, 0, 0]), metadata)), "1")
+        self.assertEqual(str(VariantVal(bytes([32, 0, 255, 201, 154, 59]), metadata)), "999999999")
+        self.assertRaises(
+            PySparkValueError, lambda: str(VariantVal(bytes([32, 0, 0, 202, 154, 59]), metadata))
+        )
+        self.assertRaises(
+            PySparkValueError, lambda: str(VariantVal(bytes([32, 10, 1, 0, 0, 0]), metadata))
+        )
+
     def test_from_ddl(self):
         self.assertEqual(DataType.fromDDL("long"), LongType())
         self.assertEqual(
@@ -1487,20 +2218,44 @@ def test_from_ddl(self):
 
     def test_collated_string(self):
         dfs = [
-            self.spark.sql("SELECT 'abc' collate UTF8_BINARY_LCASE"),
+            self.spark.sql("SELECT 'abc' collate UTF8_LCASE"),
             self.spark.createDataFrame(
-                [], StructType([StructField("id", StringType("UTF8_BINARY_LCASE"))])
+                [], StructType([StructField("id", StringType("UTF8_LCASE"))])
             ),
         ]
         for df in dfs:
             # performs both datatype -> proto & proto -> datatype conversions
             self.assertEqual(
-                df.to(StructType([StructField("new", StringType("UTF8_BINARY_LCASE"))]))
+                df.to(StructType([StructField("new", StringType("UTF8_LCASE"))]))
                 .schema[0]
                 .dataType,
-                StringType("UTF8_BINARY_LCASE"),
+                StringType("UTF8_LCASE"),
+            )
+
+    def test_infer_array_element_type_with_struct(self):
+        # SPARK-48248: Nested array to respect legacy conf of inferArrayTypeFromFirstElement
+        with self.sql_conf(
+            {"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
+        ):
+            self.assertEqual(
+                ArrayType(ArrayType(LongType())),
+                self.spark.createDataFrame([[[[1, 1.0]]]]).schema.fields[0].dataType,
             )
 
+    def test_ym_interval_in_collect(self):
+        with self.assertRaises(PySparkNotImplementedError):
+            self.spark.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").first()
+
+        with self.temp_env({"PYSPARK_YM_INTERVAL_LEGACY": "1"}):
+            self.assertEqual(
+                self.spark.sql("SELECT INTERVAL '10-8' YEAR TO MONTH AS interval").first(),
+                Row(interval=128),
+            )
+
+    def test_cal_interval_in_collect(self):
+        with self.assertRaises(PySparkNotImplementedError):
+            self.spark.sql("SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001)").first()[0]
+
 
 class DataTypeTests(unittest.TestCase):
     # regression test for SPARK-6055
@@ -1638,7 +2393,7 @@ def __init__(self, **kwargs):
             (1.0, StringType()),
             ([], StringType()),
             ({}, StringType()),
-            ("", StringType("UTF8_BINARY_LCASE")),
+            ("", StringType("UTF8_LCASE")),
             # Char
             ("", CharType(10)),
             (1, CharType(10)),
@@ -1707,7 +2462,7 @@ def __init__(self, **kwargs):
         failure_spec = [
             # String (match anything but None)
             (None, StringType(), ValueError),
-            (None, StringType("UTF8_BINARY_LCASE"), ValueError),
+            (None, StringType("UTF8_LCASE"), ValueError),
             # CharType (match anything but None)
             (None, CharType(10), ValueError),
             # VarcharType (match anything but None)
diff --git a/python/pyspark/sql/tests/test_udtf.py b/python/pyspark/sql/tests/test_udtf.py
index 923fe4a2a8e8d..66f1a3090546f 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -1801,6 +1801,9 @@ def _add_archive(self, path):
     def test_udtf_with_analyze_using_archive(self):
         from pyspark.core.files import SparkFiles
 
+        self.check_udtf_with_analyze_using_archive(SparkFiles.getRootDirectory())
+
+    def check_udtf_with_analyze_using_archive(self, exec_root_dir):
         with tempfile.TemporaryDirectory(prefix="test_udtf_with_analyze_using_archive") as d:
             archive_path = os.path.join(d, "my_archive")
             os.mkdir(archive_path)
@@ -1815,9 +1818,7 @@ class TestUDTF:
                 @staticmethod
                 def read_my_archive() -> str:
                     with open(
-                        os.path.join(
-                            SparkFiles.getRootDirectory(), "my_files", "my_archive", "my_file.txt"
-                        ),
+                        os.path.join(exec_root_dir, "my_files", "my_archive", "my_file.txt"),
                         "r",
                     ) as my_file:
                         return my_file.read().strip()
@@ -1850,6 +1851,9 @@ def _add_file(self, path):
     def test_udtf_with_analyze_using_file(self):
         from pyspark.core.files import SparkFiles
 
+        self.check_udtf_with_analyze_using_file(SparkFiles.getRootDirectory())
+
+    def check_udtf_with_analyze_using_file(self, exec_root_dir):
         with tempfile.TemporaryDirectory(prefix="test_udtf_with_analyze_using_file") as d:
             file_path = os.path.join(d, "my_file.txt")
             with open(file_path, "w") as f:
@@ -1860,9 +1864,7 @@ def test_udtf_with_analyze_using_file(self):
             class TestUDTF:
                 @staticmethod
                 def read_my_file() -> str:
-                    with open(
-                        os.path.join(SparkFiles.getRootDirectory(), "my_file.txt"), "r"
-                    ) as my_file:
+                    with open(os.path.join(exec_root_dir, "my_file.txt"), "r") as my_file:
                         return my_file.read().strip()
 
                 @staticmethod
@@ -2557,16 +2559,18 @@ class TestUDTF:
             def eval(self):
                 yield 1,
 
-        # We do not use `self.sql_conf` here to test the SQL SET command
-        # instead of using PySpark's `spark.conf.set`.
         old_value = self.spark.conf.get("spark.sql.execution.pythonUDTF.arrow.enabled")
-        self.spark.sql("SET spark.sql.execution.pythonUDTF.arrow.enabled=False")
-        self.assertEqual(udtf(TestUDTF, returnType="x: int").evalType, PythonEvalType.SQL_TABLE_UDF)
-        self.spark.sql("SET spark.sql.execution.pythonUDTF.arrow.enabled=True")
-        self.assertEqual(
-            udtf(TestUDTF, returnType="x: int").evalType, PythonEvalType.SQL_ARROW_TABLE_UDF
-        )
-        self.spark.conf.set("spark.sql.execution.pythonUDTF.arrow.enabled", old_value)
+        try:
+            self.spark.conf.set("spark.sql.execution.pythonUDTF.arrow.enabled", False)
+            self.assertEqual(
+                udtf(TestUDTF, returnType="x: int").evalType, PythonEvalType.SQL_TABLE_UDF
+            )
+            self.spark.conf.set("spark.sql.execution.pythonUDTF.arrow.enabled", True)
+            self.assertEqual(
+                udtf(TestUDTF, returnType="x: int").evalType, PythonEvalType.SQL_ARROW_TABLE_UDF
+            )
+        finally:
+            self.spark.conf.set("spark.sql.execution.pythonUDTF.arrow.enabled", old_value)
 
     def test_udtf_eval_returning_non_tuple(self):
         @udtf(returnType="a: int")
diff --git a/python/pyspark/sql/tests/typing/test_session.yml b/python/pyspark/sql/tests/typing/test_session.yml
index 8f48edb7e579e..d6eee82a7678e 100644
--- a/python/pyspark/sql/tests/typing/test_session.yml
+++ b/python/pyspark/sql/tests/typing/test_session.yml
@@ -51,25 +51,6 @@
     spark.createDataFrame(["foo", "bar"], "string")
 
 
-- case: createDataFrameScalarsInvalid
-  main: |
-    from pyspark.sql import SparkSession
-    from pyspark.sql.types import StructType, StructField, StringType, IntegerType
-
-    spark = SparkSession.builder.getOrCreate()
-
-    schema = StructType([
-        StructField("name", StringType(), True),
-        StructField("age", IntegerType(), True)
-    ])
-
-    # Invalid - scalars require schema
-    spark.createDataFrame(["foo", "bar"]) # E: Value of type variable "RowLike" of "createDataFrame" of "SparkSession" cannot be "str"  [type-var]
-
-    # Invalid - data has to match schema (either product -> struct or scalar -> atomic)
-    spark.createDataFrame([1, 2, 3], schema) # E: Value of type variable "RowLike" of "createDataFrame" of "SparkSession" cannot be "int"  [type-var]
-
-
 - case: createDataFrameStructsInvalid
   main: |
     from pyspark.sql import SparkSession
@@ -102,7 +83,9 @@
     main:18: note:     def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: RDD[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame
     main:18: note:     def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: Iterable[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame
     main:18: note:     def createDataFrame(self, data: DataFrame, samplingRatio: Optional[float] = ...) -> DataFrame
+    main:18: note:     def createDataFrame(self, data: Any, samplingRatio: Optional[float] = ...) -> DataFrame
     main:18: note:     def createDataFrame(self, data: DataFrame, schema: Union[StructType, str], verifySchema: bool = ...) -> DataFrame
+    main:18: note:     def createDataFrame(self, data: Any, schema: Union[StructType, str], verifySchema: bool = ...) -> DataFrame
 
 - case: createDataFrameFromEmptyRdd
   main: |
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 3546fd8228141..d2adc53a3618f 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import os
 import sys
 import decimal
 import time
@@ -45,9 +46,14 @@
     TYPE_CHECKING,
 )
 
-from pyspark.util import is_remote_only
+from pyspark.util import is_remote_only, JVM_INT_MAX
 from pyspark.serializers import CloudPickleSerializer
-from pyspark.sql.utils import has_numpy, get_active_spark_context
+from pyspark.sql.utils import (
+    has_numpy,
+    get_active_spark_context,
+    escape_meta_characters,
+    StringConcat,
+)
 from pyspark.sql.variant_utils import VariantUtils
 from pyspark.errors import (
     PySparkNotImplementedError,
@@ -199,6 +205,35 @@ def fromDDL(cls, ddl: str) -> "DataType":
         assert len(schema) == 1
         return schema[0].dataType
 
+    @classmethod
+    def _data_type_build_formatted_string(
+        cls,
+        dataType: "DataType",
+        prefix: str,
+        stringConcat: StringConcat,
+        maxDepth: int,
+    ) -> None:
+        if isinstance(dataType, (ArrayType, StructType, MapType)):
+            dataType._build_formatted_string(prefix, stringConcat, maxDepth - 1)
+
+    # The method typeName() is not always the same as the Scala side.
+    # Add this helper method to make TreeString() compatible with Scala side.
+    @classmethod
+    def _get_jvm_type_name(cls, dataType: "DataType") -> str:
+        if isinstance(
+            dataType,
+            (
+                DecimalType,
+                CharType,
+                VarcharType,
+                DayTimeIntervalType,
+                YearMonthIntervalType,
+            ),
+        ):
+            return dataType.simpleString()
+        else:
+            return dataType.typeName()
+
 
 # This singleton pattern does not work with pickle, you will get
 # another object after pickle and unpickle
@@ -254,38 +289,40 @@ class StringType(AtomicType):
         name of the collation, default is UTF8_BINARY.
     """
 
-    collationNames = ["UTF8_BINARY", "UTF8_BINARY_LCASE", "UNICODE", "UNICODE_CI"]
-
-    def __init__(self, collation: Optional[str] = None):
-        self.collationId = 0 if collation is None else self.collationNameToId(collation)
+    providerSpark = "spark"
+    providerICU = "icu"
+    providers = [providerSpark, providerICU]
 
-    @classmethod
-    def fromCollationId(self, collationId: int) -> "StringType":
-        return StringType(StringType.collationNames[collationId])
-
-    def collationIdToName(self) -> str:
-        if self.collationId == 0:
-            return ""
-        else:
-            return " collate %s" % StringType.collationNames[self.collationId]
+    def __init__(self, collation: str = "UTF8_BINARY"):
+        self.collation = collation
 
     @classmethod
-    def collationNameToId(cls, collationName: str) -> int:
-        return StringType.collationNames.index(collationName)
+    def collationProvider(cls, collationName: str) -> str:
+        # TODO: do this properly like on the scala side
+        if collationName.startswith("UTF8"):
+            return StringType.providerSpark
+        return StringType.providerICU
 
     def simpleString(self) -> str:
-        return "string" + self.collationIdToName()
+        if self.isUTF8BinaryCollation():
+            return "string"
+
+        return f"string collate ${self.collation}"
 
+    # For backwards compatibility and compatibility with other readers all string types
+    # are serialized in json as regular strings and the collation info is written to
+    # struct field metadata
     def jsonValue(self) -> str:
-        return "string" + self.collationIdToName()
+        return "string"
 
     def __repr__(self) -> str:
         return (
-            "StringType('%s')" % StringType.collationNames[self.collationId]
-            if self.collationId != 0
-            else "StringType()"
+            "StringType()" if self.isUTF8BinaryCollation() else "StringType('%s')" % self.collation
         )
 
+    def isUTF8BinaryCollation(self) -> bool:
+        return self.collation == "UTF8_BINARY"
+
 
 class CharType(AtomicType):
     """Char data type
@@ -397,8 +434,8 @@ def toInternal(self, dt: datetime.datetime) -> int:
     def fromInternal(self, ts: int) -> datetime.datetime:
         if ts is not None:
             # using int to avoid precision loss in float
-            return datetime.datetime.utcfromtimestamp(ts // 1000000).replace(
-                microsecond=ts % 1000000
+            return datetime.datetime.fromtimestamp(ts // 1000000, datetime.timezone.utc).replace(
+                microsecond=ts % 1000000, tzinfo=None
             )
 
 
@@ -550,7 +587,12 @@ def fromInternal(self, micros: int) -> Optional[datetime.timedelta]:
 
 
 class YearMonthIntervalType(AnsiIntervalType):
-    """YearMonthIntervalType, represents year-month intervals of the SQL standard"""
+    """YearMonthIntervalType, represents year-month intervals of the SQL standard
+
+    Notes
+    -----
+    This data type doesn't support collection: df.collect/take/head.
+    """
 
     YEAR = 0
     MONTH = 1
@@ -592,6 +634,24 @@ def _str_repr(self) -> str:
 
     jsonValue = _str_repr
 
+    def needConversion(self) -> bool:
+        # If PYSPARK_YM_INTERVAL_LEGACY is not set, needConversion is true,
+        # 'df.collect' fails with PySparkNotImplementedError;
+        # otherwise, no conversion is needed, and 'df.collect' returns the internal integers.
+        return not os.environ.get("PYSPARK_YM_INTERVAL_LEGACY") == "1"
+
+    def toInternal(self, obj: Any) -> Any:
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "YearMonthIntervalType.toInternal"},
+        )
+
+    def fromInternal(self, obj: Any) -> Any:
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "YearMonthIntervalType.fromInternal"},
+        )
+
     def __repr__(self) -> str:
         return "%s(%d, %d)" % (type(self).__name__, self.startField, self.endField)
 
@@ -609,6 +669,21 @@ class CalendarIntervalType(DataType, metaclass=DataTypeSingleton):
     def typeName(cls) -> str:
         return "interval"
 
+    def needConversion(self) -> bool:
+        return True
+
+    def toInternal(self, obj: Any) -> Any:
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "CalendarIntervalType.toInternal"},
+        )
+
+    def fromInternal(self, obj: Any) -> Any:
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": "CalendarIntervalType.fromInternal"},
+        )
+
 
 class ArrayType(DataType):
     """Array data type.
@@ -693,8 +768,16 @@ def jsonValue(self) -> Dict[str, Any]:
         }
 
     @classmethod
-    def fromJson(cls, json: Dict[str, Any]) -> "ArrayType":
-        return ArrayType(_parse_datatype_json_value(json["elementType"]), json["containsNull"])
+    def fromJson(
+        cls,
+        json: Dict[str, Any],
+        fieldPath: str,
+        collationsMap: Optional[Dict[str, str]],
+    ) -> "ArrayType":
+        elementType = _parse_datatype_json_value(
+            json["elementType"], fieldPath + ".element", collationsMap
+        )
+        return ArrayType(elementType, json["containsNull"])
 
     def needConversion(self) -> bool:
         return self.elementType.needConversion()
@@ -709,6 +792,21 @@ def fromInternal(self, obj: List[Optional[T]]) -> List[Optional[T]]:
             return obj
         return obj and [self.elementType.fromInternal(v) for v in obj]
 
+    def _build_formatted_string(
+        self,
+        prefix: str,
+        stringConcat: StringConcat,
+        maxDepth: int = JVM_INT_MAX,
+    ) -> None:
+        if maxDepth > 0:
+            stringConcat.append(
+                f"{prefix}-- element: {DataType._get_jvm_type_name(self.elementType)} "
+                + f"(containsNull = {str(self.containsNull).lower()})\n"
+            )
+            DataType._data_type_build_formatted_string(
+                self.elementType, f"{prefix}    |", stringConcat, maxDepth
+            )
+
 
 class MapType(DataType):
     """Map data type.
@@ -810,10 +908,19 @@ def jsonValue(self) -> Dict[str, Any]:
         }
 
     @classmethod
-    def fromJson(cls, json: Dict[str, Any]) -> "MapType":
+    def fromJson(
+        cls,
+        json: Dict[str, Any],
+        fieldPath: str,
+        collationsMap: Optional[Dict[str, str]],
+    ) -> "MapType":
+        keyType = _parse_datatype_json_value(json["keyType"], fieldPath + ".key", collationsMap)
+        valueType = _parse_datatype_json_value(
+            json["valueType"], fieldPath + ".value", collationsMap
+        )
         return MapType(
-            _parse_datatype_json_value(json["keyType"]),
-            _parse_datatype_json_value(json["valueType"]),
+            keyType,
+            valueType,
             json["valueContainsNull"],
         )
 
@@ -834,6 +941,25 @@ def fromInternal(self, obj: Dict[T, Optional[U]]) -> Dict[T, Optional[U]]:
             (self.keyType.fromInternal(k), self.valueType.fromInternal(v)) for k, v in obj.items()
         )
 
+    def _build_formatted_string(
+        self,
+        prefix: str,
+        stringConcat: StringConcat,
+        maxDepth: int = JVM_INT_MAX,
+    ) -> None:
+        if maxDepth > 0:
+            stringConcat.append(f"{prefix}-- key: {DataType._get_jvm_type_name(self.keyType)}\n")
+            DataType._data_type_build_formatted_string(
+                self.keyType, f"{prefix}    |", stringConcat, maxDepth
+            )
+            stringConcat.append(
+                f"{prefix}-- value: {DataType._get_jvm_type_name(self.valueType)} "
+                + f"(valueContainsNull = {str(self.valueContainsNull).lower()})\n"
+            )
+            DataType._data_type_build_formatted_string(
+                self.valueType, f"{prefix}    |", stringConcat, maxDepth
+            )
+
 
 class StructField(DataType):
     """A field in :class:`StructType`.
@@ -884,22 +1010,89 @@ def __repr__(self) -> str:
         return "StructField('%s', %s, %s)" % (self.name, self.dataType, str(self.nullable))
 
     def jsonValue(self) -> Dict[str, Any]:
+        collationMetadata = self.getCollationMetadata()
+        metadata = (
+            self.metadata
+            if not collationMetadata
+            else {**self.metadata, _COLLATIONS_METADATA_KEY: collationMetadata}
+        )
+
         return {
             "name": self.name,
             "type": self.dataType.jsonValue(),
             "nullable": self.nullable,
-            "metadata": self.metadata,
+            "metadata": metadata,
         }
 
     @classmethod
     def fromJson(cls, json: Dict[str, Any]) -> "StructField":
+        metadata = json.get("metadata")
+        collationsMap = {}
+        if metadata and _COLLATIONS_METADATA_KEY in metadata:
+            collationsMap = metadata[_COLLATIONS_METADATA_KEY]
+            for key, value in collationsMap.items():
+                nameParts = value.split(".")
+                assert len(nameParts) == 2
+                provider, name = nameParts[0], nameParts[1]
+                _assert_valid_collation_provider(provider)
+                collationsMap[key] = name
+
+            metadata = {
+                key: value for key, value in metadata.items() if key != _COLLATIONS_METADATA_KEY
+            }
+
         return StructField(
             json["name"],
-            _parse_datatype_json_value(json["type"]),
+            _parse_datatype_json_value(json["type"], json["name"], collationsMap),
             json.get("nullable", True),
-            json.get("metadata"),
+            metadata,
         )
 
+    def getCollationsMap(self, metadata: Dict[str, Any]) -> Dict[str, str]:
+        if not metadata or _COLLATIONS_METADATA_KEY not in metadata:
+            return {}
+
+        collationMetadata: Dict[str, str] = metadata[_COLLATIONS_METADATA_KEY]
+        collationsMap: Dict[str, str] = {}
+
+        for key, value in collationMetadata.items():
+            nameParts = value.split(".")
+            assert len(nameParts) == 2
+            provider, name = nameParts[0], nameParts[1]
+            _assert_valid_collation_provider(provider)
+            collationsMap[key] = name
+
+        return collationsMap
+
+    def getCollationMetadata(self) -> Dict[str, str]:
+        def visitRecursively(dt: DataType, fieldPath: str) -> None:
+            if isinstance(dt, ArrayType):
+                processDataType(dt.elementType, fieldPath + ".element")
+            elif isinstance(dt, MapType):
+                processDataType(dt.keyType, fieldPath + ".key")
+                processDataType(dt.valueType, fieldPath + ".value")
+            elif isinstance(dt, StringType) and self._isCollatedString(dt):
+                collationMetadata[fieldPath] = self.schemaCollationValue(dt)
+
+        def processDataType(dt: DataType, fieldPath: str) -> None:
+            if self._isCollatedString(dt):
+                collationMetadata[fieldPath] = self.schemaCollationValue(dt)
+            else:
+                visitRecursively(dt, fieldPath)
+
+        collationMetadata: Dict[str, str] = {}
+        visitRecursively(self.dataType, self.name)
+        return collationMetadata
+
+    def _isCollatedString(self, dt: DataType) -> bool:
+        return isinstance(dt, StringType) and not dt.isUTF8BinaryCollation()
+
+    def schemaCollationValue(self, dt: DataType) -> str:
+        assert isinstance(dt, StringType)
+        collationName = dt.collation
+        provider = StringType.collationProvider(collationName)
+        return f"{provider}.{collationName}"
+
     def needConversion(self) -> bool:
         return self.dataType.needConversion()
 
@@ -915,6 +1108,22 @@ def typeName(self) -> str:  # type: ignore[override]
             message_parameters={},
         )
 
+    def _build_formatted_string(
+        self,
+        prefix: str,
+        stringConcat: StringConcat,
+        maxDepth: int = JVM_INT_MAX,
+    ) -> None:
+        if maxDepth > 0:
+            stringConcat.append(
+                f"{prefix}-- {escape_meta_characters(self.name)}: "
+                + f"{DataType._get_jvm_type_name(self.dataType)} "
+                + f"(nullable = {str(self.nullable).lower()})\n"
+            )
+            DataType._data_type_build_formatted_string(
+                self.dataType, f"{prefix}    |", stringConcat, maxDepth
+            )
+
 
 class StructType(DataType):
     """Struct type, consisting of a list of :class:`StructField`.
@@ -1335,6 +1544,24 @@ def fromInternal(self, obj: Tuple) -> "Row":
             values = obj
         return _create_row(self.names, values)
 
+    def _build_formatted_string(
+        self,
+        prefix: str,
+        stringConcat: StringConcat,
+        maxDepth: int = JVM_INT_MAX,
+    ) -> None:
+        for field in self.fields:
+            field._build_formatted_string(prefix, stringConcat, maxDepth)
+
+    def treeString(self, maxDepth: int = JVM_INT_MAX) -> str:
+        stringConcat = StringConcat()
+        stringConcat.append("root\n")
+        prefix = " |"
+        depth = maxDepth if maxDepth > 0 else JVM_INT_MAX
+        for field in self.fields:
+            field._build_formatted_string(prefix, stringConcat, depth)
+        return stringConcat.toString()
+
 
 class VariantType(AtomicType):
     """
@@ -1521,6 +1748,19 @@ def toPython(self) -> Any:
         """
         return VariantUtils.to_python(self.value, self.metadata)
 
+    def toJson(self, zone_id: str = "UTC") -> str:
+        """
+        Convert the VariantVal to a JSON string. The zone ID represents the time zone that the
+        timestamp should be printed in. It is defaulted to UTC. The list of valid zone IDs can be
+        found by importing the `zoneinfo` module and running :code:`zoneinfo.available_timezones()`.
+
+        Returns
+        -------
+        str
+            A JSON string that represents the Variant.
+        """
+        return VariantUtils.to_json(self.value, self.metadata, zone_id)
+
 
 _atomic_types: List[Type[DataType]] = [
     StringType,
@@ -1540,21 +1780,67 @@ def toPython(self) -> Any:
     TimestampNTZType,
     NullType,
     VariantType,
+    YearMonthIntervalType,
+    DayTimeIntervalType,
 ]
-_all_atomic_types: Dict[str, Type[DataType]] = dict((t.typeName(), t) for t in _atomic_types)
 
-_complex_types: List[Type[Union[ArrayType, MapType, StructType]]] = [ArrayType, MapType, StructType]
-_all_complex_types: Dict[str, Type[Union[ArrayType, MapType, StructType]]] = dict(
-    (v.typeName(), v) for v in _complex_types
-)
+_complex_types: List[Type[Union[ArrayType, MapType, StructType]]] = [
+    ArrayType,
+    MapType,
+    StructType,
+]
+_all_complex_types: Dict[str, Type[Union[ArrayType, MapType, StructType]]] = {
+    "array": ArrayType,
+    "map": MapType,
+    "struct": StructType,
+}
+
+# Datatypes that can be directly parsed by mapping a json string without regex.
+# This dict should be only used in json parsing.
+# Note that:
+# 1, CharType and VarcharType are not listed here, since they need regex;
+# 2, DecimalType can be parsed by both mapping ('decimal') and regex ('decimal(10, 2)');
+# 3, CalendarIntervalType is not an atomic type, but can be mapped by 'interval';
+_all_mappable_types: Dict[str, Type[DataType]] = {
+    "string": StringType,
+    "binary": BinaryType,
+    "boolean": BooleanType,
+    "decimal": DecimalType,
+    "float": FloatType,
+    "double": DoubleType,
+    "byte": ByteType,
+    "short": ShortType,
+    "integer": IntegerType,
+    "long": LongType,
+    "date": DateType,
+    "timestamp": TimestampType,
+    "timestamp_ntz": TimestampNTZType,
+    "void": NullType,
+    "variant": VariantType,
+    "interval": CalendarIntervalType,
+}
 
-_COLLATED_STRING = re.compile(r"string\s+collate\s+([\w_]+|`[\w_]`)")
 _LENGTH_CHAR = re.compile(r"char\(\s*(\d+)\s*\)")
 _LENGTH_VARCHAR = re.compile(r"varchar\(\s*(\d+)\s*\)")
 _FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
 _INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
 _INTERVAL_YEARMONTH = re.compile(r"interval (year|month)( to (year|month))?")
 
+_COLLATIONS_METADATA_KEY = "__COLLATIONS"
+
+
+def _drop_metadata(d: Union[DataType, StructField]) -> Union[DataType, StructField]:
+    assert isinstance(d, (DataType, StructField))
+    if isinstance(d, StructField):
+        return StructField(d.name, _drop_metadata(d.dataType), d.nullable, None)
+    elif isinstance(d, StructType):
+        return StructType([cast(StructField, _drop_metadata(f)) for f in d.fields])
+    elif isinstance(d, ArrayType):
+        return ArrayType(_drop_metadata(d.elementType), d.containsNull)
+    elif isinstance(d, MapType):
+        return MapType(_drop_metadata(d.keyType), _drop_metadata(d.valueType), d.valueContainsNull)
+    return d
+
 
 def _parse_datatype_string(s: str) -> DataType:
     """
@@ -1600,35 +1886,48 @@ def _parse_datatype_string(s: str) -> DataType:
         ...
     ParseException:...
     """
-    from py4j.java_gateway import JVMView
+    from pyspark.sql.utils import is_remote
 
-    sc = get_active_spark_context()
+    if is_remote():
+        from pyspark.sql.connect.session import SparkSession
 
-    def from_ddl_schema(type_str: str) -> DataType:
-        return _parse_datatype_json_string(
-            cast(JVMView, sc._jvm).org.apache.spark.sql.types.StructType.fromDDL(type_str).json()
+        return cast(
+            DataType,
+            SparkSession.active()._client._analyze(method="ddl_parse", ddl_string=s).parsed,
         )
 
-    def from_ddl_datatype(type_str: str) -> DataType:
-        return _parse_datatype_json_string(
-            cast(JVMView, sc._jvm)
-            .org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str)
-            .json()
-        )
+    else:
+        from py4j.java_gateway import JVMView
+
+        sc = get_active_spark_context()
+
+        def from_ddl_schema(type_str: str) -> DataType:
+            return _parse_datatype_json_string(
+                cast(JVMView, sc._jvm)
+                .org.apache.spark.sql.types.StructType.fromDDL(type_str)
+                .json()
+            )
+
+        def from_ddl_datatype(type_str: str) -> DataType:
+            return _parse_datatype_json_string(
+                cast(JVMView, sc._jvm)
+                .org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str)
+                .json()
+            )
 
-    try:
-        # DDL format, "fieldname datatype, fieldname datatype".
-        return from_ddl_schema(s)
-    except Exception as e:
         try:
-            # For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc.
-            return from_ddl_datatype(s)
-        except BaseException:
+            # DDL format, "fieldname datatype, fieldname datatype".
+            return from_ddl_schema(s)
+        except Exception as e:
             try:
-                # For backwards compatibility, "fieldname: datatype, fieldname: datatype" case.
-                return from_ddl_datatype("struct<%s>" % s.strip())
+                # For backwards compatibility, "integer", "struct<fieldname: datatype>" and etc.
+                return from_ddl_datatype(s)
             except BaseException:
-                raise e
+                try:
+                    # For backwards compatibility, "fieldname: datatype, fieldname: datatype" case.
+                    return from_ddl_datatype("struct<%s>" % s.strip())
+                except BaseException:
+                    raise e
 
 
 def _parse_datatype_json_string(json_string: str) -> DataType:
@@ -1644,11 +1943,8 @@ def _parse_datatype_json_string(json_string: str) -> DataType:
     ...     python_datatype = _parse_datatype_json_string(scala_datatype.json())
     ...     assert datatype == python_datatype
     ...
-    >>> for cls in _all_atomic_types.values():
-    ...     if cls is not VarcharType and cls is not CharType:
-    ...         check_datatype(cls())
-    ...     else:
-    ...         check_datatype(cls(1))
+    >>> for cls in _all_mappable_types.values():
+    ...     check_datatype(cls())
 
     >>> # Simple ArrayType.
     >>> simple_arraytype = ArrayType(StringType(), True)
@@ -1689,12 +1985,18 @@ def _parse_datatype_json_string(json_string: str) -> DataType:
     return _parse_datatype_json_value(json.loads(json_string))
 
 
-def _parse_datatype_json_value(json_value: Union[dict, str]) -> DataType:
+def _parse_datatype_json_value(
+    json_value: Union[dict, str],
+    fieldPath: str = "",
+    collationsMap: Optional[Dict[str, str]] = None,
+) -> DataType:
     if not isinstance(json_value, dict):
-        if json_value in _all_atomic_types.keys():
-            return _all_atomic_types[json_value]()
-        elif json_value == "decimal":
-            return DecimalType()
+        if json_value in _all_mappable_types.keys():
+            if collationsMap is not None and fieldPath in collationsMap:
+                _assert_valid_type_for_collation(fieldPath, json_value, collationsMap)
+                collation_name = collationsMap[fieldPath]
+                return StringType(collation_name)
+            return _all_mappable_types[json_value]()
         elif _FIXED_DECIMAL.match(json_value):
             m = _FIXED_DECIMAL.match(json_value)
             return DecimalType(int(m.group(1)), int(m.group(2)))  # type: ignore[union-attr]
@@ -1714,11 +2016,6 @@ def _parse_datatype_json_value(json_value: Union[dict, str]) -> DataType:
             if first_field is not None and second_field is None:
                 return YearMonthIntervalType(first_field)
             return YearMonthIntervalType(first_field, second_field)
-        elif json_value == "interval":
-            return CalendarIntervalType()
-        elif _COLLATED_STRING.match(json_value):
-            m = _COLLATED_STRING.match(json_value)
-            return StringType(m.group(1))  # type: ignore[union-attr]
         elif _LENGTH_CHAR.match(json_value):
             m = _LENGTH_CHAR.match(json_value)
             return CharType(int(m.group(1)))  # type: ignore[union-attr]
@@ -1733,7 +2030,15 @@ def _parse_datatype_json_value(json_value: Union[dict, str]) -> DataType:
     else:
         tpe = json_value["type"]
         if tpe in _all_complex_types:
-            return _all_complex_types[tpe].fromJson(json_value)
+            if collationsMap is not None and fieldPath in collationsMap:
+                _assert_valid_type_for_collation(fieldPath, tpe, collationsMap)
+
+            complex_type = _all_complex_types[tpe]
+            if complex_type is ArrayType:
+                return ArrayType.fromJson(json_value, fieldPath, collationsMap)
+            elif complex_type is MapType:
+                return MapType.fromJson(json_value, fieldPath, collationsMap)
+            return StructType.fromJson(json_value)
         elif tpe == "udt":
             return UserDefinedType.fromJson(json_value)
         else:
@@ -1743,6 +2048,27 @@ def _parse_datatype_json_value(json_value: Union[dict, str]) -> DataType:
             )
 
 
+def _assert_valid_type_for_collation(
+    fieldPath: str, fieldType: Any, collationMap: Dict[str, str]
+) -> None:
+    if fieldPath in collationMap and fieldType != "string":
+        raise PySparkTypeError(
+            error_class="INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+            message_parameters={"jsonType": fieldType},
+        )
+
+
+def _assert_valid_collation_provider(provider: str) -> None:
+    if provider.lower() not in StringType.providers:
+        raise PySparkValueError(
+            error_class="COLLATION_INVALID_PROVIDER",
+            message_parameters={
+                "provider": provider,
+                "supportedProviders": ", ".join(StringType.providers),
+            },
+        )
+
+
 # Mapping Python types to Spark SQL DataType
 _type_mappings = {
     type(None): NullType,
@@ -1862,6 +2188,7 @@ def _infer_type(
     obj: Any,
     infer_dict_as_struct: bool = False,
     infer_array_from_first_element: bool = False,
+    infer_map_from_first_pair: bool = False,
     prefer_timestamp_ntz: bool = False,
 ) -> DataType:
     """Infer the DataType from obj"""
@@ -1897,12 +2224,13 @@ def _infer_type(
                             value,
                             infer_dict_as_struct,
                             infer_array_from_first_element,
+                            infer_map_from_first_pair,
                             prefer_timestamp_ntz,
                         ),
                         True,
                     )
             return struct
-        else:
+        elif infer_map_from_first_pair:
             for key, value in obj.items():
                 if key is not None and value is not None:
                     return MapType(
@@ -1910,28 +2238,72 @@ def _infer_type(
                             key,
                             infer_dict_as_struct,
                             infer_array_from_first_element,
+                            infer_map_from_first_pair,
                             prefer_timestamp_ntz,
                         ),
                         _infer_type(
                             value,
                             infer_dict_as_struct,
                             infer_array_from_first_element,
+                            infer_map_from_first_pair,
                             prefer_timestamp_ntz,
                         ),
                         True,
                     )
             return MapType(NullType(), NullType(), True)
+        else:
+            key_type: DataType = NullType()
+            value_type: DataType = NullType()
+            for key, value in obj.items():
+                if key is not None:
+                    key_type = _merge_type(
+                        key_type,
+                        _infer_type(
+                            key,
+                            infer_dict_as_struct,
+                            infer_array_from_first_element,
+                            infer_map_from_first_pair,
+                            prefer_timestamp_ntz,
+                        ),
+                    )
+                if value is not None:
+                    value_type = _merge_type(
+                        value_type,
+                        _infer_type(
+                            value,
+                            infer_dict_as_struct,
+                            infer_array_from_first_element,
+                            infer_map_from_first_pair,
+                            prefer_timestamp_ntz,
+                        ),
+                    )
+
+            return MapType(key_type, value_type, True)
     elif isinstance(obj, list):
         if len(obj) > 0:
             if infer_array_from_first_element:
                 return ArrayType(
-                    _infer_type(obj[0], infer_dict_as_struct, prefer_timestamp_ntz), True
+                    _infer_type(
+                        obj[0],
+                        infer_dict_as_struct,
+                        infer_array_from_first_element,
+                        prefer_timestamp_ntz,
+                    ),
+                    True,
                 )
             else:
                 return ArrayType(
                     reduce(
                         _merge_type,
-                        (_infer_type(v, infer_dict_as_struct, prefer_timestamp_ntz) for v in obj),
+                        (
+                            _infer_type(
+                                v,
+                                infer_dict_as_struct,
+                                infer_array_from_first_element,
+                                prefer_timestamp_ntz,
+                            )
+                            for v in obj
+                        ),
                     ),
                     True,
                 )
@@ -1950,6 +2322,7 @@ def _infer_type(
                 obj,
                 infer_dict_as_struct=infer_dict_as_struct,
                 infer_array_from_first_element=infer_array_from_first_element,
+                prefer_timestamp_ntz=prefer_timestamp_ntz,
             )
         except TypeError:
             raise PySparkTypeError(
@@ -1963,6 +2336,7 @@ def _infer_schema(
     names: Optional[List[str]] = None,
     infer_dict_as_struct: bool = False,
     infer_array_from_first_element: bool = False,
+    infer_map_from_first_pair: bool = False,
     prefer_timestamp_ntz: bool = False,
 ) -> StructType:
     """Infer the schema from dict/namedtuple/object"""
@@ -2001,6 +2375,7 @@ def _infer_schema(
                         v,
                         infer_dict_as_struct,
                         infer_array_from_first_element,
+                        infer_map_from_first_pair,
                         prefer_timestamp_ntz,
                     ),
                     True,
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 0d0fc9042e627..3d19a2b5458bd 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -27,7 +27,7 @@
 
 
 from pyspark.util import PythonEvalType
-from pyspark.sql.column import Column, _to_java_expr, _to_seq
+from pyspark.sql.column import Column
 from pyspark.sql.types import (
     DataType,
     StringType,
@@ -205,54 +205,45 @@ def __init__(
         self.evalType = evalType
         self.deterministic = deterministic
 
-    @property
-    def returnType(self) -> DataType:
-        # This makes sure this is called after SparkContext is initialized.
-        # ``_parse_datatype_string`` accesses to JVM for parsing a DDL formatted string.
-        # TODO: PythonEvalType.SQL_BATCHED_UDF
-        if self._returnType_placeholder is None:
-            if isinstance(self._returnType, DataType):
-                self._returnType_placeholder = self._returnType
-            else:
-                self._returnType_placeholder = _parse_datatype_string(self._returnType)
-        if self.evalType == PythonEvalType.SQL_ARROW_BATCHED_UDF:
+    @staticmethod
+    def _check_return_type(returnType: DataType, evalType: int) -> None:
+        if evalType == PythonEvalType.SQL_ARROW_BATCHED_UDF:
             try:
-                to_arrow_type(self._returnType_placeholder)
+                to_arrow_type(returnType)
             except TypeError:
                 raise PySparkNotImplementedError(
                     error_class="NOT_IMPLEMENTED",
                     message_parameters={
                         "feature": f"Invalid return type with Arrow-optimized Python UDF: "
-                        f"{self._returnType_placeholder}"
+                        f"{returnType}"
                     },
                 )
         elif (
-            self.evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF
-            or self.evalType == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF
+            evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF
+            or evalType == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF
         ):
             try:
-                to_arrow_type(self._returnType_placeholder)
+                to_arrow_type(returnType)
             except TypeError:
                 raise PySparkNotImplementedError(
                     error_class="NOT_IMPLEMENTED",
                     message_parameters={
-                        "feature": f"Invalid return type with scalar Pandas UDFs: "
-                        f"{self._returnType_placeholder}"
+                        "feature": f"Invalid return type with scalar Pandas UDFs: " f"{returnType}"
                     },
                 )
         elif (
-            self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
-            or self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE
+            evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
+            or evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE
         ):
-            if isinstance(self._returnType_placeholder, StructType):
+            if isinstance(returnType, StructType):
                 try:
-                    to_arrow_type(self._returnType_placeholder)
+                    to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
                         error_class="NOT_IMPLEMENTED",
                         message_parameters={
                             "feature": f"Invalid return type with grouped map Pandas UDFs or "
-                            f"at groupby.applyInPandas(WithState): {self._returnType_placeholder}"
+                            f"at groupby.applyInPandas(WithState): {returnType}"
                         },
                     )
             else:
@@ -261,22 +252,21 @@ def returnType(self) -> DataType:
                     message_parameters={
                         "eval_type": "SQL_GROUPED_MAP_PANDAS_UDF or "
                         "SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE",
-                        "return_type": str(self._returnType_placeholder),
+                        "return_type": str(returnType),
                     },
                 )
         elif (
-            self.evalType == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF
-            or self.evalType == PythonEvalType.SQL_MAP_ARROW_ITER_UDF
+            evalType == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF
+            or evalType == PythonEvalType.SQL_MAP_ARROW_ITER_UDF
         ):
-            if isinstance(self._returnType_placeholder, StructType):
+            if isinstance(returnType, StructType):
                 try:
-                    to_arrow_type(self._returnType_placeholder)
+                    to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
                         error_class="NOT_IMPLEMENTED",
                         message_parameters={
-                            "feature": f"Invalid return type in mapInPandas: "
-                            f"{self._returnType_placeholder}"
+                            "feature": f"Invalid return type in mapInPandas: " f"{returnType}"
                         },
                     )
             else:
@@ -284,19 +274,19 @@ def returnType(self) -> DataType:
                     error_class="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
                     message_parameters={
                         "eval_type": "SQL_MAP_PANDAS_ITER_UDF or SQL_MAP_ARROW_ITER_UDF",
-                        "return_type": str(self._returnType_placeholder),
+                        "return_type": str(returnType),
                     },
                 )
-        elif self.evalType == PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF:
-            if isinstance(self._returnType_placeholder, StructType):
+        elif evalType == PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF:
+            if isinstance(returnType, StructType):
                 try:
-                    to_arrow_type(self._returnType_placeholder)
+                    to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
                         error_class="NOT_IMPLEMENTED",
                         message_parameters={
                             "feature": "Invalid return type with grouped map Arrow UDFs or "
-                            f"at groupby.applyInArrow: {self._returnType_placeholder}"
+                            f"at groupby.applyInArrow: {returnType}"
                         },
                     )
             else:
@@ -304,19 +294,19 @@ def returnType(self) -> DataType:
                     error_class="INVALID_RETURN_TYPE_FOR_ARROW_UDF",
                     message_parameters={
                         "eval_type": "SQL_GROUPED_MAP_ARROW_UDF",
-                        "return_type": str(self._returnType_placeholder),
+                        "return_type": str(returnType),
                     },
                 )
-        elif self.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
-            if isinstance(self._returnType_placeholder, StructType):
+        elif evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
+            if isinstance(returnType, StructType):
                 try:
-                    to_arrow_type(self._returnType_placeholder)
+                    to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
                         error_class="NOT_IMPLEMENTED",
                         message_parameters={
                             "feature": f"Invalid return type in cogroup.applyInPandas: "
-                            f"{self._returnType_placeholder}"
+                            f"{returnType}"
                         },
                     )
             else:
@@ -324,19 +314,19 @@ def returnType(self) -> DataType:
                     error_class="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
                     message_parameters={
                         "eval_type": "SQL_COGROUPED_MAP_PANDAS_UDF",
-                        "return_type": str(self._returnType_placeholder),
+                        "return_type": str(returnType),
                     },
                 )
-        elif self.evalType == PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF:
-            if isinstance(self._returnType_placeholder, StructType):
+        elif evalType == PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF:
+            if isinstance(returnType, StructType):
                 try:
-                    to_arrow_type(self._returnType_placeholder)
+                    to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
                         error_class="NOT_IMPLEMENTED",
                         message_parameters={
                             "feature": "Invalid return type in cogroup.applyInArrow: "
-                            f"{self._returnType_placeholder}"
+                            f"{returnType}"
                         },
                     )
             else:
@@ -344,30 +334,42 @@ def returnType(self) -> DataType:
                     error_class="INVALID_RETURN_TYPE_FOR_ARROW_UDF",
                     message_parameters={
                         "eval_type": "SQL_COGROUPED_MAP_ARROW_UDF",
-                        "return_type": str(self._returnType_placeholder),
+                        "return_type": str(returnType),
                     },
                 )
-        elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
+        elif evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF:
             try:
                 # StructType is not yet allowed as a return type, explicitly check here to fail fast
-                if isinstance(self._returnType_placeholder, StructType):
+                if isinstance(returnType, StructType):
                     raise PySparkNotImplementedError(
                         error_class="NOT_IMPLEMENTED",
                         message_parameters={
                             "feature": f"Invalid return type with grouped aggregate Pandas UDFs: "
-                            f"{self._returnType_placeholder}"
+                            f"{returnType}"
                         },
                     )
-                to_arrow_type(self._returnType_placeholder)
+                to_arrow_type(returnType)
             except TypeError:
                 raise PySparkNotImplementedError(
                     error_class="NOT_IMPLEMENTED",
                     message_parameters={
                         "feature": f"Invalid return type with grouped aggregate Pandas UDFs: "
-                        f"{self._returnType_placeholder}"
+                        f"{returnType}"
                     },
                 )
 
+    @property
+    def returnType(self) -> DataType:
+        # Make sure this is called after SparkContext is initialized.
+        # ``_parse_datatype_string`` accesses to JVM for parsing a DDL formatted string.
+        # TODO: PythonEvalType.SQL_BATCHED_UDF
+        if self._returnType_placeholder is None:
+            if isinstance(self._returnType, DataType):
+                self._returnType_placeholder = self._returnType
+            else:
+                self._returnType_placeholder = _parse_datatype_string(self._returnType)
+
+        UserDefinedFunction._check_return_type(self._returnType_placeholder, self.evalType)
         return self._returnType_placeholder
 
     @property
@@ -395,6 +397,8 @@ def _create_judf(self, func: Callable[..., Any]) -> "JavaObject":
         return judf
 
     def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> Column:
+        from pyspark.sql.classic.column import _to_java_expr, _to_seq
+
         sc = get_active_spark_context()
 
         assert sc._jvm is not None
diff --git a/python/pyspark/sql/udtf.py b/python/pyspark/sql/udtf.py
index 801ecc605e500..f560880202230 100644
--- a/python/pyspark/sql/udtf.py
+++ b/python/pyspark/sql/udtf.py
@@ -26,7 +26,6 @@
 
 from pyspark.errors import PySparkAttributeError, PySparkPicklingError, PySparkTypeError
 from pyspark.util import PythonEvalType
-from pyspark.sql.column import _to_java_column, _to_java_expr, _to_seq
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
 from pyspark.sql.types import DataType, StructType, _parse_datatype_string
 from pyspark.sql.udf import _wrap_function
@@ -374,6 +373,8 @@ def _create_judtf(self, func: Type) -> "JavaObject":
         return judtf
 
     def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> "DataFrame":
+        from pyspark.sql.classic.column import _to_java_column, _to_java_expr, _to_seq
+
         from pyspark.sql import DataFrame, SparkSession
 
         spark = SparkSession._getActiveSessionOrCreate()
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index be46203665714..76227851f9fe6 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -17,7 +17,17 @@
 import inspect
 import functools
 import os
-from typing import Any, Callable, Optional, Sequence, TYPE_CHECKING, cast, TypeVar, Union, Type
+from typing import (
+    Any,
+    Callable,
+    Optional,
+    List,
+    Sequence,
+    TYPE_CHECKING,
+    cast,
+    TypeVar,
+    Union,
+)
 
 # For backward compatibility.
 from pyspark.errors import (  # noqa: F401
@@ -32,7 +42,7 @@
     PySparkNotImplementedError,
     PySparkRuntimeError,
 )
-from pyspark.util import is_remote_only
+from pyspark.util import is_remote_only, JVM_INT_MAX
 from pyspark.errors.exceptions.captured import CapturedException  # noqa: F401
 from pyspark.find_spark_home import _find_spark_home
 
@@ -46,8 +56,6 @@
     from pyspark import SparkContext
     from pyspark.sql.session import SparkSession
     from pyspark.sql.dataframe import DataFrame
-    from pyspark.sql.column import Column
-    from pyspark.sql.window import Window
     from pyspark.pandas._typing import IndexOpsLike, SeriesOrIndex
 
 has_numpy: bool = False
@@ -124,6 +132,44 @@ class Java:
         implements = ["org.apache.spark.sql.execution.streaming.sources.PythonForeachBatchFunction"]
 
 
+# Python implementation of 'org.apache.spark.sql.catalyst.util.StringConcat'
+class StringConcat:
+    def __init__(self, maxLength: int = JVM_INT_MAX - 15):
+        self.maxLength: int = maxLength
+        self.strings: List[str] = []
+        self.length: int = 0
+
+    def atLimit(self) -> bool:
+        return self.length >= self.maxLength
+
+    def append(self, s: str) -> None:
+        if s is not None:
+            sLen = len(s)
+            if not self.atLimit():
+                available = self.maxLength - self.length
+                stringToAppend = s if available >= sLen else s[0:available]
+                self.strings.append(stringToAppend)
+
+            self.length = min(self.length + sLen, JVM_INT_MAX - 15)
+
+    def toString(self) -> str:
+        # finalLength = self.maxLength if self.atLimit()  else self.length
+        return "".join(self.strings)
+
+
+# Python implementation of 'org.apache.spark.util.SparkSchemaUtils.escapeMetaCharacters'
+def escape_meta_characters(s: str) -> str:
+    return (
+        s.replace("\n", "\\n")
+        .replace("\r", "\\r")
+        .replace("\t", "\\t")
+        .replace("\f", "\\f")
+        .replace("\b", "\\b")
+        .replace("\u000B", "\\v")
+        .replace("\u0007", "\\a")
+    )
+
+
 def to_str(value: Any) -> Optional[str]:
     """
     A wrapper over str(), but converts bool values to lower case strings.
@@ -242,62 +288,111 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
     return cast(FuncT, wrapped)
 
 
-def try_remote_window(f: FuncT) -> FuncT:
+def get_active_spark_context() -> "SparkContext":
+    """Raise RuntimeError if SparkContext is not initialized,
+    otherwise, returns the active SparkContext."""
+    from pyspark import SparkContext
+
+    sc = SparkContext._active_spark_context
+    if sc is None or sc._jvm is None:
+        raise PySparkRuntimeError(
+            error_class="SESSION_OR_CONTEXT_NOT_EXISTS",
+            message_parameters={},
+        )
+    return sc
+
+
+def try_remote_session_classmethod(f: FuncT) -> FuncT:
     """Mark API supported from Spark Connect."""
 
     @functools.wraps(f)
     def wrapped(*args: Any, **kwargs: Any) -> Any:
         if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
-            from pyspark.sql.connect.window import Window
+            from pyspark.sql.connect.session import SparkSession
 
-            return getattr(Window, f.__name__)(*args, **kwargs)
+            assert inspect.isclass(args[0])
+            return getattr(SparkSession, f.__name__)(*args[1:], **kwargs)
         else:
             return f(*args, **kwargs)
 
     return cast(FuncT, wrapped)
 
 
-def try_remote_windowspec(f: FuncT) -> FuncT:
-    """Mark API supported from Spark Connect."""
+def dispatch_df_method(f: FuncT) -> FuncT:
+    """
+    For the usecases of direct DataFrame.union(df, ...), it checks if self
+    is a Connect DataFrame or Classic DataFrame, and dispatches.
+    """
 
     @functools.wraps(f)
     def wrapped(*args: Any, **kwargs: Any) -> Any:
         if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
-            from pyspark.sql.connect.window import WindowSpec
+            from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
 
-            return getattr(WindowSpec, f.__name__)(*args, **kwargs)
+            if isinstance(args[0], ConnectDataFrame):
+                return getattr(ConnectDataFrame, f.__name__)(*args, **kwargs)
         else:
-            return f(*args, **kwargs)
+            from pyspark.sql.classic.dataframe import DataFrame as ClassicDataFrame
+
+            if isinstance(args[0], ClassicDataFrame):
+                return getattr(ClassicDataFrame, f.__name__)(*args, **kwargs)
+
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": f"DataFrame.{f.__name__}"},
+        )
 
     return cast(FuncT, wrapped)
 
 
-def get_active_spark_context() -> "SparkContext":
-    """Raise RuntimeError if SparkContext is not initialized,
-    otherwise, returns the active SparkContext."""
-    from pyspark import SparkContext
+def dispatch_col_method(f: FuncT) -> FuncT:
+    """
+    For the usecases of direct Column.method(col, ...), it checks if self
+    is a Connect DataFrame or Classic DataFrame, and dispatches.
+    """
 
-    sc = SparkContext._active_spark_context
-    if sc is None or sc._jvm is None:
-        raise PySparkRuntimeError(
-            error_class="SESSION_OR_CONTEXT_NOT_EXISTS",
-            message_parameters={},
+    @functools.wraps(f)
+    def wrapped(*args: Any, **kwargs: Any) -> Any:
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.sql.connect.column import Column as ConnectColumn
+
+            if isinstance(args[0], ConnectColumn):
+                return getattr(ConnectColumn, f.__name__)(*args, **kwargs)
+        else:
+            from pyspark.sql.classic.column import Column as ClassicColumn
+
+            if isinstance(args[0], ClassicColumn):
+                return getattr(ClassicColumn, f.__name__)(*args, **kwargs)
+
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": f"Column.{f.__name__}"},
         )
-    return sc
 
+    return cast(FuncT, wrapped)
 
-def try_remote_session_classmethod(f: FuncT) -> FuncT:
-    """Mark API supported from Spark Connect."""
+
+def dispatch_window_method(f: FuncT) -> FuncT:
+    """
+    For the usecases of direct Window.method(col, ...), it checks if self
+    is a Connect Window or Classic Window, and dispatches.
+    """
 
     @functools.wraps(f)
     def wrapped(*args: Any, **kwargs: Any) -> Any:
         if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
-            from pyspark.sql.connect.session import SparkSession
+            from pyspark.sql.connect.window import Window as ConnectWindow
 
-            assert inspect.isclass(args[0])
-            return getattr(SparkSession, f.__name__)(*args[1:], **kwargs)
+            return getattr(ConnectWindow, f.__name__)(*args, **kwargs)
         else:
-            return f(*args, **kwargs)
+            from pyspark.sql.classic.window import Window as ClassicWindow
+
+            return getattr(ClassicWindow, f.__name__)(*args, **kwargs)
+
+        raise PySparkNotImplementedError(
+            error_class="NOT_IMPLEMENTED",
+            message_parameters={"feature": f"Window.{f.__name__}"},
+        )
 
     return cast(FuncT, wrapped)
 
@@ -309,15 +404,9 @@ def pyspark_column_op(
     Wrapper function for column_op to get proper Column class.
     """
     from pyspark.pandas.base import column_op
-    from pyspark.sql.column import Column as PySparkColumn
+    from pyspark.sql.column import Column
     from pyspark.pandas.data_type_ops.base import _is_extension_dtypes
 
-    if is_remote():
-        from pyspark.sql.connect.column import Column as ConnectColumn
-
-        Column = ConnectColumn
-    else:
-        Column = PySparkColumn  # type: ignore[assignment]
     result = column_op(getattr(Column, func_name))(left, right)
     # It works as expected on extension dtype, so we don't need to call `fillna` for this case.
     if (fillna is not None) and (_is_extension_dtypes(left) or _is_extension_dtypes(right)):
@@ -326,39 +415,6 @@ def pyspark_column_op(
     return result.fillna(fillna) if fillna is not None else result
 
 
-def get_column_class() -> Type["Column"]:
-    from pyspark.sql.column import Column as PySparkColumn
-
-    if is_remote():
-        from pyspark.sql.connect.column import Column as ConnectColumn
-
-        return ConnectColumn  # type: ignore[return-value]
-    else:
-        return PySparkColumn
-
-
-def get_dataframe_class() -> Type["DataFrame"]:
-    from pyspark.sql.dataframe import DataFrame as PySparkDataFrame
-
-    if is_remote():
-        from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
-
-        return ConnectDataFrame  # type: ignore[return-value]
-    else:
-        return PySparkDataFrame
-
-
-def get_window_class() -> Type["Window"]:
-    from pyspark.sql.window import Window as PySparkWindow
-
-    if is_remote():
-        from pyspark.sql.connect.window import Window as ConnectWindow
-
-        return ConnectWindow  # type: ignore[return-value]
-    else:
-        return PySparkWindow
-
-
 def get_lit_sql_str(val: str) -> str:
     # Equivalent to `lit(val)._jc.expr().sql()` for string typed val
     # See `sql` definition in `sql/catalyst/src/main/scala/org/apache/spark/
diff --git a/python/pyspark/sql/variant_utils.py b/python/pyspark/sql/variant_utils.py
index 11dc29503921f..95084fc7d932f 100644
--- a/python/pyspark/sql/variant_utils.py
+++ b/python/pyspark/sql/variant_utils.py
@@ -15,12 +15,15 @@
 # limitations under the License.
 #
 
+import base64
 import decimal
+import datetime
 import json
 import struct
 from array import array
 from typing import Any, Callable, Dict, List, Tuple
 from pyspark.errors import PySparkValueError
+from zoneinfo import ZoneInfo
 
 
 class VariantUtils:
@@ -86,19 +89,48 @@ class VariantUtils:
     DECIMAL8 = 9
     # 16-byte decimal. Content is 1-byte scale + 16-byte little-endian signed integer.
     DECIMAL16 = 10
+    # Date value. Content is 4-byte little-endian signed integer that represents the number of days
+    # from the Unix epoch.
+    DATE = 11
+    # Timestamp value. Content is 8-byte little-endian signed integer that represents the number of
+    # microseconds elapsed since the Unix epoch, 1970-01-01 00:00:00 UTC. This is a timezone-aware
+    # field and when reading into a Python datetime object defaults to the UTC timezone.
+    TIMESTAMP = 12
+    # Timestamp_ntz value. It has the same content as `TIMESTAMP` but should always be interpreted
+    # as if the local time zone is UTC.
+    TIMESTAMP_NTZ = 13
+    # 4-byte IEEE float.
+    FLOAT = 14
+    # Binary value. The content is (4-byte little-endian unsigned integer representing the binary
+    # size) + (size bytes of binary content).
+    BINARY = 15
     # Long string value. The content is (4-byte little-endian unsigned integer representing the
     # string size) + (size bytes of string content).
     LONG_STR = 16
 
     U32_SIZE = 4
 
+    EPOCH = datetime.datetime(
+        year=1970, month=1, day=1, hour=0, minute=0, second=0, tzinfo=datetime.timezone.utc
+    )
+    EPOCH_NTZ = datetime.datetime(year=1970, month=1, day=1, hour=0, minute=0, second=0)
+
+    MAX_DECIMAL4_PRECISION = 9
+    MAX_DECIMAL4_VALUE = 10**MAX_DECIMAL4_PRECISION
+    MAX_DECIMAL8_PRECISION = 18
+    MAX_DECIMAL8_VALUE = 10**MAX_DECIMAL8_PRECISION
+    MAX_DECIMAL16_PRECISION = 38
+    MAX_DECIMAL16_VALUE = 10**MAX_DECIMAL16_PRECISION
+
     @classmethod
-    def to_json(cls, value: bytes, metadata: bytes) -> str:
+    def to_json(cls, value: bytes, metadata: bytes, zone_id: str = "UTC") -> str:
         """
-        Convert the VariantVal to a JSON string.
+        Convert the VariantVal to a JSON string. The `zone_id` parameter denotes the time zone that
+        timestamp fields should be parsed in. It defaults to "UTC". The list of valid zone IDs can
+        found by importing the `zoneinfo` module and running `zoneinfo.available_timezones()`.
         :return: JSON string
         """
-        return cls._to_json(value, metadata, 0)
+        return cls._to_json(value, metadata, 0, zone_id)
 
     @classmethod
     def to_python(cls, value: bytes, metadata: bytes) -> str:
@@ -117,7 +149,7 @@ def _read_long(cls, data: bytes, pos: int, num_bytes: int, signed: bool) -> int:
     @classmethod
     def _check_index(cls, pos: int, length: int) -> None:
         if pos < 0 or pos >= length:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
 
     @classmethod
     def _get_type_info(cls, value: bytes, pos: int) -> Tuple[int, int]:
@@ -137,14 +169,14 @@ def _get_metadata_key(cls, metadata: bytes, id: int) -> str:
         offset_size = ((metadata[0] >> 6) & 0x3) + 1
         dict_size = cls._read_long(metadata, 1, offset_size, signed=False)
         if id >= dict_size:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         string_start = 1 + (dict_size + 2) * offset_size
         offset = cls._read_long(metadata, 1 + (id + 1) * offset_size, offset_size, signed=False)
         next_offset = cls._read_long(
             metadata, 1 + (id + 2) * offset_size, offset_size, signed=False
         )
         if offset > next_offset:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         cls._check_index(string_start + next_offset - 1, len(metadata))
         return metadata[string_start + offset : (string_start + next_offset)].decode("utf-8")
 
@@ -155,7 +187,7 @@ def _get_boolean(cls, value: bytes, pos: int) -> bool:
         if basic_type != VariantUtils.PRIMITIVE or (
             type_info != VariantUtils.TRUE and type_info != VariantUtils.FALSE
         ):
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         return type_info == VariantUtils.TRUE
 
     @classmethod
@@ -163,16 +195,45 @@ def _get_long(cls, value: bytes, pos: int) -> int:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         if type_info == VariantUtils.INT1:
             return cls._read_long(value, pos + 1, 1, signed=True)
         elif type_info == VariantUtils.INT2:
             return cls._read_long(value, pos + 1, 2, signed=True)
-        elif type_info == VariantUtils.INT4:
+        elif type_info == VariantUtils.INT4 or type_info == VariantUtils.DATE:
             return cls._read_long(value, pos + 1, 4, signed=True)
         elif type_info == VariantUtils.INT8:
             return cls._read_long(value, pos + 1, 8, signed=True)
-        raise PySparkValueError(error_class="MALFORMED_VARIANT")
+        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+
+    @classmethod
+    def _get_date(cls, value: bytes, pos: int) -> datetime.date:
+        cls._check_index(pos, len(value))
+        basic_type, type_info = cls._get_type_info(value, pos)
+        if basic_type != VariantUtils.PRIMITIVE:
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        if type_info == VariantUtils.DATE:
+            days_since_epoch = cls._read_long(value, pos + 1, 4, signed=True)
+            return datetime.date.fromordinal(VariantUtils.EPOCH.toordinal() + days_since_epoch)
+        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+
+    @classmethod
+    def _get_timestamp(cls, value: bytes, pos: int, zone_id: str) -> datetime.datetime:
+        cls._check_index(pos, len(value))
+        basic_type, type_info = cls._get_type_info(value, pos)
+        if basic_type != VariantUtils.PRIMITIVE:
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        if type_info == VariantUtils.TIMESTAMP_NTZ:
+            microseconds_since_epoch = cls._read_long(value, pos + 1, 8, signed=True)
+            return VariantUtils.EPOCH_NTZ + datetime.timedelta(
+                microseconds=microseconds_since_epoch
+            )
+        if type_info == VariantUtils.TIMESTAMP:
+            microseconds_since_epoch = cls._read_long(value, pos + 1, 8, signed=True)
+            return (
+                VariantUtils.EPOCH + datetime.timedelta(microseconds=microseconds_since_epoch)
+            ).astimezone(ZoneInfo(zone_id))
+        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
 
     @classmethod
     def _get_string(cls, value: bytes, pos: int) -> str:
@@ -191,35 +252,64 @@ def _get_string(cls, value: bytes, pos: int) -> str:
                 length = cls._read_long(value, pos + 1, VariantUtils.U32_SIZE, signed=False)
             cls._check_index(start + length - 1, len(value))
             return value[start : start + length].decode("utf-8")
-        raise PySparkValueError(error_class="MALFORMED_VARIANT")
+        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
 
     @classmethod
     def _get_double(cls, value: bytes, pos: int) -> float:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
-        if basic_type != VariantUtils.PRIMITIVE or type_info != VariantUtils.DOUBLE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
-        return struct.unpack("d", value[pos + 1 : pos + 9])[0]
+        if basic_type != VariantUtils.PRIMITIVE:
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        if type_info == VariantUtils.FLOAT:
+            cls._check_index(pos + 4, len(value))
+            return struct.unpack("<f", value[pos + 1 : pos + 5])[0]
+        elif type_info == VariantUtils.DOUBLE:
+            cls._check_index(pos + 8, len(value))
+            return struct.unpack("<d", value[pos + 1 : pos + 9])[0]
+        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+
+    @classmethod
+    def _check_decimal(cls, unscaled: int, scale: int, max_unscaled: int, max_scale: int) -> None:
+        # max_unscaled == 10**max_scale, but we pass a literal parameter to avoid redundant
+        # computation.
+        if unscaled >= max_unscaled or unscaled <= -max_unscaled or scale > max_scale:
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
 
     @classmethod
     def _get_decimal(cls, value: bytes, pos: int) -> decimal.Decimal:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         scale = value[pos + 1]
         unscaled = 0
         if type_info == VariantUtils.DECIMAL4:
             unscaled = cls._read_long(value, pos + 2, 4, signed=True)
+            cls._check_decimal(unscaled, scale, cls.MAX_DECIMAL4_VALUE, cls.MAX_DECIMAL4_PRECISION)
         elif type_info == VariantUtils.DECIMAL8:
             unscaled = cls._read_long(value, pos + 2, 8, signed=True)
+            cls._check_decimal(unscaled, scale, cls.MAX_DECIMAL8_VALUE, cls.MAX_DECIMAL8_PRECISION)
         elif type_info == VariantUtils.DECIMAL16:
             cls._check_index(pos + 17, len(value))
             unscaled = int.from_bytes(value[pos + 2 : pos + 18], byteorder="little", signed=True)
+            cls._check_decimal(
+                unscaled, scale, cls.MAX_DECIMAL16_VALUE, cls.MAX_DECIMAL16_PRECISION
+            )
         else:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         return decimal.Decimal(unscaled) * (decimal.Decimal(10) ** (-scale))
 
+    @classmethod
+    def _get_binary(cls, value: bytes, pos: int) -> bytes:
+        cls._check_index(pos, len(value))
+        basic_type, type_info = cls._get_type_info(value, pos)
+        if basic_type != VariantUtils.PRIMITIVE or type_info != VariantUtils.BINARY:
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        start = pos + 1 + VariantUtils.U32_SIZE
+        length = cls._read_long(value, pos + 1, VariantUtils.U32_SIZE, signed=False)
+        cls._check_index(start + length - 1, len(value))
+        return bytes(value[start : start + length])
+
     @classmethod
     def _get_type(cls, value: bytes, pos: int) -> Any:
         """
@@ -244,7 +334,7 @@ def _get_type(cls, value: bytes, pos: int) -> Any:
             or type_info == VariantUtils.INT8
         ):
             return int
-        elif type_info == VariantUtils.DOUBLE:
+        elif type_info == VariantUtils.DOUBLE or type_info == VariantUtils.FLOAT:
             return float
         elif (
             type_info == VariantUtils.DECIMAL4
@@ -252,18 +342,24 @@ def _get_type(cls, value: bytes, pos: int) -> Any:
             or type_info == VariantUtils.DECIMAL16
         ):
             return decimal.Decimal
+        elif type_info == VariantUtils.BINARY:
+            return bytes
+        elif type_info == VariantUtils.DATE:
+            return datetime.date
+        elif type_info == VariantUtils.TIMESTAMP or type_info == VariantUtils.TIMESTAMP_NTZ:
+            return datetime.datetime
         elif type_info == VariantUtils.LONG_STR:
             return str
-        raise PySparkValueError(error_class="MALFORMED_VARIANT")
+        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
 
     @classmethod
-    def _to_json(cls, value: bytes, metadata: bytes, pos: int) -> Any:
+    def _to_json(cls, value: bytes, metadata: bytes, pos: int, zone_id: str) -> str:
         variant_type = cls._get_type(value, pos)
         if variant_type == dict:
 
             def handle_object(key_value_pos_list: List[Tuple[str, int]]) -> str:
                 key_value_list = [
-                    json.dumps(key) + ":" + cls._to_json(value, metadata, value_pos)
+                    json.dumps(key) + ":" + cls._to_json(value, metadata, value_pos, zone_id)
                     for (key, value_pos) in key_value_pos_list
                 ]
                 return "{" + ",".join(key_value_list) + "}"
@@ -273,19 +369,25 @@ def handle_object(key_value_pos_list: List[Tuple[str, int]]) -> str:
 
             def handle_array(value_pos_list: List[int]) -> str:
                 value_list = [
-                    cls._to_json(value, metadata, value_pos) for value_pos in value_pos_list
+                    cls._to_json(value, metadata, value_pos, zone_id)
+                    for value_pos in value_pos_list
                 ]
                 return "[" + ",".join(value_list) + "]"
 
             return cls._handle_array(value, pos, handle_array)
         else:
-            value = cls._get_scalar(variant_type, value, metadata, pos)
+            value = cls._get_scalar(variant_type, value, metadata, pos, zone_id)
             if value is None:
                 return "null"
             if type(value) == bool:
                 return "true" if value else "false"
             if type(value) == str:
                 return json.dumps(value)
+            if type(value) == bytes:
+                # decoding simply converts byte array to string
+                return '"' + base64.b64encode(value).decode("utf-8") + '"'
+            if type(value) == datetime.date or type(value) == datetime.datetime:
+                return '"' + str(value) + '"'
             return str(value)
 
     @classmethod
@@ -311,10 +413,12 @@ def handle_array(value_pos_list: List[int]) -> List[Any]:
 
             return cls._handle_array(value, pos, handle_array)
         else:
-            return cls._get_scalar(variant_type, value, metadata, pos)
+            return cls._get_scalar(variant_type, value, metadata, pos, zone_id="UTC")
 
     @classmethod
-    def _get_scalar(cls, variant_type: Any, value: bytes, metadata: bytes, pos: int) -> Any:
+    def _get_scalar(
+        cls, variant_type: Any, value: bytes, metadata: bytes, pos: int, zone_id: str
+    ) -> Any:
         if isinstance(None, variant_type):
             return None
         elif variant_type == bool:
@@ -327,8 +431,14 @@ def _get_scalar(cls, variant_type: Any, value: bytes, metadata: bytes, pos: int)
             return cls._get_double(value, pos)
         elif variant_type == decimal.Decimal:
             return cls._get_decimal(value, pos)
+        elif variant_type == bytes:
+            return cls._get_binary(value, pos)
+        elif variant_type == datetime.date:
+            return cls._get_date(value, pos)
+        elif variant_type == datetime.datetime:
+            return cls._get_timestamp(value, pos, zone_id)
         else:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
 
     @classmethod
     def _handle_object(
@@ -341,7 +451,7 @@ def _handle_object(
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.OBJECT:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         large_size = ((type_info >> 4) & 0x1) != 0
         size_bytes = VariantUtils.U32_SIZE if large_size else 1
         num_fields = cls._read_long(value, pos + 1, size_bytes, signed=False)
@@ -370,7 +480,7 @@ def _handle_array(cls, value: bytes, pos: int, func: Callable[[List[int]], Any])
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.ARRAY:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT")
+            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
         large_size = ((type_info >> 2) & 0x1) != 0
         size_bytes = VariantUtils.U32_SIZE if large_size else 1
         num_fields = cls._read_long(value, pos + 1, size_bytes, signed=False)
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index 42d50dc1b3bdf..22c9f697acde3 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -14,14 +14,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+
+# mypy: disable-error-code="empty-body"
+
 import sys
-from typing import cast, Iterable, List, Tuple, TYPE_CHECKING, Union
+from typing import List, TYPE_CHECKING, Union
 
-from pyspark.sql.column import _to_seq, _to_java_column
-from pyspark.sql.utils import (
-    try_remote_window,
-    try_remote_windowspec,
-    get_active_spark_context,
+from pyspark.sql.utils import dispatch_window_method
+from pyspark.util import (
+    JVM_LONG_MIN,
+    JVM_LONG_MAX,
 )
 
 if TYPE_CHECKING:
@@ -31,13 +33,6 @@
 __all__ = ["Window", "WindowSpec"]
 
 
-def _to_java_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]) -> "JavaObject":
-    if len(cols) == 1 and isinstance(cols[0], list):
-        cols = cols[0]  # type: ignore[assignment]
-    sc = get_active_spark_context()
-    return _to_seq(sc, cast(Iterable["ColumnOrName"], cols), _to_java_column)
-
-
 class Window:
     """
     Utility functions for defining window in DataFrames.
@@ -62,19 +57,17 @@ class Window:
     >>> window = Window.orderBy("date").partitionBy("country").rangeBetween(-3, 3)
     """
 
-    _JAVA_MIN_LONG = -(1 << 63)  # -9223372036854775808
-    _JAVA_MAX_LONG = (1 << 63) - 1  # 9223372036854775807
-    _PRECEDING_THRESHOLD = max(-sys.maxsize, _JAVA_MIN_LONG)
-    _FOLLOWING_THRESHOLD = min(sys.maxsize, _JAVA_MAX_LONG)
+    _PRECEDING_THRESHOLD = max(-sys.maxsize, JVM_LONG_MIN)
+    _FOLLOWING_THRESHOLD = min(sys.maxsize, JVM_LONG_MAX)
 
-    unboundedPreceding: int = _JAVA_MIN_LONG
+    unboundedPreceding: int = JVM_LONG_MIN
 
-    unboundedFollowing: int = _JAVA_MAX_LONG
+    unboundedFollowing: int = JVM_LONG_MAX
 
     currentRow: int = 0
 
     @staticmethod
-    @try_remote_window
+    @dispatch_window_method
     def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the partitioning defined.
@@ -124,16 +117,10 @@ def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowS
         |  3|       b|         3|
         +---+--------+----------+
         """
-        from py4j.java_gateway import JVMView
-
-        sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.partitionBy(
-            _to_java_cols(cols)
-        )
-        return WindowSpec(jspec)
+        ...
 
     @staticmethod
-    @try_remote_window
+    @dispatch_window_method
     def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the ordering defined.
@@ -183,16 +170,10 @@ def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec"
         |  3|       b|         1|
         +---+--------+----------+
         """
-        from py4j.java_gateway import JVMView
-
-        sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.orderBy(
-            _to_java_cols(cols)
-        )
-        return WindowSpec(jspec)
+        ...
 
     @staticmethod
-    @try_remote_window
+    @dispatch_window_method
     def rowsBetween(start: int, end: int) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the frame boundaries defined,
@@ -266,20 +247,10 @@ def rowsBetween(start: int, end: int) -> "WindowSpec":
         +---+--------+---+
 
         """
-        from py4j.java_gateway import JVMView
-
-        if start <= Window._PRECEDING_THRESHOLD:
-            start = Window.unboundedPreceding
-        if end >= Window._FOLLOWING_THRESHOLD:
-            end = Window.unboundedFollowing
-        sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rowsBetween(
-            start, end
-        )
-        return WindowSpec(jspec)
+        ...
 
     @staticmethod
-    @try_remote_window
+    @dispatch_window_method
     def rangeBetween(start: int, end: int) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the frame boundaries defined,
@@ -356,17 +327,7 @@ def rangeBetween(start: int, end: int) -> "WindowSpec":
         +---+--------+---+
 
         """
-        from py4j.java_gateway import JVMView
-
-        if start <= Window._PRECEDING_THRESHOLD:
-            start = Window.unboundedPreceding
-        if end >= Window._FOLLOWING_THRESHOLD:
-            end = Window.unboundedFollowing
-        sc = get_active_spark_context()
-        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rangeBetween(
-            start, end
-        )
-        return WindowSpec(jspec)
+        ...
 
 
 class WindowSpec:
@@ -382,10 +343,11 @@ class WindowSpec:
         Supports Spark Connect.
     """
 
-    def __init__(self, jspec: "JavaObject") -> None:
-        self._jspec = jspec
+    def __new__(cls, jspec: "JavaObject") -> "WindowSpec":
+        from pyspark.sql.classic.WindowSpec import WindowSpec  # type: ignore[import-not-found]
+
+        return WindowSpec.__new__(WindowSpec, jspec)
 
-    @try_remote_windowspec
     def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Defines the partitioning columns in a :class:`WindowSpec`.
@@ -397,9 +359,8 @@ def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "W
         cols : str, :class:`Column` or list
             names of columns or expressions
         """
-        return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols)))
+        ...
 
-    @try_remote_windowspec
     def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Defines the ordering columns in a :class:`WindowSpec`.
@@ -411,9 +372,8 @@ def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "Windo
         cols : str, :class:`Column` or list
             names of columns or expressions
         """
-        return WindowSpec(self._jspec.orderBy(_to_java_cols(cols)))
+        ...
 
-    @try_remote_windowspec
     def rowsBetween(self, start: int, end: int) -> "WindowSpec":
         """
         Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
@@ -439,13 +399,8 @@ def rowsBetween(self, start: int, end: int) -> "WindowSpec":
             The frame is unbounded if this is ``Window.unboundedFollowing``, or
             any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
-        if start <= Window._PRECEDING_THRESHOLD:
-            start = Window.unboundedPreceding
-        if end >= Window._FOLLOWING_THRESHOLD:
-            end = Window.unboundedFollowing
-        return WindowSpec(self._jspec.rowsBetween(start, end))
+        ...
 
-    @try_remote_windowspec
     def rangeBetween(self, start: int, end: int) -> "WindowSpec":
         """
         Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
@@ -471,29 +426,4 @@ def rangeBetween(self, start: int, end: int) -> "WindowSpec":
             The frame is unbounded if this is ``Window.unboundedFollowing``, or
             any value greater than or equal to min(sys.maxsize, 9223372036854775807).
         """
-        if start <= Window._PRECEDING_THRESHOLD:
-            start = Window.unboundedPreceding
-        if end >= Window._FOLLOWING_THRESHOLD:
-            end = Window.unboundedFollowing
-        return WindowSpec(self._jspec.rangeBetween(start, end))
-
-
-def _test() -> None:
-    import doctest
-    from pyspark.sql import SparkSession
-    import pyspark.sql.window
-
-    globs = pyspark.sql.window.__dict__.copy()
-    spark = SparkSession.builder.master("local[4]").appName("sql.window tests").getOrCreate()
-    globs["spark"] = spark
-
-    (failure_count, test_count) = doctest.testmod(
-        pyspark.sql.window, globs=globs, optionflags=doctest.NORMALIZE_WHITESPACE
-    )
-    spark.stop()
-    if failure_count:
-        sys.exit(-1)
-
-
-if __name__ == "__main__":
-    _test()
+        ...
diff --git a/python/pyspark/sql/worker/analyze_udtf.py b/python/pyspark/sql/worker/analyze_udtf.py
index d0a24363c0c1e..7dafb87c42211 100644
--- a/python/pyspark/sql/worker/analyze_udtf.py
+++ b/python/pyspark/sql/worker/analyze_udtf.py
@@ -264,4 +264,7 @@ def invalid_analyze_result_field(field_name: str, expected_field: str) -> PySpar
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    # TODO: Remove the following two lines and use `Process.pid()` when we drop JDK 8.
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/worker/commit_data_source_write.py b/python/pyspark/sql/worker/commit_data_source_write.py
index 530f18ef8288d..1d9e53083d4d9 100644
--- a/python/pyspark/sql/worker/commit_data_source_write.py
+++ b/python/pyspark/sql/worker/commit_data_source_write.py
@@ -60,14 +60,7 @@ def main(infile: IO, outfile: IO) -> None:
 
         # Receive the data source writer instance.
         writer = pickleSer._read_with_length(infile)
-        if not isinstance(writer, DataSourceWriter):
-            raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
-                    "expected": "an instance of DataSourceWriter",
-                    "actual": f"'{type(writer).__name__}'",
-                },
-            )
+        assert isinstance(writer, DataSourceWriter)
 
         # Receive the commit messages.
         num_messages = read_int(infile)
@@ -76,7 +69,7 @@ def main(infile: IO, outfile: IO) -> None:
             message = pickleSer._read_with_length(infile)
             if message is not None and not isinstance(message, WriterCommitMessage):
                 raise PySparkAssertionError(
-                    error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                    error_class="DATA_SOURCE_TYPE_MISMATCH",
                     message_parameters={
                         "expected": "an instance of WriterCommitMessage",
                         "actual": f"'{type(message).__name__}'",
@@ -90,9 +83,9 @@ def main(infile: IO, outfile: IO) -> None:
         # Commit or abort the Python data source write.
         # Note the commit messages can be None if there are failed tasks.
         if abort:
-            writer.abort(commit_messages)  # type: ignore[arg-type]
+            writer.abort(commit_messages)
         else:
-            writer.commit(commit_messages)  # type: ignore[arg-type]
+            writer.commit(commit_messages)
 
         # Send a status code back to JVM.
         write_int(0, outfile)
@@ -117,4 +110,6 @@ def main(infile: IO, outfile: IO) -> None:
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/worker/create_data_source.py b/python/pyspark/sql/worker/create_data_source.py
index 1f11b65f44c7e..d6b59b04393d8 100644
--- a/python/pyspark/sql/worker/create_data_source.py
+++ b/python/pyspark/sql/worker/create_data_source.py
@@ -75,7 +75,7 @@ def main(infile: IO, outfile: IO) -> None:
         data_source_cls = read_command(pickleSer, infile)
         if not (isinstance(data_source_cls, type) and issubclass(data_source_cls, DataSource)):
             raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": "a subclass of DataSource",
                     "actual": f"'{type(data_source_cls).__name__}'",
@@ -85,7 +85,7 @@ def main(infile: IO, outfile: IO) -> None:
         # Check the name method is a class method.
         if not inspect.ismethod(data_source_cls.name):
             raise PySparkTypeError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": "'name()' method to be a classmethod",
                     "actual": f"'{type(data_source_cls.name).__name__}'",
@@ -98,7 +98,7 @@ def main(infile: IO, outfile: IO) -> None:
         # Check if the provider name matches the data source's name.
         if provider.lower() != data_source_cls.name().lower():
             raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": f"provider with name {data_source_cls.name()}",
                     "actual": f"'{provider}'",
@@ -111,7 +111,7 @@ def main(infile: IO, outfile: IO) -> None:
             user_specified_schema = _parse_datatype_json_string(utf8_deserializer.loads(infile))
             if not isinstance(user_specified_schema, StructType):
                 raise PySparkAssertionError(
-                    error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                    error_class="DATA_SOURCE_TYPE_MISMATCH",
                     message_parameters={
                         "expected": "the user-defined schema to be a 'StructType'",
                         "actual": f"'{type(data_source_cls).__name__}'",
@@ -187,4 +187,6 @@ def main(infile: IO, outfile: IO) -> None:
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/worker/lookup_data_sources.py b/python/pyspark/sql/worker/lookup_data_sources.py
index 7f0127b719463..6da9d5925f636 100644
--- a/python/pyspark/sql/worker/lookup_data_sources.py
+++ b/python/pyspark/sql/worker/lookup_data_sources.py
@@ -95,4 +95,6 @@ def main(infile: IO, outfile: IO) -> None:
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/worker/plan_data_source_read.py b/python/pyspark/sql/worker/plan_data_source_read.py
index 6c0d48caefeb8..51a90bba14547 100644
--- a/python/pyspark/sql/worker/plan_data_source_read.py
+++ b/python/pyspark/sql/worker/plan_data_source_read.py
@@ -18,8 +18,9 @@
 import os
 import sys
 import functools
+import pyarrow as pa
 from itertools import islice
-from typing import IO, List, Iterator, Iterable
+from typing import IO, List, Iterator, Iterable, Tuple, Union
 
 from pyspark.accumulators import _accumulatorRegistry
 from pyspark.errors import PySparkAssertionError, PySparkRuntimeError
@@ -31,7 +32,13 @@
 )
 from pyspark.sql import Row
 from pyspark.sql.connect.conversion import ArrowTableToRowsConversion, LocalDataToArrowConversion
-from pyspark.sql.datasource import DataSource, InputPartition
+from pyspark.sql.datasource import (
+    DataSource,
+    DataSourceReader,
+    DataSourceStreamReader,
+    InputPartition,
+)
+from pyspark.sql.datasource_internal import _streamReader
 from pyspark.sql.pandas.types import to_arrow_schema
 from pyspark.sql.types import (
     _parse_datatype_json_string,
@@ -51,6 +58,78 @@
 )
 
 
+def records_to_arrow_batches(
+    output_iter: Iterator[Tuple],
+    max_arrow_batch_size: int,
+    return_type: StructType,
+    data_source: DataSource,
+) -> Iterable[pa.RecordBatch]:
+    """
+    Convert an iterator of Python tuples to an iterator of pyarrow record batches.
+
+    For each python tuple, check the types of each field and append it to the records batch.
+
+    """
+
+    def batched(iterator: Iterator, n: int) -> Iterator:
+        return iter(functools.partial(lambda it: list(islice(it, n)), iterator), [])
+
+    pa_schema = to_arrow_schema(return_type)
+    column_names = return_type.fieldNames()
+    column_converters = [
+        LocalDataToArrowConversion._create_converter(field.dataType) for field in return_type.fields
+    ]
+    # Convert the results from the `reader.read` method to an iterator of arrow batches.
+    num_cols = len(column_names)
+    col_mapping = {name: i for i, name in enumerate(column_names)}
+    col_name_set = set(column_names)
+    for batch in batched(output_iter, max_arrow_batch_size):
+        pylist: List[List] = [[] for _ in range(num_cols)]
+        for result in batch:
+            # Validate the output row schema.
+            if hasattr(result, "__len__") and len(result) != num_cols:
+                raise PySparkRuntimeError(
+                    error_class="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
+                    message_parameters={
+                        "expected": str(num_cols),
+                        "actual": str(len(result)),
+                    },
+                )
+
+            # Validate the output row type.
+            if not isinstance(result, (list, tuple)):
+                raise PySparkRuntimeError(
+                    error_class="DATA_SOURCE_INVALID_RETURN_TYPE",
+                    message_parameters={
+                        "type": type(result).__name__,
+                        "name": data_source.name(),
+                        "supported_types": "tuple, list, `pyspark.sql.types.Row`",
+                    },
+                )
+
+            # Assign output values by name of the field, not position, if the result is a
+            # named `Row` object.
+            if isinstance(result, Row) and hasattr(result, "__fields__"):
+                # Check if the names are the same as the schema.
+                if set(result.__fields__) != col_name_set:
+                    raise PySparkRuntimeError(
+                        error_class="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
+                        message_parameters={
+                            "expected": str(column_names),
+                            "actual": str(result.__fields__),
+                        },
+                    )
+                # Assign the values by name.
+                for name in column_names:
+                    idx = col_mapping[name]
+                    pylist[idx].append(column_converters[idx](result[name]))
+            else:
+                for col in range(num_cols):
+                    pylist[col].append(column_converters[col](result[col]))
+        batch = pa.RecordBatch.from_arrays(pylist, schema=pa_schema)
+        yield batch
+
+
 def main(infile: IO, outfile: IO) -> None:
     """
     Main method for planning a data source read.
@@ -113,7 +192,7 @@ def main(infile: IO, outfile: IO) -> None:
         schema = _parse_datatype_json_string(schema_json)
         if not isinstance(schema, StructType):
             raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": "an output schema of type 'StructType'",
                     "actual": f"'{type(schema).__name__}'",
@@ -130,26 +209,27 @@ def main(infile: IO, outfile: IO) -> None:
         is_streaming = read_bool(infile)
 
         # Instantiate data source reader.
-        reader = (
-            data_source.streamReader(schema=schema)
-            if is_streaming
-            else data_source.reader(schema=schema)
-        )
-
-        # Wrap the data source read logic in an mapInArrow UDF.
-        import pyarrow as pa
+        if is_streaming:
+            reader: Union[DataSourceReader, DataSourceStreamReader] = _streamReader(
+                data_source, schema
+            )
+        else:
+            reader = data_source.reader(schema=schema)
+            # Validate the reader.
+            if not isinstance(reader, DataSourceReader):
+                raise PySparkAssertionError(
+                    error_class="DATA_SOURCE_TYPE_MISMATCH",
+                    message_parameters={
+                        "expected": "an instance of DataSourceReader",
+                        "actual": f"'{type(reader).__name__}'",
+                    },
+                )
 
         # Create input converter.
         converter = ArrowTableToRowsConversion._create_converter(BinaryType())
 
         # Create output converter.
         return_type = schema
-        pa_schema = to_arrow_schema(return_type)
-        column_names = return_type.fieldNames()
-        column_converters = [
-            LocalDataToArrowConversion._create_converter(field.dataType)
-            for field in return_type.fields
-        ]
 
         def data_source_read_func(iterator: Iterable[pa.RecordBatch]) -> Iterable[pa.RecordBatch]:
             partition_bytes = None
@@ -176,7 +256,7 @@ def data_source_read_func(iterator: Iterable[pa.RecordBatch]) -> Iterable[pa.Rec
                 f"but found '{type(partition).__name__}'."
             )
 
-            output_iter = reader.read(partition)  # type: ignore[attr-defined]
+            output_iter = reader.read(partition)  # type: ignore[arg-type]
 
             # Validate the output iterator.
             if not isinstance(output_iter, Iterator):
@@ -189,58 +269,9 @@ def data_source_read_func(iterator: Iterable[pa.RecordBatch]) -> Iterable[pa.Rec
                     },
                 )
 
-            def batched(iterator: Iterator, n: int) -> Iterator:
-                return iter(functools.partial(lambda it: list(islice(it, n)), iterator), [])
-
-            # Convert the results from the `reader.read` method to an iterator of arrow batches.
-            num_cols = len(column_names)
-            col_mapping = {name: i for i, name in enumerate(column_names)}
-            col_name_set = set(column_names)
-            for batch in batched(output_iter, max_arrow_batch_size):
-                pylist: List[List] = [[] for _ in range(num_cols)]
-                for result in batch:
-                    # Validate the output row schema.
-                    if hasattr(result, "__len__") and len(result) != num_cols:
-                        raise PySparkRuntimeError(
-                            error_class="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
-                            message_parameters={
-                                "expected": str(num_cols),
-                                "actual": str(len(result)),
-                            },
-                        )
-
-                    # Validate the output row type.
-                    if not isinstance(result, (list, tuple)):
-                        raise PySparkRuntimeError(
-                            error_class="DATA_SOURCE_INVALID_RETURN_TYPE",
-                            message_parameters={
-                                "type": type(result).__name__,
-                                "name": data_source.name(),
-                                "supported_types": "tuple, list, `pyspark.sql.types.Row`",
-                            },
-                        )
-
-                    # Assign output values by name of the field, not position, if the result is a
-                    # named `Row` object.
-                    if isinstance(result, Row) and hasattr(result, "__fields__"):
-                        # Check if the names are the same as the schema.
-                        if set(result.__fields__) != col_name_set:
-                            raise PySparkRuntimeError(
-                                error_class="PYTHON_DATA_SOURCE_READ_RETURN_SCHEMA_MISMATCH",
-                                message_parameters={
-                                    "expected": str(column_names),
-                                    "actual": str(result.__fields__),
-                                },
-                            )
-                        # Assign the values by name.
-                        for name in column_names:
-                            idx = col_mapping[name]
-                            pylist[idx].append(column_converters[idx](result[name]))
-                    else:
-                        for col in range(num_cols):
-                            pylist[col].append(column_converters[col](result[col]))
-
-                yield pa.RecordBatch.from_arrays(pylist, schema=pa_schema)
+            return records_to_arrow_batches(
+                output_iter, max_arrow_batch_size, return_type, data_source
+            )
 
         command = (data_source_read_func, return_type)
         pickleSer._write_with_length(command, outfile)
@@ -248,7 +279,7 @@ def batched(iterator: Iterator, n: int) -> Iterator:
         if not is_streaming:
             # The partitioning of python batch source read is determined before query execution.
             try:
-                partitions = reader.partitions()  # type: ignore[attr-defined]
+                partitions = reader.partitions()  # type: ignore[call-arg]
                 if not isinstance(partitions, list):
                     raise PySparkRuntimeError(
                         error_class="DATA_SOURCE_TYPE_MISMATCH",
@@ -267,9 +298,9 @@ def batched(iterator: Iterator, n: int) -> Iterator:
                         },
                     )
                 if len(partitions) == 0:
-                    partitions = [None]
+                    partitions = [None]  # type: ignore[list-item]
             except NotImplementedError:
-                partitions = [None]
+                partitions = [None]  # type: ignore[list-item]
 
             # Return the serialized partition values.
             write_int(len(partitions), outfile)
@@ -299,4 +330,6 @@ def batched(iterator: Iterator, n: int) -> Iterator:
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/worker/python_streaming_sink_runner.py b/python/pyspark/sql/worker/python_streaming_sink_runner.py
index ba0a8037de602..7d03157d705d6 100644
--- a/python/pyspark/sql/worker/python_streaming_sink_runner.py
+++ b/python/pyspark/sql/worker/python_streaming_sink_runner.py
@@ -21,7 +21,6 @@
 
 from pyspark.accumulators import _accumulatorRegistry
 from pyspark.errors import PySparkAssertionError, PySparkRuntimeError
-from pyspark.util import local_connect_and_auth
 from pyspark.serializers import (
     read_bool,
     read_int,
@@ -34,12 +33,13 @@
     _parse_datatype_json_string,
     StructType,
 )
-from pyspark.util import handle_worker_exception
+from pyspark.util import handle_worker_exception, local_connect_and_auth
 from pyspark.worker_util import (
     check_python_version,
     read_command,
     pickleSer,
     send_accumulator_updates,
+    setup_broadcasts,
     setup_memory_limits,
     setup_spark_files,
     utf8_deserializer,
@@ -47,9 +47,18 @@
 
 
 def main(infile: IO, outfile: IO) -> None:
+    """
+    Main method for committing or aborting a data source streaming write operation.
+
+    This process is invoked from the `PythonStreamingSinkCommitRunner.runInPython`
+    method in the StreamingWrite implementation of the PythonDataSourceV2. It is
+    responsible for invoking either the `commit` or the `abort` method on a data source
+    writer instance, given a list of commit messages.
+    """
     try:
         check_python_version(infile)
         setup_spark_files(infile)
+        setup_broadcasts(infile)
 
         memory_limit_mb = int(os.environ.get("PYSPARK_PLANNER_MEMORY_MB", "-1"))
         setup_memory_limits(memory_limit_mb)
@@ -61,7 +70,7 @@ def main(infile: IO, outfile: IO) -> None:
 
         if not isinstance(data_source, DataSource):
             raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": "a Python data source instance of type 'DataSource'",
                     "actual": f"'{type(data_source).__name__}'",
@@ -72,7 +81,7 @@ def main(infile: IO, outfile: IO) -> None:
         schema = _parse_datatype_json_string(schema_json)
         if not isinstance(schema, StructType):
             raise PySparkAssertionError(
-                error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
+                error_class="DATA_SOURCE_TYPE_MISMATCH",
                 message_parameters={
                     "expected": "an output schema of type 'StructType'",
                     "actual": f"'{type(schema).__name__}'",
@@ -82,36 +91,36 @@ def main(infile: IO, outfile: IO) -> None:
         overwrite = read_bool(infile)
         # Instantiate data source reader.
         try:
+            # Create the data source writer instance.
             writer = data_source.streamWriter(schema=schema, overwrite=overwrite)
-            # Initialization succeed.
+
+            # Receive the commit messages.
+            num_messages = read_int(infile)
+            commit_messages = []
+            for _ in range(num_messages):
+                message = pickleSer._read_with_length(infile)
+                if message is not None and not isinstance(message, WriterCommitMessage):
+                    raise PySparkAssertionError(
+                        error_class="DATA_SOURCE_TYPE_MISMATCH",
+                        message_parameters={
+                            "expected": "an instance of WriterCommitMessage",
+                            "actual": f"'{type(message).__name__}'",
+                        },
+                    )
+                commit_messages.append(message)
+
+            batch_id = read_long(infile)
+            abort = read_bool(infile)
+
+            # Commit or abort the Python data source write.
+            # Note the commit messages can be None if there are failed tasks.
+            if abort:
+                writer.abort(commit_messages, batch_id)
+            else:
+                writer.commit(commit_messages, batch_id)
+            # Send a status code back to JVM.
             write_int(0, outfile)
             outfile.flush()
-
-            # handle method call from socket
-            while True:
-                num_messages = read_int(infile)
-                commit_messages = []
-                for _ in range(num_messages):
-                    message = pickleSer._read_with_length(infile)
-                    if message is not None and not isinstance(message, WriterCommitMessage):
-                        raise PySparkAssertionError(
-                            error_class="PYTHON_DATA_SOURCE_TYPE_MISMATCH",
-                            message_parameters={
-                                "expected": "an instance of WriterCommitMessage",
-                                "actual": f"'{type(message).__name__}'",
-                            },
-                        )
-                    commit_messages.append(message)
-                batch_id = read_long(infile)
-                abort = read_bool(infile)
-                # Commit or abort the Python data source write.
-                # Note the commit messages can be None if there are failed tasks.
-                if abort:
-                    writer.abort(commit_messages, batch_id)  # type: ignore[arg-type]
-                else:
-                    writer.commit(commit_messages, batch_id)  # type: ignore[arg-type]
-                write_int(0, outfile)
-                outfile.flush()
         except Exception as e:
             error_msg = "data source {} throw exception: {}".format(data_source.name, e)
             raise PySparkRuntimeError(
@@ -137,4 +146,6 @@ def main(infile: IO, outfile: IO) -> None:
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/sql/worker/write_into_data_source.py b/python/pyspark/sql/worker/write_into_data_source.py
index ad8717cb33b5c..212a2754ec9f0 100644
--- a/python/pyspark/sql/worker/write_into_data_source.py
+++ b/python/pyspark/sql/worker/write_into_data_source.py
@@ -29,7 +29,12 @@
     SpecialLengths,
 )
 from pyspark.sql import Row
-from pyspark.sql.datasource import DataSource, WriterCommitMessage, CaseInsensitiveDict
+from pyspark.sql.datasource import (
+    DataSource,
+    DataSourceWriter,
+    WriterCommitMessage,
+    CaseInsensitiveDict,
+)
 from pyspark.sql.types import (
     _parse_datatype_json_string,
     StructType,
@@ -162,6 +167,14 @@ def main(infile: IO, outfile: IO) -> None:
         else:
             # Instantiate the data source writer.
             writer = data_source.writer(schema, overwrite)  # type: ignore[assignment]
+            if not isinstance(writer, DataSourceWriter):
+                raise PySparkAssertionError(
+                    error_class="DATA_SOURCE_TYPE_MISMATCH",
+                    message_parameters={
+                        "expected": "an instance of DataSourceWriter",
+                        "actual": f"'{type(writer).__name__}'",
+                    },
+                )
 
         # Create a function that can be used in mapInArrow.
         import pyarrow as pa
@@ -229,4 +242,6 @@ def batch_to_rows() -> Iterator[Row]:
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py
index 191505741eb40..2f18cd8a6ccdc 100644
--- a/python/pyspark/testing/connectutils.py
+++ b/python/pyspark/testing/connectutils.py
@@ -45,6 +45,13 @@
     googleapis_common_protos_requirement_message = str(e)
 have_googleapis_common_protos = googleapis_common_protos_requirement_message is None
 
+graphviz_requirement_message = None
+try:
+    import graphviz
+except ImportError as e:
+    graphviz_requirement_message = str(e)
+have_graphviz: bool = graphviz_requirement_message is None
+
 from pyspark import Row, SparkConf
 from pyspark.util import is_remote_only
 from pyspark.testing.utils import PySparkErrorTestUtils
@@ -170,6 +177,8 @@ def conf(cls):
         conf = SparkConf(loadDefaults=False)
         # Make the server terminate reattachable streams every 1 second and 123 bytes,
         # to make the tests exercise reattach.
+        if conf._jconf is not None:
+            conf._jconf.remove("spark.master")
         conf.set("spark.connect.execute.reattachable.senderMaxStreamDuration", "1s")
         conf.set("spark.connect.execute.reattachable.senderMaxStreamSize", "123")
         return conf
diff --git a/python/pyspark/testing/mlutils.py b/python/pyspark/testing/mlutils.py
index 8981e97ea49ba..aa3e23bccb198 100644
--- a/python/pyspark/testing/mlutils.py
+++ b/python/pyspark/testing/mlutils.py
@@ -99,6 +99,11 @@ def tearDownClass(cls):
 
 
 class MockDataset(DataFrame):
+    def __new__(cls) -> "DataFrame":
+        self = object.__new__(cls)
+        self.__init__()
+        return self
+
     def __init__(self):
         self.index = 0
 
diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py
index 690d5c37b22e4..9f07c44c084cf 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -247,6 +247,29 @@ def function(self, *functions):
             for f in functions:
                 self.spark.sql("DROP FUNCTION IF EXISTS %s" % f)
 
+    @contextmanager
+    def temp_env(self, pairs):
+        assert isinstance(pairs, dict), "pairs should be a dictionary."
+
+        keys = pairs.keys()
+        new_values = pairs.values()
+        old_values = [os.environ.get(key, None) for key in keys]
+        for key, new_value in zip(keys, new_values):
+            if new_value is None:
+                if key in os.environ:
+                    del os.environ[key]
+            else:
+                os.environ[key] = new_value
+        try:
+            yield
+        finally:
+            for key, old_value in zip(keys, old_values):
+                if old_value is None:
+                    if key in os.environ:
+                        del os.environ[key]
+                else:
+                    os.environ[key] = old_value
+
     @staticmethod
     def assert_close(a, b):
         c = [j[0] for j in b]
@@ -258,6 +281,7 @@ class ReusedSQLTestCase(ReusedPySparkTestCase, SQLTestUtils, PySparkErrorTestUti
     @classmethod
     def setUpClass(cls):
         super(ReusedSQLTestCase, cls).setUpClass()
+        cls._legacy_sc = cls.sc
         cls.spark = SparkSession(cls.sc)
         cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
         os.unlink(cls.tempdir.name)
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index fe25136864eef..c74291524daed 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -38,7 +38,7 @@
 have_scipy = False
 have_numpy = False
 try:
-    import scipy.sparse  # noqa: F401
+    import scipy  # noqa: F401
 
     have_scipy = True
 except ImportError:
@@ -287,7 +287,7 @@ def check_error(
         error_class: str,
         message_parameters: Optional[Dict[str, str]] = None,
         query_context_type: Optional[QueryContextType] = None,
-        pyspark_fragment: Optional[str] = None,
+        fragment: Optional[str] = None,
     ):
         query_context = exception.getQueryContext()
         assert bool(query_context) == (query_context_type is not None), (
@@ -326,10 +326,10 @@ def check_error(
                 )
                 if actual == QueryContextType.DataFrame:
                     assert (
-                        pyspark_fragment is not None
-                    ), "`pyspark_fragment` is required when QueryContextType is DataFrame."
-                    expected = pyspark_fragment
-                    actual = actual_context.pysparkFragment()
+                        fragment is not None
+                    ), "`fragment` is required when QueryContextType is DataFrame."
+                    expected = fragment
+                    actual = actual_context.fragment()
                     self.assertEqual(
                         expected,
                         actual,
@@ -829,12 +829,7 @@ def assertDataFrameEqual(
                 actual, expected, almost=True, rtol=rtol, atol=atol, check_row_order=checkRowOrder
             )
 
-    from pyspark.sql.utils import get_dataframe_class
-
-    # if is_remote(), allow Connect DataFrame
-    SparkDataFrame = get_dataframe_class()
-
-    if not isinstance(actual, (DataFrame, SparkDataFrame, list)):
+    if not isinstance(actual, (DataFrame, list)):
         raise PySparkAssertionError(
             error_class="INVALID_TYPE_DF_EQUALITY_ARG",
             message_parameters={
@@ -843,7 +838,7 @@ def assertDataFrameEqual(
                 "actual_type": type(actual),
             },
         )
-    elif not isinstance(expected, (DataFrame, SparkDataFrame, list)):
+    elif not isinstance(expected, (DataFrame, list)):
         raise PySparkAssertionError(
             error_class="INVALID_TYPE_DF_EQUALITY_ARG",
             message_parameters={
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index bf1cf5b595533..49766913e6ee2 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -71,6 +71,16 @@
     from pyspark.sql import SparkSession
 
 
+JVM_BYTE_MIN: int = -(1 << 7)
+JVM_BYTE_MAX: int = (1 << 7) - 1
+JVM_SHORT_MIN: int = -(1 << 15)
+JVM_SHORT_MAX: int = (1 << 15) - 1
+JVM_INT_MIN: int = -(1 << 31)
+JVM_INT_MAX: int = (1 << 31) - 1
+JVM_LONG_MIN: int = -(1 << 63)
+JVM_LONG_MAX: int = (1 << 63) - 1
+
+
 def print_exec(stream: TextIO) -> None:
     ei = sys.exc_info()
     traceback.print_exception(ei[0], ei[1], ei[2], None, stream)
@@ -107,6 +117,22 @@ def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]:
             )
 
 
+class LogUtils:
+    """
+    Utils for querying structured Spark logs with Spark SQL.
+    """
+
+    LOG_SCHEMA = (
+        "ts TIMESTAMP, "
+        "level STRING, "
+        "msg STRING, "
+        "context map<STRING, STRING>, "
+        "exception STRUCT<class STRING, msg STRING, "
+        "stacktrace ARRAY<STRUCT<class STRING, method STRING, file STRING,line STRING>>>,"
+        "logger STRING"
+    )
+
+
 def fail_on_stopiteration(f: Callable) -> Callable:
     """
     Wraps the input function to fail on 'StopIteration' by raising a 'RuntimeError'
@@ -747,6 +773,9 @@ def is_remote_only() -> bool:
     """
     global _is_remote_only
 
+    if "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ:
+        return True
+
     if _is_remote_only is not None:
         return _is_remote_only
     try:
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 41f6c35bc4452..e9c259e68a27a 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -1868,4 +1868,7 @@ def process():
     java_port = int(os.environ["PYTHON_WORKER_FACTORY_PORT"])
     auth_secret = os.environ["PYTHON_WORKER_FACTORY_SECRET"]
     (sock_file, _) = local_connect_and_auth(java_port, auth_secret)
+    # TODO: Remove the following two lines and use `Process.pid()` when we drop JDK 8.
+    write_int(os.getpid(), sock_file)
+    sock_file.flush()
     main(sock_file, sock_file)
diff --git a/python/run-tests b/python/run-tests
index 401fcae3e350c..b5492a080d6a5 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -21,9 +21,9 @@
 FWDIR="$(cd "`dirname $0`"/..; pwd)"
 cd "$FWDIR"
 
-PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 8, 0))')
+PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 9, 0))')
 if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
-  echo "Python versions prior to 3.8 are not supported."
+  echo "Python versions prior to 3.9 are not supported."
   exit -1
 fi
 
diff --git a/python/run-tests.py b/python/run-tests.py
index ebdd4a9a21798..64ac48e210db4 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -62,13 +62,15 @@ def get_valid_filename(s):
 
 # Find out where the assembly jars are located.
 # TODO: revisit for Scala 2.13
-for scala in ["2.13"]:
-    build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
-    if os.path.isdir(build_dir):
-        SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
-        break
-else:
-    raise RuntimeError("Cannot find assembly build directory, please build Spark first.")
+SPARK_DIST_CLASSPATH = ""
+if "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ:
+    for scala in ["2.13"]:
+        build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
+        if os.path.isdir(build_dir):
+            SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
+            break
+    else:
+        raise RuntimeError("Cannot find assembly build directory, please build Spark first.")
 
 
 def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_output):
@@ -100,6 +102,8 @@ def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_
 
     if "SPARK_CONNECT_TESTING_REMOTE" in os.environ:
         env.update({"SPARK_CONNECT_TESTING_REMOTE": os.environ["SPARK_CONNECT_TESTING_REMOTE"]})
+    if "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ:
+        env.update({"SPARK_SKIP_JVM_REQUIRED_TESTS": os.environ["SPARK_SKIP_CONNECT_COMPAT_TESTS"]})
 
     # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is
     # recognized by the tempfile module to override the default system temp directory.
diff --git a/python/test_support/sql/streaming/time/text-with-time-test.txt b/python/test_support/sql/streaming/time/text-with-time-test.txt
new file mode 100644
index 0000000000000..cf9edcafe5a49
--- /dev/null
+++ b/python/test_support/sql/streaming/time/text-with-time-test.txt
@@ -0,0 +1,10 @@
+2024-05-24 15:03:20;1
+2024-05-24 15:03:21;2
+2024-05-24 15:03:24;3
+2024-05-24 15:03:25;3
+2024-05-24 15:03:31;4
+2024-05-24 15:03:31;1
+2024-05-24 15:03:32;3
+2024-05-24 15:03:45;2
+2024-05-24 15:03:46;5
+2024-05-24 15:03:50;6
\ No newline at end of file
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index 385734c557a38..ead3188aa6494 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -16,7 +16,16 @@
  */
 package org.apache.spark.deploy.k8s
 
-private[spark] object Constants {
+import org.apache.spark.annotation.{DeveloperApi, Stable}
+
+/**
+ * :: DeveloperApi ::
+ *
+ * This is used in both K8s module and Spark K8s Operator.
+ */
+@Stable
+@DeveloperApi
+object Constants {
 
   // Labels
   val SPARK_VERSION_LABEL = "spark-version"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index 9fdd9518d2d81..deb178eb90e17 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -22,14 +22,15 @@ import io.fabric8.kubernetes.api.model.{LocalObjectReference, LocalObjectReferen
 import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.{SPARK_VERSION, SparkConf}
+import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
+import org.apache.spark.deploy.k8s.features.DriverServiceFeatureStep._
 import org.apache.spark.deploy.k8s.submit._
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CONFIG, EXECUTOR_ENV_REGEX}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{Clock, SystemClock, Utils}
 
 /**
  * Structure containing metadata for Kubernetes logic to build Spark pods.
@@ -77,18 +78,43 @@ private[spark] abstract class KubernetesConf(val sparkConf: SparkConf) {
   def getOption(key: String): Option[String] = sparkConf.getOption(key)
 }
 
-private[spark] class KubernetesDriverConf(
+/**
+ * :: DeveloperApi ::
+ *
+ * Used for K8s operations internally and Spark K8s operator.
+ */
+@Unstable
+@DeveloperApi
+@Since("4.0.0")
+class KubernetesDriverConf(
     sparkConf: SparkConf,
     val appId: String,
     val mainAppResource: MainAppResource,
     val mainClass: String,
     val appArgs: Array[String],
-    val proxyUser: Option[String])
-  extends KubernetesConf(sparkConf) {
+    val proxyUser: Option[String],
+    clock: Clock = new SystemClock())
+  extends KubernetesConf(sparkConf) with Logging {
 
   def driverNodeSelector: Map[String, String] =
     KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_DRIVER_NODE_SELECTOR_PREFIX)
 
+  lazy val driverServiceName: String = {
+    val preferredServiceName = s"$resourceNamePrefix$DRIVER_SVC_POSTFIX"
+    if (preferredServiceName.length <= MAX_SERVICE_NAME_LENGTH) {
+      preferredServiceName
+    } else {
+      val randomServiceId = KubernetesUtils.uniqueID(clock)
+      val shorterServiceName = s"spark-$randomServiceId$DRIVER_SVC_POSTFIX"
+      logWarning(log"Driver's hostname would preferably be " +
+        log"${MDC(LogKeys.PREFERRED_SERVICE_NAME, preferredServiceName)}, but this is too long " +
+        log"(must be <= ${MDC(LogKeys.MAX_SERVICE_NAME_LENGTH, MAX_SERVICE_NAME_LENGTH)} " +
+        log"characters). Falling back to use " +
+        log"${MDC(LogKeys.SHORTER_SERVICE_NAME, shorterServiceName)} as the driver service's name.")
+      shorterServiceName
+    }
+  }
+
   override val resourceNamePrefix: String = {
     val custom = if (Utils.isTesting) get(KUBERNETES_DRIVER_POD_NAME_PREFIX) else None
     custom.getOrElse(KubernetesConf.getResourceNamePrefix(appName))
@@ -100,8 +126,9 @@ private[spark] class KubernetesDriverConf(
       SPARK_APP_ID_LABEL -> appId,
       SPARK_APP_NAME_LABEL -> KubernetesConf.getAppNameLabel(appName),
       SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE)
-    val driverCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_DRIVER_LABEL_PREFIX)
+    val driverCustomLabels =
+      KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_DRIVER_LABEL_PREFIX)
+        .map { case(k, v) => (k, Utils.substituteAppNExecIds(v, appId, "")) }
 
     presetLabels.keys.foreach { key =>
       require(
@@ -173,8 +200,9 @@ private[spark] class KubernetesExecutorConf(
       SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE,
       SPARK_RESOURCE_PROFILE_ID_LABEL -> resourceProfileId.toString)
 
-    val executorCustomLabels = KubernetesUtils.parsePrefixedKeyValuePairs(
-      sparkConf, KUBERNETES_EXECUTOR_LABEL_PREFIX)
+    val executorCustomLabels =
+      KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_EXECUTOR_LABEL_PREFIX)
+        .map { case(k, v) => (k, Utils.substituteAppNExecIds(v, appId, executorId)) }
 
     presetLabels.keys.foreach { key =>
       require(
@@ -215,10 +243,10 @@ private[spark] class KubernetesExecutorConf(
     if (executorEnvRegex.pattern.matcher(key).matches()) {
       true
     } else {
-      logWarning(log"Invalid key: ${MDC(CONFIG, key)}, " +
+      logWarning(log"Invalid key: ${MDC(LogKeys.CONFIG, key)}, " +
         log"a valid environment variable name must consist of alphabetic characters, " +
         log"digits, '_', '-', or '.', and must not start with a digit. " +
-        log"Regex used for validation is '${MDC(EXECUTOR_ENV_REGEX, executorEnvRegex)}'")
+        log"Regex used for validation is '${MDC(LogKeys.EXECUTOR_ENV_REGEX, executorEnvRegex)}'")
       false
     }
   }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesDriverSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesDriverSpec.scala
index a603cb08ba9a1..0fd2cf16e74ed 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesDriverSpec.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesDriverSpec.scala
@@ -18,7 +18,18 @@ package org.apache.spark.deploy.k8s
 
 import io.fabric8.kubernetes.api.model.HasMetadata
 
-private[spark] case class KubernetesDriverSpec(
+import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
+
+/**
+ * :: DeveloperApi ::
+ *
+ * Spec for driver pod and resources, used for K8s operations internally
+ * and Spark K8s operator.
+ */
+@Unstable
+@DeveloperApi
+@Since("3.3.0")
+case class KubernetesDriverSpec(
     pod: SparkPod,
     driverPreKubernetesResources: Seq[HasMetadata],
     driverKubernetesResources: Seq[HasMetadata],
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index 50ecefdb6a5dc..c2fcfe179bbeb 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -33,7 +33,7 @@ import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.k8s.Config.KUBERNETES_FILE_UPLOAD_PATH
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.POD_ID
+import org.apache.spark.internal.LogKeys.POD_ID
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.resource.ResourceUtils
 import org.apache.spark.util.{Clock, SystemClock, Utils}
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
index baa519658c2e1..ee2108e8234d3 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
@@ -19,9 +19,17 @@ package org.apache.spark.deploy.k8s
 import java.lang.Long.parseLong
 
 import org.apache.spark.SparkConf
+import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
 import org.apache.spark.deploy.k8s.Config._
 
-private[spark] object KubernetesVolumeUtils {
+/**
+ * :: DeveloperApi ::
+ *
+ * A utility class used for K8s operations internally and Spark K8s operator.
+ */
+@Unstable
+@DeveloperApi
+object KubernetesVolumeUtils {
   /**
    * Extract Spark volume configuration properties with a given name prefix.
    *
@@ -29,6 +37,7 @@ private[spark] object KubernetesVolumeUtils {
    * @param prefix the given property name prefix
    * @return a Map storing with volume name as key and spec as value
    */
+  @Since("3.0.0")
   def parseVolumesWithPrefix(sparkConf: SparkConf, prefix: String): Seq[KubernetesVolumeSpec] = {
     val properties = sparkConf.getAllWithPrefix(prefix).toMap
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
index 3763aeadea0ef..79f76e96474e3 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -32,7 +32,8 @@ import okhttp3.OkHttpClient
 import org.apache.spark.SparkConf
 import org.apache.spark.annotation.{DeveloperApi, Since, Stable}
 import org.apache.spark.deploy.k8s.Config._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.K8S_CONTEXT
 import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.util.ThreadUtils
 
@@ -84,9 +85,9 @@ object SparkKubernetesClientFactory extends Logging {
 
     // Allow for specifying a context used to auto-configure from the users K8S config file
     val kubeContext = sparkConf.get(KUBERNETES_CONTEXT).filter(_.nonEmpty)
-    logInfo("Auto-configuring K8S client using " +
-      kubeContext.map("context " + _).getOrElse("current context") +
-      " from users K8S config file")
+    logInfo(log"Auto-configuring K8S client using " +
+      log"${MDC(K8S_CONTEXT, kubeContext.map("context " + _).getOrElse("current context"))}" +
+      log" from users K8S config file")
 
     // if backoff limit is not set then set it to 3
     if (getSystemPropertyOrEnvVar(KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY) == null) {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index de15bf9b24d90..a8706370eead6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -25,7 +25,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit._
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CONFIG, CONFIG2, CONFIG3, CONFIG4, CONFIG5}
+import org.apache.spark.internal.LogKeys.{CONFIG, CONFIG2, CONFIG3, CONFIG4, CONFIG5}
 import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
 import org.apache.spark.launcher.SparkLauncher
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
index cba4f442371c9..71f8340f5b5b6 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
@@ -20,15 +20,13 @@ import scala.jdk.CollectionConverters._
 
 import io.fabric8.kubernetes.api.model.{HasMetadata, ServiceBuilder}
 
-import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesUtils, SparkPod}
+import org.apache.spark.deploy.k8s.{KubernetesDriverConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config.{KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH, KUBERNETES_DRIVER_SERVICE_IP_FAMILIES, KUBERNETES_DRIVER_SERVICE_IP_FAMILY_POLICY}
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.{config, Logging}
-import org.apache.spark.util.{Clock, SystemClock}
 
 private[spark] class DriverServiceFeatureStep(
-    kubernetesConf: KubernetesDriverConf,
-    clock: Clock = new SystemClock())
+    kubernetesConf: KubernetesDriverConf)
   extends KubernetesFeatureConfigStep with Logging {
   import DriverServiceFeatureStep._
 
@@ -39,17 +37,7 @@ private[spark] class DriverServiceFeatureStep(
     s"$DRIVER_HOST_KEY is not supported in Kubernetes mode, as the driver's hostname will be " +
       "managed via a Kubernetes service.")
 
-  private val preferredServiceName = s"${kubernetesConf.resourceNamePrefix}$DRIVER_SVC_POSTFIX"
-  private val resolvedServiceName = if (preferredServiceName.length <= MAX_SERVICE_NAME_LENGTH) {
-    preferredServiceName
-  } else {
-    val randomServiceId = KubernetesUtils.uniqueID(clock = clock)
-    val shorterServiceName = s"spark-$randomServiceId$DRIVER_SVC_POSTFIX"
-    logWarning(s"Driver's hostname would preferably be $preferredServiceName, but this is " +
-      s"too long (must be <= $MAX_SERVICE_NAME_LENGTH characters). Falling back to use " +
-      s"$shorterServiceName as the driver service's name.")
-    shorterServiceName
-  }
+  private val resolvedServiceName = kubernetesConf.driverServiceName
   private val ipFamilyPolicy =
     kubernetesConf.sparkConf.get(KUBERNETES_DRIVER_SERVICE_IP_FAMILY_POLICY)
   private val ipFamilies =
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 662f5ddbd4a7e..042f984933a7e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -31,7 +31,8 @@ import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{APP_ID, APP_NAME, SUBMISSION_ID}
 import org.apache.spark.util.Utils
 
 /**
@@ -203,8 +204,9 @@ private[spark] class Client(
         }
       }
     } else {
-      logInfo(s"Deployed Spark application ${conf.appName} with application ID ${conf.appId} " +
-        s"and submission ID $sId into Kubernetes")
+      logInfo(log"Deployed Spark application ${MDC(APP_NAME, conf.appName)} with " +
+        log"application ID ${MDC(APP_ID, conf.appId)} and " +
+        log"submission ID ${MDC(SUBMISSION_ID, sId)} into Kubernetes")
     }
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
index beb7ff6bfe22c..d6b1da39bcbb5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala
@@ -28,23 +28,34 @@ import scala.jdk.CollectionConverters._
 import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, KeyToPath}
 
 import org.apache.spark.SparkConf
+import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
 import org.apache.spark.deploy.k8s.{Config, Constants, KubernetesUtils}
 import org.apache.spark.deploy.k8s.Config.{KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH, KUBERNETES_NAMESPACE}
 import org.apache.spark.deploy.k8s.Constants.ENV_SPARK_CONF_DIR
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CONFIG, PATH, PATHS}
+import org.apache.spark.internal.LogKeys.{CONFIG, PATH, PATHS}
 import org.apache.spark.util.ArrayImplicits._
 
-private[spark] object KubernetesClientUtils extends Logging {
+/**
+ * :: DeveloperApi ::
+ *
+ * A utility class used for K8s operations internally and Spark K8s operator.
+ */
+@Unstable
+@DeveloperApi
+object KubernetesClientUtils extends Logging {
 
   // Config map name can be KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH chars at max.
+  @Since("3.3.0")
   def configMapName(prefix: String): String = {
     val suffix = "-conf-map"
     s"${prefix.take(KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH - suffix.length)}$suffix"
   }
 
+  @Since("3.1.0")
   val configMapNameExecutor: String = configMapName(s"spark-exec-${KubernetesUtils.uniqueID()}")
 
+  @Since("3.1.0")
   val configMapNameDriver: String = configMapName(s"spark-drv-${KubernetesUtils.uniqueID()}")
 
   private def buildStringFromPropertiesMap(configMapName: String,
@@ -62,6 +73,7 @@ private[spark] object KubernetesClientUtils extends Logging {
   /**
    * Build, file -> 'file's content' map of all the selected files in SPARK_CONF_DIR.
    */
+  @Since("3.1.1")
   def buildSparkConfDirFilesMap(
       configMapName: String,
       sparkConf: SparkConf,
@@ -77,6 +89,7 @@ private[spark] object KubernetesClientUtils extends Logging {
     }
   }
 
+  @Since("3.1.0")
   def buildKeyToPathObjects(confFilesMap: Map[String, String]): Seq[KeyToPath] = {
     confFilesMap.map {
       case (fileName: String, _: String) =>
@@ -89,6 +102,7 @@ private[spark] object KubernetesClientUtils extends Logging {
    * Build a Config Map that will hold the content for environment variable SPARK_CONF_DIR
    * on remote pods.
    */
+  @Since("3.1.0")
   def buildConfigMap(configMapName: String, confFileMap: Map[String, String],
       withLabels: Map[String, String] = Map()): ConfigMap = {
     val configMapNameSpace =
@@ -141,8 +155,8 @@ private[spark] object KubernetesClientUtils extends Logging {
         }
       }
       if (truncatedMap.nonEmpty) {
-        logInfo(s"Spark configuration files loaded from $confDir :" +
-          s" ${truncatedMap.keys.mkString(",")}")
+        logInfo(log"Spark configuration files loaded from ${MDC(PATH, confDir)} : " +
+          log"${MDC(PATHS, truncatedMap.keys.mkString(","))}")
       }
       if (skippedFiles.nonEmpty) {
         logWarning(log"Skipped conf file(s) ${MDC(PATHS, skippedFiles.mkString(","))}, due to " +
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
index 3b69754b9cdf1..12626a8676efe 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -19,12 +19,22 @@ package org.apache.spark.deploy.k8s.submit
 import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.SparkException
+import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.features._
 import org.apache.spark.util.Utils
 
-private[spark] class KubernetesDriverBuilder {
+/**
+ * ::DeveloperApi::
+ *
+ * KubernetesDriverBuilder builds k8s spec for driver, used for K8s operations internally
+ * and Spark K8s operator.
+ */
+@Unstable
+@DeveloperApi
+class KubernetesDriverBuilder {
 
+  @Since("3.0.0")
   def buildFromFeatures(
       conf: KubernetesDriverConf,
       client: KubernetesClient): KubernetesDriverSpec = {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
index 3227a72a8371b..465c5e605b8cc 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
@@ -23,7 +23,8 @@ import io.fabric8.kubernetes.client.Watcher.Action
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.KubernetesDriverConf
 import org.apache.spark.deploy.k8s.KubernetesUtils._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{APP_ID, APP_NAME, POD_PHASE, POD_STATE, STATUS, SUBMISSION_ID}
 
 private[k8s] trait LoggingPodStatusWatcher extends Watcher[Pod] {
   def watchOrStop(submissionId: String): Boolean
@@ -83,7 +84,8 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf)
   }
 
   private def logLongStatus(): Unit = {
-    logInfo("State changed, new state: " + pod.map(formatPodState).getOrElse("unknown"))
+    logInfo(log"State changed, new state: " +
+      log"${MDC(POD_STATE, pod.map(formatPodState).getOrElse("unknown"))}")
   }
 
   private def hasCompleted(): Boolean = {
@@ -96,22 +98,22 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf)
   }
 
   override def watchOrStop(sId: String): Boolean = {
-    logInfo(s"Waiting for application ${conf.appName} with application ID $appId " +
-      s"and submission ID $sId to finish...")
+    logInfo(log"Waiting for application ${MDC(APP_NAME, conf.appName)}} with application ID " +
+      log"${MDC(APP_ID, appId)} and submission ID ${MDC(SUBMISSION_ID, sId)} to finish...")
     val interval = conf.get(REPORT_INTERVAL)
     synchronized {
       while (!podCompleted && !resourceTooOldReceived) {
         wait(interval)
-        logInfo(s"Application status for $appId (phase: $phase)")
+        logInfo(log"Application status for ${MDC(APP_ID, appId)} (phase: ${MDC(POD_PHASE, phase)})")
       }
     }
 
     if(podCompleted) {
       logInfo(
-        pod.map { p => s"Container final statuses:\n\n${containersDescription(p)}" }
-          .getOrElse("No containers were found in the driver pod."))
-      logInfo(s"Application ${conf.appName} with application ID $appId " +
-        s"and submission ID $sId finished")
+        pod.map { p => log"Container final statuses:\n\n${MDC(STATUS, containersDescription(p))}" }
+          .getOrElse(log"No containers were found in the driver pod."))
+      logInfo(log"Application ${MDC(APP_NAME, conf.appName)} with application ID " +
+        log"${MDC(APP_ID, appId)} and submission ID ${MDC(SUBMISSION_ID, sId)} finished")
     }
     podCompleted
   }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/MainAppResource.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/MainAppResource.scala
index a2e01fa2d9a0e..398bb76376cfb 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/MainAppResource.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/MainAppResource.scala
@@ -16,15 +16,38 @@
  */
 package org.apache.spark.deploy.k8s.submit
 
-private[spark] sealed trait MainAppResource
+import org.apache.spark.annotation.{DeveloperApi, Since, Stable}
 
-private[spark] sealed trait NonJVMResource
+/**
+ * :: DeveloperApi ::
+ *
+ * All traits and classes in this file are used by K8s module and Spark K8s operator.
+ */
+
+@Stable
+@DeveloperApi
+@Since("2.3.0")
+sealed trait MainAppResource
+
+@Stable
+@DeveloperApi
+@Since("2.4.0")
+sealed trait NonJVMResource
 
-private[spark] case class JavaMainAppResource(primaryResource: Option[String])
+@Stable
+@DeveloperApi
+@Since("3.0.0")
+case class JavaMainAppResource(primaryResource: Option[String])
   extends MainAppResource
 
-private[spark] case class PythonMainAppResource(primaryResource: String)
+@Stable
+@DeveloperApi
+@Since("2.4.0")
+case class PythonMainAppResource(primaryResource: String)
   extends MainAppResource with NonJVMResource
 
-private[spark] case class RMainAppResource(primaryResource: String)
+@Stable
+@DeveloperApi
+@Since("2.4.0")
+case class RMainAppResource(primaryResource: String)
   extends MainAppResource with NonJVMResource
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index a48e1fba99546..ef3547fd389fd 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -33,8 +33,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesConf
 import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
-import org.apache.spark.internal.{Logging, LogKey, MDC}
-import org.apache.spark.internal.LogKey.{COUNT, EXECUTOR_IDS, TIMEOUT}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.cluster.SchedulerBackendUtils.DEFAULT_NUMBER_EXECUTORS
@@ -145,7 +144,7 @@ class ExecutorPodsAllocator(
       onNewSnapshots(applicationId, schedulerBackend, executorPodsSnapshot)
       if (failureTracker.numFailedExecutors > maxNumExecutorFailures) {
         logError(log"Max number of executor failures " +
-          log"(${MDC(LogKey.MAX_EXECUTOR_FAILURES, maxNumExecutorFailures)}) reached")
+          log"(${MDC(LogKeys.MAX_EXECUTOR_FAILURES, maxNumExecutorFailures)}) reached")
         stopApplication(EXCEED_MAX_EXECUTOR_FAILURES)
       }
     }
@@ -211,10 +210,11 @@ class ExecutorPodsAllocator(
     }
 
     if (timedOut.nonEmpty) {
-      logWarning(log"Executors with ids ${MDC(EXECUTOR_IDS, timedOut.mkString(","))}} were not " +
-        log"detected in the Kubernetes cluster after ${MDC(TIMEOUT, podCreationTimeout)} ms " +
-        log"despite the fact that a previous allocation attempt tried to create them. " +
-        log"The executors may have been deleted but the application missed the deletion event.")
+      logWarning(log"Executors with ids ${MDC(LogKeys.EXECUTOR_IDS, timedOut.mkString(","))}} " +
+        log"were not detected in the Kubernetes cluster after " +
+        log"${MDC(LogKeys.TIMEOUT, podCreationTimeout)} ms despite the fact that a previous " +
+        log"allocation attempt tried to create them. The executors may have been deleted but the " +
+        log"application missed the deletion event.")
 
       newlyCreatedExecutors --= timedOut
       if (shouldDeleteExecutors) {
@@ -283,7 +283,7 @@ class ExecutorPodsAllocator(
 
       val newFailedExecutorIds = currentFailedExecutorIds.diff(failedExecutorIds)
       if (newFailedExecutorIds.nonEmpty) {
-        logWarning(log"${MDC(COUNT, newFailedExecutorIds.size)} new failed executors.")
+        logWarning(log"${MDC(LogKeys.COUNT, newFailedExecutorIds.size)} new failed executors.")
         newFailedExecutorIds.foreach { _ => failureTracker.registerExecutorFailure() }
       }
       failedExecutorIds = failedExecutorIds ++ currentFailedExecutorIds
@@ -343,7 +343,8 @@ class ExecutorPodsAllocator(
         val toDelete = newlyCreatedToDelete ++ pendingToDelete
 
         if (toDelete.nonEmpty) {
-          logInfo(s"Deleting ${toDelete.size} excess pod requests (${toDelete.mkString(",")}).")
+          logInfo(log"Deleting ${MDC(LogKeys.COUNT, toDelete.size)} excess pod requests " +
+            log"(${MDC(LogKeys.RESOURCE_PROFILE_IDS, toDelete.mkString(","))}).")
           _deletedExecutorIds = _deletedExecutorIds ++ toDelete
 
           Utils.tryLogNonFatalError {
@@ -397,9 +398,11 @@ class ExecutorPodsAllocator(
         val numMissingPodsForRpId = targetNum - podCountForRpId
         val numExecutorsToAllocate =
           math.min(math.min(numMissingPodsForRpId, podAllocationSize), sharedSlotFromPendingPods)
-        logInfo(s"Going to request $numExecutorsToAllocate executors from Kubernetes for " +
-          s"ResourceProfile Id: $rpId, target: $targetNum, known: $podCountForRpId, " +
-          s"sharedSlotFromPendingPods: $sharedSlotFromPendingPods.")
+        logInfo(log"Going to request ${MDC(LogKeys.COUNT, numExecutorsToAllocate)} executors from" +
+          log" Kubernetes for ResourceProfile Id: ${MDC(LogKeys.RESOURCE_PROFILE_ID, rpId)}, " +
+          log"target: ${MDC(LogKeys.NUM_POD_TARGET, targetNum)}, " +
+          log"known: ${MDC(LogKeys.NUM_POD, podCountForRpId)}, sharedSlotFromPendingPods: " +
+          log"${MDC(LogKeys.NUM_POD_SHARED_SLOT, sharedSlotFromPendingPods)}.")
         requestNewExecutors(numExecutorsToAllocate, applicationId, rpId, k8sKnownPVCNames)
       }
     }
@@ -428,7 +431,8 @@ class ExecutorPodsAllocator(
           .filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
           .filter(pvc => now - Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
             > podAllocationDelay)
-        logInfo(s"Found ${reusablePVCs.size} reusable PVCs from ${createdPVCs.size} PVCs")
+        logInfo(log"Found ${MDC(LogKeys.COUNT, reusablePVCs.size)} reusable PVCs from " +
+          log"${MDC(LogKeys.TOTAL, createdPVCs.size)} PVCs")
         reusablePVCs
       } catch {
         case _: KubernetesClientException =>
@@ -449,7 +453,8 @@ class ExecutorPodsAllocator(
     val reusablePVCs = getReusablePVCs(applicationId, pvcsInUse)
     for ( _ <- 0 until numExecutorsToAllocate) {
       if (reusablePVCs.isEmpty && podAllocOnPVC && maxPVCs <= PVC_COUNTER.get()) {
-        logInfo(s"Wait to reuse one of the existing ${PVC_COUNTER.get()} PVCs.")
+        logInfo(
+          log"Wait to reuse one of the existing ${MDC(LogKeys.COUNT, PVC_COUNTER.get())} PVCs.")
         return
       }
       val newExecutorId = EXECUTOR_ID_COUNTER.incrementAndGet()
@@ -480,8 +485,9 @@ class ExecutorPodsAllocator(
               addOwnerReference(driverPod.get, Seq(resource))
             }
             val pvc = resource.asInstanceOf[PersistentVolumeClaim]
-            logInfo(s"Trying to create PersistentVolumeClaim ${pvc.getMetadata.getName} with " +
-              s"StorageClass ${pvc.getSpec.getStorageClassName}")
+            logInfo(log"Trying to create PersistentVolumeClaim " +
+              log"${MDC(LogKeys.PVC_METADATA_NAME, pvc.getMetadata.getName)} with " +
+              log"StorageClass ${MDC(LogKeys.CLASS_NAME, pvc.getSpec.getStorageClassName)}")
             kubernetesClient.persistentVolumeClaims().inNamespace(namespace).resource(pvc).create()
             PVC_COUNTER.incrementAndGet()
           }
@@ -519,7 +525,8 @@ class ExecutorPodsAllocator(
           if (volume.nonEmpty) {
             val matchedPVC = reusablePVCs.remove(index)
             replacedResources.add(pvc)
-            logInfo(s"Reuse PersistentVolumeClaim ${matchedPVC.getMetadata.getName}")
+            logInfo(log"Reuse PersistentVolumeClaim " +
+              log"${MDC(LogKeys.PVC_METADATA_NAME, matchedPVC.getMetadata.getName)}")
             volume.get.getPersistentVolumeClaim.setClaimName(matchedPVC.getMetadata.getName)
           }
         }
@@ -535,7 +542,7 @@ class ExecutorPodsAllocator(
     } catch {
       case e: Exception =>
         logError(log"Cannot get the creationTimestamp of the pod: " +
-          log"${MDC(LogKey.POD_ID, state.pod)}", e)
+          log"${MDC(LogKeys.POD_ID, state.pod)}", e)
         true
     }
   }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
index 5590311bf6614..0d79efa06e497 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
@@ -30,7 +30,8 @@ import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesUtils._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.EXECUTOR_ID
 import org.apache.spark.scheduler.ExecutorExited
 import org.apache.spark.util.Utils
 
@@ -99,8 +100,9 @@ private[spark] class ExecutorPodsLifecycleManager(
             if (onFinalNonDeletedState(succeeded, execId, schedulerBackend, deleteFromK8s)) {
               execIdsRemovedInThisRound += execId
               if (schedulerBackend.isExecutorActive(execId.toString)) {
-                logInfo(s"Snapshot reported succeeded executor with id $execId, " +
-                  "even though the application has not requested for it to be removed.")
+                logInfo(log"Snapshot reported succeeded executor with id " +
+                  log"${MDC(EXECUTOR_ID, execId)}, even though the application has not " +
+                  log"requested for it to be removed.")
               } else {
                 logDebug(s"Snapshot reported succeeded executor with id $execId," +
                   s" pod name ${state.pod.getMetadata.getName}.")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
index 99cef671d2e41..44daf57d76ebf 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshot.scala
@@ -25,7 +25,7 @@ import io.fabric8.kubernetes.api.model.Pod
 
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{POD_NAME, POD_NAMESPACE, POD_PHASE}
+import org.apache.spark.internal.LogKeys.{POD_NAME, POD_NAMESPACE, POD_PHASE}
 
 /**
  * An immutable view of the current executor pods that are running in the cluster.
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
index 1c0de8e2afded..e84aae1f27b3b 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
@@ -27,7 +27,7 @@ import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext,
 import org.apache.spark.deploy.k8s.Config.{EXECUTOR_ROLL_INTERVAL, EXECUTOR_ROLL_POLICY, ExecutorRollPolicy, MINIMUM_TASKS_PER_EXECUTOR_BEFORE_ROLLING}
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CLASS_NAME, CONFIG, INTERVAL}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, CONFIG, EXECUTOR_ID, INTERVAL}
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.scheduler.ExecutorDecommissionInfo
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
@@ -82,7 +82,7 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
               choose(executorSummaryList, policy) match {
                 case Some(id) =>
                   // Use decommission to be safe.
-                  logInfo(s"Ask to decommission executor $id")
+                  logInfo(log"Ask to decommission executor ${MDC(EXECUTOR_ID, id)}")
                   val now = System.currentTimeMillis()
                   scheduler.decommissionExecutor(
                     id,
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index 3235d922204b2..ddcdc2cf663ac 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -25,7 +25,8 @@ import org.apache.spark.{SparkConf, SparkContext, SparkMasterRegex}
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants.DEFAULT_EXECUTOR_CONTAINER_NAME
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.MASTER_URL
 import org.apache.spark.internal.config.TASK_MAX_FAILURES
 import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
@@ -61,7 +62,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
           if (threads == "*") localCpuCount else threads.toInt
         case _ => 1
       }
-      logInfo(s"Running Spark with ${sc.conf.get(KUBERNETES_DRIVER_MASTER_URL)}")
+      logInfo(log"Running Spark with ${MDC(MASTER_URL, sc.conf.get(KUBERNETES_DRIVER_MASTER_URL))}")
       val schedulerImpl = scheduler.asInstanceOf[TaskSchedulerImpl]
       // KubernetesClusterSchedulerBackend respects `spark.app.id` while LocalSchedulerBackend
       // does not. Propagate `spark.app.id` via `spark.test.appId` to match the behavior.
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index daf8d5e3f58a2..4e4634504a0f3 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -32,6 +32,8 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit.KubernetesClientUtils
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
+import org.apache.spark.internal.LogKeys.{COUNT, HOST_PORT, TOTAL}
+import org.apache.spark.internal.MDC
 import org.apache.spark.internal.config.SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.{RpcAddress, RpcCallContext}
@@ -255,9 +257,10 @@ private[spark] class KubernetesClusterSchedulerBackend(
           .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
           .withLabelIn(SPARK_EXECUTOR_ID_LABEL, executorIds: _*)
 
-        if (!running.list().getItems().isEmpty()) {
-          logInfo(s"Forcefully deleting ${running.list().getItems().size()} pods " +
-            s"(out of ${executorIds.size}) that are still running after graceful shutdown period.")
+        if (!running.list().getItems.isEmpty) {
+          logInfo(log"Forcefully deleting ${MDC(COUNT, running.list().getItems.size())} pods " +
+            log"(out of ${MDC(TOTAL, executorIds.size)}) that are still running after graceful " +
+            log"shutdown period.")
           running.delete()
         }
       }
@@ -353,7 +356,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
               execIDRequester -= rpcAddress
               // Expected, executors re-establish a connection with an ID
             case _ =>
-              logInfo(s"No executor found for ${rpcAddress}")
+              logInfo(log"No executor found for ${MDC(HOST_PORT, rpcAddress)}")
           }
       }
     }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala
index 376218df57702..2728385874f6d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala
@@ -27,7 +27,7 @@ import org.apache.commons.io.FileExistsException
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.deploy.k8s.Config.KUBERNETES_DRIVER_REUSE_PVC
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.{SHUFFLE_CHECKSUM_ALGORITHM, SHUFFLE_CHECKSUM_ENABLED}
 import org.apache.spark.shuffle.ShuffleChecksumUtils.{compareChecksums, getChecksumFileName}
 import org.apache.spark.shuffle.api.{ShuffleExecutorComponents, ShuffleMapOutputWriter, SingleSpillShuffleMapOutputWriter}
@@ -54,7 +54,8 @@ class KubernetesLocalDiskShuffleExecutorComponents(sparkConf: SparkConf)
         KubernetesLocalDiskShuffleExecutorComponents.recoverDiskStore(sparkConf, blockManager)
       }
     } else {
-      logInfo(s"Skip recovery because ${KUBERNETES_DRIVER_REUSE_PVC.key} is disabled.")
+      logInfo(log"Skip recovery because ${MDC(LogKeys.CONFIG, KUBERNETES_DRIVER_REUSE_PVC.key)} " +
+        log"is disabled.")
     }
   }
 
@@ -94,20 +95,23 @@ object KubernetesLocalDiskShuffleExecutorComponents extends Logging {
       .partition(_.getName.contains(".checksum"))
     val (indexFiles, dataFiles) = files.partition(_.getName.endsWith(".index"))
 
-    logInfo(s"Found ${dataFiles.size} data files, ${indexFiles.size} index files, " +
-        s"and ${checksumFiles.size} checksum files.")
+    logInfo(log"Found ${MDC(LogKeys.NUM_DATA_FILE, dataFiles.length)} data files, " +
+      log"${MDC(LogKeys.NUM_INDEX_FILE, indexFiles.length)} index files, " +
+      log"and ${MDC(LogKeys.NUM_CHECKSUM_FILE, checksumFiles.length)} checksum files.")
 
     // Build a hashmap with checksum file name as a key
     val checksumFileMap = new mutable.HashMap[String, File]()
     val algorithm = conf.get(SHUFFLE_CHECKSUM_ALGORITHM)
     checksumFiles.foreach { f =>
-      logInfo(s"${f.getName} -> ${f.getAbsolutePath}")
+      logInfo(log"${MDC(LogKeys.FILE_NAME, f.getName)} -> " +
+        log"${MDC(LogKeys.FILE_ABSOLUTE_PATH, f.getAbsolutePath)}")
       checksumFileMap.put(f.getName, f)
     }
     // Build a hashmap with shuffle data file name as a key
     val indexFileMap = new mutable.HashMap[String, File]()
     indexFiles.foreach { f =>
-      logInfo(s"${f.getName.replace(".index", ".data")} -> ${f.getAbsolutePath}")
+      logInfo(log"${MDC(LogKeys.FILE_NAME, f.getName.replace(".index", ".data"))} -> " +
+        log"${MDC(LogKeys.FILE_ABSOLUTE_PATH, f.getAbsolutePath)}")
       indexFileMap.put(f.getName.replace(".index", ".data"), f)
     }
 
@@ -116,7 +120,7 @@ object KubernetesLocalDiskShuffleExecutorComponents extends Logging {
     val level = StorageLevel.DISK_ONLY
     val checksumDisabled = !conf.get(SHUFFLE_CHECKSUM_ENABLED)
     (dataFiles ++ indexFiles).foreach { f =>
-      logInfo(s"Try to recover ${f.getAbsolutePath}")
+      logInfo(log"Try to recover ${MDC(LogKeys.FILE_ABSOLUTE_PATH, f.getAbsolutePath)}")
       try {
         val id = BlockId(f.getName)
         // To make it sure to handle only shuffle blocks
@@ -129,7 +133,8 @@ object KubernetesLocalDiskShuffleExecutorComponents extends Logging {
             val decryptedSize = f.length()
             bm.TempFileBasedBlockStoreUpdater(id, level, classTag, f, decryptedSize).save()
           } else {
-            logInfo(s"Ignore ${f.getAbsolutePath} due to the verification failure.")
+            logInfo(log"Ignore ${MDC(LogKeys.FILE_ABSOLUTE_PATH, f.getAbsolutePath)} " +
+              log"due to the verification failure.")
           }
         } else {
           logInfo("Ignore a non-shuffle block file.")
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index 9963db016ad9b..3c53e9b74f924 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -40,7 +40,9 @@ class KubernetesConfSuite extends SparkFunSuite {
     "execNodeSelectorKey2" -> "execNodeSelectorValue2")
   private val CUSTOM_LABELS = Map(
     "customLabel1Key" -> "customLabel1Value",
-    "customLabel2Key" -> "customLabel2Value")
+    "customLabel2Key" -> "customLabel2Value",
+    "customLabel3Key" -> "{{APP_ID}}",
+    "customLabel4Key" -> "{{EXECUTOR_ID}}")
   private val CUSTOM_ANNOTATIONS = Map(
     "customAnnotation1Key" -> "customAnnotation1Value",
     "customAnnotation2Key" -> "customAnnotation2Value",
@@ -95,7 +97,9 @@ class KubernetesConfSuite extends SparkFunSuite {
       SPARK_APP_ID_LABEL -> KubernetesTestConf.APP_ID,
       SPARK_APP_NAME_LABEL -> KubernetesConf.getAppNameLabel(conf.appName),
       SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) ++
-      CUSTOM_LABELS)
+      CUSTOM_LABELS.map {
+        case (k, v) => (k, Utils.substituteAppNExecIds(v, conf.appId, ""))
+      })
     assert(conf.annotations === CUSTOM_ANNOTATIONS.map {
       case (k, v) => (k, Utils.substituteAppNExecIds(v, conf.appId, ""))
     })
@@ -165,7 +169,10 @@ class KubernetesConfSuite extends SparkFunSuite {
       SPARK_APP_ID_LABEL -> KubernetesTestConf.APP_ID,
       SPARK_APP_NAME_LABEL -> KubernetesConf.getAppNameLabel(conf.appName),
       SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE,
-      SPARK_RESOURCE_PROFILE_ID_LABEL -> DEFAULT_RESOURCE_PROFILE_ID.toString) ++ CUSTOM_LABELS)
+      SPARK_RESOURCE_PROFILE_ID_LABEL -> DEFAULT_RESOURCE_PROFILE_ID.toString) ++
+      CUSTOM_LABELS.map {
+        case (k, v) => (k, Utils.substituteAppNExecIds(v, conf.appId, EXECUTOR_ID))
+      })
     assert(conf.annotations === CUSTOM_ANNOTATIONS.map {
       case (k, v) => (k, Utils.substituteAppNExecIds(v, conf.appId, EXECUTOR_ID))
     })
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
index d6a60b1edea2f..b70b9348d23b4 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -22,6 +22,7 @@ import io.fabric8.kubernetes.api.model.Pod
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.submit.{JavaMainAppResource, MainAppResource}
+import org.apache.spark.util.{Clock, SystemClock}
 
 /**
  * Builder methods for KubernetesConf that allow easy control over what to return for a few
@@ -52,7 +53,8 @@ object KubernetesTestConf {
       secretEnvNamesToKeyRefs: Map[String, String] = Map.empty,
       secretNamesToMountPaths: Map[String, String] = Map.empty,
       volumes: Seq[KubernetesVolumeSpec] = Seq.empty,
-      proxyUser: Option[String] = None): KubernetesDriverConf = {
+      proxyUser: Option[String] = None,
+      clock: Clock = new SystemClock()): KubernetesDriverConf = {
     val conf = sparkConf.clone()
 
     resourceNamePrefix.foreach { prefix =>
@@ -67,7 +69,7 @@ object KubernetesTestConf {
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_SECRET_KEY_REF_PREFIX, secretEnvNamesToKeyRefs)
     setVolumeSpecs(conf, KUBERNETES_DRIVER_VOLUMES_PREFIX, volumes)
 
-    new KubernetesDriverConf(conf, appId, mainAppResource, mainClass, appArgs, proxyUser)
+    new KubernetesDriverConf(conf, appId, mainAppResource, mainClass, appArgs, proxyUser, clock)
   }
   // scalastyle:on argcount
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index f102851e6c3b9..bf022ac630158 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -35,11 +35,13 @@ import org.apache.spark.util.Utils
 
 class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
-  private val CUSTOM_DRIVER_LABELS = Map("labelkey" -> "labelvalue")
+  private val CUSTOM_DRIVER_LABELS = Map(
+    "labelkey" -> "labelvalue",
+    "customAppIdLabelKey" -> "{{APP_ID}}")
   private val CONTAINER_IMAGE_PULL_POLICY = "IfNotPresent"
   private val DRIVER_ANNOTATIONS = Map(
     "customAnnotation" -> "customAnnotationValue",
-    "yunikorn.apache.org/app-id" -> "{{APPID}}")
+    "customAppIdAnnotation" -> "{{APP_ID}}")
   private val DRIVER_ENVS = Map(
     "customDriverEnv1" -> "customDriverEnv1Value",
     "customDriverEnv2" -> "customDriverEnv2Value")
@@ -121,10 +123,11 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     assert(driverPodMetadata.getName === "spark-driver-pod")
 
     // Check custom and preset labels are as expected
+    val labels = driverPodMetadata.getLabels
     CUSTOM_DRIVER_LABELS.foreach { case (k, v) =>
-      assert(driverPodMetadata.getLabels.get(k) === v)
+      assert(labels.get(k) === Utils.substituteAppNExecIds(v, KubernetesTestConf.APP_ID, ""))
     }
-    assert(driverPodMetadata.getLabels === kubernetesConf.labels.asJava)
+    assert(labels === kubernetesConf.labels.asJava)
 
     val annotations = driverPodMetadata.getAnnotations.asScala
     DRIVER_ANNOTATIONS.foreach { case (k, v) =>
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index 06d322c9d19b5..d69fcf89e1337 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -109,17 +109,18 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
   }
 
   test("Long prefixes should switch to using a generated unique name.") {
+    val clock = new ManualClock()
     val sparkConf = new SparkConf(false)
       .set(KUBERNETES_NAMESPACE, "my-namespace")
-    val kconf = KubernetesTestConf.createDriverConf(
-      sparkConf = sparkConf,
-      resourceNamePrefix = Some(LONG_RESOURCE_NAME_PREFIX),
-      labels = DRIVER_LABELS)
-    val clock = new ManualClock()
 
     // Ensure that multiple services created at the same time generate unique names.
     val services = (1 to 10).map { _ =>
-      val configurationStep = new DriverServiceFeatureStep(kconf, clock = clock)
+      val kconf = KubernetesTestConf.createDriverConf(
+        sparkConf = sparkConf,
+        resourceNamePrefix = Some(LONG_RESOURCE_NAME_PREFIX),
+        labels = DRIVER_LABELS,
+        clock = clock)
+      val configurationStep = new DriverServiceFeatureStep(kconf)
       val serviceName = configurationStep
         .getAdditionalKubernetesResources()
         .head
@@ -130,11 +131,11 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
       val hostAddress = configurationStep
         .getAdditionalPodSystemProperties()(DRIVER_HOST_ADDRESS.key)
 
-      (serviceName -> hostAddress)
-    }.toMap
+      Tuple3(kconf, serviceName, hostAddress)
+    }
 
     assert(services.size === 10)
-    services.foreach { case (name, address) =>
+    services.foreach { case (kconf, name, address) =>
       assert(!name.startsWith(kconf.resourceNamePrefix))
       assert(!address.startsWith(kconf.resourceNamePrefix))
       assert(InternetDomainName.isValid(address))
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
index d6911aadfa237..0dafe30c364ae 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
@@ -102,16 +102,18 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
     sparkAppConf
       .set("spark.kubernetes.driver.label.label1", "label1-value")
       .set("spark.kubernetes.driver.label.label2", "label2-value")
+      .set("spark.kubernetes.driver.label.customAppIdLabelKey", "{{APP_ID}}")
       .set("spark.kubernetes.driver.annotation.annotation1", "annotation1-value")
       .set("spark.kubernetes.driver.annotation.annotation2", "annotation2-value")
-      .set("spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}")
+      .set("spark.kubernetes.driver.annotation.customAppIdAnnotation", "{{APP_ID}}")
       .set("spark.kubernetes.driverEnv.ENV1", "VALUE1")
       .set("spark.kubernetes.driverEnv.ENV2", "VALUE2")
       .set("spark.kubernetes.executor.label.label1", "label1-value")
       .set("spark.kubernetes.executor.label.label2", "label2-value")
+      .set("spark.kubernetes.executor.label.customAppIdLabelKey", "{{APP_ID}}")
       .set("spark.kubernetes.executor.annotation.annotation1", "annotation1-value")
       .set("spark.kubernetes.executor.annotation.annotation2", "annotation2-value")
-      .set("spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}")
+      .set("spark.kubernetes.executor.annotation.customAppIdAnnotation", "{{APP_ID}}")
       .set("spark.executorEnv.ENV1", "VALUE1")
       .set("spark.executorEnv.ENV2", "VALUE2")
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index 1b9b5310c2ee2..ae5f037c6b7d4 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -175,7 +175,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
         expectedDriverLogOnCompletion = Seq(
           "Finished waiting, stopping Spark",
           "Decommission executors",
-          "Remove reason statistics: (gracefully decommissioned: 1, decommision unfinished: 0, " +
+          "Remove reason statistics: (gracefully decommissioned: 1, decommission unfinished: 0, " +
             "driver killed: 0, unexpectedly exited: 0)."),
         appArgs = Array.empty[String],
         driverPodChecker = doBasicDriverPyPodCheck,
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index 5f95b8daa66cb..c0f5e0fe265d7 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT,
 import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite._
 import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName
 import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
-import org.apache.spark.internal.{LogKey, MDC}
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.internal.config.{ARCHIVES, PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
 
 private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
@@ -327,7 +327,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
         s3client.createBucket(createBucketRequest)
       } catch {
         case e: Exception =>
-          logError(log"Failed to create bucket ${MDC(LogKey.BUCKET, BUCKET)}", e)
+          logError(log"Failed to create bucket ${MDC(LogKeys.BUCKET, BUCKET)}", e)
           throw new SparkException(s"Failed to create bucket $BUCKET.", e)
       }
     }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 868461fd5b9e1..0b0b30e5e04fd 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -589,7 +589,8 @@ class KubernetesSuite extends SparkFunSuite
     assert(pod.getMetadata.getLabels.get("label2") === "label2-value")
     assert(pod.getMetadata.getAnnotations.get("annotation1") === "annotation1-value")
     assert(pod.getMetadata.getAnnotations.get("annotation2") === "annotation2-value")
-    val appId = pod.getMetadata.getAnnotations.get("yunikorn.apache.org/app-id")
+    val appIdLabel = pod.getMetadata.getLabels.get("customAppIdLabelKey")
+    val appIdAnnotation = pod.getMetadata.getAnnotations.get("customAppIdAnnotation")
 
     val container = pod.getSpec.getContainers.get(0)
     val envVars = container
@@ -601,7 +602,8 @@ class KubernetesSuite extends SparkFunSuite
       .toMap
     assert(envVars("ENV1") === "VALUE1")
     assert(envVars("ENV2") === "VALUE2")
-    assert(appId === envVars(ENV_APPLICATION_ID))
+    assert(appIdLabel === envVars(ENV_APPLICATION_ID))
+    assert(appIdAnnotation === envVars(ENV_APPLICATION_ID))
   }
 
   private def deleteDriverPod(): Unit = {
diff --git a/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpFilter.java b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpFilter.java
new file mode 100644
index 0000000000000..60e880d1ac4aa
--- /dev/null
+++ b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpFilter.java
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn;
+
+import org.apache.hadoop.classification.InterfaceAudience.Public;
+import org.apache.hadoop.classification.VisibleForTesting;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.util.Time;
+
+import jakarta.servlet.*;
+import jakarta.servlet.http.Cookie;
+import jakarta.servlet.http.HttpServletRequest;
+import jakarta.servlet.http.HttpServletResponse;
+import java.io.IOException;
+import java.net.*;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+
+// This class is copied from Hadoop 3.4.0
+// org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter
+//
+// Modification:
+// Migrate from javax.servlet to jakarta.servlet
+// Copy constant string definitions to strip external dependency
+//  - RM_HA_URLS
+//  - PROXY_USER_COOKIE_NAME
+@Public
+public class AmIpFilter implements Filter {
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(AmIpFilter.class);
+
+  @Deprecated
+  public static final String PROXY_HOST = "PROXY_HOST";
+  @Deprecated
+  public static final String PROXY_URI_BASE = "PROXY_URI_BASE";
+  public static final String PROXY_HOSTS = "PROXY_HOSTS";
+  public static final String PROXY_HOSTS_DELIMITER = ",";
+  public static final String PROXY_URI_BASES = "PROXY_URI_BASES";
+  public static final String PROXY_URI_BASES_DELIMITER = ",";
+  private static final String PROXY_PATH = "/proxy";
+  // RM_HA_URLS is defined in AmFilterInitializer in the original Hadoop code
+  private static final String RM_HA_URLS = "RM_HA_URLS";
+  // WebAppProxyServlet is defined in WebAppProxyServlet in the original Hadoop code
+  public static final String PROXY_USER_COOKIE_NAME = "proxy-user";
+  // update the proxy IP list about every 5 min
+  private static long updateInterval = TimeUnit.MINUTES.toMillis(5);
+
+  private String[] proxyHosts;
+  private Set<String> proxyAddresses = null;
+  private long lastUpdate;
+  @VisibleForTesting
+  Map<String, String> proxyUriBases;
+  String[] rmUrls = null;
+
+  @Override
+  public void init(FilterConfig conf) throws ServletException {
+    // Maintain for backwards compatibility
+    if (conf.getInitParameter(PROXY_HOST) != null
+        && conf.getInitParameter(PROXY_URI_BASE) != null) {
+      proxyHosts = new String[]{conf.getInitParameter(PROXY_HOST)};
+      proxyUriBases = new HashMap<>(1);
+      proxyUriBases.put("dummy", conf.getInitParameter(PROXY_URI_BASE));
+    } else {
+      proxyHosts = conf.getInitParameter(PROXY_HOSTS)
+        .split(PROXY_HOSTS_DELIMITER);
+
+      String[] proxyUriBasesArr = conf.getInitParameter(PROXY_URI_BASES)
+        .split(PROXY_URI_BASES_DELIMITER);
+      proxyUriBases = new HashMap<>(proxyUriBasesArr.length);
+      for (String proxyUriBase : proxyUriBasesArr) {
+        try {
+          URL url = new URL(proxyUriBase);
+          proxyUriBases.put(url.getHost() + ":" + url.getPort(), proxyUriBase);
+        } catch(MalformedURLException e) {
+          LOG.warn(proxyUriBase + " does not appear to be a valid URL", e);
+        }
+      }
+    }
+
+    if (conf.getInitParameter(RM_HA_URLS) != null) {
+      rmUrls = conf.getInitParameter(RM_HA_URLS).split(",");
+    }
+  }
+
+  protected Set<String> getProxyAddresses() throws ServletException {
+    long now = Time.monotonicNow();
+    synchronized(this) {
+      if (proxyAddresses == null || (lastUpdate + updateInterval) <= now) {
+        proxyAddresses = new HashSet<>();
+        for (String proxyHost : proxyHosts) {
+          try {
+            for (InetAddress add : InetAddress.getAllByName(proxyHost)) {
+              LOG.debug("proxy address is: {}", add.getHostAddress());
+              proxyAddresses.add(add.getHostAddress());
+            }
+            lastUpdate = now;
+          } catch (UnknownHostException e) {
+            LOG.warn("Could not locate " + proxyHost + " - skipping", e);
+          }
+        }
+        if (proxyAddresses.isEmpty()) {
+          throw new ServletException("Could not locate any of the proxy hosts");
+        }
+      }
+      return proxyAddresses;
+    }
+  }
+
+  @Override
+  public void destroy() {
+    // Empty
+  }
+
+  @Override
+  public void doFilter(ServletRequest req, ServletResponse resp,
+      FilterChain chain) throws IOException, ServletException {
+    ProxyUtils.rejectNonHttpRequests(req);
+
+    HttpServletRequest httpReq = (HttpServletRequest)req;
+    HttpServletResponse httpResp = (HttpServletResponse)resp;
+
+    LOG.debug("Remote address for request is: {}", httpReq.getRemoteAddr());
+
+    if (!getProxyAddresses().contains(httpReq.getRemoteAddr())) {
+      StringBuilder redirect = new StringBuilder(findRedirectUrl());
+
+      redirect.append(httpReq.getRequestURI());
+
+      int insertPoint = redirect.indexOf(PROXY_PATH);
+
+      if (insertPoint >= 0) {
+        // Add /redirect as the second component of the path so that the RM web
+        // proxy knows that this request was a redirect.
+        insertPoint += PROXY_PATH.length();
+        redirect.insert(insertPoint, "/redirect");
+      }
+      // add the query parameters on the redirect if there were any
+      String queryString = httpReq.getQueryString();
+      if (queryString != null && !queryString.isEmpty()) {
+        redirect.append("?");
+        redirect.append(queryString);
+      }
+
+      ProxyUtils.sendRedirect(httpReq, httpResp, redirect.toString());
+    } else {
+      String user = null;
+
+      if (httpReq.getCookies() != null) {
+        for (Cookie c: httpReq.getCookies()) {
+          if (PROXY_USER_COOKIE_NAME.equals(c.getName())){
+            user = c.getValue();
+            break;
+          }
+        }
+      }
+      if (user == null) {
+        LOG.debug("Could not find {} cookie, so user will not be set",
+            PROXY_USER_COOKIE_NAME);
+
+        chain.doFilter(req, resp);
+      } else {
+        AmIpPrincipal principal = new AmIpPrincipal(user);
+        ServletRequest requestWrapper = new AmIpServletRequestWrapper(httpReq,
+            principal);
+
+        chain.doFilter(requestWrapper, resp);
+      }
+    }
+  }
+
+  @VisibleForTesting
+  public String findRedirectUrl() throws ServletException {
+    String addr = null;
+    if (proxyUriBases.size() == 1) {
+      // external proxy or not RM HA
+      addr = proxyUriBases.values().iterator().next();
+    } else if (rmUrls != null) {
+      for (String url : rmUrls) {
+        String host = proxyUriBases.get(url);
+        if (isValidUrl(host)) {
+          addr = host;
+          break;
+        }
+      }
+    }
+
+    if (addr == null) {
+      throw new ServletException(
+          "Could not determine the proxy server for redirection");
+    }
+    return addr;
+  }
+
+  @VisibleForTesting
+  public boolean isValidUrl(String url) {
+    boolean isValid = false;
+    try {
+      HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection();
+      conn.connect();
+      isValid = conn.getResponseCode() == HttpURLConnection.HTTP_OK;
+      // If security is enabled, any valid RM which can give 401 Unauthorized is
+      // good enough to access. Since AM doesn't have enough credential, auth
+      // cannot be completed and hence 401 is fine in such case.
+      if (!isValid && UserGroupInformation.isSecurityEnabled()) {
+        isValid = (conn.getResponseCode() == HttpURLConnection.HTTP_UNAUTHORIZED)
+            || (conn.getResponseCode() == HttpURLConnection.HTTP_FORBIDDEN);
+        return isValid;
+      }
+    } catch (Exception e) {
+      LOG.warn("Failed to connect to " + url + ": " + e.toString());
+    }
+    return isValid;
+  }
+
+  @VisibleForTesting
+  protected static void setUpdateInterval(long updateInterval) {
+    AmIpFilter.updateInterval = updateInterval;
+  }
+}
diff --git a/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpPrincipal.java b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpPrincipal.java
new file mode 100644
index 0000000000000..9d5a5e3b04568
--- /dev/null
+++ b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpPrincipal.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn;
+
+import java.security.Principal;
+
+// This class is copied from Hadoop 3.4.0
+// org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpPrincipal
+public class AmIpPrincipal implements Principal {
+  private final String name;
+
+  public AmIpPrincipal(String name) {
+    this.name = name;
+  }
+
+  @Override
+  public String getName() {
+    return name;
+  }
+}
diff --git a/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpServletRequestWrapper.java b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpServletRequestWrapper.java
new file mode 100644
index 0000000000000..9082378fe89c7
--- /dev/null
+++ b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpServletRequestWrapper.java
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn;
+
+import jakarta.servlet.http.HttpServletRequest;
+import jakarta.servlet.http.HttpServletRequestWrapper;
+import java.security.Principal;
+
+// This class is copied from Hadoop 3.4.0
+// org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpServletRequestWrapper
+//
+// Modification:
+// Migrate from javax.servlet to jakarta.servlet
+public class AmIpServletRequestWrapper extends HttpServletRequestWrapper {
+  private final AmIpPrincipal principal;
+
+  public AmIpServletRequestWrapper(HttpServletRequest request,
+      AmIpPrincipal principal) {
+    super(request);
+    this.principal = principal;
+  }
+
+  @Override
+  public Principal getUserPrincipal() {
+    return principal;
+  }
+
+  @Override
+  public String getRemoteUser() {
+    return principal.getName();
+  }
+
+  @Override
+  public boolean isUserInRole(String role) {
+    // No role info so far
+    return false;
+  }
+
+}
diff --git a/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/ProxyUtils.java b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/ProxyUtils.java
new file mode 100644
index 0000000000000..c7a49a76c655f
--- /dev/null
+++ b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/ProxyUtils.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn;
+
+import org.apache.hadoop.yarn.webapp.MimeType;
+import org.apache.hadoop.yarn.webapp.hamlet2.Hamlet;
+
+import jakarta.servlet.ServletException;
+import jakarta.servlet.ServletRequest;
+import jakarta.servlet.http.HttpServletRequest;
+import jakarta.servlet.http.HttpServletResponse;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.EnumSet;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+
+// Class containing general purpose proxy utilities
+//
+// This class is copied from Hadoop 3.4.0
+// org.apache.hadoop.yarn.server.webproxy.ProxyUtils
+//
+// Modification:
+// Migrate from javax.servlet to jakarta.servlet
+public class ProxyUtils {
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(ProxyUtils.class);
+  public static final String E_HTTP_HTTPS_ONLY =
+      "This filter only works for HTTP/HTTPS";
+  public static final String LOCATION = "Location";
+
+  public static class __ implements Hamlet.__ {
+    // Empty
+  }
+
+  public static class Page extends Hamlet {
+    Page(PrintWriter out) {
+      super(out, 0, false);
+    }
+
+    public HTML<ProxyUtils.__> html() {
+      return new HTML<>("html", null, EnumSet.of(EOpt.ENDTAG));
+    }
+  }
+
+  /**
+   * Handle redirects with a status code that can in future support verbs other
+   * than GET, thus supporting full REST functionality.
+   * <p>
+   * The target URL is included in the redirect text returned
+   * <p>
+   * At the end of this method, the output stream is closed.
+   *
+   * @param request request (hence: the verb and any other information
+   * relevant to a redirect)
+   * @param response the response
+   * @param target the target URL -unencoded
+   *
+   */
+  public static void sendRedirect(HttpServletRequest request,
+      HttpServletResponse response,
+      String target)
+      throws IOException {
+    LOG.debug("Redirecting {} {} to {}",
+          request.getMethod(),
+          request.getRequestURI(),
+          target);
+    String location = response.encodeRedirectURL(target);
+    response.setStatus(HttpServletResponse.SC_FOUND);
+    response.setHeader(LOCATION, location);
+    response.setContentType(MimeType.HTML);
+    PrintWriter writer = response.getWriter();
+    Page p = new Page(writer);
+    p.html()
+      .head().title("Moved").__()
+      .body()
+      .h1("Moved")
+      .div()
+        .__("Content has moved ")
+        .a(location, "here").__()
+      .__().__();
+    writer.close();
+  }
+
+
+  /**
+   * Output 404 with appropriate message.
+   * @param resp the http response.
+   * @param message the message to include on the page.
+   * @throws IOException on any error.
+   */
+  public static void notFound(HttpServletResponse resp, String message)
+      throws IOException {
+    resp.setStatus(HttpServletResponse.SC_NOT_FOUND);
+    resp.setContentType(MimeType.HTML);
+    Page p = new Page(resp.getWriter());
+    p.html().h1(message).__();
+  }
+
+  /**
+   * Reject any request that isn't from an HTTP servlet
+   * @param req request
+   * @throws ServletException if the request is of the wrong type
+   */
+  public static void rejectNonHttpRequests(ServletRequest req) throws
+      ServletException {
+    if (!(req instanceof HttpServletRequest)) {
+      throw new ServletException(E_HTTP_HTTPS_ONLY);
+    }
+  }
+}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index eb944244fc9da..11d22a3225d8a 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -43,8 +43,7 @@ import org.apache.spark.deploy.{ExecutorFailureTracker, SparkHadoopUtil}
 import org.apache.spark.deploy.history.HistoryServer
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{EXIT_CODE, FAILURES, HOST_PORT}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
@@ -220,7 +219,7 @@ private[spark] class ApplicationMaster(
         "APPMASTER", sparkConf.get(APP_CALLER_CONTEXT),
         Option(appAttemptId.getApplicationId.toString), attemptID).setCurrentContext()
 
-      logInfo("ApplicationAttemptId: " + appAttemptId)
+      logInfo(log"ApplicationAttemptId: ${MDC(LogKeys.APP_ATTEMPT_ID, appAttemptId)}")
 
       // During shutdown, we may not be able to create an FileSystem object. So, pre-create here.
       val stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
@@ -368,8 +367,9 @@ private[spark] class ApplicationMaster(
   final def unregister(status: FinalApplicationStatus, diagnostics: String = null): Unit = {
     synchronized {
       if (registered && !unregistered) {
-        logInfo(s"Unregistering ApplicationMaster with $status" +
-          Option(diagnostics).map(msg => s" (diag message: $msg)").getOrElse(""))
+        logInfo(log"Unregistering ApplicationMaster with ${MDC(LogKeys.APP_STATE, status)}" +
+          Option(diagnostics).map(
+            msg => log" (diag message: ${MDC(LogKeys.MESSAGE, msg)})").getOrElse(log""))
         unregistered = true
         client.unregister(status, Option(diagnostics).getOrElse(""))
       }
@@ -387,8 +387,9 @@ private[spark] class ApplicationMaster(
           finalStatus = FinalApplicationStatus.FAILED
           exitCode = ApplicationMaster.EXIT_SC_NOT_INITED
         }
-        logInfo(s"Final app status: $finalStatus, exitCode: $exitCode" +
-          Option(msg).map(msg => s", (reason: $msg)").getOrElse(""))
+        logInfo(log"Final app status: ${MDC(LogKeys.APP_STATE, finalStatus)}, " +
+          log"exitCode: ${MDC(LogKeys.EXIT_CODE, exitCode)}" +
+          Option(msg).map(msg => log", (reason: ${MDC(LogKeys.REASON, msg)})").getOrElse(log""))
         finalMsg = ComStrUtils.abbreviate(msg, sparkConf.get(AM_FINAL_MSG_LIMIT).toInt)
         finished = true
         if (!inShutdown && Thread.currentThread() != reporterThread && reporterThread != null) {
@@ -481,8 +482,8 @@ private[spark] class ApplicationMaster(
     // the allocator is ready to service requests.
     rpcEnv.setupEndpoint("YarnAM", new AMEndpoint(rpcEnv, driverRef))
     if (_sparkConf.get(SHUFFLE_SERVICE_ENABLED)) {
-      logInfo("Initializing service data for shuffle service using name '" +
-        s"${_sparkConf.get(SHUFFLE_SERVICE_NAME)}'")
+      logInfo(log"Initializing service data for shuffle service using name '" +
+        log"${MDC(LogKeys.SHUFFLE_SERVICE_NAME, _sparkConf.get(SHUFFLE_SERVICE_NAME))}'")
     }
     allocator.allocateResources()
     val ms = MetricsSystem.createMetricsSystem(MetricsSystemInstances.APPLICATION_MASTER, sparkConf)
@@ -526,9 +527,9 @@ private[spark] class ApplicationMaster(
       userClassThread.join()
     } catch {
       case e: SparkException if e.getCause().isInstanceOf[TimeoutException] =>
-        logError(
-          s"SparkContext did not initialize after waiting for $totalWaitTime ms. " +
-           "Please check earlier log output for errors. Failing the application.")
+        logError(log"SparkContext did not initialize after waiting for " +
+            log"${MDC(LogKeys.TIMEOUT, totalWaitTime)} ms. " +
+            log"Please check earlier log output for errors. Failing the application.")
         finish(FinalApplicationStatus.FAILED,
           ApplicationMaster.EXIT_SC_NOT_INITED,
           "Timed out waiting for SparkContext.")
@@ -597,8 +598,8 @@ private[spark] class ApplicationMaster(
               ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was thrown " +
                 s"$failureCount time(s) from Reporter thread.")
           } else {
-            logWarning(
-              log"Reporter thread fails ${MDC(FAILURES, failureCount)} time(s) in a row.", e)
+            logWarning(log"Reporter thread fails ${MDC(LogKeys.FAILURES, failureCount)} " +
+              log"time(s) in a row.", e)
           }
       }
       try {
@@ -656,8 +657,9 @@ private[spark] class ApplicationMaster(
     t.setDaemon(true)
     t.setName("Reporter")
     t.start()
-    logInfo(s"Started progress reporter thread with (heartbeat : $heartbeatInterval, " +
-            s"initial allocation : $initialAllocationInterval) intervals")
+    logInfo(log"Started progress reporter thread with " +
+      log"(heartbeat: ${MDC(LogKeys.HEARTBEAT_INTERVAL, heartbeatInterval)}, initial allocation: " +
+      log"${MDC(LogKeys.INITIAL_HEARTBEAT_INTERVAL, initialAllocationInterval)}) intervals")
     t
   }
 
@@ -683,18 +685,18 @@ private[spark] class ApplicationMaster(
     try {
       val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
       if (!preserveFiles) {
-        logInfo("Deleting staging directory " + stagingDirPath)
+        logInfo(log"Deleting staging directory ${MDC(LogKeys.PATH, stagingDirPath)}")
         fs.delete(stagingDirPath, true)
       }
     } catch {
       case ioe: IOException =>
-        logError("Failed to cleanup staging dir " + stagingDirPath, ioe)
+        logError(log"Failed to cleanup staging dir ${MDC(LogKeys.PATH, stagingDirPath)}", ioe)
     }
   }
 
   /** Add the Yarn IP filter that is required for properly securing the UI. */
   private def addAmIpFilter(driver: Option[RpcEndpointRef], proxyBase: String) = {
-    val amFilter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
+    val amFilter = classOf[AmIpFilter].getName
     val params = client.getAmIpFilterParams(yarnConf, proxyBase)
     driver match {
       case Some(d) =>
@@ -734,7 +736,8 @@ private[spark] class ApplicationMaster(
       override def run(): Unit = {
         try {
           if (!Modifier.isStatic(mainMethod.getModifiers)) {
-            logError(s"Could not find static main method in object ${args.userClass}")
+            logError(log"Could not find static main method in object " +
+              log"${MDC(LogKeys.CLASS_NAME, args.userClass)}")
             finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_EXCEPTION_USER_CLASS)
           } else {
             mainMethod.invoke(null, userArgs.toArray)
@@ -748,7 +751,7 @@ private[spark] class ApplicationMaster(
                 // Reporter thread can interrupt to stop user class
               case SparkUserAppException(exitCode) =>
                 val msg = log"User application exited with status " +
-                  log"${MDC(EXIT_CODE, exitCode)}"
+                  log"${MDC(LogKeys.EXIT_CODE, exitCode)}"
                 logError(msg)
                 finish(FinalApplicationStatus.FAILED, exitCode, msg.message)
               case cause: Throwable =>
@@ -791,9 +794,9 @@ private[spark] class ApplicationMaster(
 
     override def onStart(): Unit = {
       driver.send(RegisterClusterManager(self))
-      // if deployment mode for yarn Application is client
+      // if deployment mode for yarn Application is managed client
       // then send the AM Log Info to spark driver
-      if (!isClusterMode) {
+      if (!isClusterMode && !sparkConf.get(YARN_UNMANAGED_AM)) {
         val hostPort = YarnContainerInfoHelper.getNodeManagerHttpAddress(None)
         val yarnAMID = "yarn-am"
         val info = new MiscellaneousProcessDetails(hostPort,
@@ -831,7 +834,8 @@ private[spark] class ApplicationMaster(
         }
 
       case KillExecutors(executorIds) =>
-        logInfo(s"Driver requested to kill executor(s) ${executorIds.mkString(", ")}.")
+        logInfo(log"Driver requested to kill executor(s) " +
+          log"${MDC(LogKeys.EXECUTOR_IDS, executorIds.mkString(", "))}.")
         Option(allocator) match {
           case Some(a) => executorIds.foreach(a.killExecutor)
           case None => logWarning("Container allocator is not ready to kill executors yet.")
@@ -854,15 +858,17 @@ private[spark] class ApplicationMaster(
       if (!(isClusterMode || sparkConf.get(YARN_UNMANAGED_AM))) {
         if (shutdown || !clientModeTreatDisconnectAsFailed) {
           if (exitCode == 0) {
-            logInfo(s"Driver terminated or disconnected! Shutting down. $remoteAddress")
+            logInfo(log"Driver terminated or disconnected! Shutting down. " +
+              log"${MDC(LogKeys.HOST_PORT, remoteAddress)}")
             finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
           } else {
-            logError(log"Driver terminated with exit code ${MDC(EXIT_CODE, exitCode)}! " +
-              log"Shutting down. ${MDC(HOST_PORT, remoteAddress)}")
+            logError(log"Driver terminated with exit code ${MDC(LogKeys.EXIT_CODE, exitCode)}! " +
+              log"Shutting down. ${MDC(LogKeys.HOST_PORT, remoteAddress)}")
             finish(FinalApplicationStatus.FAILED, exitCode)
           }
         } else {
-          logError(s"Application Master lost connection with driver! Shutting down. $remoteAddress")
+          logError(log"Application Master lost connection with driver! Shutting down. " +
+            log"${MDC(LogKeys.HOST_PORT, remoteAddress)}")
           finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_DISCONNECTED)
         }
       }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index bed7c859003a0..b2c4d97bc7b07 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -55,8 +55,7 @@ import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.deploy.yarn.ResourceRequestHelper._
 import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.{Logging, LogKey, MDC}
-import org.apache.spark.internal.LogKey.{APP_ID, CONFIG, CONFIG2, PATH}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python._
 import org.apache.spark.launcher.{JavaModuleOptions, LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils}
@@ -142,7 +141,8 @@ private[spark] class Client(
     val principal = sparkConf.get(PRINCIPAL).orNull
     require((principal == null) == (keytab == null),
       "Both principal and keytab must be defined, or neither.")
-    logInfo(s"Kerberos credentials: principal = $principal, keytab = $keytab")
+    logInfo(log"Kerberos credentials: principal = ${MDC(LogKeys.PRINCIPAL, principal)}, " +
+      log"keytab = ${MDC(LogKeys.KEYTAB, keytab)}")
     // Generate a file name that can be used for the keytab file, that does not conflict
     // with any user file.
     Some(new File(keytab).getName() + "-" + UUID.randomUUID().toString)
@@ -229,7 +229,7 @@ private[spark] class Client(
       val appContext = createApplicationSubmissionContext(newApp, containerContext)
 
       // Finally, submit and monitor the application
-      logInfo(s"Submitting application $appId to ResourceManager")
+      logInfo(log"Submitting application ${MDC(LogKeys.APP_ID, appId)} to ResourceManager")
       yarnClient.submitApplication(appContext)
       launcherBackend.setAppId(appId.toString)
       reportLauncherState(SparkAppHandle.State.SUBMITTED)
@@ -254,11 +254,11 @@ private[spark] class Client(
       try {
         val fs = stagingDirPath.getFileSystem(hadoopConf)
         if (fs.delete(stagingDirPath, true)) {
-          logInfo(s"Deleted staging directory $stagingDirPath")
+          logInfo(log"Deleted staging directory ${MDC(LogKeys.PATH, stagingDirPath)}")
         }
       } catch {
         case ioe: IOException =>
-          logWarning(log"Failed to cleanup staging dir ${MDC(PATH, stagingDirPath)}", ioe)
+          logWarning(log"Failed to cleanup staging dir ${MDC(LogKeys.PATH, stagingDirPath)}", ioe)
       }
     }
 
@@ -332,7 +332,7 @@ private[spark] class Client(
         appContext.setLogAggregationContext(logAggregationContext)
       } catch {
         case NonFatal(e) =>
-          logWarning(log"Ignoring ${MDC(CONFIG, ROLLED_LOG_INCLUDE_PATTERN.key)}} " +
+          logWarning(log"Ignoring ${MDC(LogKeys.CONFIG, ROLLED_LOG_INCLUDE_PATTERN.key)}} " +
             log"because the version of YARN does not support it", e)
       }
     }
@@ -371,14 +371,16 @@ private[spark] class Client(
     // SPARK-37205: this regex is used to grep a list of configurations and send them to YARN RM
     // for fetching delegation tokens. See YARN-5910 for more details.
     sparkConf.get(config.AM_TOKEN_CONF_REGEX).foreach { regex =>
-      logInfo(s"Processing token conf (spark.yarn.am.tokenConfRegex) with regex $regex")
+      logInfo(log"Processing token conf (spark.yarn.am.tokenConfRegex) with " +
+        log"regex ${MDC(LogKeys.TOKEN_REGEX, regex)}")
       val dob = new DataOutputBuffer()
       val copy = new Configuration(false)
       copy.clear()
       hadoopConf.asScala.foreach { entry =>
         if (entry.getKey.matches(regex)) {
           copy.set(entry.getKey, entry.getValue)
-          logInfo(s"Captured key: ${entry.getKey} -> value: ${entry.getValue}")
+          logInfo(log"Captured key: ${MDC(LogKeys.KEY, entry.getKey)} -> " +
+            log"value: ${MDC(LogKeys.VALUE, entry.getValue)}")
         }
       }
       copy.write(dob);
@@ -403,8 +405,8 @@ private[spark] class Client(
    */
   private def verifyClusterResources(newAppResponse: GetNewApplicationResponse): Unit = {
     val maxMem = newAppResponse.getMaximumResourceCapability.getMemorySize
-    logInfo("Verifying our application has not requested more than the maximum " +
-      s"memory capability of the cluster ($maxMem MB per container)")
+    logInfo(log"Verifying our application has not requested more than the maximum memory " +
+      log"capability of the cluster (${MDC(LogKeys.MAX_MEMORY_SIZE, maxMem)} MB per container)")
     val executorMem =
       executorMemory + executorOffHeapMemory + executorMemoryOverhead + pysparkWorkerMemory
     if (executorMem > maxMem) {
@@ -421,9 +423,8 @@ private[spark] class Client(
         "Please check the values of 'yarn.scheduler.maximum-allocation-mb' and/or " +
         "'yarn.nodemanager.resource.memory-mb'.")
     }
-    logInfo("Will allocate AM container, with %d MB memory including %d MB overhead".format(
-      amMem,
-      amMemoryOverhead))
+    logInfo(log"Will allocate AM container, with ${MDC(LogKeys.MEMORY_SIZE, amMem)} MB memory " +
+      log"including ${MDC(LogKeys.OVERHEAD_MEMORY_SIZE, amMemoryOverhead)} MB overhead")
 
     // We could add checks to make sure the entire cluster has enough resources but that involves
     // getting all the node reports and computing ourselves.
@@ -447,7 +448,8 @@ private[spark] class Client(
     var destPath = srcPath
     if (force || !compareFs(srcFs, destFs) || "file".equals(srcFs.getScheme)) {
       destPath = new Path(destDir, destName.getOrElse(srcPath.getName()))
-      logInfo(s"Uploading resource $srcPath -> $destPath")
+      logInfo(log"Uploading resource ${MDC(LogKeys.SRC_PATH, srcPath)} -> " +
+        log"${MDC(LogKeys.TARGET_PATH, destPath)}")
       try {
         FileUtil.copy(srcFs, srcPath, destFs, destPath, false, hadoopConf)
       } catch {
@@ -458,7 +460,8 @@ private[spark] class Client(
       replication.foreach(repl => destFs.setReplication(destPath, repl))
       destFs.setPermission(destPath, new FsPermission(APP_FILE_PERMISSION))
     } else {
-      logInfo(s"Source and destination file systems are the same. Not copying $srcPath")
+      logInfo(log"Source and destination file systems are the same. " +
+        log"Not copying ${MDC(LogKeys.SRC_PATH, srcPath)}")
     }
     // Resolve any symlinks in the URI path so using a "current" symlink to point to a specific
     // version shows the specific version in the distributed cache configuration
@@ -558,11 +561,11 @@ private[spark] class Client(
       val uriStr = uri.toString()
       val fileName = new File(uri.getPath).getName
       if (distributedUris.contains(uriStr)) {
-        logWarning(log"Same path resource ${MDC(LogKey.URI, uri)} added multiple times " +
+        logWarning(log"Same path resource ${MDC(LogKeys.URI, uri)} added multiple times " +
           log"to distributed cache.")
         false
       } else if (distributedNames.contains(fileName)) {
-        logWarning(log"Same name resource ${MDC(LogKey.URI, uri)} added multiple times " +
+        logWarning(log"Same name resource ${MDC(LogKeys.URI, uri)} added multiple times " +
           log"to distributed cache")
         false
       } else {
@@ -701,8 +704,9 @@ private[spark] class Client(
         case None =>
           // No configuration, so fall back to uploading local jar files.
           logWarning(
-            log"Neither ${MDC(CONFIG, SPARK_JARS.key)} nor ${MDC(CONFIG2, SPARK_ARCHIVE.key)}} " +
-            log"is set, falling back to uploading libraries under SPARK_HOME.")
+            log"Neither ${MDC(LogKeys.CONFIG, SPARK_JARS.key)} nor " +
+              log"${MDC(LogKeys.CONFIG2, SPARK_ARCHIVE.key)}} is set, falling back to uploading " +
+              log"libraries under SPARK_HOME.")
           val jarsDir = new File(YarnCommandBuilderUtils.findJarsDir(
             sparkConf.getenv("SPARK_HOME")))
           val jarsArchive = File.createTempFile(LOCALIZED_LIB_DIR, ".zip",
@@ -881,7 +885,7 @@ private[spark] class Client(
         if (dir.isDirectory()) {
           val files = dir.listFiles()
           if (files == null) {
-            logWarning(log"Failed to list files under directory ${MDC(PATH, dir)}")
+            logWarning(log"Failed to list files under directory ${MDC(LogKeys.PATH, dir)}")
           } else {
             files.foreach { file =>
               if (file.isFile && !hadoopConfFiles.contains(file.getName())) {
@@ -1070,7 +1074,8 @@ private[spark] class Client(
           sparkConf))
       }
       if (sparkConf.get(AM_JAVA_OPTIONS).isDefined) {
-        logWarning(log"${MDC(CONFIG, AM_JAVA_OPTIONS.key)} will not take effect in cluster mode")
+        logWarning(log"${MDC(LogKeys.CONFIG, AM_JAVA_OPTIONS.key)} will not take effect " +
+          log"in cluster mode")
       }
     } else {
       // Validate and include yarn am specific java options in yarn-client mode.
@@ -1202,21 +1207,22 @@ private[spark] class Client(
           getApplicationReport()
         } catch {
           case e: ApplicationNotFoundException =>
-            logError(log"Application ${MDC(APP_ID, appId)} not found.")
+            logError(log"Application ${MDC(LogKeys.APP_ID, appId)} not found.")
             cleanupStagingDir()
             return YarnAppReport(YarnApplicationState.KILLED, FinalApplicationStatus.KILLED, None)
           case NonFatal(e) if !e.isInstanceOf[InterruptedIOException] =>
-            val msg = s"Failed to contact YARN for application $appId."
+            val msg = log"Failed to contact YARN for application ${MDC(LogKeys.APP_ID, appId)}."
             logError(msg, e)
             // Don't necessarily clean up staging dir because status is unknown
             return YarnAppReport(YarnApplicationState.FAILED, FinalApplicationStatus.FAILED,
-              Some(msg))
+              Some(msg.message))
         }
       val state = report.getYarnApplicationState
       reportsSinceLastLog += 1
       if (logApplicationReport) {
         if (lastState != state || reportsSinceLastLog >= reportsTillNextLog) {
-          logInfo(s"Application report for $appId (state: $state)")
+          logInfo(log"Application report for ${MDC(LogKeys.APP_ID, appId)} " +
+            log"(state: ${MDC(LogKeys.APP_STATE, state)})")
           reportsSinceLastLog = 0
         }
 
@@ -1225,7 +1231,8 @@ private[spark] class Client(
         if (log.isDebugEnabled) {
           logDebug(formatReportDetails(report, getDriverLogsLink(report)))
         } else if (lastState != state) {
-          logInfo(formatReportDetails(report, getDriverLogsLink(report)))
+          logInfo(log"${MDC(LogKeys.REPORT_DETAILS,
+            formatReportDetails(report, getDriverLogsLink(report)))}")
         }
       }
 
@@ -1347,7 +1354,7 @@ private[spark] class Client(
         .getOrElse(IMap.empty)
     } catch {
       case e: Exception =>
-        logWarning(log"Unable to get driver log links for ${MDC(APP_ID, appId)}: ", e)
+        logWarning(log"Unable to get driver log links for ${MDC(LogKeys.APP_ID, appId)}: ", e)
         // Include the full stack trace only at DEBUG level to reduce verbosity
         logDebug(s"Unable to get driver log links for $appId", e)
         IMap.empty
@@ -1367,8 +1374,10 @@ private[spark] class Client(
     if (!launcherBackend.isConnected() && fireAndForget) {
       val report = getApplicationReport()
       val state = report.getYarnApplicationState
-      logInfo(s"Application report for $appId (state: $state)")
-      logInfo(formatReportDetails(report, getDriverLogsLink(report)))
+      logInfo(log"Application report for ${MDC(LogKeys.APP_ID, appId)} " +
+        log"(state: ${MDC(LogKeys.APP_STATE, state)})")
+      logInfo(log"${MDC(LogKeys.REPORT_DETAILS,
+        formatReportDetails(report, getDriverLogsLink(report)))}")
       if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) {
         throw new SparkException(s"Application $appId finished with status: $state")
       }
@@ -1376,7 +1385,7 @@ private[spark] class Client(
       val YarnAppReport(appState, finalState, diags) = monitorApplication()
       if (appState == YarnApplicationState.FAILED || finalState == FinalApplicationStatus.FAILED) {
         diags.foreach { err =>
-          logError(s"Application diagnostics message: $err")
+          logError(log"Application diagnostics message: ${MDC(LogKeys.ERROR, err)}")
         }
         throw new SparkException(s"Application $appId finished with failed status")
       }
@@ -1674,8 +1683,8 @@ private[spark] object Client extends Logging {
   def getClusterPath(conf: SparkConf, path: String): String = {
     val localPath = conf.get(GATEWAY_ROOT_PATH)
     val clusterPath = conf.get(REPLACEMENT_ROOT_PATH)
-    if (localPath != null && clusterPath != null) {
-      path.replace(localPath, clusterPath)
+    if (localPath.isDefined && clusterPath.isDefined) {
+      path.replace(localPath.get, clusterPath.get)
     } else {
       path
     }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
index 202ef36166d2a..62753f35ae76c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientArguments.scala
@@ -19,7 +19,8 @@ package org.apache.spark.deploy.yarn
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.ARGS
 
 // TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware !
 private[spark] class ClientArguments(args: Array[String]) extends Logging {
@@ -75,7 +76,7 @@ private[spark] class ClientArguments(args: Array[String]) extends Logging {
     }
 
     if (verbose) {
-      logInfo(s"Parsed user args for YARN application: [${userArgs.mkString(" ")}]")
+      logInfo(log"Parsed user args for YARN application: [${MDC(ARGS, userArgs.mkString(" "))}]")
     }
   }
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 81b210a2297a5..983ab5b4341b8 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -38,7 +38,8 @@ import org.apache.hadoop.yarn.ipc.YarnRPC
 import org.apache.hadoop.yarn.util.Records
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys.{EXECUTOR_ENVS, EXECUTOR_LAUNCH_COMMANDS, EXECUTOR_RESOURCES}
 import org.apache.spark.internal.config._
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.Utils
@@ -68,21 +69,23 @@ private[yarn] class ExecutorRunnable(
     startContainer()
   }
 
-  def launchContextDebugInfo(): String = {
+  def launchContextDebugInfo(): MessageWithContext = {
     val commands = prepareCommand()
     val env = prepareEnvironment()
 
-    s"""
-    |===============================================================================
-    |Default YARN executor launch context:
-    |  env:
-    |${Utils.redact(sparkConf, env.toSeq).map { case (k, v) => s"    $k -> $v\n" }.mkString}
-    |  command:
-    |    ${Utils.redactCommandLineArgs(sparkConf, commands).mkString(" \\ \n      ")}
-    |
-    |  resources:
-    |${localResources.map { case (k, v) => s"    $k -> $v\n" }.mkString}
-    |===============================================================================""".stripMargin
+    // scalastyle:off line.size.limit
+    log"""
+        |===============================================================================
+        |Default YARN executor launch context:
+        |  env:
+        |${MDC(EXECUTOR_ENVS, Utils.redact(sparkConf, env.toSeq).map { case (k, v) => s"    $k -> $v\n" }.mkString)}
+        |  command:
+        |    ${MDC(EXECUTOR_LAUNCH_COMMANDS, Utils.redactCommandLineArgs(sparkConf, commands).mkString(" \\ \n      "))}
+        |
+        |  resources:
+        |${MDC(EXECUTOR_RESOURCES, localResources.map { case (k, v) => s"    $k -> $v\n" }.mkString)}
+        |===============================================================================""".stripMargin
+    // scalastyle:on line.size.limit
   }
 
   def startContainer(): java.util.Map[String, ByteBuffer] = {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
index 755a69520ce41..a747f99f1b85d 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.yarn.exceptions.ResourceNotFoundException
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, RESOURCE_NAME}
+import org.apache.spark.internal.LogKeys.{ERROR, RESOURCE_NAME}
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceID
 import org.apache.spark.resource.ResourceUtils.{AMOUNT, FPGA, GPU}
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
index e0d66af348e29..618f0dc8a4daa 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
@@ -29,7 +29,8 @@ import org.apache.hadoop.yarn.util.RackResolver
 import org.apache.logging.log4j.{Level, LogManager}
 import org.apache.logging.log4j.core.Logger
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.NODE_LOCATION
 
 /**
  * Re-implement YARN's [[RackResolver]] for hadoop releases without YARN-9332.
@@ -77,8 +78,8 @@ private[spark] class SparkRackResolver(conf: Configuration) extends Logging {
     val rNameList = dnsToSwitchMapping.resolve(hostNames.toList.asJava).asScala
     if (rNameList == null || rNameList.isEmpty) {
       hostNames.foreach(nodes += new NodeBase(_, NetworkTopology.DEFAULT_RACK))
-      logInfo(s"Got an error when resolving hostNames. " +
-        s"Falling back to ${NetworkTopology.DEFAULT_RACK} for all")
+      logInfo(log"Got an error when resolving hostNames. " +
+        log"Falling back to ${MDC(NODE_LOCATION, NetworkTopology.DEFAULT_RACK)} for all")
     } else {
       for ((hostName, rName) <- hostNames.zip(rNameList)) {
         if (Strings.isNullOrEmpty(rName)) {
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index efe766be8356d..c86195d0ef31e 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -41,7 +41,7 @@ import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._
 import org.apache.spark.deploy.yarn.config._
 import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{APP_STATE, CONFIG, CONFIG2, CONFIG3, CONTAINER_ID, ERROR, EXECUTOR_ID, HOST, REASON}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
@@ -195,7 +195,8 @@ private[yarn] class YarnAllocator(
       case (true, false) => true
       case (true, true) =>
         logWarning(log"Yarn Executor Decommissioning is supported only " +
-          log"when ${MDC(CONFIG, SHUFFLE_SERVICE_ENABLED.key)} is set to false. See: SPARK-39018.")
+          log"when ${MDC(LogKeys.CONFIG, SHUFFLE_SERVICE_ENABLED.key)} is set to false. " +
+          log"See: SPARK-39018.")
         false
       case (false, _) => false
     }
@@ -313,7 +314,8 @@ private[yarn] class YarnAllocator(
     if (!rpIdToYarnResource.containsKey(rp.id)) {
       // track the resource profile if not already there
       getOrUpdateRunningExecutorForRPId(rp.id)
-      logInfo(s"Resource profile ${rp.id} doesn't exist, adding it")
+      logInfo(log"Resource profile ${MDC(LogKeys.RESOURCE_PROFILE_ID, rp.id)} doesn't exist, " +
+        log"adding it")
 
       val resourcesWithDefaults =
         ResourceProfile.getResourcesForClusterManager(rp.id, rp.executorResources,
@@ -399,8 +401,8 @@ private[yarn] class YarnAllocator(
       val res = resourceProfileToTotalExecs.map { case (rp, numExecs) =>
         createYarnResourceForResourceProfile(rp)
         if (numExecs != getOrUpdateTargetNumExecutorsForRPId(rp.id)) {
-          logInfo(s"Driver requested a total number of $numExecs executor(s) " +
-            s"for resource profile id: ${rp.id}.")
+          logInfo(log"Driver requested a total number of ${MDC(LogKeys.COUNT, numExecs)} " +
+            log"executor(s) for resource profile id: ${MDC(LogKeys.RESOURCE_PROFILE_ID, rp.id)}.")
           targetNumExecutorsPerResourceProfileId(rp.id) = numExecs
           allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes)
           true
@@ -421,7 +423,8 @@ private[yarn] class YarnAllocator(
         val (_, rpId) = containerIdToExecutorIdAndResourceProfileId(container.getId)
         internalReleaseContainer(container)
         getOrUpdateRunningExecutorForRPId(rpId).remove(executorId)
-      case _ => logWarning(log"Attempted to kill unknown executor ${MDC(EXECUTOR_ID, executorId)}!")
+      case _ => logWarning(log"Attempted to kill unknown executor " +
+        log"${MDC(LogKeys.EXECUTOR_ID, executorId)}!")
     }
   }
 
@@ -520,12 +523,13 @@ private[yarn] class YarnAllocator(
       if (missing > 0) {
         val resource = rpIdToYarnResource.get(rpId)
         if (log.isInfoEnabled()) {
-          var requestContainerMessage = s"Will request $missing executor container(s) for " +
-            s" ResourceProfile Id: $rpId, each with " +
-            s"${resource.getVirtualCores} core(s) and " +
-            s"${resource.getMemorySize} MB memory."
-          if (resource.getResources().nonEmpty) {
-            requestContainerMessage ++= s" with custom resources: $resource"
+          var requestContainerMessage = log"Will request ${MDC(LogKeys.COUNT, missing)} executor " +
+            log"container(s) for ResourceProfile Id: ${MDC(LogKeys.RESOURCE_PROFILE_ID, rpId)}, " +
+            log"each with ${MDC(LogKeys.VIRTUAL_CORES, resource.getVirtualCores)} core(s) and " +
+            log"${MDC(LogKeys.MEMORY_SIZE, resource.getMemorySize)} MB memory."
+          if (resource.getResources.nonEmpty) {
+            requestContainerMessage = requestContainerMessage +
+              log" with custom resources: ${MDC(LogKeys.RESOURCE, resource)}"
           }
           logInfo(requestContainerMessage)
         }
@@ -536,7 +540,8 @@ private[yarn] class YarnAllocator(
         }
         val cancelledContainers = staleRequests.size
         if (cancelledContainers > 0) {
-          logInfo(s"Canceled $cancelledContainers container request(s) (locality no longer needed)")
+          logInfo(log"Canceled ${MDC(LogKeys.COUNT, cancelledContainers)} container request(s) " +
+            log"(locality no longer needed)")
         }
 
         // consider the number of new containers and cancelled stale containers available
@@ -570,8 +575,8 @@ private[yarn] class YarnAllocator(
             amClient.removeContainerRequest(nonLocal)
           }
           if (numToCancel > 0) {
-            logInfo(s"Canceled $numToCancel unlocalized container requests to " +
-              s"resubmit with locality")
+            logInfo(log"Canceled ${MDC(LogKeys.COUNT, numToCancel)} unlocalized container " +
+              log"requests to resubmit with locality")
           }
         }
 
@@ -582,16 +587,20 @@ private[yarn] class YarnAllocator(
         if (log.isInfoEnabled()) {
           val (localized, anyHost) = newLocalityRequests.partition(_.getNodes() != null)
           if (anyHost.nonEmpty) {
-            logInfo(s"Submitted ${anyHost.size} unlocalized container requests.")
+            logInfo(log"Submitted ${MDC(LogKeys.COUNT, anyHost.size)}} unlocalized container " +
+              log"requests.")
           }
           localized.foreach { request =>
-            logInfo(s"Submitted container request for host ${hostStr(request)}.")
+            logInfo(log"Submitted container request for host " +
+              log"${MDC(LogKeys.HOST, hostStr(request))}.")
           }
         }
       } else if (numPendingAllocate > 0 && missing < 0) {
         val numToCancel = math.min(numPendingAllocate, -missing)
-        logInfo(s"Canceling requests for $numToCancel executor container(s) to have a new " +
-          s"desired total ${getOrUpdateTargetNumExecutorsForRPId(rpId)} executors.")
+        logInfo(log"Canceling requests for ${MDC(LogKeys.COUNT, numToCancel)} executor " +
+          log"container(s) to have a new desired total " +
+          log"${MDC(LogKeys.NUM_EXECUTOR_DESIRED,
+            getOrUpdateTargetNumExecutorsForRPId(rpId))} executors.")
         // cancel pending allocate requests by taking locality preference into account
         val cancelRequests = (staleRequests ++ anyHostRequests ++ localRequests).take(numToCancel)
         cancelRequests.foreach(amClient.removeContainerRequest)
@@ -697,8 +706,9 @@ private[yarn] class YarnAllocator(
 
     runAllocatedContainers(containersToUse)
 
-    logInfo("Received %d containers from YARN, launching executors on %d of them."
-      .format(allocatedContainers.size, containersToUse.size))
+    logInfo(log"Received ${MDC(LogKeys.COUNT, allocatedContainers.size)} containers from YARN, " +
+      log"launching executors on ${MDC(LogKeys.NUM_EXECUTOR_LAUNCH, containersToUse.size)} " +
+      log"of them.")
   }
 
   /**
@@ -751,8 +761,10 @@ private[yarn] class YarnAllocator(
       val executorId = executorIdCounter.toString
       val yarnResourceForRpId = rpIdToYarnResource.get(rpId)
       assert(container.getResource.getMemorySize >= yarnResourceForRpId.getMemorySize)
-      logInfo(s"Launching container $containerId on host $executorHostname " +
-        s"for executor with ID $executorId for ResourceProfile Id $rpId")
+      logInfo(log"Launching container ${MDC(LogKeys.CONTAINER_ID, containerId)} " +
+        log"on host ${MDC(LogKeys.HOST, executorHostname)} for " +
+        log"executor with ID ${MDC(LogKeys.EXECUTOR_ID, executorId)} for " +
+        log"ResourceProfile Id ${MDC(LogKeys.RESOURCE_PROFILE_ID, rpId)}")
 
       val rp = rpIdToResourceProfile(rpId)
       val defaultResources = ResourceProfile.getDefaultProfileExecutorResources(sparkConf)
@@ -790,8 +802,8 @@ private[yarn] class YarnAllocator(
                 getOrUpdateNumExecutorsStartingForRPId(rpId).decrementAndGet()
                 launchingExecutorContainerIds.remove(containerId)
                 if (NonFatal(e)) {
-                  logError(log"Failed to launch executor ${MDC(EXECUTOR_ID, executorId)} " +
-                    log"on container ${MDC(CONTAINER_ID, containerId)}", e)
+                  logError(log"Failed to launch executor ${MDC(LogKeys.EXECUTOR_ID, executorId)} " +
+                    log"on container ${MDC(LogKeys.CONTAINER_ID, containerId)}", e)
                   // Assigned container should be released immediately
                   // to avoid unnecessary resource occupation.
                   amClient.releaseAssignedContainer(containerId)
@@ -805,9 +817,9 @@ private[yarn] class YarnAllocator(
           updateInternalState(rpId, executorId, container)
         }
       } else {
-        logInfo(("Skip launching executorRunnable as running executors count: %d " +
-          "reached target executors count: %d.").format(rpRunningExecs,
-          getOrUpdateTargetNumExecutorsForRPId(rpId)))
+        logInfo(log"Skip launching executorRunnable as running executors count: " +
+          log"${MDC(LogKeys.COUNT, rpRunningExecs)} reached target executors count: " +
+          log"${MDC(LogKeys.NUM_EXECUTOR_TARGET, getOrUpdateTargetNumExecutorsForRPId(rpId))}.")
       }
     }
   }
@@ -849,47 +861,47 @@ private[yarn] class YarnAllocator(
           case Some((executorId, _)) =>
             getOrUpdateRunningExecutorForRPId(rpId).remove(executorId)
           case None => logWarning(log"Cannot find executorId for container: " +
-            log"${MDC(CONTAINER_ID, containerId)}")
+            log"${MDC(LogKeys.CONTAINER_ID, containerId)}")
         }
 
-        logInfo("Completed container %s%s (state: %s, exit status: %s)".format(
-          containerId,
-          onHostStr,
-          completedContainer.getState,
-          completedContainer.getExitStatus))
+        logInfo(log"Completed container ${MDC(LogKeys.CONTAINER_ID, containerId)}" +
+          log"${MDC(LogKeys.HOST, onHostStr)} " +
+          log"(state: ${MDC(LogKeys.CONTAINER_STATE, completedContainer.getState)}, " +
+          log"exit status: ${MDC(LogKeys.EXIT_CODE, completedContainer.getExitStatus)}")
         val exitStatus = completedContainer.getExitStatus
         val (exitCausedByApp, containerExitReason) = exitStatus match {
           case _ if shutdown =>
-            (false, log"Executor for container ${MDC(CONTAINER_ID, containerId)} exited after " +
-              log"Application shutdown.")
+            (false, log"Executor for container ${MDC(LogKeys.CONTAINER_ID, containerId)} " +
+              log"exited after Application shutdown.")
           case ContainerExitStatus.SUCCESS =>
-            (false, log"Executor for container ${MDC(CONTAINER_ID, containerId)} exited because " +
-              log"of a YARN event (e.g., preemption) and not because of an error in the running " +
-              log"job.")
+            (false, log"Executor for container ${MDC(LogKeys.CONTAINER_ID, containerId)} " +
+              log"exited because of a YARN event (e.g., preemption) and not because of an " +
+              log"error in the running job.")
           case ContainerExitStatus.PREEMPTED =>
             // Preemption is not the fault of the running tasks, since YARN preempts containers
             // merely to do resource sharing, and tasks that fail due to preempted executors could
             // just as easily finish on any other executor. See SPARK-8167.
-            (false, log"Container ${MDC(CONTAINER_ID, containerId)}${MDC(HOST, onHostStr)} " +
-              log"was preempted.")
+            (false, log"Container ${MDC(LogKeys.CONTAINER_ID, containerId)}" +
+              log"${MDC(LogKeys.HOST, onHostStr)} was preempted.")
           // Should probably still count memory exceeded exit codes towards task failures
           case ContainerExitStatus.KILLED_EXCEEDED_VMEM =>
             val vmemExceededPattern = raw"$MEM_REGEX of $MEM_REGEX virtual memory used".r
             val diag = vmemExceededPattern.findFirstIn(completedContainer.getDiagnostics)
               .map(_.concat(".")).getOrElse("")
             val message = log"Container killed by YARN for exceeding virtual memory limits. " +
-              log"${MDC(ERROR, diag)} Consider boosting " +
-              log"${MDC(CONFIG, EXECUTOR_MEMORY_OVERHEAD.key)} or boosting " +
-              log"${MDC(CONFIG2, YarnConfiguration.NM_VMEM_PMEM_RATIO)} or disabling " +
-              log"${MDC(CONFIG3, YarnConfiguration.NM_VMEM_CHECK_ENABLED)} because of YARN-4714."
+              log"${MDC(LogKeys.ERROR, diag)} Consider boosting " +
+              log"${MDC(LogKeys.CONFIG, EXECUTOR_MEMORY_OVERHEAD.key)} or boosting " +
+              log"${MDC(LogKeys.CONFIG2, YarnConfiguration.NM_VMEM_PMEM_RATIO)} or disabling " +
+              log"${MDC(LogKeys.CONFIG3, YarnConfiguration.NM_VMEM_CHECK_ENABLED)} " +
+              log"because of YARN-4714."
             (true, message)
           case ContainerExitStatus.KILLED_EXCEEDED_PMEM =>
             val pmemExceededPattern = raw"$MEM_REGEX of $MEM_REGEX physical memory used".r
             val diag = pmemExceededPattern.findFirstIn(completedContainer.getDiagnostics)
               .map(_.concat(".")).getOrElse("")
             val message = log"Container killed by YARN for exceeding physical memory limits. " +
-              log"${MDC(ERROR, diag)} Consider boosting " +
-              log"${MDC(CONFIG, EXECUTOR_MEMORY_OVERHEAD.key)}."
+              log"${MDC(LogKeys.ERROR, diag)} Consider boosting " +
+              log"${MDC(LogKeys.CONFIG, EXECUTOR_MEMORY_OVERHEAD.key)}."
             (true, message)
           case other_exit_status =>
             val exitStatus = completedContainer.getExitStatus
@@ -900,17 +912,19 @@ private[yarn] class YarnAllocator(
             // SPARK-26269: follow YARN's behaviour, see details in
             // org.apache.hadoop.yarn.util.Apps#shouldCountTowardsNodeBlacklisting
             if (NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS.contains(other_exit_status)) {
-              (false, log"Container marked as failed: ${MDC(CONTAINER_ID, containerId)}" +
-                log"${MDC(HOST, onHostStr)}. Exit status: ${MDC(APP_STATE, exitStatus)}. " +
-                log"Possible causes: ${MDC(REASON, sparkExitCodeReason)} " +
-                log"Diagnostics: ${MDC(ERROR, completedContainer.getDiagnostics)}.")
+              (false, log"Container marked as failed: ${MDC(LogKeys.CONTAINER_ID, containerId)}" +
+                log"${MDC(LogKeys.HOST, onHostStr)}. " +
+                log"Exit status: ${MDC(LogKeys.EXIT_CODE, exitStatus)}. " +
+                log"Possible causes: ${MDC(LogKeys.REASON, sparkExitCodeReason)} " +
+                log"Diagnostics: ${MDC(LogKeys.ERROR, completedContainer.getDiagnostics)}.")
             } else {
               // completed container from a bad node
               allocatorNodeHealthTracker.handleResourceAllocationFailure(hostOpt)
-              (true, log"Container from a bad node: ${MDC(CONTAINER_ID, containerId)}" +
-                log"${MDC(HOST, onHostStr)}. Exit status: ${MDC(APP_STATE, exitStatus)}. " +
-                log"Possible causes: ${MDC(REASON, sparkExitCodeReason)} " +
-                log"Diagnostics: ${MDC(ERROR, completedContainer.getDiagnostics)}.")
+              (true, log"Container from a bad node: ${MDC(LogKeys.CONTAINER_ID, containerId)}" +
+                log"${MDC(LogKeys.HOST, onHostStr)}. " +
+                log"Exit status: ${MDC(LogKeys.EXIT_CODE, exitStatus)}. " +
+                log"Possible causes: ${MDC(LogKeys.REASON, sparkExitCodeReason)} " +
+                log"Diagnostics: ${MDC(LogKeys.ERROR, completedContainer.getDiagnostics)}.")
             }
         }
         if (exitCausedByApp) {
@@ -981,7 +995,7 @@ private[yarn] class YarnAllocator(
       context.reply(releasedExecutorLossReasons.remove(eid).get)
     } else {
       logWarning(log"Tried to get the loss reason for non-existent executor " +
-        log"${MDC(EXECUTOR_ID, eid)}")
+        log"${MDC(LogKeys.EXECUTOR_ID, eid)}")
       context.sendFailure(
         new SparkException(s"Fail to find loss reason for non-existent executor $eid"))
     }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala
index 22937ed8117a1..6938c0d7f8020 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala
@@ -25,7 +25,8 @@ import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.ExecutorFailureTracker
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{FAILURES, HOST, NODES}
 import org.apache.spark.internal.config._
 import org.apache.spark.scheduler.HealthTracker
 
@@ -90,7 +91,8 @@ private[spark] class YarnAllocatorNodeHealthTracker(
   private def updateAllocationExcludedNodes(hostname: String): Unit = {
     val failuresOnHost = failureTracker.numFailuresOnHost(hostname)
     if (failuresOnHost > maxFailuresPerHost) {
-      logInfo(s"excluding $hostname as YARN allocation failed $failuresOnHost times")
+      logInfo(log"excluding ${MDC(HOST, hostname)} as YARN allocation failed " +
+        log"${MDC(FAILURES, failuresOnHost)} times")
       allocatorExcludedNodeList.put(
         hostname,
         failureTracker.clock.getTimeMillis() + excludeOnFailureTimeoutMillis)
@@ -125,10 +127,12 @@ private[spark] class YarnAllocatorNodeHealthTracker(
     val additions = (nodesToExclude -- currentExcludededYarnNodes).toList.sorted
     val removals = (currentExcludededYarnNodes -- nodesToExclude).toList.sorted
     if (additions.nonEmpty) {
-      logInfo(s"adding nodes to YARN application master's excluded node list: $additions")
+      logInfo(log"adding nodes to YARN application master's " +
+        log"excluded node list: ${MDC(NODES, additions)}")
     }
     if (removals.nonEmpty) {
-      logInfo(s"removing nodes from YARN application master's excluded node list: $removals")
+      logInfo(log"removing nodes from YARN application master's " +
+        log"excluded node list: ${MDC(NODES, removals)}")
     }
     if (additions.nonEmpty || removals.nonEmpty) {
       // Note YARNs api for excluding nodes is updateBlacklist.
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
index c119610080199..51e5e0bfb9087 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -121,14 +121,14 @@ package object config extends Logging {
       "with the corresponding path in cluster machines.")
     .version("1.5.0")
     .stringConf
-    .createWithDefault(null)
+    .createOptional
 
   private[spark] val REPLACEMENT_ROOT_PATH = ConfigBuilder("spark.yarn.config.replacementPath")
     .doc(s"Path to use as a replacement for ${GATEWAY_ROOT_PATH.key} when launching processes " +
       "in the YARN cluster.")
     .version("1.5.0")
     .stringConf
-    .createWithDefault(null)
+    .createOptional
 
   private[spark] val QUEUE_NAME = ConfigBuilder("spark.yarn.queue")
     .version("1.0.0")
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index ccc0bc9f715e4..8032d782cf4fc 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -26,8 +26,8 @@ import org.apache.hadoop.yarn.api.records.{FinalApplicationStatus, YarnApplicati
 import org.apache.spark.{SparkContext, SparkException}
 import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnAppReport}
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.APP_STATE
+import org.apache.spark.internal.{config, Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{APP_ID, APP_STATE}
 import org.apache.spark.launcher.SparkAppHandle
 import org.apache.spark.scheduler.TaskSchedulerImpl
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
@@ -99,7 +99,7 @@ private[spark] class YarnClientSchedulerBackend(
       throw new SparkException(exceptionMsg)
     }
     if (state == YarnApplicationState.RUNNING) {
-      logInfo(s"Application ${appId.get} has started running.")
+      logInfo(log"Application ${MDC(APP_ID, appId.get)} has started running.")
     }
   }
 
@@ -120,7 +120,7 @@ private[spark] class YarnClientSchedulerBackend(
         logError(log"YARN application has exited unexpectedly with state " +
           log"${MDC(APP_STATE, state)}! Check the YARN application logs for more details.")
         diags.foreach { err =>
-          logError(s"Diagnostics message: $err")
+          logError(log"Diagnostics message: ${MDC(LogKeys.ERROR, err)}")
         }
         allowInterrupt = false
         sc.stop()
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index d7f285aeb892b..cd81f11510fee 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -30,7 +30,7 @@ import org.apache.hadoop.yarn.api.records.{ApplicationAttemptId, ApplicationId}
 import org.apache.spark.SparkContext
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.{config, Logging, MDC}
-import org.apache.spark.internal.LogKey.{EXECUTOR_ID, HOST_PORT, REASON}
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc._
@@ -221,7 +221,9 @@ private[spark] abstract class YarnSchedulerBackend(
     if (hasFilter) {
       // SPARK-26255: Append user provided filters(spark.ui.filters) with yarn filter.
       val allFilters = Seq(filterName) ++ conf.get(UI_FILTERS)
-      logInfo(s"Add WebUI Filter. $filterName, $filterParams, $proxyBase")
+      logInfo(log"Add WebUI Filter. ${MDC(LogKeys.UI_FILTER, filterName)}, " +
+        log"${MDC(LogKeys.UI_FILTER_PARAMS, filterParams)}, " +
+        log"${MDC(LogKeys.UI_PROXY_BASE, proxyBase)}")
 
       // For already installed handlers, prepend the filter.
       scheduler.sc.ui.foreach { ui =>
@@ -306,8 +308,8 @@ private[spark] abstract class YarnSchedulerBackend(
             .recover {
               case NonFatal(e) =>
                 logWarning(log"Attempted to get executor loss reason for executor id " +
-                  log"${MDC(EXECUTOR_ID, executorId)} at RPC address " +
-                  log"${MDC(HOST_PORT, executorRpcAddress)}, but got no response. " +
+                  log"${MDC(LogKeys.EXECUTOR_ID, executorId)} at RPC address " +
+                  log"${MDC(LogKeys.HOST_PORT, executorRpcAddress)}, but got no response. " +
                   log"Marking as agent lost.", e)
                 RemoveExecutor(executorId, ExecutorProcessLost())
             }(ThreadUtils.sameThread)
@@ -332,7 +334,7 @@ private[spark] abstract class YarnSchedulerBackend(
 
     override def receive: PartialFunction[Any, Unit] = {
       case RegisterClusterManager(am) =>
-        logInfo(s"ApplicationMaster registered as $am")
+        logInfo(log"ApplicationMaster registered as ${MDC(LogKeys.RPC_ENDPOINT_REF, am)}")
         amEndpoint = Option(am)
         reset()
 
@@ -346,7 +348,7 @@ private[spark] abstract class YarnSchedulerBackend(
       case r @ RemoveExecutor(executorId, reason) =>
         if (!stopped.get) {
           logWarning(log"Requesting driver to remove executor " +
-            log"${MDC(EXECUTOR_ID, executorId)} for reason ${MDC(REASON, reason)}")
+            log"${MDC(LogKeys.EXECUTOR_ID, executorId)} for reason ${MDC(LogKeys.REASON, reason)}")
           driverEndpoint.send(r)
         }
 
@@ -364,7 +366,8 @@ private[spark] abstract class YarnSchedulerBackend(
             am.ask[Boolean](r).andThen {
               case Success(b) => context.reply(b)
               case Failure(NonFatal(e)) =>
-                logError(s"Sending $r to AM was unsuccessful", e)
+                logError(
+                  log"Sending ${MDC(LogKeys.REQUEST_EXECUTORS, r)} to AM was unsuccessful", e)
                 context.sendFailure(e)
             }(ThreadUtils.sameThread)
           case None =>
@@ -378,7 +381,7 @@ private[spark] abstract class YarnSchedulerBackend(
             am.ask[Boolean](k).andThen {
               case Success(b) => context.reply(b)
               case Failure(NonFatal(e)) =>
-                logError(s"Sending $k to AM was unsuccessful", e)
+                logError(log"Sending ${MDC(LogKeys.KILL_EXECUTORS, k)} to AM was unsuccessful", e)
                 context.sendFailure(e)
             }(ThreadUtils.sameThread)
           case None =>
@@ -395,7 +398,8 @@ private[spark] abstract class YarnSchedulerBackend(
 
     override def onDisconnected(remoteAddress: RpcAddress): Unit = {
       if (amEndpoint.exists(_.address == remoteAddress)) {
-        logWarning(log"ApplicationMaster has disassociated: ${MDC(HOST_PORT, remoteAddress)}")
+        logWarning(log"ApplicationMaster has disassociated: " +
+          log"${MDC(LogKeys.HOST_PORT, remoteAddress)}")
         amEndpoint = None
       }
     }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/AmIpFilterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/AmIpFilterSuite.scala
new file mode 100644
index 0000000000000..e25bd665dec0d
--- /dev/null
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/AmIpFilterSuite.scala
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.yarn
+
+import java.io.{IOException, PrintWriter, StringWriter}
+import java.net.HttpURLConnection
+import java.util
+import java.util.{Collections, Locale}
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.jdk.CollectionConverters._
+
+import jakarta.servlet.{FilterChain, FilterConfig, ServletContext, ServletException, ServletOutputStream, ServletRequest, ServletResponse}
+import jakarta.servlet.http.{Cookie, HttpServlet, HttpServletRequest, HttpServletResponse}
+import jakarta.ws.rs.core.MediaType
+import org.eclipse.jetty.server.{Server, ServerConnector}
+import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
+import org.eclipse.jetty.util.thread.QueuedThreadPool
+import org.mockito.Mockito.{mock, when}
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkFunSuite
+
+// A port of org.apache.hadoop.yarn.server.webproxy.amfilter.TestAmFilter
+class AmIpFilterSuite extends SparkFunSuite {
+
+  private val proxyHost = "localhost"
+  private val proxyUri = "http://bogus"
+
+  class TestAmIpFilter extends AmIpFilter {
+    override def getProxyAddresses: util.Set[String] = Set(proxyHost).asJava
+  }
+
+  class DummyFilterConfig (val map: util.Map[String, String]) extends FilterConfig {
+    override def getFilterName: String = "dummy"
+
+    override def getInitParameter(arg0: String): String = map.get(arg0)
+
+    override def getInitParameterNames: util.Enumeration[String] =
+      Collections.enumeration(map.keySet)
+
+    override def getServletContext: ServletContext = null
+  }
+
+  test("filterNullCookies") {
+    val request = mock(classOf[HttpServletRequest])
+
+    when(request.getCookies).thenReturn(null)
+    when(request.getRemoteAddr).thenReturn(proxyHost)
+
+    val response = mock(classOf[HttpServletResponse])
+    val invoked = new AtomicBoolean
+
+    val chain = new FilterChain() {
+      @throws[IOException]
+      @throws[ServletException]
+      override def doFilter(req: ServletRequest, resp: ServletResponse): Unit = {
+        invoked.set(true)
+      }
+    }
+
+    val params = new util.HashMap[String, String]
+    params.put(AmIpFilter.PROXY_HOST, proxyHost)
+    params.put(AmIpFilter.PROXY_URI_BASE, proxyUri)
+    val conf = new DummyFilterConfig(params)
+    val filter = new TestAmIpFilter
+    filter.init(conf)
+    filter.doFilter(request, response, chain)
+    assert(invoked.get)
+    filter.destroy()
+  }
+
+  test("testFindRedirectUrl") {
+    class EchoServlet extends HttpServlet {
+      @throws[IOException]
+      @throws[ServletException]
+      override def doGet(request: HttpServletRequest, response: HttpServletResponse): Unit = {
+        response.setContentType(MediaType.TEXT_PLAIN + "; charset=utf-8")
+        val out = response.getWriter
+        request.getParameterNames.asScala.toSeq.sorted.foreach { key =>
+          out.print(key)
+          out.print(':')
+          out.print(request.getParameter(key))
+          out.print('\n')
+        }
+        out.close()
+      }
+    }
+
+    def withHttpEchoServer(body: String => Unit): Unit = {
+      val server = new Server(0)
+      server.getThreadPool.asInstanceOf[QueuedThreadPool].setMaxThreads(20)
+      val context = new ServletContextHandler
+      context.setContextPath("/foo")
+      server.setHandler(context)
+      val servletPath = "/bar"
+      context.addServlet(new ServletHolder(new EchoServlet), servletPath)
+      server.getConnectors.head.asInstanceOf[ServerConnector].setHost("localhost")
+      try {
+        server.start()
+        body(server.getURI.toString + servletPath)
+      } finally {
+        server.stop()
+      }
+    }
+
+    // generate a valid URL
+    withHttpEchoServer { rm1Url =>
+      val rm1 = "rm1"
+      val rm2 = "rm2"
+      // invalid url
+      val rm2Url = "host2:8088"
+
+      val filter = new TestAmIpFilter
+      // make sure findRedirectUrl() go to HA branch
+      filter.proxyUriBases = Map(rm1 -> rm1Url, rm2 -> rm2Url).asJava
+      filter.rmUrls = Array[String](rm1, rm2)
+
+      assert(filter.findRedirectUrl === rm1Url)
+    }
+  }
+
+  test("testProxyUpdate") {
+    var params = new util.HashMap[String, String]
+    params.put(AmIpFilter.PROXY_HOSTS, proxyHost)
+    params.put(AmIpFilter.PROXY_URI_BASES, proxyUri)
+
+    var conf = new DummyFilterConfig(params)
+    val filter = new AmIpFilter
+    val updateInterval = TimeUnit.SECONDS.toMillis(1)
+    AmIpFilter.setUpdateInterval(updateInterval)
+    filter.init(conf)
+
+    // check that the configuration was applied
+    assert(filter.getProxyAddresses.contains("127.0.0.1"))
+
+    // change proxy configurations
+    params = new util.HashMap[String, String]
+    params.put(AmIpFilter.PROXY_HOSTS, "unknownhost")
+    params.put(AmIpFilter.PROXY_URI_BASES, proxyUri)
+    conf = new DummyFilterConfig(params)
+    filter.init(conf)
+
+    // configurations shouldn't be updated now
+    assert(!filter.getProxyAddresses.isEmpty)
+    // waiting for configuration update
+    eventually(timeout(5.seconds), interval(500.millis)) {
+      assertThrows[ServletException] {
+        filter.getProxyAddresses.isEmpty
+      }
+    }
+  }
+
+  test("testFilter") {
+    var doFilterRequest: String = null
+    var servletWrapper: AmIpServletRequestWrapper = null
+
+    val params = new util.HashMap[String, String]
+    params.put(AmIpFilter.PROXY_HOST, proxyHost)
+    params.put(AmIpFilter.PROXY_URI_BASE, proxyUri)
+    val config = new DummyFilterConfig(params)
+
+    // dummy filter
+    val chain = new FilterChain() {
+      @throws[IOException]
+      @throws[ServletException]
+      override def doFilter(req: ServletRequest, resp: ServletResponse): Unit = {
+        doFilterRequest = req.getClass.getName
+        req match {
+          case wrapper: AmIpServletRequestWrapper => servletWrapper = wrapper
+          case _ =>
+        }
+      }
+    }
+    val testFilter = new AmIpFilter
+    testFilter.init(config)
+
+    val response = new HttpServletResponseForTest
+
+    // Test request should implements HttpServletRequest
+    val failRequest = mock(classOf[ServletRequest])
+    val throws = intercept[ServletException] {
+      testFilter.doFilter(failRequest, response, chain)
+    }
+    assert(ProxyUtils.E_HTTP_HTTPS_ONLY === throws.getMessage)
+
+
+    // request with HttpServletRequest
+    val request = mock(classOf[HttpServletRequest])
+    when(request.getRemoteAddr).thenReturn("nowhere")
+    when(request.getRequestURI).thenReturn("/app/application_00_0")
+
+    // address "redirect" is not in host list for non-proxy connection
+    testFilter.doFilter(request, response, chain)
+    assert(HttpURLConnection.HTTP_MOVED_TEMP === response.status)
+    var redirect = response.getHeader(ProxyUtils.LOCATION)
+    assert("http://bogus/app/application_00_0" === redirect)
+
+    // address "redirect" is not in host list for proxy connection
+    when(request.getRequestURI).thenReturn("/proxy/application_00_0")
+    testFilter.doFilter(request, response, chain)
+    assert(HttpURLConnection.HTTP_MOVED_TEMP === response.status)
+    redirect = response.getHeader(ProxyUtils.LOCATION)
+    assert("http://bogus/proxy/redirect/application_00_0" === redirect)
+
+    // check for query parameters
+    when(request.getRequestURI).thenReturn("/proxy/application_00_0")
+    when(request.getQueryString).thenReturn("id=0")
+    testFilter.doFilter(request, response, chain)
+    assert(HttpURLConnection.HTTP_MOVED_TEMP === response.status)
+    redirect = response.getHeader(ProxyUtils.LOCATION)
+    assert("http://bogus/proxy/redirect/application_00_0?id=0" === redirect)
+
+    // "127.0.0.1" contains in host list. Without cookie
+    when(request.getRemoteAddr).thenReturn("127.0.0.1")
+    testFilter.doFilter(request, response, chain)
+    assert(doFilterRequest.contains("HttpServletRequest"))
+
+    // cookie added
+    val cookies = Array[Cookie](new Cookie(AmIpFilter.PROXY_USER_COOKIE_NAME, "user"))
+
+    when(request.getCookies).thenReturn(cookies)
+    testFilter.doFilter(request, response, chain)
+
+    assert(doFilterRequest === classOf[AmIpServletRequestWrapper].getName)
+    // request contains principal from cookie
+    assert(servletWrapper.getUserPrincipal.getName === "user")
+    assert(servletWrapper.getRemoteUser === "user")
+    assert(!servletWrapper.isUserInRole(""))
+  }
+
+  private class HttpServletResponseForTest extends HttpServletResponse {
+    private var redirectLocation = ""
+    var status = 0
+    private var contentType: String = _
+    final private val headers = new util.HashMap[String, String](1)
+    private var body: StringWriter = _
+
+    def getRedirect: String = redirectLocation
+
+    @throws[IOException]
+    override def sendRedirect(location: String): Unit = redirectLocation = location
+
+    override def setDateHeader(name: String, date: Long): Unit = {}
+
+    override def addDateHeader(name: String, date: Long): Unit = {}
+
+    override def addCookie(cookie: Cookie): Unit = {}
+
+    override def containsHeader(name: String): Boolean = false
+
+    override def encodeURL(url: String): String = null
+
+    override def encodeRedirectURL(url: String): String = url
+
+    override def encodeUrl(url: String): String = null
+
+    override def encodeRedirectUrl(url: String): String = null
+
+    @throws[IOException]
+    override def sendError(sc: Int, msg: String): Unit = {}
+
+    @throws[IOException]
+    override def sendError(sc: Int): Unit = {}
+
+    override def setStatus(status: Int): Unit = this.status = status
+
+    override def setStatus(sc: Int, sm: String): Unit = {}
+
+    override def getStatus: Int = 0
+
+    override def setContentType(contentType: String): Unit = this.contentType = contentType
+
+    override def setBufferSize(size: Int): Unit = {}
+
+    override def getBufferSize: Int = 0
+
+    @throws[IOException]
+    override def flushBuffer(): Unit = {}
+
+    override def resetBuffer(): Unit = {}
+
+    override def isCommitted: Boolean = false
+
+    override def reset(): Unit = {}
+
+    override def setLocale(loc: Locale): Unit = {}
+
+    override def getLocale: Locale = null
+
+    override def setHeader(name: String, value: String): Unit = headers.put(name, value)
+
+    override def addHeader(name: String, value: String): Unit = {}
+
+    override def setIntHeader(name: String, value: Int): Unit = {}
+
+    override def addIntHeader(name: String, value: Int): Unit = {}
+
+    override def getHeader(name: String): String = headers.get(name)
+
+    override def getHeaders(name: String): util.Collection[String] = null
+
+    override def getHeaderNames: util.Collection[String] = null
+
+    override def getCharacterEncoding: String = null
+
+    override def getContentType: String = null
+
+    @throws[IOException]
+    override def getOutputStream: ServletOutputStream = null
+
+    @throws[IOException]
+    override def getWriter: PrintWriter = {
+      body = new StringWriter
+      new PrintWriter(body)
+    }
+
+    override def setCharacterEncoding(charset: String): Unit = {}
+
+    override def setContentLength(len: Int): Unit = {}
+
+    override def setContentLengthLong(len: Long): Unit = {}
+  }
+
+}
diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index 28d205f03e0fa..b7233e6e9bf3d 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -98,6 +98,10 @@ spark_rotate_log ()
 . "${SPARK_HOME}/bin/load-spark-env.sh"
 
 if [ "$SPARK_IDENT_STRING" = "" ]; then
+  # if for some reason the shell doesn't have $USER defined
+  # (e.g., ssh'd in to execute a command)
+  # let's get the effective username and use that
+  USER=${USER:-$(id -nu)}
   export SPARK_IDENT_STRING="$USER"
 fi
 
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index e2b178d34b568..85a4633e80502 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -69,6 +69,35 @@ lexer grammar SqlBaseLexer;
   public void markUnclosedComment() {
     has_unclosed_bracketed_comment = true;
   }
+
+  /**
+   * When greater than zero, it's in the middle of parsing ARRAY/MAP/STRUCT type.
+   */
+  public int complex_type_level_counter = 0;
+
+  /**
+   * Increase the counter by one when hits KEYWORD 'ARRAY', 'MAP', 'STRUCT'.
+   */
+  public void incComplexTypeLevelCounter() {
+    complex_type_level_counter++;
+  }
+
+  /**
+   * Decrease the counter by one when hits close tag '>' && the counter greater than zero
+   * which means we are in the middle of complex type parsing. Otherwise, it's a dangling
+   * GT token and we do nothing.
+   */
+  public void decComplexTypeLevelCounter() {
+    if (complex_type_level_counter > 0) complex_type_level_counter--;
+  }
+
+  /**
+   * If the counter is zero, it's a shift right operator. It can be closing tags of an complex
+   * type definition, such as MAP<INT, ARRAY<INT>>.
+   */
+  public boolean isShiftRightOperator() {
+    return complex_type_level_counter == 0 ? true : false;
+  }
 }
 
 SEMICOLON: ';';
@@ -100,14 +129,16 @@ ANTI: 'ANTI';
 ANY: 'ANY';
 ANY_VALUE: 'ANY_VALUE';
 ARCHIVE: 'ARCHIVE';
-ARRAY: 'ARRAY';
+ARRAY: 'ARRAY' {incComplexTypeLevelCounter();};
 AS: 'AS';
 ASC: 'ASC';
 AT: 'AT';
 AUTHORIZATION: 'AUTHORIZATION';
+BEGIN: 'BEGIN';
 BETWEEN: 'BETWEEN';
 BIGINT: 'BIGINT';
 BINARY: 'BINARY';
+BINDING: 'BINDING';
 BOOLEAN: 'BOOLEAN';
 BOTH: 'BOTH';
 BUCKET: 'BUCKET';
@@ -115,6 +146,7 @@ BUCKETS: 'BUCKETS';
 BY: 'BY';
 BYTE: 'BYTE';
 CACHE: 'CACHE';
+CALLED: 'CALLED';
 CASCADE: 'CASCADE';
 CASE: 'CASE';
 CAST: 'CAST';
@@ -137,9 +169,11 @@ COMMENT: 'COMMENT';
 COMMIT: 'COMMIT';
 COMPACT: 'COMPACT';
 COMPACTIONS: 'COMPACTIONS';
+COMPENSATION: 'COMPENSATION';
 COMPUTE: 'COMPUTE';
 CONCATENATE: 'CONCATENATE';
 CONSTRAINT: 'CONSTRAINT';
+CONTAINS: 'CONTAINS';
 COST: 'COST';
 CREATE: 'CREATE';
 CROSS: 'CROSS';
@@ -166,10 +200,12 @@ DECIMAL: 'DECIMAL';
 DECLARE: 'DECLARE';
 DEFAULT: 'DEFAULT';
 DEFINED: 'DEFINED';
+DEFINER: 'DEFINER';
 DELETE: 'DELETE';
 DELIMITED: 'DELIMITED';
 DESC: 'DESC';
 DESCRIBE: 'DESCRIBE';
+DETERMINISTIC: 'DETERMINISTIC';
 DFS: 'DFS';
 DIRECTORIES: 'DIRECTORIES';
 DIRECTORY: 'DIRECTORY';
@@ -182,6 +218,7 @@ ELSE: 'ELSE';
 END: 'END';
 ESCAPE: 'ESCAPE';
 ESCAPED: 'ESCAPED';
+EVOLUTION: 'EVOLUTION';
 EXCEPT: 'EXCEPT';
 EXCHANGE: 'EXCHANGE';
 EXCLUDE: 'EXCLUDE';
@@ -227,6 +264,7 @@ INDEX: 'INDEX';
 INDEXES: 'INDEXES';
 INNER: 'INNER';
 INPATH: 'INPATH';
+INPUT: 'INPUT';
 INPUTFORMAT: 'INPUTFORMAT';
 INSERT: 'INSERT';
 INTERSECT: 'INTERSECT';
@@ -234,10 +272,12 @@ INTERVAL: 'INTERVAL';
 INT: 'INT';
 INTEGER: 'INTEGER';
 INTO: 'INTO';
+INVOKER: 'INVOKER';
 IS: 'IS';
 ITEMS: 'ITEMS';
 JOIN: 'JOIN';
 KEYS: 'KEYS';
+LANGUAGE: 'LANGUAGE';
 LAST: 'LAST';
 LATERAL: 'LATERAL';
 LAZY: 'LAZY';
@@ -256,7 +296,7 @@ LOCKS: 'LOCKS';
 LOGICAL: 'LOGICAL';
 LONG: 'LONG';
 MACRO: 'MACRO';
-MAP: 'MAP';
+MAP: 'MAP' {incComplexTypeLevelCounter();};
 MATCHED: 'MATCHED';
 MERGE: 'MERGE';
 MICROSECOND: 'MICROSECOND';
@@ -265,6 +305,7 @@ MILLISECOND: 'MILLISECOND';
 MILLISECONDS: 'MILLISECONDS';
 MINUTE: 'MINUTE';
 MINUTES: 'MINUTES';
+MODIFIES: 'MODIFIES';
 MONTH: 'MONTH';
 MONTHS: 'MONTHS';
 MSCK: 'MSCK';
@@ -297,8 +338,6 @@ OVERWRITE: 'OVERWRITE';
 PARTITION: 'PARTITION';
 PARTITIONED: 'PARTITIONED';
 PARTITIONS: 'PARTITIONS';
-PERCENTILE_CONT: 'PERCENTILE_CONT';
-PERCENTILE_DISC: 'PERCENTILE_DISC';
 PERCENTLIT: 'PERCENT';
 PIVOT: 'PIVOT';
 PLACING: 'PLACING';
@@ -311,6 +350,7 @@ PURGE: 'PURGE';
 QUARTER: 'QUARTER';
 QUERY: 'QUERY';
 RANGE: 'RANGE';
+READS: 'READS';
 REAL: 'REAL';
 RECORDREADER: 'RECORDREADER';
 RECORDWRITER: 'RECORDWRITER';
@@ -325,6 +365,8 @@ REPLACE: 'REPLACE';
 RESET: 'RESET';
 RESPECT: 'RESPECT';
 RESTRICT: 'RESTRICT';
+RETURN: 'RETURN';
+RETURNS: 'RETURNS';
 REVOKE: 'REVOKE';
 RIGHT: 'RIGHT';
 RLIKE: 'RLIKE' | 'REGEXP';
@@ -338,6 +380,7 @@ SECOND: 'SECOND';
 SECONDS: 'SECONDS';
 SCHEMA: 'SCHEMA';
 SCHEMAS: 'SCHEMAS';
+SECURITY: 'SECURITY';
 SELECT: 'SELECT';
 SEMI: 'SEMI';
 SEPARATED: 'SEPARATED';
@@ -356,12 +399,14 @@ SOME: 'SOME';
 SORT: 'SORT';
 SORTED: 'SORTED';
 SOURCE: 'SOURCE';
+SPECIFIC: 'SPECIFIC';
+SQL: 'SQL';
 START: 'START';
 STATISTICS: 'STATISTICS';
 STORED: 'STORED';
 STRATIFY: 'STRATIFY';
 STRING: 'STRING';
-STRUCT: 'STRUCT';
+STRUCT: 'STRUCT' {incComplexTypeLevelCounter();};
 SUBSTR: 'SUBSTR';
 SUBSTRING: 'SUBSTRING';
 SYNC: 'SYNC';
@@ -438,8 +483,11 @@ NEQ : '<>';
 NEQJ: '!=';
 LT  : '<';
 LTE : '<=' | '!>';
-GT  : '>';
+GT  : '>' {decComplexTypeLevelCounter();};
 GTE : '>=' | '!<';
+SHIFT_LEFT: '<<';
+SHIFT_RIGHT: '>>' {isShiftRightOperator()}?;
+SHIFT_RIGHT_UNSIGNED: '>>>' {isShiftRightOperator()}?;
 
 PLUS: '+';
 MINUS: '-';
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 3d008516589b2..54eff14b6d4df 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -42,6 +42,28 @@ options { tokenVocab = SqlBaseLexer; }
   public boolean double_quoted_identifiers = false;
 }
 
+compoundOrSingleStatement
+    : singleStatement
+    | singleCompoundStatement
+    ;
+
+singleCompoundStatement
+    : beginEndCompoundBlock SEMICOLON? EOF
+    ;
+
+beginEndCompoundBlock
+    : BEGIN compoundBody END
+    ;
+
+compoundBody
+    : (compoundStatements+=compoundStatement SEMICOLON)*
+    ;
+
+compoundStatement
+    : statement
+    | beginEndCompoundBlock
+    ;
+
 singleStatement
     : statement SEMICOLON* EOF
     ;
@@ -77,29 +99,31 @@ statement
     | USE identifierReference                                          #use
     | USE namespace identifierReference                                #useNamespace
     | SET CATALOG (errorCapturingIdentifier | stringLit)                  #setCatalog
-    | CREATE namespace (IF NOT EXISTS)? identifierReference
+    | CREATE namespace (IF errorCapturingNot EXISTS)? identifierReference
         (commentSpec |
          locationSpec |
          (WITH (DBPROPERTIES | PROPERTIES) propertyList))*             #createNamespace
     | ALTER namespace identifierReference
         SET (DBPROPERTIES | PROPERTIES) propertyList                   #setNamespaceProperties
+    | ALTER namespace identifierReference
+        UNSET (DBPROPERTIES | PROPERTIES) propertyList                 #unsetNamespaceProperties
     | ALTER namespace identifierReference
         SET locationSpec                                               #setNamespaceLocation
     | DROP namespace (IF EXISTS)? identifierReference
         (RESTRICT | CASCADE)?                                          #dropNamespace
     | SHOW namespaces ((FROM | IN) multipartIdentifier)?
         (LIKE? pattern=stringLit)?                                        #showNamespaces
-    | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
+    | createTableHeader (LEFT_PAREN colDefinitionList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #createTable
-    | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
+    | CREATE TABLE (IF errorCapturingNot EXISTS)? target=tableIdentifier
         LIKE source=tableIdentifier
         (tableProvider |
         rowFormat |
         createFileFormat |
         locationSpec |
         (TBLPROPERTIES tableProps=propertyList))*                      #createTableLike
-    | replaceTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
+    | replaceTableHeader (LEFT_PAREN colDefinitionList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #replaceTable
     | ANALYZE TABLE identifierReference partitionSpec? COMPUTE STATISTICS
@@ -141,7 +165,7 @@ statement
         SET SERDE stringLit (WITH SERDEPROPERTIES propertyList)?       #setTableSerDe
     | ALTER TABLE identifierReference (partitionSpec)?
         SET SERDEPROPERTIES propertyList                               #setTableSerDe
-    | ALTER (TABLE | VIEW) identifierReference ADD (IF NOT EXISTS)?
+    | ALTER (TABLE | VIEW) identifierReference ADD (IF errorCapturingNot EXISTS)?
         partitionSpecLocation+                                         #addTablePartition
     | ALTER TABLE identifierReference
         from=partitionSpec RENAME TO to=partitionSpec                  #renameTablePartition
@@ -153,9 +177,10 @@ statement
     | DROP TABLE (IF EXISTS)? identifierReference PURGE?               #dropTable
     | DROP VIEW (IF EXISTS)? identifierReference                       #dropView
     | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
-        VIEW (IF NOT EXISTS)? identifierReference
+        VIEW (IF errorCapturingNot EXISTS)? identifierReference
         identifierCommentList?
         (commentSpec |
+         schemaBinding |
          (PARTITIONED ON identifierList) |
          (TBLPROPERTIES propertyList))*
         AS query                                                       #createView
@@ -163,9 +188,15 @@ statement
         tableIdentifier (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider
         (OPTIONS propertyList)?                                        #createTempViewUsing
     | ALTER VIEW identifierReference AS? query                         #alterViewQuery
-    | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)?
+    | ALTER VIEW identifierReference schemaBinding                     #alterViewSchemaBinding
+    | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF errorCapturingNot EXISTS)?
         identifierReference AS className=stringLit
         (USING resource (COMMA resource)*)?                            #createFunction
+    | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF errorCapturingNot EXISTS)?
+        identifierReference LEFT_PAREN parameters=colDefinitionList? RIGHT_PAREN
+        (RETURNS (dataType | TABLE LEFT_PAREN returnParams=colTypeList RIGHT_PAREN))?
+        routineCharacteristics
+        RETURN (query | expression)                                    #createUserDefinedFunction
     | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference        #dropFunction
     | DECLARE (OR REPLACE)? VARIABLE?
         identifierReference dataType? variableDefaultExpression?       #createVariable
@@ -224,7 +255,7 @@ statement
     | SET .*?                                                          #setConfiguration
     | RESET configKey                                                  #resetQuotedConfiguration
     | RESET .*?                                                        #resetConfiguration
-    | CREATE INDEX (IF NOT EXISTS)? identifier ON TABLE?
+    | CREATE INDEX (IF errorCapturingNot EXISTS)? identifier ON TABLE?
         identifierReference (USING indexType=identifier)?
         LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN
         (OPTIONS options=propertyList)?                                #createIndex
@@ -315,7 +346,7 @@ unsupportedHiveNativeCommands
     ;
 
 createTableHeader
-    : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? identifierReference
+    : CREATE TEMPORARY? EXTERNAL? TABLE (IF errorCapturingNot EXISTS)? identifierReference
     ;
 
 replaceTableHeader
@@ -342,6 +373,10 @@ locationSpec
     : LOCATION stringLit
     ;
 
+schemaBinding
+    : WITH SCHEMA (BINDING | COMPENSATION | EVOLUTION | TYPE EVOLUTION)
+    ;
+
 commentSpec
     : COMMENT stringLit
     ;
@@ -351,8 +386,8 @@ query
     ;
 
 insertInto
-    : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF NOT EXISTS)?)?  ((BY NAME) | identifierList)? #insertOverwriteTable
-    | INSERT INTO TABLE? identifierReference partitionSpec? (IF NOT EXISTS)? ((BY NAME) | identifierList)?   #insertIntoTable
+    : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF errorCapturingNot EXISTS)?)?  ((BY NAME) | identifierList)? #insertOverwriteTable
+    | INSERT INTO TABLE? identifierReference partitionSpec? (IF errorCapturingNot EXISTS)? ((BY NAME) | identifierList)?   #insertIntoTable
     | INSERT INTO TABLE? identifierReference REPLACE whereClause                                             #insertIntoReplaceWhere
     | INSERT OVERWRITE LOCAL? DIRECTORY path=stringLit rowFormat? createFileFormat?                     #insertOverwriteHiveDir
     | INSERT OVERWRITE LOCAL? DIRECTORY (path=stringLit)? tableProvider (OPTIONS options=propertyList)? #insertOverwriteDir
@@ -389,6 +424,7 @@ describeFuncName
     | comparisonOperator
     | arithmeticOperator
     | predicateOperator
+    | shiftOperator
     | BANG
     ;
 
@@ -480,7 +516,7 @@ dmlStatementNoWith
     | fromClause multiInsertQueryBody+                                             #multiInsertQuery
     | DELETE FROM identifierReference tableAlias whereClause?                      #deleteFromTable
     | UPDATE identifierReference tableAlias setClause whereClause?                 #updateTable
-    | MERGE INTO target=identifierReference targetAlias=tableAlias
+    | MERGE (WITH SCHEMA EVOLUTION)? INTO target=identifierReference targetAlias=tableAlias
         USING (source=identifierReference |
           LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias
         ON mergeCondition=booleanExpression
@@ -588,11 +624,11 @@ matchedClause
     : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction
     ;
 notMatchedClause
-    : WHEN NOT MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction
+    : WHEN errorCapturingNot MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction
     ;
 
 notMatchedBySourceClause
-    : WHEN NOT MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction
+    : WHEN errorCapturingNot MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction
     ;
 
 matchedAction
@@ -838,9 +874,11 @@ tableArgumentPartitioning
     : ((WITH SINGLE PARTITION)
         | ((PARTITION | DISTRIBUTE) BY
             (((LEFT_PAREN partition+=expression (COMMA partition+=expression)* RIGHT_PAREN))
+            | (expression (COMMA invalidMultiPartitionExpression=expression)+)
             | partition+=expression)))
       ((ORDER | SORT) BY
         (((LEFT_PAREN sortItem (COMMA sortItem)* RIGHT_PAREN)
+        | (sortItem (COMMA invalidMultiSortItem=sortItem)+)
         | sortItem)))?
     ;
 
@@ -956,15 +994,20 @@ booleanExpression
     ;
 
 predicate
-    : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
-    | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
-    | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
-    | NOT? kind=RLIKE pattern=valueExpression
-    | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN)
-    | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)?
-    | IS NOT? kind=NULL
-    | IS NOT? kind=(TRUE | FALSE | UNKNOWN)
-    | IS NOT? kind=DISTINCT FROM right=valueExpression
+    : errorCapturingNot? kind=BETWEEN lower=valueExpression AND upper=valueExpression
+    | errorCapturingNot? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
+    | errorCapturingNot? kind=IN LEFT_PAREN query RIGHT_PAREN
+    | errorCapturingNot? kind=RLIKE pattern=valueExpression
+    | errorCapturingNot? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN)
+    | errorCapturingNot? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)?
+    | IS errorCapturingNot? kind=NULL
+    | IS errorCapturingNot? kind=(TRUE | FALSE | UNKNOWN)
+    | IS errorCapturingNot? kind=DISTINCT FROM right=valueExpression
+    ;
+
+errorCapturingNot
+    : NOT
+    | BANG
     ;
 
 valueExpression
@@ -972,12 +1015,19 @@ valueExpression
     | operator=(MINUS | PLUS | TILDE) valueExpression                                        #arithmeticUnary
     | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary
     | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression       #arithmeticBinary
+    | left=valueExpression shiftOperator right=valueExpression                               #shiftExpression
     | left=valueExpression operator=AMPERSAND right=valueExpression                          #arithmeticBinary
     | left=valueExpression operator=HAT right=valueExpression                                #arithmeticBinary
     | left=valueExpression operator=PIPE right=valueExpression                               #arithmeticBinary
     | left=valueExpression comparisonOperator right=valueExpression                          #comparison
     ;
 
+shiftOperator
+    : SHIFT_LEFT
+    | SHIFT_RIGHT
+    | SHIFT_RIGHT_UNSIGNED
+    ;
+
 datetimeUnit
     : YEAR | QUARTER | MONTH
     | WEEK | DAY | DAYOFYEAR
@@ -1143,7 +1193,7 @@ qualifiedColTypeWithPosition
     ;
 
 colDefinitionDescriptorWithPosition
-    : NOT NULL
+    : errorCapturingNot NULL
     | defaultExpression
     | commentSpec
     | colPosition
@@ -1162,19 +1212,19 @@ colTypeList
     ;
 
 colType
-    : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec?
+    : colName=errorCapturingIdentifier dataType (errorCapturingNot NULL)? commentSpec?
     ;
 
-createOrReplaceTableColTypeList
-    : createOrReplaceTableColType (COMMA createOrReplaceTableColType)*
+colDefinitionList
+    : colDefinition (COMMA colDefinition)*
     ;
 
-createOrReplaceTableColType
+colDefinition
     : colName=errorCapturingIdentifier dataType colDefinitionOption*
     ;
 
 colDefinitionOption
-    : NOT NULL
+    : errorCapturingNot NULL
     | defaultExpression
     | generationExpression
     | commentSpec
@@ -1189,9 +1239,49 @@ complexColTypeList
     ;
 
 complexColType
-    : errorCapturingIdentifier COLON? dataType (NOT NULL)? commentSpec?
+    : errorCapturingIdentifier COLON? dataType (errorCapturingNot NULL)? commentSpec?
+    ;
+
+routineCharacteristics
+    : (routineLanguage
+    | specificName
+    | deterministic
+    | sqlDataAccess
+    | nullCall
+    | commentSpec
+    | rightsClause)*
+    ;
+
+routineLanguage
+    : LANGUAGE (SQL | IDENTIFIER)
+    ;
+
+specificName
+    : SPECIFIC specific=errorCapturingIdentifier
     ;
 
+deterministic
+    : DETERMINISTIC
+    | errorCapturingNot DETERMINISTIC
+    ;
+
+sqlDataAccess
+    : access=NO SQL
+    | access=CONTAINS SQL
+    | access=READS SQL DATA
+    | access=MODIFIES SQL DATA
+    ;
+
+nullCall
+    : RETURNS NULL ON NULL INPUT
+    | CALLED ON NULL INPUT
+    ;
+
+rightsClause
+    : SQL SECURITY INVOKER
+    | SQL SECURITY DEFINER
+   ;
+
 whenClause
     : WHEN condition=expression THEN result=expression
     ;
@@ -1296,7 +1386,7 @@ alterColumnAction
     : TYPE dataType
     | commentSpec
     | colPosition
-    | setOrDrop=(SET | DROP) NOT NULL
+    | setOrDrop=(SET | DROP) errorCapturingNot NULL
     | SET defaultExpression
     | dropDefault=DROP DEFAULT
     ;
@@ -1339,16 +1429,19 @@ ansiNonReserved
     | ARRAY
     | ASC
     | AT
+    | BEGIN
     | BETWEEN
     | BIGINT
     | BINARY
     | BINARY_HEX
+    | BINDING
     | BOOLEAN
     | BUCKET
     | BUCKETS
     | BY
     | BYTE
     | CACHE
+    | CALLED
     | CASCADE
     | CATALOG
     | CATALOGS
@@ -1365,8 +1458,10 @@ ansiNonReserved
     | COMMIT
     | COMPACT
     | COMPACTIONS
+    | COMPENSATION
     | COMPUTE
     | CONCATENATE
+    | CONTAINS
     | COST
     | CUBE
     | CURRENT
@@ -1387,10 +1482,12 @@ ansiNonReserved
     | DECLARE
     | DEFAULT
     | DEFINED
+    | DEFINER
     | DELETE
     | DELIMITED
     | DESC
     | DESCRIBE
+    | DETERMINISTIC
     | DFS
     | DIRECTORIES
     | DIRECTORY
@@ -1399,6 +1496,7 @@ ansiNonReserved
     | DOUBLE
     | DROP
     | ESCAPED
+    | EVOLUTION
     | EXCHANGE
     | EXCLUDE
     | EXISTS
@@ -1430,13 +1528,16 @@ ansiNonReserved
     | INDEX
     | INDEXES
     | INPATH
+    | INPUT
     | INPUTFORMAT
     | INSERT
     | INT
     | INTEGER
     | INTERVAL
+    | INVOKER
     | ITEMS
     | KEYS
+    | LANGUAGE
     | LAST
     | LAZY
     | LIKE
@@ -1461,6 +1562,7 @@ ansiNonReserved
     | MILLISECONDS
     | MINUTE
     | MINUTES
+    | MODIFIES
     | MONTH
     | MONTHS
     | MSCK
@@ -1494,6 +1596,7 @@ ansiNonReserved
     | QUARTER
     | QUERY
     | RANGE
+    | READS
     | REAL
     | RECORDREADER
     | RECORDWRITER
@@ -1507,6 +1610,8 @@ ansiNonReserved
     | RESET
     | RESPECT
     | RESTRICT
+    | RETURN
+    | RETURNS
     | REVOKE
     | RLIKE
     | ROLE
@@ -1519,6 +1624,7 @@ ansiNonReserved
     | SCHEMAS
     | SECOND
     | SECONDS
+    | SECURITY
     | SEMI
     | SEPARATED
     | SERDE
@@ -1534,6 +1640,7 @@ ansiNonReserved
     | SORT
     | SORTED
     | SOURCE
+    | SPECIFIC
     | START
     | STATISTICS
     | STORED
@@ -1638,10 +1745,12 @@ nonReserved
     | ASC
     | AT
     | AUTHORIZATION
+    | BEGIN
     | BETWEEN
     | BIGINT
     | BINARY
     | BINARY_HEX
+    | BINDING
     | BOOLEAN
     | BOTH
     | BUCKET
@@ -1649,6 +1758,7 @@ nonReserved
     | BY
     | BYTE
     | CACHE
+    | CALLED
     | CASCADE
     | CASE
     | CAST
@@ -1671,9 +1781,11 @@ nonReserved
     | COMMIT
     | COMPACT
     | COMPACTIONS
+    | COMPENSATION
     | COMPUTE
     | CONCATENATE
     | CONSTRAINT
+    | CONTAINS
     | COST
     | CREATE
     | CUBE
@@ -1699,10 +1811,12 @@ nonReserved
     | DECLARE
     | DEFAULT
     | DEFINED
+    | DEFINER
     | DELETE
     | DELIMITED
     | DESC
     | DESCRIBE
+    | DETERMINISTIC
     | DFS
     | DIRECTORIES
     | DIRECTORY
@@ -1715,6 +1829,7 @@ nonReserved
     | END
     | ESCAPE
     | ESCAPED
+    | EVOLUTION
     | EXCHANGE
     | EXCLUDE
     | EXECUTE
@@ -1757,15 +1872,18 @@ nonReserved
     | INDEX
     | INDEXES
     | INPATH
+    | INPUT
     | INPUTFORMAT
     | INSERT
     | INT
     | INTEGER
     | INTERVAL
     | INTO
+    | INVOKER
     | IS
     | ITEMS
     | KEYS
+    | LANGUAGE
     | LAST
     | LAZY
     | LEADING
@@ -1792,6 +1910,7 @@ nonReserved
     | MILLISECONDS
     | MINUTE
     | MINUTES
+    | MODIFIES
     | MONTH
     | MONTHS
     | MSCK
@@ -1822,8 +1941,6 @@ nonReserved
     | PARTITION
     | PARTITIONED
     | PARTITIONS
-    | PERCENTILE_CONT
-    | PERCENTILE_DISC
     | PERCENTLIT
     | PIVOT
     | PLACING
@@ -1836,6 +1953,7 @@ nonReserved
     | QUARTER
     | QUERY
     | RANGE
+    | READS
     | REAL
     | RECORDREADER
     | RECORDWRITER
@@ -1850,6 +1968,8 @@ nonReserved
     | RESET
     | RESPECT
     | RESTRICT
+    | RETURN
+    | RETURNS
     | REVOKE
     | RLIKE
     | ROLE
@@ -1862,6 +1982,7 @@ nonReserved
     | SCHEMAS
     | SECOND
     | SECONDS
+    | SECURITY
     | SELECT
     | SEPARATED
     | SERDE
@@ -1878,6 +1999,8 @@ nonReserved
     | SORT
     | SORTED
     | SOURCE
+    | SPECIFIC
+    | SQL
     | START
     | STATISTICS
     | STORED
diff --git a/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java b/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
index 88c51d6c43ddf..efb71f196f5f7 100644
--- a/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
+++ b/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/Identifier.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java b/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
index 17895e73d9fcf..ba883b8042c4d 100644
--- a/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
+++ b/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java b/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java
index 32c20dedac4c6..0034b8e715183 100644
--- a/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java
+++ b/sql/api/src/main/java/org/apache/spark/sql/types/DataTypes.java
@@ -259,4 +259,22 @@ public static StructType createStructType(StructField[] fields) {
 
     return StructType$.MODULE$.apply(fields);
   }
+
+  /**
+   * Creates a CharType with the given length.
+   *
+   * @since 4.0.0
+   */
+  public static CharType createCharType(int length) {
+    return new CharType(length);
+  }
+
+  /**
+   * Creates a VarcharType with the given length.
+   *
+   * @since 4.0.0
+   */
+  public static VarcharType createVarcharType(int length) {
+    return new VarcharType(length);
+  }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/ObservationBase.scala b/sql/api/src/main/scala/org/apache/spark/sql/ObservationBase.scala
new file mode 100644
index 0000000000000..4789ae8975d12
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/ObservationBase.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.jdk.CollectionConverters.MapHasAsJava
+
+/**
+ * Helper class to simplify usage of `Dataset.observe(String, Column, Column*)`:
+ *
+ * {{{
+ *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
+ *   val observation = Observation("my metrics")
+ *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
+ *   observed_ds.write.parquet("ds.parquet")
+ *   val metrics = observation.get
+ * }}}
+ *
+ * This collects the metrics while the first action is executed on the observed dataset. Subsequent
+ * actions do not modify the metrics returned by [[get]]. Retrieval of the metric via [[get]]
+ * blocks until the first action has finished and metrics become available.
+ *
+ * This class does not support streaming datasets.
+ *
+ * @param name name of the metric
+ * @since 3.3.0
+ */
+abstract class ObservationBase(val name: String) {
+
+  if (name.isEmpty) throw new IllegalArgumentException("Name must not be empty")
+
+  @volatile protected var metrics: Option[Map[String, Any]] = None
+
+  /**
+   * (Scala-specific) Get the observed metrics. This waits for the observed dataset to finish
+   * its first action. Only the result of the first action is available. Subsequent actions do not
+   * modify the result.
+   *
+   * @return the observed metrics as a `Map[String, Any]`
+   * @throws InterruptedException interrupted while waiting
+   */
+  @throws[InterruptedException]
+  def get: Map[String, _] = {
+    synchronized {
+      // we need to loop as wait might return without us calling notify
+      // https://en.wikipedia.org/w/index.php?title=Spurious_wakeup&oldid=992601610
+      while (this.metrics.isEmpty) {
+        wait()
+      }
+    }
+
+    this.metrics.get
+  }
+
+  /**
+   * (Java-specific) Get the observed metrics. This waits for the observed dataset to finish
+   * its first action. Only the result of the first action is available. Subsequent actions do not
+   * modify the result.
+   *
+   * @return the observed metrics as a `java.util.Map[String, Object]`
+   * @throws InterruptedException interrupted while waiting
+   */
+  @throws[InterruptedException]
+  def getAsJava: java.util.Map[String, AnyRef] = {
+    get.map { case (key, value) => (key, value.asInstanceOf[Object]) }.asJava
+  }
+
+  /**
+   * Get the observed metrics. This returns the metrics if they are available, otherwise an empty.
+   *
+   * @return the observed metrics as a `Map[String, Any]`
+   */
+  @throws[InterruptedException]
+  private[sql] def getOrEmpty: Map[String, _] = {
+    synchronized {
+      if (metrics.isEmpty) {
+        wait(100) // Wait for 100ms to see if metrics are available
+      }
+      metrics.getOrElse(Map.empty)
+    }
+  }
+
+  /**
+   * Set the observed metrics and notify all waiting threads to resume.
+   *
+   * @return `true` if all waiting threads were notified, `false` if otherwise.
+   */
+  private[spark] def setMetricsAndNotify(metrics: Option[Map[String, Any]]): Boolean = {
+    synchronized {
+      this.metrics = metrics
+      if(metrics.isDefined) {
+        notifyAll()
+        true
+      } else {
+        false
+      }
+    }
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 16ac283eccb15..c507e952630f6 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.catalyst.encoders
 import scala.collection.mutable
 import scala.reflect.classTag
 
-import org.apache.spark.sql.Row
+import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, VariantEncoder, YearMonthIntervalEncoder}
-import org.apache.spark.sql.errors.ExecutionErrors
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, ExecutionErrors}
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
@@ -59,7 +59,7 @@ import org.apache.spark.util.ArrayImplicits._
  *   StructType -> org.apache.spark.sql.Row
  * }}}
  */
-object RowEncoder {
+object RowEncoder extends DataTypeErrorsBase {
   def encoderFor(schema: StructType): AgnosticEncoder[Row] = {
     encoderFor(schema, lenient = false)
   }
@@ -124,5 +124,11 @@ object RowEncoder {
           field.nullable,
           field.metadata)
       }.toImmutableArraySeq)
+
+    case _ =>
+      throw new AnalysisException(
+        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_ENCODER",
+        messageParameters = Map("dataType" -> toSQLType(dataType))
+    )
   }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
index 1c2456f00bcdc..2b3f4674539e3 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
@@ -145,36 +145,30 @@ case class DataFrameQueryContext(
   override def stopIndex: Int = throw SparkUnsupportedOperationException()
 
   override val fragment: String = {
-    stackTrace.headOption.map { firstElem =>
-      val methodName = firstElem.getMethodName
-      if (methodName.length > 1 && methodName(0) == '$') {
-        methodName.substring(1)
-      } else {
-        methodName
-      }
-    }.getOrElse("")
+    pysparkErrorContext.map(_._1).getOrElse {
+      stackTrace.headOption.map { firstElem =>
+        val methodName = firstElem.getMethodName
+        if (methodName.length > 1 && methodName(0) == '$') {
+          methodName.substring(1)
+        } else {
+          methodName
+        }
+      }.getOrElse("")
+    }
   }
 
-  override val callSite: String = stackTrace.tail.mkString("\n")
-
-  val pysparkFragment: String = pysparkErrorContext.map(_._1).getOrElse("")
-  val pysparkCallSite: String = pysparkErrorContext.map(_._2).getOrElse("")
-
-  val (displayedFragment, displayedCallsite) = if (pysparkErrorContext.nonEmpty) {
-    (pysparkFragment, pysparkCallSite)
-  } else {
-    (fragment, callSite)
-  }
+  override val callSite: String = pysparkErrorContext.map(
+    _._2).getOrElse(stackTrace.tail.mkString("\n"))
 
   override lazy val summary: String = {
     val builder = new StringBuilder
     builder ++= "== DataFrame ==\n"
     builder ++= "\""
 
-    builder ++= displayedFragment
+    builder ++= fragment
     builder ++= "\""
     builder ++= " was called from\n"
-    builder ++= displayedCallsite
+    builder ++= callSite
     builder += '\n'
 
     builder.result()
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala
index 9d3968b025350..4ecbfd631e7e8 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala
@@ -85,3 +85,20 @@ object CurrentOrigin {
     ret
   }
 }
+
+/**
+ * Provides detailed error context information on PySpark.
+ */
+object PySparkCurrentOrigin {
+  private val pysparkErrorContext = new ThreadLocal[Option[(String, String)]]() {
+    override def initialValue(): Option[(String, String)] = None
+  }
+
+  def set(fragment: String, callSite: String): Unit = {
+    pysparkErrorContext.set(Some((fragment, callSite)))
+  }
+
+  def get(): Option[(String, String)] = pysparkErrorContext.get()
+
+  def clear(): Unit = pysparkErrorContext.remove()
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index 8db8c3cd39d74..0447d813e26a5 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -198,7 +198,7 @@ trait SparkDateTimeUtils {
   }
 
   private val zoneInfoClassName = "sun.util.calendar.ZoneInfo"
-  private val getOffsetsByWallHandle = {
+  private lazy val getOffsetsByWallHandle = {
     val lookup = MethodHandles.lookup()
     val classType = SparkClassUtils.classForName(zoneInfoClassName)
     val methodName = "getOffsetsByWall"
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
index a4ce5fb120340..7597cb1d9087d 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
@@ -16,8 +16,7 @@
  */
 package org.apache.spark.sql.catalyst.util
 
-import java.lang.{Long => JLong}
-import java.nio.CharBuffer
+import java.lang.{Long => JLong, StringBuilder => JStringBuilder}
 
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.misc.Interval
@@ -26,16 +25,10 @@ import org.antlr.v4.runtime.tree.TerminalNode
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 
 trait SparkParserUtils {
-  val U16_CHAR_PATTERN = """\\u([a-fA-F0-9]{4})(?s).*""".r
-  val U32_CHAR_PATTERN = """\\U([a-fA-F0-9]{8})(?s).*""".r
-  val OCTAL_CHAR_PATTERN = """\\([01][0-7]{2})(?s).*""".r
-  val ESCAPED_CHAR_PATTERN = """\\((?s).)(?s).*""".r
 
   /** Unescape backslash-escaped string enclosed by quotes. */
   def unescapeSQLString(b: String): String = {
-    val sb = new StringBuilder(b.length())
-
-    def appendEscapedChar(n: Char): Unit = {
+    def appendEscapedChar(n: Char, sb: JStringBuilder): Unit = {
       n match {
         case '0' => sb.append('\u0000')
         case 'b' => sb.append('\b')
@@ -50,22 +43,64 @@ trait SparkParserUtils {
       }
     }
 
-    if (b.startsWith("r") || b.startsWith("R")) {
+    def allCharsAreHex(s: String, start: Int, length: Int): Boolean = {
+      val end = start + length
+      var i = start
+      while (i < end) {
+        val c = s.charAt(i)
+        val cIsHex = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')
+        if (!cIsHex) {
+          return false
+        }
+        i += 1
+      }
+      true
+    }
+
+    def isThreeDigitOctalEscape(s: String, start: Int): Boolean = {
+      val firstChar = s.charAt(start)
+      val secondChar = s.charAt(start + 1)
+      val thirdChar = s.charAt(start + 2)
+      (firstChar == '0' || firstChar == '1') &&
+        (secondChar >= '0' && secondChar <= '7') &&
+        (thirdChar >= '0' && thirdChar <= '7')
+    }
+
+    val isRawString = {
+      val firstChar = b.charAt(0)
+      firstChar == 'r' || firstChar == 'R'
+    }
+
+    if (isRawString) {
+      // Skip the 'r' or 'R' and the first and last quotations enclosing the string literal.
       b.substring(2, b.length - 1)
+    } else if (b.indexOf('\\') == -1) {
+      // Fast path for the common case where the string has no escaped characters,
+      // in which case we just skip the first and last quotations enclosing the string literal.
+      b.substring(1, b.length - 1)
     } else {
+      val sb = new JStringBuilder(b.length())
       // Skip the first and last quotations enclosing the string literal.
-      val charBuffer = CharBuffer.wrap(b, 1, b.length - 1)
-
-      while (charBuffer.remaining() > 0) {
-        charBuffer match {
-          case U16_CHAR_PATTERN(cp) =>
+      var i = 1
+      val length = b.length - 1
+      while (i < length) {
+        val c = b.charAt(i)
+        if (c != '\\' || i + 1 == length) {
+          // Either a regular character or a backslash at the end of the string:
+          sb.append(c)
+          i += 1
+        } else {
+          // A backslash followed by at least one character:
+          i += 1
+          val cAfterBackslash = b.charAt(i)
+          if (cAfterBackslash == 'u' && i + 1 + 4 <= length && allCharsAreHex(b, i + 1, 4)) {
             // \u0000 style 16-bit unicode character literals.
-            sb.append(Integer.parseInt(cp, 16).toChar)
-            charBuffer.position(charBuffer.position() + 6)
-          case U32_CHAR_PATTERN(cp) =>
+            sb.append(Integer.parseInt(b, i + 1, i + 1 + 4, 16).toChar)
+            i += 1 + 4
+          } else if (cAfterBackslash == 'U' && i + 1 + 8 <= length && allCharsAreHex(b, i + 1, 8)) {
             // \U00000000 style 32-bit unicode character literals.
             // Use Long to treat codePoint as unsigned in the range of 32-bit.
-            val codePoint = JLong.parseLong(cp, 16)
+            val codePoint = JLong.parseLong(b, i + 1, i + 1 + 8, 16)
             if (codePoint < 0x10000) {
               sb.append((codePoint & 0xFFFF).toChar)
             } else {
@@ -74,21 +109,18 @@ trait SparkParserUtils {
               sb.append(highSurrogate.toChar)
               sb.append(lowSurrogate.toChar)
             }
-            charBuffer.position(charBuffer.position() + 10)
-          case OCTAL_CHAR_PATTERN(cp) =>
+            i += 1 + 8
+          } else if (i + 3 <= length && isThreeDigitOctalEscape(b, i)) {
             // \000 style character literals.
-            sb.append(Integer.parseInt(cp, 8).toChar)
-            charBuffer.position(charBuffer.position() + 4)
-          case ESCAPED_CHAR_PATTERN(c) =>
-            // escaped character literals.
-            appendEscapedChar(c.charAt(0))
-            charBuffer.position(charBuffer.position() + 2)
-          case _ =>
-            // non-escaped character literals.
-            sb.append(charBuffer.get())
+            sb.append(Integer.parseInt(b, i, i + 3, 8).toChar)
+            i += 3
+          } else {
+            appendEscapedChar(cAfterBackslash, sb)
+            i += 1
+          }
         }
       }
-      sb.toString()
+      sb.toString
     }
   }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index aa8826dd48b66..edb1ee371b156 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.sql.catalyst.util
 
+import java.util.HexFormat
 import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.spark.internal.Logging
@@ -101,11 +102,16 @@ object SparkStringUtils extends Logging {
     truncatedString(seq, "", sep, "", maxFields)
   }
 
+  private final lazy val SPACE_DELIMITED_UPPERCASE_HEX =
+    HexFormat.of().withDelimiter(" ").withUpperCase()
+
   /**
    * Returns a pretty string of the byte array which prints each byte as a hex digit and add spaces
    * between them. For example, [1A C0].
    */
-  def getHexString(bytes: Array[Byte]): String = bytes.map("%02X".format(_)).mkString("[", " ", "]")
+  def getHexString(bytes: Array[Byte]): String = {
+    s"[${SPACE_DELIMITED_UPPERCASE_HEX.formatHex(bytes)}]"
+  }
 
   def sideBySide(left: String, right: String): Seq[String] = {
     sideBySide(left.split("\n").toImmutableArraySeq, right.split("\n").toImmutableArraySeq)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index d59b52a3818ac..9f57f8375c54d 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -162,6 +162,9 @@ class Iso8601TimestampFormatter(
   protected lazy val formatter: DateTimeFormatter =
     getOrCreateFormatter(pattern, locale, isParsing)
 
+  @transient
+  private lazy val zonedFormatter: DateTimeFormatter = formatter.withZone(zoneId)
+
   @transient
   protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
     pattern, zoneId, locale, legacyFormat)
@@ -231,7 +234,7 @@ class Iso8601TimestampFormatter(
 
   override def format(instant: Instant): String = {
     try {
-      formatter.withZone(zoneId).format(instant)
+      zonedFormatter.format(instant)
     } catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)),
       (t: Timestamp) => format(t))
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala
index d1d9dd806b3b8..930f92db26826 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala
@@ -20,7 +20,7 @@ import java.util.Locale
 
 import org.apache.spark.QueryContext
 import org.apache.spark.sql.catalyst.util.{AttributeNameParser, QuotingUtils}
-import org.apache.spark.sql.types.{AbstractDataType, DataType, TypeCollection}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 private[sql] trait DataTypeErrorsBase {
@@ -50,6 +50,7 @@ private[sql] trait DataTypeErrorsBase {
 
   def toSQLType(t: AbstractDataType): String = t match {
     case TypeCollection(types) => types.map(toSQLType).mkString("(", " or ", ")")
+    case u: UserDefinedType[_] => s"UDT(${toSQLType(u.sqlType)})"
     case dt: DataType => quoteByDefault(dt.sql)
     case at => quoteByDefault(at.simpleString.toUpperCase(Locale.ROOT))
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index 9d0d4ea799746..e7ae9f2bfb7bb 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -36,6 +36,12 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
     new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0001", ctx)
   }
 
+  def parserStackOverflow(parserRuleContext: ParserRuleContext): Throwable = {
+    throw new ParseException(
+      errorClass = "FAILED_TO_PARSE_TOO_COMPLEX",
+      ctx = parserRuleContext)
+  }
+
   def insertOverwriteDirectoryUnsupportedError(): Throwable = {
     SparkException.internalError("INSERT OVERWRITE DIRECTORY is not supported.")
   }
@@ -283,7 +289,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
 
   def nestedTypeMissingElementTypeError(
       dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
-    dataType match {
+    dataType.toUpperCase(Locale.ROOT) match {
       case "ARRAY" =>
         new ParseException(
           errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
@@ -540,6 +546,12 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
     new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0052", ctx)
   }
 
+  def temporaryViewWithSchemaBindingMode(ctx: StatementContext): Throwable = {
+    new ParseException(errorClass = "UNSUPPORTED_FEATURE.TEMPORARY_VIEW_WITH_SCHEMA_BINDING_MODE",
+      messageParameters = Map.empty,
+      ctx)
+  }
+
   def parameterMarkerNotAllowed(statement: String, origin: Origin): Throwable = {
     new ParseException(
       command = origin.sqlText,
@@ -562,19 +574,19 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
-  def createFuncWithBothIfNotExistsAndReplaceError(ctx: CreateFunctionContext): Throwable = {
+  def createFuncWithBothIfNotExistsAndReplaceError(ctx: ParserRuleContext): Throwable = {
     new ParseException(
-      errorClass = "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_IF_NOT_EXISTS_AND_REPLACE",
+      errorClass = "INVALID_SQL_SYNTAX.CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE",
       ctx)
   }
 
-  def defineTempFuncWithIfNotExistsError(ctx: CreateFunctionContext): Throwable = {
+  def defineTempFuncWithIfNotExistsError(ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS",
       ctx)
   }
 
-  def unsupportedFunctionNameError(funcName: Seq[String], ctx: CreateFunctionContext): Throwable = {
+  def unsupportedFunctionNameError(funcName: Seq[String], ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
       messageParameters = Map(
@@ -585,7 +597,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
 
   def specifyingDBInCreateTempFuncError(
       databaseName: String,
-      ctx: CreateFunctionContext): Throwable = {
+      ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE",
       messageParameters = Map("database" -> toSQLId(databaseName)),
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractMapType.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractMapType.scala
new file mode 100644
index 0000000000000..62f422f6f80a7
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractMapType.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal.types
+
+import org.apache.spark.sql.types.{AbstractDataType, DataType, MapType}
+
+
+/**
+ * Use AbstractMapType(AbstractDataType, AbstractDataType)
+ * for defining expected types for expression parameters.
+ */
+case class AbstractMapType(
+    keyType: AbstractDataType,
+    valueType: AbstractDataType
+  ) extends AbstractDataType {
+
+  override private[sql] def defaultConcreteType: DataType =
+    MapType(keyType.defaultConcreteType, valueType.defaultConcreteType, valueContainsNull = true)
+
+  override private[sql] def acceptsType(other: DataType): Boolean = {
+    other.isInstanceOf[MapType] &&
+      keyType.acceptsType(other.asInstanceOf[MapType].keyType) &&
+      valueType.acceptsType(other.asInstanceOf[MapType].valueType)
+  }
+
+  override private[spark] def simpleString: String =
+    s"map<${keyType.simpleString}, ${valueType.simpleString}>"
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
index 6403295fe20c4..0828c2d6fc104 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.sql.internal.types
 
+import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
 
 /**
  * StringTypeCollated is an abstract class for StringType with collation support.
  */
 abstract class AbstractStringType extends AbstractDataType {
-  override private[sql] def defaultConcreteType: DataType = StringType
+  override private[sql] def defaultConcreteType: DataType = SqlApiConf.get.defaultStringType
   override private[sql] def simpleString: String = "string"
 }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala
index f662b685c4e4f..4dc2ca875ef0e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala
@@ -108,6 +108,29 @@ private[sql] trait StatefulProcessorHandle extends Serializable {
       userKeyEnc: Encoder[K],
       valEncoder: Encoder[V]): MapState[K, V]
 
+  /**
+   * Function to create new or return existing map state variable of given type
+   * with ttl. State values will not be returned past ttlDuration, and will be eventually removed
+   * from the state store. Any values in mapState which have expired after ttlDuration will not
+   * returned on get() and will be eventually removed from the state.
+   *
+   * The user must ensure to call this function only within the `init()` method of the
+   * StatefulProcessor.
+   *
+   * @param stateName  - name of the state variable
+   * @param userKeyEnc  - spark sql encoder for the map key
+   * @param valEncoder - SQL encoder for state variable
+   * @param ttlConfig  - the ttl configuration (time to live duration etc.)
+   * @tparam K - type of key for map state variable
+   * @tparam V - type of value for map state variable
+   * @return - instance of MapState of type [K,V] that can be used to store state persistently
+   */
+  def getMapState[K, V](
+     stateName: String,
+     userKeyEnc: Encoder[K],
+     valEncoder: Encoder[V],
+     ttlConfig: TTLConfig): MapState[K, V]
+
   /** Function to return queryInfo for currently running task */
   def getQueryInfo(): QueryInfo
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 16cf6224ce27b..12c7905f62d1a 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -117,7 +117,8 @@ object DataType {
   private val FIXED_DECIMAL = """decimal\(\s*(\d+)\s*,\s*(\-?\d+)\s*\)""".r
   private val CHAR_TYPE = """char\(\s*(\d+)\s*\)""".r
   private val VARCHAR_TYPE = """varchar\(\s*(\d+)\s*\)""".r
-  private val COLLATED_STRING_TYPE = """string\s+collate\s+([\w_]+|`[\w_]`)""".r
+
+  val COLLATIONS_METADATA_KEY = "__COLLATIONS"
 
   def fromDDL(ddl: String): DataType = {
     parseTypeWithFallback(
@@ -182,9 +183,6 @@ object DataType {
   /** Given the string representation of a type, return its DataType */
   private def nameToType(name: String): DataType = {
     name match {
-      case COLLATED_STRING_TYPE(collation) =>
-        val collationId = CollationFactory.collationNameToId(collation)
-        StringType(collationId)
       case "decimal" => DecimalType.USER_DEFAULT
       case FIXED_DECIMAL(precision, scale) => DecimalType(precision.toInt, scale.toInt)
       case CHAR_TYPE(length) => CharType(length.toInt)
@@ -208,26 +206,40 @@ object DataType {
   }
 
   // NOTE: Map fields must be sorted in alphabetical order to keep consistent with the Python side.
-  private[sql] def parseDataType(json: JValue): DataType = json match {
+  private[sql] def parseDataType(
+      json: JValue,
+      fieldPath: String = "",
+      collationsMap: Map[String, String] = Map.empty): DataType = json match {
     case JString(name) =>
-      nameToType(name)
+      collationsMap.get(fieldPath) match {
+        case Some(collation) =>
+          assertValidTypeForCollations(fieldPath, name, collationsMap)
+          stringTypeWithCollation(collation)
+        case _ => nameToType(name)
+      }
 
     case JSortedObject(
     ("containsNull", JBool(n)),
     ("elementType", t: JValue),
     ("type", JString("array"))) =>
-      ArrayType(parseDataType(t), n)
+      assertValidTypeForCollations(fieldPath, "array", collationsMap)
+      val elementType = parseDataType(t, fieldPath + ".element", collationsMap)
+      ArrayType(elementType, n)
 
     case JSortedObject(
     ("keyType", k: JValue),
     ("type", JString("map")),
     ("valueContainsNull", JBool(n)),
     ("valueType", v: JValue)) =>
-      MapType(parseDataType(k), parseDataType(v), n)
+      assertValidTypeForCollations(fieldPath, "map", collationsMap)
+      val keyType = parseDataType(k, fieldPath + ".key", collationsMap)
+      val valueType = parseDataType(v, fieldPath + ".value", collationsMap)
+      MapType(keyType, valueType, n)
 
     case JSortedObject(
     ("fields", JArray(fields)),
     ("type", JString("struct"))) =>
+      assertValidTypeForCollations(fieldPath, "struct", collationsMap)
       StructType(fields.map(parseStructField))
 
     // Scala/Java UDT
@@ -253,11 +265,18 @@ object DataType {
 
   private def parseStructField(json: JValue): StructField = json match {
     case JSortedObject(
-    ("metadata", metadata: JObject),
+    ("metadata", JObject(metadataFields)),
     ("name", JString(name)),
     ("nullable", JBool(nullable)),
     ("type", dataType: JValue)) =>
-      StructField(name, parseDataType(dataType), nullable, Metadata.fromJObject(metadata))
+      val collationsMap = getCollationsMap(metadataFields)
+      val metadataWithoutCollations =
+        JObject(metadataFields.filterNot(_._1 == COLLATIONS_METADATA_KEY))
+      StructField(
+        name,
+        parseDataType(dataType, name, collationsMap),
+        nullable,
+        Metadata.fromJObject(metadataWithoutCollations))
     // Support reading schema when 'metadata' is missing.
     case JSortedObject(
     ("name", JString(name)),
@@ -270,8 +289,43 @@ object DataType {
     ("type", dataType: JValue)) =>
       StructField(name, parseDataType(dataType))
     case other => throw new SparkIllegalArgumentException(
-      errorClass = "_LEGACY_ERROR_TEMP_3250",
-      messageParameters = Map("other" -> compact(render(other))))
+      errorClass = "INVALID_JSON_DATA_TYPE",
+      messageParameters = Map("invalidType" -> compact(render(other))))
+  }
+
+  private def assertValidTypeForCollations(
+      fieldPath: String,
+      fieldType: String,
+      collationMap: Map[String, String]): Unit = {
+    if (collationMap.contains(fieldPath) && fieldType != "string") {
+      throw new SparkIllegalArgumentException(
+        errorClass = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+        messageParameters = Map("jsonType" -> fieldType))
+    }
+  }
+
+  /**
+   * Returns a map of field path to collation name.
+   */
+  private def getCollationsMap(metadataFields: List[JField]): Map[String, String] = {
+    val collationsJsonOpt = metadataFields.find(_._1 == COLLATIONS_METADATA_KEY).map(_._2)
+    collationsJsonOpt match {
+      case Some(JObject(fields)) =>
+        fields.collect {
+          case (fieldPath, JString(collation)) =>
+            collation.split("\\.", 2) match {
+              case Array(provider: String, collationName: String) =>
+                CollationFactory.assertValidProvider(provider)
+                fieldPath -> collationName
+            }
+        }.toMap
+
+      case _ => Map.empty
+    }
+  }
+
+  private def stringTypeWithCollation(collationName: String): StringType = {
+    StringType(CollationFactory.collationNameToId(collationName))
   }
 
   protected[types] def buildFormattedString(
@@ -354,6 +408,41 @@ object DataType {
     }
   }
 
+  /**
+   * Check if `from` is equal to `to` type except for collations, which are checked to be
+   * compatible so that data of type `from` can be interpreted as of type `to`.
+   */
+  private[sql] def equalsIgnoreCompatibleCollation(
+      from: DataType,
+      to: DataType): Boolean = {
+    (from, to) match {
+      // String types with possibly different collations are compatible.
+      case (_: StringType, _: StringType) => true
+
+      case (ArrayType(fromElement, fromContainsNull), ArrayType(toElement, toContainsNull)) =>
+        (fromContainsNull == toContainsNull) &&
+          equalsIgnoreCompatibleCollation(fromElement, toElement)
+
+      case (MapType(fromKey, fromValue, fromContainsNull),
+          MapType(toKey, toValue, toContainsNull)) =>
+        fromContainsNull == toContainsNull &&
+          // Map keys cannot change collation.
+          fromKey == toKey &&
+          equalsIgnoreCompatibleCollation(fromValue, toValue)
+
+      case (StructType(fromFields), StructType(toFields)) =>
+        fromFields.length == toFields.length &&
+          fromFields.zip(toFields).forall { case (fromField, toField) =>
+            fromField.name == toField.name &&
+              fromField.nullable == toField.nullable &&
+              fromField.metadata == toField.metadata &&
+              equalsIgnoreCompatibleCollation(fromField.dataType, toField.dataType)
+          }
+
+      case (fromDataType, toDataType) => fromDataType == toDataType
+    }
+  }
+
   /**
    * Returns true if the two data types share the same "shape", i.e. the types
    * are the same, but the field names don't need to be the same.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala
index 2ffd0f13ca10f..70e03905d4b05 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala
@@ -49,6 +49,13 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any])
   /** Tests whether this Metadata contains a binding for a key. */
   def contains(key: String): Boolean = map.contains(key)
 
+  /**
+   * Tests whether this Metadata is empty.
+   *
+   * @since 4.0.0
+   */
+  def isEmpty: Boolean = map.isEmpty
+
   /** Gets a Long. */
   def getLong(key: String): Long = get(key)
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
index 47d85b2c645c8..6ec55db008c75 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.types
 
+import org.json4s.JsonAST.{JString, JValue}
+
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.util.CollationFactory
 
@@ -36,8 +38,12 @@ class StringType private(val collationId: Int) extends AtomicType with Serializa
    */
   def supportsBinaryEquality: Boolean =
     CollationFactory.fetchCollation(collationId).supportsBinaryEquality
+
+  def isUTF8BinaryCollation: Boolean =
+    collationId == CollationFactory.UTF8_BINARY_COLLATION_ID
+
   def isUTF8BinaryLcaseCollation: Boolean =
-    collationId == CollationFactory.UTF8_BINARY_LCASE_COLLATION_ID
+    collationId == CollationFactory.UTF8_LCASE_COLLATION_ID
 
   /**
    * Support for Binary Ordering implies that strings are considered equal only
@@ -54,9 +60,14 @@ class StringType private(val collationId: Int) extends AtomicType with Serializa
    * If this is an UTF8_BINARY collation output is `string` due to backwards compatibility.
    */
   override def typeName: String =
-    if (collationId == 0) "string"
+    if (isUTF8BinaryCollation) "string"
     else s"string collate ${CollationFactory.fetchCollation(collationId).collationName}"
 
+  // Due to backwards compatibility and compatibility with other readers
+  // all string types are serialized in json as regular strings and
+  // the collation information is written to struct field metadata
+  override def jsonValue: JValue = JString("string")
+
   override def equals(obj: Any): Boolean =
     obj.isInstanceOf[StringType] && obj.asInstanceOf[StringType].collationId == collationId
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
index 66f9557db213e..3ff96fea9ee04 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -17,11 +17,15 @@
 
 package org.apache.spark.sql.types
 
+import scala.collection.mutable
+
+import org.json4s.{JObject, JString}
 import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 
+import org.apache.spark.SparkException
 import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.catalyst.util.{QuotingUtils, StringConcat}
+import org.apache.spark.sql.catalyst.util.{CollationFactory, QuotingUtils, StringConcat}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumnsUtils.{CURRENT_DEFAULT_COLUMN_METADATA_KEY, EXISTS_DEFAULT_COLUMN_METADATA_KEY}
 import org.apache.spark.util.SparkSchemaUtils
 
@@ -63,7 +67,61 @@ case class StructField(
     ("name" -> name) ~
       ("type" -> dataType.jsonValue) ~
       ("nullable" -> nullable) ~
-      ("metadata" -> metadata.jsonValue)
+      ("metadata" -> metadataJson)
+  }
+
+  private def metadataJson: JValue = {
+    val metadataJsonValue = metadata.jsonValue
+    metadataJsonValue match {
+      case JObject(fields) if collationMetadata.nonEmpty =>
+        val collationFields = collationMetadata.map(kv => kv._1 -> JString(kv._2)).toList
+        JObject(fields :+ (DataType.COLLATIONS_METADATA_KEY -> JObject(collationFields)))
+
+      case _ => metadataJsonValue
+    }
+  }
+
+  /** Map of field path to collation name. */
+  private lazy val collationMetadata: Map[String, String] = {
+    val fieldToCollationMap = mutable.Map[String, String]()
+
+    def visitRecursively(dt: DataType, path: String): Unit = dt match {
+      case at: ArrayType =>
+        processDataType(at.elementType, path + ".element")
+
+      case mt: MapType =>
+        processDataType(mt.keyType, path + ".key")
+        processDataType(mt.valueType, path + ".value")
+
+      case st: StringType if isCollatedString(st) =>
+        fieldToCollationMap(path) = schemaCollationValue(st)
+
+      case _ =>
+    }
+
+    def processDataType(dt: DataType, path: String): Unit = {
+      if (isCollatedString(dt)) {
+        fieldToCollationMap(path) = schemaCollationValue(dt)
+      } else {
+        visitRecursively(dt, path)
+      }
+    }
+
+    visitRecursively(dataType, name)
+    fieldToCollationMap.toMap
+  }
+
+  private def isCollatedString(dt: DataType): Boolean = dt match {
+    case st: StringType => !st.isUTF8BinaryCollation
+    case _ => false
+  }
+
+  private def schemaCollationValue(dt: DataType): String = dt match {
+    case st: StringType =>
+      val collation = CollationFactory.fetchCollation(st.collationId)
+      collation.identifier().toStringWithoutVersion()
+    case _ =>
+      throw SparkException.internalError(s"Unexpected data type $dt")
   }
 
   /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
index 42c8c783e54c7..9219c1d139b99 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.types
 import scala.collection.mutable
 
 import org.apache.spark.annotation.{DeveloperApi, Since}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.errors.DataTypeErrors
 import org.apache.spark.util.SparkClassUtils
 
@@ -58,7 +58,8 @@ object UDTRegistration extends Serializable with Logging {
    */
   def register(userClass: String, udtClass: String): Unit = {
     if (udtMap.contains(userClass)) {
-      logWarning(s"Cannot register UDT for ${userClass}, which is already registered.")
+      logWarning(log"Cannot register UDT for ${MDC(LogKeys.CLASS_NAME, userClass)}, " +
+        log"which is already registered.")
     } else {
       // When register UDT with class name, we can't check if the UDT class is an UserDefinedType,
       // or not. The check is deferred.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
index e4ab802c5bd82..7ec00bde0b25f 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.types
 
-import scala.collection.immutable.IndexedSeq
-
 /**
  * Rule that defines which upcasts are allow in Spark.
  */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index d9bd3b0e612b6..6852fe09ef96b 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -51,7 +51,7 @@ private[sql] object ArrowUtils {
     case BinaryType if !largeVarTypes => ArrowType.Binary.INSTANCE
     case _: StringType if largeVarTypes => ArrowType.LargeUtf8.INSTANCE
     case BinaryType if largeVarTypes => ArrowType.LargeBinary.INSTANCE
-    case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale)
+    case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale, 8 * 16)
     case DateType => new ArrowType.Date(DateUnit.DAY)
     case TimestampType if timeZoneId == null =>
       throw SparkException.internalError("Missing timezoneId where it is mandatory.")
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
index b398bfea784fc..0f3cb3d9d3ac2 100644
--- a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
@@ -2,10 +2,10 @@
 CalendarInterval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Call setInterval & getInterval                     1024           1024           0        131.1           7.6       1.0X
+Call setInterval & getInterval                     1028           1028           1        130.6           7.7       1.0X
 
 
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
index efcd1362dd227..28e1630ae9624 100644
--- a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
@@ -2,10 +2,10 @@
 CalendarInterval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Call setInterval & getInterval                     1020           1021           1        131.6           7.6       1.0X
+Call setInterval & getInterval                     1092           1093           1        122.9           8.1       1.0X
 
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
index 5fc70c010384d..0c5014db37346 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1389.9           0.7       1.0X
-Use EnumSet                                           5              5           0        212.5           4.7       0.2X
+Use HashSet                                           1              1           0       1391.6           0.7       1.0X
+Use EnumSet                                           2              2           0        441.1           2.3       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        464.2           2.2       1.0X
-Use EnumSet                                           2              2           0        544.3           1.8       1.2X
+Use HashSet                                           2              2           0        494.0           2.0       1.0X
+Use EnumSet                                           2              2           0        564.3           1.8       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              3           0        413.1           2.4       1.0X
-Use EnumSet                                           2              2           0        563.5           1.8       1.4X
+Use HashSet                                           2              2           0        486.6           2.1       1.0X
+Use EnumSet                                           2              2           0        502.8           2.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           9             10           2        108.0           9.3       1.0X
-Use EnumSet                                           2              2           0        544.5           1.8       5.0X
+Use HashSet                                           9              9           0        114.6           8.7       1.0X
+Use EnumSet                                           2              2           0        424.0           2.4       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0         98.3          10.2       1.0X
-Use EnumSet                                           2              2           0        544.5           1.8       5.5X
+Use HashSet                                          10             10           0        100.0          10.0       1.0X
+Use EnumSet                                           2              2           0        423.9           2.4       4.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              0           0        406.5           2.5       1.0X
-Use EnumSet                                           1              1           0        136.5           7.3       0.3X
+Use HashSet                                           0              0           0        407.9           2.5       1.0X
+Use EnumSet                                           1              1           0        136.9           7.3       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        102.2           9.8       1.0X
-Use EnumSet                                           0              0           0        291.4           3.4       2.9X
+Use HashSet                                           1              1           0        102.8           9.7       1.0X
+Use EnumSet                                           0              0           0        291.7           3.4       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           9             10           0         10.6          94.4       1.0X
-Use EnumSet                                           1              1           0        132.3           7.6      12.5X
+Use HashSet                                          10             10           0         10.5          95.5       1.0X
+Use EnumSet                                           1              1           0        132.6           7.5      12.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          38             38           0          2.7         376.8       1.0X
-Use EnumSet                                           1              1           0        144.3           6.9      54.4X
+Use HashSet                                          30             30           0          3.3         300.1       1.0X
+Use EnumSet                                           1              1           0        144.8           6.9      43.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          58             59           1          1.7         580.1       1.0X
-Use EnumSet                                           1              1           0        132.6           7.5      76.9X
+Use HashSet                                          59             61           1          1.7         594.5       1.0X
+Use EnumSet                                           1              1           0        129.9           7.7      77.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           4              4           0        269.5           3.7       1.0X
-Use EnumSet                                           5              5           0        212.9           4.7       0.8X
+Use HashSet                                           4              4           0        230.7           4.3       1.0X
+Use EnumSet                                           6              6           0        179.2           5.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          12             12           1         85.4          11.7       1.0X
-Use EnumSet                                           6              6           0        167.6           6.0       2.0X
+Use HashSet                                          13             14           0         75.4          13.3       1.0X
+Use EnumSet                                           7              7           0        147.3           6.8       2.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          27             28           0         36.6          27.3       1.0X
-Use EnumSet                                           6              6           0        169.5           5.9       4.6X
+Use HashSet                                          27             28           1         37.6          26.6       1.0X
+Use EnumSet                                           7              7           0        149.6           6.7       4.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          58             58           1         17.3          57.9       1.0X
-Use EnumSet                                           6              6           0        166.4           6.0       9.6X
+Use HashSet                                          48             49           1         20.7          48.3       1.0X
+Use EnumSet                                           7              7           0        147.4           6.8       7.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           79             79           1         12.7          78.8       1.0X
-Use EnumSet                                            6              6           0        157.9           6.3      12.4X
+Use HashSet                                           79             80           1         12.6          79.2       1.0X
+Use EnumSet                                            7              7           0        140.3           7.1      11.1X
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
index 31c2877a42c21..1714661841022 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1435.7           0.7       1.0X
-Use EnumSet                                           2              2           0        516.0           1.9       0.4X
+Use HashSet                                           1              1           0       1391.5           0.7       1.0X
+Use EnumSet                                           2              2           0        503.5           2.0       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        501.5           2.0       1.0X
-Use EnumSet                                           2              2           0        481.2           2.1       1.0X
+Use HashSet                                           2              2           0        509.3           2.0       1.0X
+Use EnumSet                                           2              2           0        488.5           2.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        501.2           2.0       1.0X
-Use EnumSet                                           2              2           0        564.9           1.8       1.1X
+Use HashSet                                           2              2           0        501.9           2.0       1.0X
+Use EnumSet                                           2              2           0        564.7           1.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           9              9           1        107.7           9.3       1.0X
-Use EnumSet                                           2              2           0        598.6           1.7       5.6X
+Use HashSet                                           8              8           0        122.9           8.1       1.0X
+Use EnumSet                                           2              2           0        545.7           1.8       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0         98.2          10.2       1.0X
-Use EnumSet                                           2              2           0        587.5           1.7       6.0X
+Use HashSet                                           9              9           0        107.8           9.3       1.0X
+Use EnumSet                                           2              2           0        545.7           1.8       5.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              0           0        408.3           2.4       1.0X
-Use EnumSet                                           0              0           0        291.8           3.4       0.7X
+Use HashSet                                           0              0           0        395.7           2.5       1.0X
+Use EnumSet                                           1              1           0        132.7           7.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0         59.7          16.7       1.0X
-Use EnumSet                                           1              1           0        150.1           6.7       2.5X
+Use HashSet                                           2              2           0         59.6          16.8       1.0X
+Use EnumSet                                           1              1           0        151.1           6.6       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             11           0          9.6         104.4       1.0X
-Use EnumSet                                           1              1           0        132.4           7.6      13.8X
+Use HashSet                                          10             10           0         10.2          98.3       1.0X
+Use EnumSet                                           1              1           0        132.4           7.6      13.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          36             37           1          2.8         362.5       1.0X
-Use EnumSet                                           1              1           0        132.3           7.6      48.0X
+Use HashSet                                          35             36           1          2.8         351.5       1.0X
+Use EnumSet                                           1              1           0        132.4           7.6      46.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          58             59           1          1.7         582.1       1.0X
-Use EnumSet                                           1              1           0        127.2           7.9      74.1X
+Use HashSet                                          61             62           1          1.6         607.7       1.0X
+Use EnumSet                                           1              1           0        127.2           7.9      77.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           4              4           0        274.1           3.6       1.0X
-Use EnumSet                                           5              5           0        216.0           4.6       0.8X
+Use HashSet                                           4              4           0        248.3           4.0       1.0X
+Use EnumSet                                           5              5           0        188.3           5.3       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             11           0         97.0          10.3       1.0X
-Use EnumSet                                           6              7           0        162.4           6.2       1.7X
+Use HashSet                                          11             11           0         92.9          10.8       1.0X
+Use EnumSet                                           7              7           0        141.6           7.1       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          32             33           1         30.8          32.5       1.0X
-Use EnumSet                                           6              6           0        155.8           6.4       5.1X
+Use HashSet                                          32             32           0         31.7          31.5       1.0X
+Use EnumSet                                           6              7           0        154.0           6.5       4.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          61             63           3         16.4          61.1       1.0X
-Use EnumSet                                           7              7           0        153.4           6.5       9.4X
+Use HashSet                                          58             59           1         17.2          58.1       1.0X
+Use EnumSet                                           7              7           0        140.3           7.1       8.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           83             85           2         12.0          83.0       1.0X
-Use EnumSet                                            7              7           0        152.6           6.6      12.7X
+Use HashSet                                           84             85           1         11.9          83.8       1.0X
+Use EnumSet                                            7              7           0        144.7           6.9      12.1X
 
diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..3d16c874e8c9b
--- /dev/null
+++ b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
@@ -0,0 +1,24 @@
+================================================================================================
+Escape
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Escape Tests:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Legacy                                             6996           7009           9          0.1        6996.5       1.0X
+New                                                 771            776           3          1.3         770.7       9.1X
+
+
+================================================================================================
+Unescape
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Unescape Tests:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Legacy                                             5127           5137           6          0.2        5127.3       1.0X
+New                                                 579            583           4          1.7         579.3       8.9X
+
+
diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
new file mode 100644
index 0000000000000..7cfa134652c27
--- /dev/null
+++ b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
@@ -0,0 +1,24 @@
+================================================================================================
+Escape
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Escape Tests:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Legacy                                             6966           6978          12          0.1        6965.9       1.0X
+New                                                 725            730           4          1.4         725.4       9.6X
+
+
+================================================================================================
+Unescape
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Unescape Tests:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Legacy                                             6665           6677          11          0.2        6664.6       1.0X
+New                                                 602            606           2          1.7         602.1      11.1X
+
+
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
index fa1a8ea23b99f..f5ac49b25f6e1 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            6              6           0       1620.1           0.6       1.0X
-arrayOfAnyAsObject                                    6              6           0       1620.1           0.6       1.0X
+arrayOfAny                                            6              6           1       1619.9           0.6       1.0X
+arrayOfAnyAsObject                                    6              6           0       1619.8           0.6       1.0X
 arrayOfAnyAsSeq                                     215            216           1         46.5          21.5       0.0X
 arrayOfInt                                          270            271           1         37.0          27.0       0.0X
-arrayOfIntAsObject                                  249            250           1         40.1          24.9       0.0X
+arrayOfIntAsObject                                  250            251           1         40.0          25.0       0.0X
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
index 70ab4313ee8d7..5431cc0ccd8bb 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            6              6           0       1620.2           0.6       1.0X
+arrayOfAny                                            6              6           0       1620.1           0.6       1.0X
 arrayOfAnyAsObject                                    6              6           0       1620.1           0.6       1.0X
 arrayOfAnyAsSeq                                     155            155           1         64.7          15.5       0.0X
-arrayOfInt                                          252            252           0         39.7          25.2       0.0X
-arrayOfIntAsObject                                  249            250           0         40.2          24.9       0.0X
+arrayOfInt                                          253            254           1         39.6          25.3       0.0X
+arrayOfIntAsObject                                  252            253           1         39.7          25.2       0.0X
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
index 27d1cc6f3b036..1a1d7bb5627e0 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2165           2180          22        248.0           4.0       1.0X
-codegen version                                    3582           3583           1        149.9           6.7       0.6X
-codegen version 64-bit                             3404           3413          13        157.7           6.3       0.6X
-codegen HiveHash version                           2837           2857          28        189.3           5.3       0.8X
+interpreted version                                2149           2153           5        249.8           4.0       1.0X
+codegen version                                    3579           3579           1        150.0           6.7       0.6X
+codegen version 64-bit                             3401           3403           2        157.8           6.3       0.6X
+codegen HiveHash version                           2799           2802           5        191.8           5.2       0.8X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3483           3487           6        154.1           6.5       1.0X
-codegen version                                    5078           5081           4        105.7           9.5       0.7X
-codegen version 64-bit                             4143           4148           7        129.6           7.7       0.8X
-codegen HiveHash version                           3320           3346          37        161.7           6.2       1.0X
+interpreted version                                2761           2793          45        194.4           5.1       1.0X
+codegen version                                    5093           5095           4        105.4           9.5       0.5X
+codegen version 64-bit                             4112           4115           4        130.6           7.7       0.7X
+codegen HiveHash version                           3215           3216           1        167.0           6.0       0.9X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1422           1436          19          1.5         678.2       1.0X
-codegen version                                    1881           1881           1          1.1         896.9       0.8X
-codegen version 64-bit                              739            741           2          2.8         352.3       1.9X
-codegen HiveHash version                           3770           3771           1          0.6        1797.7       0.4X
+interpreted version                                1462           1462           1          1.4         696.9       1.0X
+codegen version                                    1868           1868           1          1.1         890.7       0.8X
+codegen version 64-bit                              732            734           1          2.9         349.0       2.0X
+codegen HiveHash version                           3733           3734           2          0.6        1780.0       0.4X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                 989            992           3          0.1        7543.5       1.0X
-codegen version                                    3498           3499           0          0.0       26691.2       0.3X
-codegen version 64-bit                             2397           2398           1          0.1       18289.8       0.4X
-codegen HiveHash version                            726            726           0          0.2        5535.4       1.4X
+interpreted version                                1084           1084           0          0.1        8269.4       1.0X
+codegen version                                    3681           3688          10          0.0       28080.6       0.3X
+codegen version 64-bit                             2527           2527           0          0.1       19280.1       0.4X
+codegen HiveHash version                            810            810           0          0.2        6178.0       1.3X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         93.0          10.7       1.0X
-codegen version                                     233            233           0          0.0       56943.4       0.0X
-codegen version 64-bit                              159            160           0          0.0       38922.6       0.0X
-codegen HiveHash version                             26             26           0          0.2        6303.0       0.0X
+interpreted version                                   0              0           0         84.9          11.8       1.0X
+codegen version                                     260            260           0          0.0       63397.9       0.0X
+codegen version 64-bit                              176            176           0          0.0       43056.2       0.0X
+codegen HiveHash version                             29             29           0          0.1        6968.9       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt b/sql/catalyst/benchmarks/HashBenchmark-results.txt
index e004245c2731a..a864b60913439 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2253           2253           0        238.3           4.2       1.0X
-codegen version                                    3602           3608           9        149.1           6.7       0.6X
-codegen version 64-bit                             3457           3466          13        155.3           6.4       0.7X
-codegen HiveHash version                           3002           3005           4        178.8           5.6       0.8X
+interpreted version                                2174           2175           1        246.9           4.1       1.0X
+codegen version                                    3591           3602          17        149.5           6.7       0.6X
+codegen version 64-bit                             3475           3475           0        154.5           6.5       0.6X
+codegen HiveHash version                           2849           2852           4        188.5           5.3       0.8X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2886           2890           5        186.0           5.4       1.0X
-codegen version                                    5308           5313           7        101.1           9.9       0.5X
-codegen version 64-bit                             3728           3730           2        144.0           6.9       0.8X
-codegen HiveHash version                           3382           3384           2        158.7           6.3       0.9X
+interpreted version                                3000           3001           2        179.0           5.6       1.0X
+codegen version                                    5207           5220          17        103.1           9.7       0.6X
+codegen version 64-bit                             3619           3645          36        148.3           6.7       0.8X
+codegen HiveHash version                           3408           3456          69        157.6           6.3       0.9X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2491           2493           2          0.8        1188.0       1.0X
-codegen version                                    2253           2253           0          0.9        1074.1       1.1X
-codegen version 64-bit                              699            700           2          3.0         333.2       3.6X
-codegen HiveHash version                           3677           3678           1          0.6        1753.4       0.7X
+interpreted version                                2521           2524           3          0.8        1202.3       1.0X
+codegen version                                    2232           2232           0          0.9        1064.4       1.1X
+codegen version 64-bit                              700            701           2          3.0         333.8       3.6X
+codegen HiveHash version                           3672           3682          14          0.6        1750.8       0.7X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                 936            937           1          0.1        7139.3       1.0X
-codegen version                                    3374           3382          11          0.0       25742.7       0.3X
-codegen version 64-bit                             2282           2284           4          0.1       17406.8       0.4X
-codegen HiveHash version                            686            687           2          0.2        5231.1       1.4X
+interpreted version                                 971            976           4          0.1        7410.9       1.0X
+codegen version                                    3558           3582          34          0.0       27147.3       0.3X
+codegen version 64-bit                             2357           2363           9          0.1       17985.0       0.4X
+codegen HiveHash version                            721            723           4          0.2        5497.9       1.3X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         90.0          11.1       1.0X
-codegen version                                     229            230           1          0.0       55936.5       0.0X
-codegen version 64-bit                              155            155           0          0.0       37821.3       0.0X
-codegen HiveHash version                             25             25           0          0.2        6172.4       0.0X
+interpreted version                                   0              0           0         90.3          11.1       1.0X
+codegen version                                     213            214           0          0.0       52051.3       0.0X
+codegen version 64-bit                              144            144           1          0.0       35164.7       0.0X
+codegen HiveHash version                             24             24           3          0.2        5812.5       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
index a88ce01f163df..f7dc5d3a8a87d 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       10             10           0        206.7           4.8       1.0X
-xxHash 64-bit                                        10             10           0        201.7           5.0       1.0X
+Murmur3_x86_32                                       10             10           0        206.8           4.8       1.0X
+xxHash 64-bit                                        10             10           0        201.8           5.0       1.0X
 HiveHasher                                           14             14           0        152.3           6.6       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 Murmur3_x86_32                                       14             14           0        145.8           6.9       1.0X
-xxHash 64-bit                                        12             12           0        168.8           5.9       1.2X
-HiveHasher                                           23             23           0         91.6          10.9       0.6X
+xxHash 64-bit                                        12             12           0        169.6           5.9       1.2X
+HiveHasher                                           23             23           0         92.0          10.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       20             20           0        104.8           9.5       1.0X
-xxHash 64-bit                                        14             14           0        145.6           6.9       1.4X
-HiveHasher                                           33             33           0         63.0          15.9       0.6X
+Murmur3_x86_32                                       20             20           0        104.5           9.6       1.0X
+xxHash 64-bit                                        15             15           0        144.0           6.9       1.4X
+HiveHasher                                           33             34           1         62.9          15.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       34             34           0         61.9          16.1       1.0X
-xxHash 64-bit                                        27             27           0         78.2          12.8       1.3X
-HiveHasher                                           43             43           0         48.4          20.6       0.8X
+Murmur3_x86_32                                       31             31           0         68.3          14.7       1.0X
+xxHash 64-bit                                        27             27           0         77.5          12.9       1.1X
+HiveHasher                                           43             43           0         48.3          20.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       71             72           0         29.4          34.1       1.0X
-xxHash 64-bit                                        60             61           1         34.9          28.7       1.2X
-HiveHasher                                          156            156           0         13.4          74.4       0.5X
+Murmur3_x86_32                                       68             69           0         30.6          32.6       1.0X
+xxHash 64-bit                                        57             58           0         36.5          27.4       1.2X
+HiveHasher                                          156            156           1         13.4          74.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      207            207           0         10.1          98.6       1.0X
-xxHash 64-bit                                       105            106           0         19.9          50.3       2.0X
-HiveHasher                                          530            530           0          4.0         252.7       0.4X
+Murmur3_x86_32                                      205            208           1         10.2          97.9       1.0X
+xxHash 64-bit                                       102            102           0         20.6          48.4       2.0X
+HiveHasher                                          529            530           0          4.0         252.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      743            743           0          2.8         354.3       1.0X
-xxHash 64-bit                                       294            295           0          7.1         140.4       2.5X
-HiveHasher                                         2029           2030           1          1.0         967.6       0.4X
+Murmur3_x86_32                                      713            713           1          2.9         339.8       1.0X
+xxHash 64-bit                                       292            293           0          7.2         139.4       2.4X
+HiveHasher                                         2030           2030           0          1.0         967.8       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1451           1452           0          1.4         692.1       1.0X
-xxHash 64-bit                                       548            548           1          3.8         261.1       2.7X
-HiveHasher                                         4022           4034          17          0.5        1917.9       0.4X
+Murmur3_x86_32                                     1379           1381           2          1.5         657.7       1.0X
+xxHash 64-bit                                       559            564           9          3.8         266.5       2.5X
+HiveHasher                                         4022           4024           4          0.5        1917.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     5784           5785           0          0.4        2758.2       1.0X
-xxHash 64-bit                                      2057           2057           0          1.0         980.6       2.8X
-HiveHasher                                        15983          15984           1          0.1        7621.4       0.4X
+Murmur3_x86_32                                     5701           5704           5          0.4        2718.2       1.0X
+xxHash 64-bit                                      2067           2068           1          1.0         985.6       2.8X
+HiveHasher                                        15981          15982           1          0.1        7620.3       0.4X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
index 86886ad928bbf..6c649e7b0d42d 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 Murmur3_x86_32                                       11             11           0        184.1           5.4       1.0X
-xxHash 64-bit                                        10             10           0        214.7           4.7       1.2X
-HiveHasher                                           14             14           0        146.2           6.8       0.8X
+xxHash 64-bit                                        10             10           0        214.5           4.7       1.2X
+HiveHasher                                           14             14           0        146.3           6.8       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       17             17           0        123.4           8.1       1.0X
-xxHash 64-bit                                        12             12           0        176.3           5.7       1.4X
-HiveHasher                                           24             25           0         85.9          11.6       0.7X
+Murmur3_x86_32                                       17             17           0        123.5           8.1       1.0X
+xxHash 64-bit                                        12             12           0        176.5           5.7       1.4X
+HiveHasher                                           24             25           1         85.7          11.7       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       23             23           0         89.5          11.2       1.0X
-xxHash 64-bit                                        14             14           0        145.9           6.9       1.6X
+Murmur3_x86_32                                       23             24           0         89.5          11.2       1.0X
+xxHash 64-bit                                        14             14           0        146.1           6.8       1.6X
 HiveHasher                                           35             35           0         59.8          16.7       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       35             35           0         60.2          16.6       1.0X
-xxHash 64-bit                                        27             27           0         76.9          13.0       1.3X
+Murmur3_x86_32                                       35             36           0         59.1          16.9       1.0X
+xxHash 64-bit                                        27             28           0         76.3          13.1       1.3X
 HiveHasher                                           45             45           0         47.0          21.3       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       87             88           0         24.0          41.7       1.0X
-xxHash 64-bit                                        62             62           0         34.0          29.5       1.4X
+Murmur3_x86_32                                       87             87           0         24.2          41.4       1.0X
+xxHash 64-bit                                        63             64           0         33.0          30.3       1.4X
 HiveHasher                                          160            160           0         13.1          76.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      243            243           0          8.6         115.9       1.0X
-xxHash 64-bit                                       107            107           0         19.6          50.9       2.3X
-HiveHasher                                          534            534           0          3.9         254.4       0.5X
+Murmur3_x86_32                                      243            243           0          8.6         115.8       1.0X
+xxHash 64-bit                                       122            122           0         17.2          58.2       2.0X
+HiveHasher                                          533            534           0          3.9         254.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      873            873           0          2.4         416.1       1.0X
-xxHash 64-bit                                       296            296           1          7.1         141.0       3.0X
-HiveHasher                                         2035           2035           0          1.0         970.5       0.4X
+Murmur3_x86_32                                      872            873           1          2.4         415.9       1.0X
+xxHash 64-bit                                       397            398           1          5.3         189.4       2.2X
+HiveHasher                                         2036           2036           0          1.0         970.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1704           1704           0          1.2         812.4       1.0X
-xxHash 64-bit                                       551            551           1          3.8         262.6       3.1X
-HiveHasher                                         4029           4029           0          0.5        1921.1       0.4X
+Murmur3_x86_32                                     1704           1713          14          1.2         812.4       1.0X
+xxHash 64-bit                                       776            778           4          2.7         370.0       2.2X
+HiveHasher                                         4028           4029           1          0.5        1920.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     6704           6705           1          0.3        3196.6       1.0X
-xxHash 64-bit                                      2054           2055           0          1.0         979.6       3.3X
-HiveHasher                                        15993          15996           4          0.1        7626.0       0.4X
+Murmur3_x86_32                                     6698           6699           2          0.3        3194.0       1.0X
+xxHash 64-bit                                      3021           3021           0          0.7        1440.4       2.2X
+HiveHasher                                        15982          15984           3          0.1        7620.8       0.4X
 
 
diff --git a/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..afa3efa7a919b
--- /dev/null
+++ b/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
@@ -0,0 +1,14 @@
+================================================================================================
+UnHex Comparison
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Cardinality 1000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Common Codecs                                      4755           4766          13          0.2        4755.0       1.0X
+Java                                               4018           4048          45          0.2        4018.3       1.2X
+Spark                                              3473           3476           3          0.3        3472.8       1.4X
+Spark Binary                                       2625           2628           3          0.4        2624.6       1.8X
+
+
diff --git a/sql/catalyst/benchmarks/HexBenchmark-results.txt b/sql/catalyst/benchmarks/HexBenchmark-results.txt
new file mode 100644
index 0000000000000..55a6a07fed406
--- /dev/null
+++ b/sql/catalyst/benchmarks/HexBenchmark-results.txt
@@ -0,0 +1,14 @@
+================================================================================================
+UnHex Comparison
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+AMD EPYC 7763 64-Core Processor
+Cardinality 1000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Common Codecs                                      4881           4897          25          0.2        4880.8       1.0X
+Java                                               4220           4226           9          0.2        4220.0       1.2X
+Spark                                              3954           3956           2          0.3        3954.5       1.2X
+Spark Binary                                       2738           2750          11          0.4        2737.9       1.8X
+
+
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
index 92a440cc261b2..650028b464207 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1323           1324           1        202.8           4.9       1.0X
-single nullable long                               2364           2376          18        113.6           8.8       0.6X
-7 primitive types                                  7098           7104           8         37.8          26.4       0.2X
-7 nullable primitive types                         9958           9959           1         27.0          37.1       0.1X
+single long                                        1326           1327           1        202.4           4.9       1.0X
+single nullable long                               2360           2374          19        113.7           8.8       0.6X
+7 primitive types                                  7076           7081           8         37.9          26.4       0.2X
+7 nullable primitive types                        10618          10621           5         25.3          39.6       0.1X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
index 8bb62ff118481..066c5f9a6f82a 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1289           1290           1        208.2           4.8       1.0X
-single nullable long                               2431           2431           1        110.4           9.1       0.5X
-7 primitive types                                  6975           6977           2         38.5          26.0       0.2X
-7 nullable primitive types                        10329          10331           3         26.0          38.5       0.1X
+single long                                        1287           1290           4        208.5           4.8       1.0X
+single nullable long                               2432           2433           2        110.4           9.1       0.5X
+7 primitive types                                  6968           6970           3         38.5          26.0       0.2X
+7 nullable primitive types                        10256          10290          48         26.2          38.2       0.1X
 
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
index 8fe59cb7fae5d..07a9409bc57a2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
@@ -112,6 +112,32 @@ public static boolean isLuhnNumber(UTF8String numberString) {
     return checkSum % 10 == 0;
   }
 
+  /**
+   * Function to validate a given UTF8 string according to Unicode rules.
+   *
+   * @param utf8String
+   *  the input string to validate against possible invalid byte sequences
+   * @return
+   *  the original string if the input string is a valid UTF8String, throw exception otherwise.
+   */
+  public static UTF8String validateUTF8String(UTF8String utf8String) {
+    if (utf8String.isValid()) return utf8String;
+    else throw QueryExecutionErrors.invalidUTF8StringError(utf8String);
+  }
+
+  /**
+   * Function to try to validate a given UTF8 string according to Unicode rules.
+   *
+   * @param utf8String
+   *  the input string to validate against possible invalid byte sequences
+   * @return
+   *  the original string if the input string is a valid UTF8String, null otherwise.
+   */
+  public static UTF8String tryValidateUTF8String(UTF8String utf8String) {
+    if (utf8String.isValid()) return utf8String;
+    else return null;
+  }
+
   public static byte[] aesEncrypt(byte[] input,
                                   byte[] key,
                                   UTF8String mode,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
index 6a74f64d44849..c057c36ca8204 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
@@ -19,16 +19,16 @@
 import java.io.Closeable;
 import java.io.IOException;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.memory.MemoryConsumer;
 import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
 /**
  * RowBasedKeyValueBatch stores key value pairs in contiguous memory region.
  *
@@ -48,7 +48,8 @@
  *
  */
 public abstract class RowBasedKeyValueBatch extends MemoryConsumer implements Closeable {
-  protected static final Logger logger = LoggerFactory.getLogger(RowBasedKeyValueBatch.class);
+  protected static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(RowBasedKeyValueBatch.class);
 
   private static final int DEFAULT_CAPACITY = 1 << 16;
 
@@ -127,7 +128,8 @@ private boolean acquirePage(long requiredSize) {
     try {
       page = allocatePage(requiredSize);
     } catch (SparkOutOfMemoryError e) {
-      logger.warn("Failed to allocate page ({} bytes).", requiredSize);
+      logger.warn("Failed to allocate page ({} bytes).",
+        MDC.of(LogKeys.PAGE_SIZE$.MODULE$, requiredSize));
       return false;
     }
     base = page.getBaseObject();
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index 6325ba68af5b7..8741c206f2bb4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -21,12 +21,14 @@
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.util.Map;
 
 import com.esotericsoftware.kryo.Kryo;
 import com.esotericsoftware.kryo.KryoSerializable;
 import com.esotericsoftware.kryo.io.Input;
 import com.esotericsoftware.kryo.io.Output;
 
+import org.apache.spark.SparkIllegalArgumentException;
 import org.apache.spark.SparkUnsupportedOperationException;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.types.*;
@@ -155,6 +157,17 @@ public UnsafeRow() {}
   public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) {
     assert numFields >= 0 : "numFields (" + numFields + ") should >= 0";
     assert sizeInBytes % 8 == 0 : "sizeInBytes (" + sizeInBytes + ") should be a multiple of 8";
+    if (baseObject instanceof byte[] bytes) {
+      int offsetInByteArray = (int) (baseOffset - Platform.BYTE_ARRAY_OFFSET);
+      if (offsetInByteArray < 0 || sizeInBytes < 0 ||
+          bytes.length < offsetInByteArray + sizeInBytes) {
+        throw new SparkIllegalArgumentException(
+          "INTERNAL_ERROR",
+          Map.of("message", "Invalid byte array backed UnsafeRow: byte array length=" +
+            bytes.length + ", offset=" + offsetInByteArray + ", byte size=" + sizeInBytes)
+        );
+      }
+    }
     this.baseObject = baseObject;
     this.baseOffset = baseOffset;
     this.sizeInBytes = sizeInBytes;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
index 8ca4f568b9f18..23f3acc7230fa 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/CatalogPlugin.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
index 65f31229764fe..6606748e6d6f9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/MetadataColumn.java
@@ -1,20 +1,18 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.apache.spark.sql.connector.catalog;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
index e42424268b44d..09cbda2aa1e16 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
@@ -1,20 +1,18 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.apache.spark.sql.connector.catalog;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/SupportsIndex.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/SupportsIndex.java
index 734b290775581..3417ef7f8e805 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/SupportsIndex.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/SupportsIndex.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/TableIndex.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/TableIndex.java
index 977ed8d6c7528..b9eba54848023 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/TableIndex.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/index/TableIndex.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
index 291f94ec75a8a..25d0c0466aca4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
@@ -29,14 +29,30 @@
 @Evolving
 public class Cast extends ExpressionWithToString {
   private Expression expression;
+
+  /**
+   * Original data type of given expression
+   */
+  private DataType expressionDataType;
+
+  /**
+   * Target data type, i.e. data type in which expression will be cast
+   */
   private DataType dataType;
 
+  @Deprecated
   public Cast(Expression expression, DataType dataType) {
+    this(expression, null, dataType);
+  }
+
+  public Cast(Expression expression, DataType expressionDataType, DataType targetDataType) {
     this.expression = expression;
-    this.dataType = dataType;
+    this.expressionDataType = expressionDataType;
+    this.dataType = targetDataType;
   }
 
   public Expression expression() { return expression; }
+  public DataType expressionDataType() { return expressionDataType; }
   public DataType dataType() { return dataType; }
 
   @Override
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index fd1b8f5dd1eeb..14e2112b7201a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -65,7 +65,6 @@ protected String escapeSpecialCharsForLikePattern(String str) {
       switch (c) {
         case '_' -> builder.append("\\_");
         case '%' -> builder.append("\\%");
-        case '\'' -> builder.append("\\\'");
         default -> builder.append(c);
       }
     }
@@ -79,7 +78,7 @@ public String build(Expression expr) {
     } else if (expr instanceof NamedReference namedReference) {
       return visitNamedReference(namedReference);
     } else if (expr instanceof Cast cast) {
-      return visitCast(build(cast.expression()), cast.dataType());
+      return visitCast(build(cast.expression()), cast.expressionDataType(), cast.dataType());
     } else if (expr instanceof Extract extract) {
       return visitExtract(extract.field(), build(extract.source()));
     } else if (expr instanceof SortOrder sortOrder) {
@@ -212,7 +211,7 @@ protected String visitContains(String l, String r) {
     return l + " LIKE '%" + escapeSpecialCharsForLikePattern(value) + "%' ESCAPE '\\'";
   }
 
-  private String inputToSQL(Expression input) {
+  protected String inputToSQL(Expression input) {
     if (input.children().length > 1) {
       return "(" + build(input) + ")";
     } else {
@@ -231,8 +230,8 @@ protected String visitBinaryArithmetic(String name, String l, String r) {
     return l + " " + name + " " + r;
   }
 
-  protected String visitCast(String l, DataType dataType) {
-    return "CAST(" + l + " AS " + dataType.typeName() + ")";
+  protected String visitCast(String expr, DataType exprDataType, DataType targetDataType) {
+    return "CAST(" + expr + " AS " + targetDataType.typeName() + ")";
   }
 
   protected String visitAnd(String name, String l, String r) {
@@ -356,7 +355,7 @@ private String joinListToString(
     return joiner.toString();
   }
 
-  private String[] expressionsToStringArray(Expression[] expressions) {
+  protected String[] expressionsToStringArray(Expression[] expressions) {
     String[] result = new String[expressions.length];
     for (int i = 0; i < expressions.length; i++) {
       result[i] = build(expressions[i]);
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
index 00a3de692fbf4..ec461f9740019 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
@@ -17,12 +17,6 @@
 
 package org.apache.spark.sql.util;
 
-import org.apache.spark.SparkIllegalArgumentException;
-import org.apache.spark.SparkUnsupportedOperationException;
-import org.apache.spark.annotation.Experimental;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
@@ -31,6 +25,14 @@
 import java.util.Objects;
 import java.util.Set;
 
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
+import org.apache.spark.SparkIllegalArgumentException;
+import org.apache.spark.SparkUnsupportedOperationException;
+
 /**
  * Case-insensitive map of string keys to string values.
  * <p>
@@ -43,7 +45,8 @@
  */
 @Experimental
 public class CaseInsensitiveStringMap implements Map<String, String> {
-  private static final Logger logger = LoggerFactory.getLogger(CaseInsensitiveStringMap.class);
+  private static final SparkLogger logger =
+    SparkLoggerFactory.getLogger(CaseInsensitiveStringMap.class);
 
   public static CaseInsensitiveStringMap empty() {
     return new CaseInsensitiveStringMap(new HashMap<>(0));
@@ -59,8 +62,8 @@ public CaseInsensitiveStringMap(Map<String, String> originalMap) {
     for (Map.Entry<String, String> entry : originalMap.entrySet()) {
       String key = toLowerCase(entry.getKey());
       if (delegate.containsKey(key)) {
-        logger.warn("Converting duplicated key " + entry.getKey() +
-                " into CaseInsensitiveStringMap.");
+        logger.warn("Converting duplicated key {} into CaseInsensitiveStringMap.",
+          MDC.of(LogKeys.KEY$.MODULE$, entry.getKey()));
       }
       delegate.put(key, entry.getValue());
     }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index 4163af9bfda58..721e6a60befe2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -49,26 +49,43 @@ public int numElements() {
     return length;
   }
 
+  /**
+   * Sets all the appropriate null bits in the input UnsafeArrayData.
+   *
+   * @param arrayData The UnsafeArrayData to set the null bits for
+   * @return The UnsafeArrayData with the null bits set
+   */
+  private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
+    if (data.hasNull()) {
+      for (int i = 0; i < length; i++) {
+        if (data.isNullAt(i)) {
+          arrayData.setNullAt(i);
+        }
+      }
+    }
+    return arrayData;
+  }
+
   @Override
   public ArrayData copy() {
     DataType dt = data.dataType();
 
     if (dt instanceof BooleanType) {
-      return UnsafeArrayData.fromPrimitiveArray(toBooleanArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toBooleanArray()));
     } else if (dt instanceof ByteType) {
-      return UnsafeArrayData.fromPrimitiveArray(toByteArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toByteArray()));
     } else if (dt instanceof ShortType) {
-      return UnsafeArrayData.fromPrimitiveArray(toShortArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toShortArray()));
     } else if (dt instanceof IntegerType || dt instanceof DateType
             || dt instanceof YearMonthIntervalType) {
-      return UnsafeArrayData.fromPrimitiveArray(toIntArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toIntArray()));
     } else if (dt instanceof LongType || dt instanceof TimestampType
             || dt instanceof DayTimeIntervalType) {
-      return UnsafeArrayData.fromPrimitiveArray(toLongArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toLongArray()));
     } else if (dt instanceof FloatType) {
-      return UnsafeArrayData.fromPrimitiveArray(toFloatArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toFloatArray()));
     } else if (dt instanceof DoubleType) {
-      return UnsafeArrayData.fromPrimitiveArray(toDoubleArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toDoubleArray()));
     } else {
       return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the elements are copied.
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala
index 4ac62b987b151..1b2013d87eedf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/StructFilters.scala
@@ -156,6 +156,8 @@ object StructFilters {
         Some(Literal(true, BooleanType))
       case sources.AlwaysFalse() =>
         Some(Literal(false, BooleanType))
+      case _: sources.CollatedFilter =>
+        None
     }
     translate(filter)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e741387d7657f..ba6764444bdf3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -254,7 +254,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     TypeCoercion.typeCoercionRules
   }
 
-  override def batches: Seq[Batch] = Seq(
+  private def earlyBatches: Seq[Batch] = Seq(
     Batch("Substitution", fixedPoint,
       new SubstituteExecuteImmediate(catalogManager),
       // This rule optimizes `UpdateFields` expression chains so looks more like optimization rule.
@@ -274,7 +274,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     Batch("Simple Sanity Check", Once,
       LookupFunctions),
     Batch("Keep Legacy Outputs", Once,
-      KeepLegacyOutputs),
+      KeepLegacyOutputs)
+  )
+
+  override def batches: Seq[Batch] = earlyBatches ++ Seq(
     Batch("Resolution", fixedPoint,
       new ResolveCatalogs(catalogManager) ::
       ResolveInsertInto ::
@@ -319,17 +322,19 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ResolveTimeZone ::
       ResolveRandomSeed ::
       ResolveBinaryArithmetic ::
-      ResolveIdentifierClause ::
+      new ResolveIdentifierClause(earlyBatches) ::
       ResolveUnion ::
       ResolveRowLevelCommandAssignments ::
       RewriteDeleteFromTable ::
       RewriteUpdateTable ::
       RewriteMergeIntoTable ::
+      MoveParameterizedQueriesDown ::
       BindParameters ::
       typeCoercionRules() ++
       Seq(
         ResolveWithCTE,
         ExtractDistributedSequenceID) ++
+      Seq(ResolveUpdateEventTimeWatermarkColumn) ++
       extendedResolutionRules : _*),
     Batch("Remove TempResolvedColumn", Once, RemoveTempResolvedColumn),
     Batch("Post-Hoc Resolution", Once,
@@ -339,11 +344,16 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       new ResolveHints.RemoveAllHints),
     Batch("Nondeterministic", Once,
       PullOutNondeterministic),
-    Batch("UDF", Once,
+    Batch("ScalaUDF Null Handling", fixedPoint,
+      // `HandleNullInputsForUDF` may wrap the `ScalaUDF` with `If` expression to return null for
+      // null inputs, so the result can be null even if `ScalaUDF#nullable` is false. We need to
+      // run `UpdateAttributeNullability` to update nullability of the UDF output attribute in
+      // downstream operators. After updating attribute nullability, `ScalaUDF`s in downstream
+      // operators may need null handling as well, so we should run these two rules repeatedly.
       HandleNullInputsForUDF,
-      ResolveEncodersInUDF),
-    Batch("UpdateNullability", Once,
       UpdateAttributeNullability),
+    Batch("UDF", Once,
+      ResolveEncodersInUDF),
     Batch("Subquery", Once,
       UpdateOuterReferences),
     Batch("Cleanup", fixedPoint,
@@ -1659,7 +1669,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
 
       case u: UpdateTable => resolveReferencesInUpdate(u)
 
-      case m @ MergeIntoTable(targetTable, sourceTable, _, _, _, _)
+      case m @ MergeIntoTable(targetTable, sourceTable, _, _, _, _, _)
         if !m.resolved && targetTable.resolved && sourceTable.resolved =>
 
         EliminateSubqueryAliases(targetTable) match {
@@ -2201,11 +2211,19 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
           val alias = SubqueryAlias.generateSubqueryName(s"_${tableArgs.size}")
 
           // Propagate the column indexes for TABLE arguments to the PythonUDTF instance.
+          val f: FunctionTableSubqueryArgumentExpression = tableArgs.head._1
           val tvfWithTableColumnIndexes = tvf match {
             case g @ Generate(pyudtf: PythonUDTF, _, _, _, _, _)
-                if tableArgs.head._1.partitioningExpressionIndexes.nonEmpty =>
-              val partitionColumnIndexes =
-                PythonUDTFPartitionColumnIndexes(tableArgs.head._1.partitioningExpressionIndexes)
+                if f.extraProjectedPartitioningExpressions.nonEmpty =>
+              val partitionColumnIndexes = if (f.selectedInputExpressions.isEmpty) {
+                PythonUDTFPartitionColumnIndexes(f.partitioningExpressionIndexes)
+              } else {
+                // If the UDTF specified 'select' expression(s), we added a projection to compute
+                // them plus the 'partitionBy' expression(s) afterwards.
+                PythonUDTFPartitionColumnIndexes(
+                  (0 until f.extraProjectedPartitioningExpressions.length)
+                    .map(_ + f.selectedInputExpressions.length))
+              }
               g.copy(generator = pyudtf.copy(
                 pythonUDTFPartitionColumnIndexes = Some(partitionColumnIndexes)))
             case _ => tvf
@@ -4002,6 +4020,8 @@ object EliminateEventTimeWatermark extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
     _.containsPattern(EVENT_TIME_WATERMARK)) {
     case EventTimeWatermark(_, _, child) if child.resolved && !child.isStreaming => child
+    case UpdateEventTimeWatermarkColumn(_, _, child) if child.resolved && !child.isStreaming =>
+      child
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 10bff5e6e59a2..9f3eee5198a16 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.collection.mutable
 
 import org.apache.spark.SparkException
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.catalyst.expressions._
@@ -41,7 +42,7 @@ import org.apache.spark.util.Utils
 /**
  * Throws user facing errors when passed invalid queries that fail to analyze.
  */
-trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsBase {
+trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsBase with Logging {
 
   protected def isView(nameParts: Seq[String]): Boolean
 
@@ -110,9 +111,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
   }
 
   /** Check and throw exception when a given resolved plan contains LateralColumnAliasReference. */
-  private def checkNotContainingLCA(exprSeq: Seq[NamedExpression], plan: LogicalPlan): Unit = {
-    if (!plan.resolved) return
-    exprSeq.foreach(_.transformDownWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
+  private def checkNotContainingLCA(exprs: Seq[Expression], plan: LogicalPlan): Unit = {
+    exprs.foreach(_.transformDownWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
       case lcaRef: LateralColumnAliasReference =>
         throw SparkException.internalError("Resolved plan should not contain any " +
           s"LateralColumnAliasReference.\nDebugging information: plan:\n$plan",
@@ -143,54 +143,22 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       errorClass, missingCol, orderedCandidates, a.origin)
   }
 
-  private def checkUnreferencedCTERelations(
-      cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])],
-      visited: mutable.Map[Long, Boolean],
-      cteId: Long): Unit = {
-    if (visited(cteId)) {
-      return
-    }
-    val (cteDef, _, refMap) = cteMap(cteId)
-    refMap.foreach { case (id, _) =>
-      checkUnreferencedCTERelations(cteMap, visited, id)
-    }
-    checkAnalysis0(cteDef.child)
-    visited(cteId) = true
-  }
-
   def checkAnalysis(plan: LogicalPlan): Unit = {
-    val inlineCTE = InlineCTE(alwaysInline = true)
-    val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, mutable.Map[Long, Int])]
-    inlineCTE.buildCTEMap(plan, cteMap)
-    val visited: mutable.Map[Long, Boolean] = mutable.Map.empty.withDefaultValue(false)
-    cteMap.foreach { case (cteId, (relation, refCount, _)) =>
-      // If a CTE relation is never used, it will disappear after inline. Here we explicitly check
-      // analysis for it, to make sure the entire query plan is valid.
-      try {
-        // If a CTE relation ref count is 0, the other CTE relations that reference it
-        // should also be checked by checkAnalysis0. This code will also guarantee the leaf
-        // relations that do not reference any others are checked first.
-        if (refCount == 0) {
-          checkUnreferencedCTERelations(cteMap, visited, cteId)
-        }
-      } catch {
-        case e: AnalysisException =>
-          throw new ExtendedAnalysisException(e, relation.child)
-      }
-    }
-    // Inline all CTEs in the plan to help check query plan structures in subqueries.
-    var inlinedPlan: Option[LogicalPlan] = None
-    try {
-      inlinedPlan = Some(inlineCTE(plan))
+    // We should inline all CTE relations to restore the original plan shape, as the analysis check
+    // may need to match certain plan shapes. For dangling CTE relations, they will still be kept
+    // in the original `WithCTE` node, as we need to perform analysis check for them as well.
+    val inlineCTE = InlineCTE(alwaysInline = true, keepDanglingRelations = true)
+    val inlinedPlan: LogicalPlan = try {
+      inlineCTE(plan)
     } catch {
       case e: AnalysisException =>
         throw new ExtendedAnalysisException(e, plan)
     }
     try {
-      checkAnalysis0(inlinedPlan.get)
+      checkAnalysis0(inlinedPlan)
     } catch {
       case e: AnalysisException =>
-        throw new ExtendedAnalysisException(e, inlinedPlan.get)
+        throw new ExtendedAnalysisException(e, inlinedPlan)
     }
     plan.setAnalyzed()
   }
@@ -286,6 +254,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                 hof.invalidFormat(checkRes)
             }
 
+          case hof: HigherOrderFunction
+              if hof.resolved && hof.functions
+                .exists(_.exists(_.isInstanceOf[PythonUDF])) =>
+            val u = hof.functions.flatMap(_.find(_.isInstanceOf[PythonUDF])).head
+            hof.failAnalysis(
+              errorClass = "UNSUPPORTED_FEATURE.LAMBDA_FUNCTION_WITH_PYTHON_UDF",
+              messageParameters = Map("funcName" -> toSQLExpr(u)))
+
           // If an attribute can't be resolved as a map key of string type, either the key should be
           // surrounded with single quotes, or there is a typo in the attribute name.
           case GetMapValue(map, key: Attribute) if isMapWithStringKey(map) && !key.resolved =>
@@ -299,6 +275,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
         // Early checks for column definitions, to produce better error messages
         ColumnDefinition.checkColumnDefinitions(operator)
 
+        var stagedError: Option[() => Unit] = None
         getAllExpressions(operator).foreach(_.foreachUp {
           case a: Attribute if !a.resolved =>
             failUnresolvedAttribute(operator, a, "UNRESOLVED_COLUMN")
@@ -337,12 +314,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
               s"Cannot resolve the runtime replaceable expression ${toSQLExpr(e)}. " +
               s"The replacement is unresolved: ${toSQLExpr(e.replacement)}.")
 
+          // `Grouping` and `GroupingID` are considered as of having lower priority than the other
+          // nodes which cause errors.
           case g: Grouping =>
-            g.failAnalysis(
-              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", messageParameters = Map.empty)
+            if (stagedError.isEmpty) stagedError = Some(() => g.failAnalysis(
+              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", messageParameters = Map.empty))
           case g: GroupingID =>
-            g.failAnalysis(
-              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", messageParameters = Map.empty)
+            if (stagedError.isEmpty) stagedError = Some(() => g.failAnalysis(
+              errorClass = "UNSUPPORTED_GROUPING_EXPRESSION", messageParameters = Map.empty))
 
           case e: Expression if e.children.exists(_.isInstanceOf[WindowFunction]) &&
               !e.isInstanceOf[WindowExpression] && e.resolved =>
@@ -401,6 +380,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
 
           case _ =>
         })
+        if (stagedError.isDefined) stagedError.get.apply()
 
         operator match {
           case RelationTimeTravel(u: UnresolvedRelation, _, _) =>
@@ -789,17 +769,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
           msg = s"Found the unresolved operator: ${o.simpleString(SQLConf.get.maxToStringFields)}",
           context = o.origin.getQueryContext,
           summary = o.origin.context.summary)
-      // If the plan is resolved, the resolved Project, Aggregate or Window should have restored or
-      // resolved all lateral column alias references. Add check for extra safe.
-      case p @ Project(pList, _)
-        if pList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
-        checkNotContainingLCA(pList, p)
-      case agg @ Aggregate(_, aggList, _)
-        if aggList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
-        checkNotContainingLCA(aggList, agg)
-      case w @ Window(pList, _, _, _)
-        if pList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
-        checkNotContainingLCA(pList, w)
+      // If the plan is resolved, all lateral column alias references should have been either
+      // restored or resolved. Add check for extra safe.
+      case o if o.expressions.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
+        checkNotContainingLCA(o.expressions, o)
       case _ =>
     }
   }
@@ -919,13 +892,36 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
 
       // SPARK-18504/SPARK-18814: Block cases where GROUP BY columns
       // are not part of the correlated columns.
+
+      // Note: groupByCols does not contain outer refs - grouping by an outer ref is always ok
       val groupByCols = AttributeSet(agg.groupingExpressions.flatMap(_.references))
-      // Collect the local references from the correlated predicate in the subquery.
-      val subqueryColumns = getCorrelatedPredicates(query).flatMap(_.references)
-        .filterNot(conditions.flatMap(_.references).contains)
-      val correlatedCols = AttributeSet(subqueryColumns)
-      val invalidCols = groupByCols -- correlatedCols
-      // GROUP BY columns must be a subset of columns in the predicates
+      // Collect the inner query attributes that are guaranteed to have a single value for each
+      // outer row. See comment on getCorrelatedEquivalentInnerColumns.
+      val correlatedEquivalentCols = getCorrelatedEquivalentInnerColumns(query)
+      val nonEquivalentGroupByCols = groupByCols -- correlatedEquivalentCols
+
+      val invalidCols = if (!SQLConf.get.getConf(
+        SQLConf.LEGACY_SCALAR_SUBQUERY_ALLOW_GROUP_BY_NON_EQUALITY_CORRELATED_PREDICATE)) {
+        nonEquivalentGroupByCols
+      } else {
+        // Legacy incorrect logic for checking for invalid group-by columns (see SPARK-48503).
+        // Allows any inner attribute that appears in a correlated predicate, even if it is a
+        // non-equality predicate or under an operator that can change the values of the attribute
+        // (see comments on getCorrelatedEquivalentInnerColumns for examples).
+        val subqueryColumns = getCorrelatedPredicates(query).flatMap(_.references)
+          .filterNot(conditions.flatMap(_.references).contains)
+        val correlatedCols = AttributeSet(subqueryColumns)
+        val invalidColsLegacy = groupByCols -- correlatedCols
+        if (!nonEquivalentGroupByCols.isEmpty && invalidColsLegacy.isEmpty) {
+          logWarning("Using legacy behavior for " +
+            s"${SQLConf.LEGACY_SCALAR_SUBQUERY_ALLOW_GROUP_BY_NON_EQUALITY_CORRELATED_PREDICATE
+              .key}. Query would be rejected with non-legacy behavior but is allowed by " +
+            s"legacy behavior. Query may be invalid and return wrong results if the scalar " +
+            s"subquery's group-by outputs multiple rows.")
+        }
+        invalidColsLegacy
+      }
+
       if (invalidCols.nonEmpty) {
         expr.failAnalysis(
           errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
@@ -1387,6 +1383,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             aggregated,
             canContainOuter && SQLConf.get.getConf(SQLConf.DECORRELATE_OFFSET_ENABLED))
 
+        // We always inline CTE relations before analysis check, and only un-referenced CTE
+        // relations will be kept in the plan. Here we should simply skip them and check the
+        // children, as un-referenced CTE relations won't be executed anyway and doesn't need to
+        // be restricted by the current subquery correlation limitations.
+        case _: WithCTE | _: CTERelationDef =>
+          plan.children.foreach(p => checkPlan(p, aggregated, canContainOuter))
+
         // Category 4: Any other operators not in the above 3 categories
         // cannot be on a correlation path, that is they are allowed only
         // under a correlation point but they and their descendant operators
@@ -1410,7 +1413,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       if (struct.findNestedField(
           fieldNames, includeCollections = true, alter.conf.resolver).isDefined) {
         alter.failAnalysis(
-          errorClass = "FIELDS_ALREADY_EXISTS",
+          errorClass = "FIELD_ALREADY_EXISTS",
           messageParameters = Map(
             "op" -> op,
             "fieldNames" -> toSQLId(fieldNames),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
index 795e8a696b017..276062ce211d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
@@ -22,10 +22,10 @@ import javax.annotation.Nullable
 import scala.annotation.tailrec
 
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{hasStringType, haveSameType}
-import org.apache.spark.sql.catalyst.expressions.{ArrayJoin, BinaryExpression, CaseWhen, Cast, Coalesce, Collate, Concat, ConcatWs, CreateArray, Elt, Expression, Greatest, If, In, InSubquery, Least}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringType}
 
 object CollationTypeCasts extends TypeCoercionRule {
   override val transform: PartialFunction[Expression, Expression] = {
@@ -45,12 +45,52 @@ object CollationTypeCasts extends TypeCoercionRule {
           caseWhenExpr.elseValue.map(e => castStringType(e, outputStringType).getOrElse(e))
         CaseWhen(newBranches, newElseValue)
 
+    case stringLocate: StringLocate =>
+      stringLocate.withNewChildren(collateToSingleType(
+        Seq(stringLocate.first, stringLocate.second)) :+ stringLocate.third)
+
+    case substringIndex: SubstringIndex =>
+      substringIndex.withNewChildren(
+        collateToSingleType(
+          Seq(substringIndex.first, substringIndex.second)) :+ substringIndex.third)
+
     case eltExpr: Elt =>
       eltExpr.withNewChildren(eltExpr.children.head +: collateToSingleType(eltExpr.children.tail))
 
+    case overlayExpr: Overlay =>
+      overlayExpr.withNewChildren(collateToSingleType(Seq(overlayExpr.input, overlayExpr.replace))
+        ++ Seq(overlayExpr.pos, overlayExpr.len))
+
+    case regExpReplace: RegExpReplace =>
+      val Seq(subject, rep) = collateToSingleType(Seq(regExpReplace.subject, regExpReplace.rep))
+      val newChildren = Seq(subject, regExpReplace.regexp, rep, regExpReplace.pos)
+      regExpReplace.withNewChildren(newChildren)
+
+    case stringPadExpr @ (_: StringRPad | _: StringLPad) =>
+      val Seq(str, len, pad) = stringPadExpr.children
+      val Seq(newStr, newPad) = collateToSingleType(Seq(str, pad))
+      stringPadExpr.withNewChildren(Seq(newStr, len, newPad))
+
+    case raiseError: RaiseError =>
+      val newErrorParams = raiseError.errorParms.dataType match {
+        case MapType(StringType, StringType, _) => raiseError.errorParms
+        case _ => Cast(raiseError.errorParms, MapType(StringType, StringType))
+      }
+      raiseError.withNewChildren(Seq(raiseError.errorClass, newErrorParams))
+
+    case framelessOffsetWindow @ (_: Lag | _: Lead) =>
+      val Seq(input, offset, default) = framelessOffsetWindow.children
+      val Seq(newInput, newDefault) = collateToSingleType(Seq(input, default))
+      framelessOffsetWindow.withNewChildren(Seq(newInput, offset, newDefault))
+
     case otherExpr @ (
       _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Concat | _: Greatest | _: Least |
-      _: Coalesce | _: BinaryExpression | _: ConcatWs) =>
+      _: Coalesce | _: ArrayContains | _: ArrayExcept | _: ConcatWs | _: Mask | _: StringReplace |
+      _: StringTranslate | _: StringTrim | _: StringTrimLeft | _: StringTrimRight |
+      _: ArrayIntersect | _: ArrayPosition | _: ArrayRemove | _: ArrayUnion | _: ArraysOverlap |
+      _: Contains | _: EndsWith | _: EqualNullSafe | _: EqualTo | _: FindInSet | _: GreaterThan |
+      _: GreaterThanOrEqual | _: LessThan | _: LessThanOrEqual | _: StartsWith | _: StringInstr |
+      _: ToNumber | _: TryToNumber) =>
       val newChildren = collateToSingleType(otherExpr.children)
       otherExpr.withNewChildren(newChildren)
   }
@@ -99,7 +139,10 @@ object CollationTypeCasts extends TypeCoercionRule {
    * complex DataTypes with collated StringTypes (e.g. ArrayType)
    */
   def getOutputCollation(expr: Seq[Expression]): StringType = {
-    val explicitTypes = expr.filter(_.isInstanceOf[Collate])
+    val explicitTypes = expr.filter {
+        case _: Collate => true
+        case _ => false
+      }
       .map(_.dataType.asInstanceOf[StringType].collationId)
       .distinct
 
@@ -114,17 +157,22 @@ object CollationTypeCasts extends TypeCoercionRule {
           )
       // Only implicit or default collations present
       case 0 =>
-        val implicitTypes = expr.map(_.dataType)
+        val implicitTypes = expr.filter {
+            case Literal(_, _: StringType) => false
+            case cast: Cast if cast.getTagValue(Cast.USER_SPECIFIED_CAST).isEmpty =>
+              cast.child.dataType.isInstanceOf[StringType]
+            case _ => true
+          }
+          .map(_.dataType)
           .filter(hasStringType)
-          .map(extractStringType)
-          .filter(dt => dt.collationId != SQLConf.get.defaultStringType.collationId)
-          .distinctBy(_.collationId)
+          .map(extractStringType(_).collationId)
+          .distinct
 
         if (implicitTypes.length > 1) {
           throw QueryCompilationErrors.implicitCollationMismatchError()
         }
         else {
-          implicitTypes.headOption.getOrElse(SQLConf.get.defaultStringType)
+          implicitTypes.headOption.map(StringType(_)).getOrElse(SQLConf.get.defaultStringType)
         }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
index 6e27192ead328..c10e000a098c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -136,6 +136,9 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
       getAttrCandidates: () => Seq[Attribute],
       throws: Boolean,
       includeLastResort: Boolean): Expression = {
+
+    val resolver = conf.resolver
+
     def innerResolve(e: Expression, isTopLevel: Boolean): Expression = withOrigin(e.origin) {
       if (e.resolved) return e
       val resolved = e match {
@@ -149,7 +152,7 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
         case GetViewColumnByNameAndOrdinal(
             viewName, colName, ordinal, expectedNumCandidates, viewDDL) =>
           val attrCandidates = getAttrCandidates()
-          val matched = attrCandidates.filter(a => conf.resolver(a.name, colName))
+          val matched = attrCandidates.filter(a => resolver(a.name, colName))
           if (matched.length != expectedNumCandidates) {
             throw QueryCompilationErrors.incompatibleViewSchemaChangeError(
               viewName, colName, expectedNumCandidates, matched, viewDDL)
@@ -183,7 +186,7 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
         case u @ UnresolvedExtractValue(child, fieldName) =>
           val newChild = innerResolve(child, isTopLevel = false)
           if (newChild.resolved) {
-            ExtractValue(newChild, fieldName, conf.resolver)
+            ExtractValue(newChild, fieldName, resolver)
           } else {
             u.copy(child = newChild)
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 9ad8368d007e7..6524ff9b2c57a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -92,7 +92,7 @@ object DecimalPrecision extends TypeCoercionRule {
       val resultType = widerDecimalType(p1, s1, p2, s2)
       val newE1 = if (e1.dataType == resultType) e1 else Cast(e1, resultType)
       val newE2 = if (e2.dataType == resultType) e2 else Cast(e2, resultType)
-      b.makeCopy(Array(newE1, newE2))
+      b.withNewChildren(Seq(newE1, newE2))
   }
 
   /**
@@ -211,21 +211,21 @@ object DecimalPrecision extends TypeCoercionRule {
         case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
             l.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r))
+          b.withNewChildren(Seq(Cast(l, DataTypeUtils.fromLiteral(l)), r))
         case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] &&
             r.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.makeCopy(Array(l, Cast(r, DataTypeUtils.fromLiteral(r))))
+          b.withNewChildren(Seq(l, Cast(r, DataTypeUtils.fromLiteral(r))))
         // Promote integers inside a binary expression with fixed-precision decimals to decimals,
         // and fixed-precision decimals in an expression with floats / doubles to doubles
         case (l @ IntegralTypeExpression(), r @ DecimalExpression(_, _)) =>
-          b.makeCopy(Array(Cast(l, DecimalType.forType(l.dataType)), r))
+          b.withNewChildren(Seq(Cast(l, DecimalType.forType(l.dataType)), r))
         case (l @ DecimalExpression(_, _), r @ IntegralTypeExpression()) =>
-          b.makeCopy(Array(l, Cast(r, DecimalType.forType(r.dataType))))
+          b.withNewChildren(Seq(l, Cast(r, DecimalType.forType(r.dataType))))
         case (l, r @ DecimalExpression(_, _)) if isFloat(l.dataType) =>
-          b.makeCopy(Array(l, Cast(r, DoubleType)))
+          b.withNewChildren(Seq(l, Cast(r, DoubleType)))
         case (l @ DecimalExpression(_, _), r) if isFloat(r.dataType) =>
-          b.makeCopy(Array(Cast(l, DoubleType), r))
+          b.withNewChildren(Seq(Cast(l, DoubleType), r))
         case _ => b
       }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
index d696ff45b9b7f..0fa11b9c45038 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
@@ -38,28 +38,30 @@ case class RelationWrapper(cls: Class[_], outputAttrIds: Seq[Long])
 object DeduplicateRelations extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
     val newPlan = renewDuplicatedRelations(mutable.HashSet.empty, plan)._1
-    if (newPlan.find(p => p.resolved && p.missingInput.nonEmpty).isDefined) {
-      // Wait for `ResolveMissingReferences` to resolve missing attributes first
-      return newPlan
-    }
+
+    // Wait for `ResolveMissingReferences` to resolve missing attributes first
+    def noMissingInput(p: LogicalPlan) = !p.exists(_.missingInput.nonEmpty)
+
     newPlan.resolveOperatorsUpWithPruning(
       _.containsAnyPattern(JOIN, LATERAL_JOIN, AS_OF_JOIN, INTERSECT, EXCEPT, UNION, COMMAND),
       ruleId) {
       case p: LogicalPlan if !p.childrenResolved => p
       // To resolve duplicate expression IDs for Join.
-      case j @ Join(left, right, _, _, _) if !j.duplicateResolved =>
+      case j @ Join(left, right, _, _, _) if !j.duplicateResolved && noMissingInput(right) =>
         j.copy(right = dedupRight(left, right))
       // Resolve duplicate output for LateralJoin.
-      case j @ LateralJoin(left, right, _, _) if right.resolved && !j.duplicateResolved =>
+      case j @ LateralJoin(left, right, _, _)
+          if right.resolved && !j.duplicateResolved && noMissingInput(right.plan) =>
         j.copy(right = right.withNewPlan(dedupRight(left, right.plan)))
       // Resolve duplicate output for AsOfJoin.
-      case j @ AsOfJoin(left, right, _, _, _, _, _) if !j.duplicateResolved =>
+      case j @ AsOfJoin(left, right, _, _, _, _, _)
+          if !j.duplicateResolved && noMissingInput(right) =>
         j.copy(right = dedupRight(left, right))
       // intersect/except will be rewritten to join at the beginning of optimizer. Here we need to
       // deduplicate the right side plan, so that we won't produce an invalid self-join later.
-      case i @ Intersect(left, right, _) if !i.duplicateResolved =>
+      case i @ Intersect(left, right, _) if !i.duplicateResolved && noMissingInput(right) =>
         i.copy(right = dedupRight(left, right))
-      case e @ Except(left, right, _) if !e.duplicateResolved =>
+      case e @ Except(left, right, _) if !e.duplicateResolved && noMissingInput(right) =>
         e.copy(right = dedupRight(left, right))
       // Only after we finish by-name resolution for Union
       case u: Union if !u.byName && !u.duplicateResolved =>
@@ -77,7 +79,8 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
           }
         }
         u.copy(children = newChildren)
-      case merge: MergeIntoTable if !merge.duplicateResolved =>
+      case merge: MergeIntoTable
+          if !merge.duplicateResolved && noMissingInput(merge.sourceTable) =>
         merge.copy(sourceTable = dedupRight(merge.targetTable, merge.sourceTable))
     }
   }
@@ -252,12 +255,18 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
                 val newRightGroup = rewriteAttrs(c.rightGroup, rightAttrMap)
                 val newLeftOrder = rewriteAttrs(c.leftOrder, leftAttrMap)
                 val newRightOrder = rewriteAttrs(c.rightOrder, rightAttrMap)
-                val newKeyDes = c.keyDeserializer.asInstanceOf[UnresolvedDeserializer]
-                  .copy(inputAttributes = newLeftGroup)
-                val newLeftDes = c.leftDeserializer.asInstanceOf[UnresolvedDeserializer]
-                  .copy(inputAttributes = newLeftAttr)
-                val newRightDes = c.rightDeserializer.asInstanceOf[UnresolvedDeserializer]
-                  .copy(inputAttributes = newRightAttr)
+                val newKeyDes = c.keyDeserializer match {
+                  case u: UnresolvedDeserializer => u.copy(inputAttributes = newLeftGroup)
+                  case e: Expression => e.withNewChildren(rewriteAttrs(e.children, leftAttrMap))
+                }
+                val newLeftDes = c.leftDeserializer match {
+                  case u: UnresolvedDeserializer => u.copy(inputAttributes = newLeftAttr)
+                  case e: Expression => e.withNewChildren(rewriteAttrs(e.children, leftAttrMap))
+                }
+                val newRightDes = c.rightDeserializer match {
+                  case u: UnresolvedDeserializer => u.copy(inputAttributes = newRightAttr)
+                  case e: Expression => e.withNewChildren(rewriteAttrs(e.children, rightAttrMap))
+                }
                 c.copy(keyDeserializer = newKeyDes, leftDeserializer = newLeftDes,
                   rightDeserializer = newRightDes, leftGroup = newLeftGroup,
                   rightGroup = newRightGroup, leftAttr = newLeftAttr, rightAttr = newRightAttr,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index c56d04b570e53..8a5a32c173bbf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -25,7 +25,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.FUNCTION_NAME
+import org.apache.spark.internal.LogKeys.FUNCTION_NAME
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.expressions._
@@ -381,7 +381,7 @@ object FunctionRegistry {
     expression[PosExplode]("posexplode"),
     expressionGeneratorOuter[PosExplode]("posexplode_outer"),
     expression[Rand]("rand"),
-    expression[Rand]("random", true),
+    expression[Rand]("random", true, Some("3.0.0")),
     expression[Randn]("randn"),
     expression[Stack]("stack"),
     expression[CaseWhen]("when"),
@@ -416,7 +416,7 @@ object FunctionRegistry {
     expression[Log1p]("log1p"),
     expression[Log2]("log2"),
     expression[Log]("ln"),
-    expression[Remainder]("mod", true),
+    expression[Remainder]("mod", true, Some("2.3.0")),
     expression[UnaryMinus]("negative", true),
     expression[Pi]("pi"),
     expression[Pmod]("pmod"),
@@ -451,6 +451,7 @@ object FunctionRegistry {
     // "try_*" function which always return Null instead of runtime error.
     expression[TryAdd]("try_add"),
     expression[TryDivide]("try_divide"),
+    expression[TryRemainder]("try_remainder"),
     expression[TrySubtract]("try_subtract"),
     expression[TryMultiply]("try_multiply"),
     expression[TryElementAt]("try_element_at"),
@@ -529,8 +530,8 @@ object FunctionRegistry {
     expressionBuilder("endswith", EndsWithExpressionBuilder),
     expression[Base64]("base64"),
     expression[BitLength]("bit_length"),
-    expression[Length]("char_length", true),
-    expression[Length]("character_length", true),
+    expression[Length]("char_length", true, Some("2.3.0")),
+    expression[Length]("character_length", true, Some("2.3.0")),
     expression[ConcatWs]("concat_ws"),
     expression[Decode]("decode"),
     expression[Elt]("elt"),
@@ -558,7 +559,7 @@ object FunctionRegistry {
     expressionBuilder("lpad", LPadExpressionBuilder),
     expression[StringTrimLeft]("ltrim"),
     expression[JsonTuple]("json_tuple"),
-    expression[StringLocate]("position", true),
+    expression[StringLocate]("position", true, Some("2.3.0")),
     expression[FormatString]("printf", true),
     expression[RegExpExtract]("regexp_extract"),
     expression[RegExpExtractAll]("regexp_extract_all"),
@@ -600,6 +601,10 @@ object FunctionRegistry {
     expression[RegExpCount]("regexp_count"),
     expression[RegExpSubStr]("regexp_substr"),
     expression[RegExpInStr]("regexp_instr"),
+    expression[IsValidUTF8]("is_valid_utf8"),
+    expression[MakeValidUTF8]("make_valid_utf8"),
+    expression[ValidateUTF8]("validate_utf8"),
+    expression[TryValidateUTF8]("try_validate_utf8"),
 
     // url functions
     expression[UrlEncode]("url_encode"),
@@ -700,7 +705,7 @@ object FunctionRegistry {
     expression[MapConcat]("map_concat"),
     expression[Size]("size"),
     expression[Slice]("slice"),
-    expression[Size]("cardinality", true),
+    expression[Size]("cardinality", true, Some("2.4.0")),
     expression[ArraysZip]("arrays_zip"),
     expression[SortArray]("sort_array"),
     expression[Shuffle]("shuffle"),
@@ -749,11 +754,11 @@ object FunctionRegistry {
     expression[InputFileBlockLength]("input_file_block_length"),
     expression[MonotonicallyIncreasingID]("monotonically_increasing_id"),
     expression[CurrentDatabase]("current_database"),
-    expression[CurrentDatabase]("current_schema", true),
+    expression[CurrentDatabase]("current_schema", true, Some("3.4.0")),
     expression[CurrentCatalog]("current_catalog"),
     expression[CurrentUser]("current_user"),
-    expression[CurrentUser]("user", setAlias = true),
-    expression[CurrentUser]("session_user", setAlias = true),
+    expression[CurrentUser]("user", true, Some("3.4.0")),
+    expression[CurrentUser]("session_user", true, Some("4.0.0")),
     expression[CallMethodViaReflection]("reflect"),
     expression[CallMethodViaReflection]("java_method", true),
     expression[SparkVersion]("version"),
@@ -799,6 +804,9 @@ object FunctionRegistry {
     expression[BitwiseNot]("~"),
     expression[BitwiseOr]("|"),
     expression[BitwiseXor]("^"),
+    expression[ShiftLeft]("<<", true, Some("4.0.0")),
+    expression[ShiftRight](">>", true, Some("4.0.0")),
+    expression[ShiftRightUnsigned](">>>", true, Some("4.0.0")),
     expression[BitwiseCount]("bit_count"),
     expression[BitAndAgg]("bit_and"),
     expression[BitOrAgg]("bit_or"),
@@ -821,7 +829,9 @@ object FunctionRegistry {
     expression[JsonObjectKeys]("json_object_keys"),
 
     // Variant
-    expression[ParseJson]("parse_json"),
+    expressionBuilder("parse_json", ParseJsonExpressionBuilder),
+    expressionBuilder("try_parse_json", TryParseJsonExpressionBuilder),
+    expression[IsVariantNull]("is_variant_null"),
     expressionBuilder("variant_get", VariantGetExpressionBuilder),
     expressionBuilder("try_variant_get", TryVariantGetExpressionBuilder),
     expression[SchemaOfVariant]("schema_of_variant"),
@@ -854,7 +864,11 @@ object FunctionRegistry {
     // Xml
     expression[XmlToStructs]("from_xml"),
     expression[SchemaOfXml]("schema_of_xml"),
-    expression[StructsToXml]("to_xml")
+    expression[StructsToXml]("to_xml"),
+
+    // Avro
+    expression[FromAvro]("from_avro"),
+    expression[ToAvro]("to_avro")
   )
 
   val builtin: SimpleFunctionRegistry = {
@@ -953,7 +967,14 @@ object FunctionRegistry {
       since: Option[String] = None): (String, (ExpressionInfo, FunctionBuilder)) = {
     val info = FunctionRegistryBase.expressionInfo[T](name, since)
     val funcBuilder = (expressions: Seq[Expression]) => {
-      assert(expressions.forall(_.resolved), "function arguments must be resolved.")
+      val (lambdas, others) = expressions.partition(_.isInstanceOf[LambdaFunction])
+      if (lambdas.nonEmpty && !builder.supportsLambda) {
+        throw new AnalysisException(
+          errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+          messageParameters = Map(
+            "class" -> builder.getClass.getCanonicalName))
+      }
+      assert(others.forall(_.resolved), "function arguments must be resolved.")
       val rearrangedExpressions = rearrangeExpressions(name, builder, expressions)
       val expr = builder.build(name, rearrangedExpressions)
       if (setAlias) expr.setTagValue(FUNC_ALIAS, name)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HintErrorLogger.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HintErrorLogger.scala
index 7338ef21a713c..68c6ae9c03e3c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HintErrorLogger.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HintErrorLogger.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{QUERY_HINT, RELATION_NAME, UNSUPPORTED_HINT_REASON}
+import org.apache.spark.internal.LogKeys.{QUERY_HINT, RELATION_NAME, UNSUPPORTED_HINT_REASON}
 import org.apache.spark.sql.catalyst.plans.logical.{HintErrorHandler, HintInfo}
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
index ced7123dfcc14..f04b7799e35ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
@@ -20,19 +20,24 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper, Expression}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_IDENTIFIER
 import org.apache.spark.sql.types.StringType
 
 /**
  * Resolves the identifier expressions and builds the original plans/expressions.
  */
-object ResolveIdentifierClause extends Rule[LogicalPlan] with AliasHelper with EvalHelper {
+class ResolveIdentifierClause(earlyBatches: Seq[RuleExecutor[LogicalPlan]#Batch])
+  extends Rule[LogicalPlan] with AliasHelper with EvalHelper {
+
+  private val executor = new RuleExecutor[LogicalPlan] {
+    override def batches: Seq[Batch] = earlyBatches.asInstanceOf[Seq[Batch]]
+  }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
     _.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
     case p: PlanWithUnresolvedIdentifier if p.identifierExpr.resolved =>
-      p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr))
+      executor.execute(p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr)))
     case other =>
       other.transformExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
         case e: ExpressionWithUnresolvedIdentifier if e.identifierExpr.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
index 4f5a11835c337..7ea90854932e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, Expression, IntegerLiteral, Literal, NamedExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, AppendColumns, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE, UNRESOLVED_ATTRIBUTE}
@@ -136,7 +136,19 @@ class ResolveReferencesInAggregate(val catalogManager: CatalogManager) extends S
         groupExprs
       } else {
         // This is a valid GROUP BY ALL aggregate.
-        expandedGroupExprs.get
+        expandedGroupExprs.get.zipWithIndex.map { case (expr, index) =>
+          trimAliases(expr) match {
+            // HACK ALERT: If the expanded grouping expression is an integer literal, don't use it
+            //             but use an integer literal of the index. The reason is we may repeatedly
+            //             analyze the plan, and the original integer literal may cause failures
+            //             with a later GROUP BY ordinal resolution. GROUP BY constant is
+            //             meaningless so whatever value does not matter here.
+            case IntegerLiteral(_) =>
+              // GROUP BY ordinal uses 1-based index.
+              Literal(index + 1)
+            case _ => expr
+          }
+        }
       }
     } else {
       groupExprs
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUpdateEventTimeWatermarkColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUpdateEventTimeWatermarkColumn.scala
new file mode 100644
index 0000000000000..31c4f068a83eb
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUpdateEventTimeWatermarkColumn.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan, UpdateEventTimeWatermarkColumn}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.UPDATE_EVENT_TIME_WATERMARK_COLUMN
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * Extracts the watermark delay and adds it to the UpdateEventTimeWatermarkColumn
+ * logical node (if such a node is present). [[UpdateEventTimeWatermarkColumn]] node updates
+ * the eventTimeColumn for upstream operators.
+ *
+ * If the logical plan contains a [[UpdateEventTimeWatermarkColumn]] node, but no watermark
+ * has been defined, the query will fail with a compilation error.
+ */
+object ResolveUpdateEventTimeWatermarkColumn extends Rule[LogicalPlan] {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
+    _.containsPattern(UPDATE_EVENT_TIME_WATERMARK_COLUMN), ruleId) {
+    case u: UpdateEventTimeWatermarkColumn if u.delay.isEmpty && u.childrenResolved =>
+      val existingWatermarkDelay = u.child.collect {
+        case EventTimeWatermark(_, delay, _) => delay
+      }
+
+      if (existingWatermarkDelay.isEmpty) {
+        // input dataset needs to have a event time column, we transfer the
+        // watermark delay from this column to user specified eventTimeColumnName
+        // in the output dataset.
+        throw QueryCompilationErrors.cannotAssignEventTimeColumn()
+      }
+
+      val delay = existingWatermarkDelay.head
+      u.copy(delay = Some(delay))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteCollationJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteCollationJoin.scala
new file mode 100644
index 0000000000000..ae29d21c7a71e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteCollationJoin.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.StringType
+import org.apache.spark.util.ArrayImplicits.SparkArrayOps
+
+/**
+ * This rule rewrites Join conditions to ensure that all types containing non-binary collated
+ * strings are compared correctly. This is necessary because join conditions are evaluated using
+ * binary equality, which does not work correctly for non-binary collated strings. However, by
+ * injecting CollationKey expressions into the join condition, we can ensure that the comparison
+ * is done correctly, which then allows HashJoin to work properly on this type of data.
+ */
+object RewriteCollationJoin extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case j @ Join(_, _, _, Some(condition), _) =>
+      val newCondition = condition transform {
+        case e @ Equality(l: AttributeReference, r: AttributeReference) =>
+          e.withNewChildren(Seq(processExpression(l, l.dataType), processExpression(r, r.dataType)))
+      }
+      if (!newCondition.fastEquals(condition)) {
+        j.copy(condition = Some(newCondition))
+      } else {
+        j
+      }
+  }
+
+  /**
+   * Recursively process the expression in order to replace non-binary collated strings with their
+   * associated collation keys. This is necessary to ensure that the join condition is evaluated
+   * correctly for all types containing non-binary collated strings, including structs and arrays.
+   */
+  private def processExpression(expr: Expression, dt: DataType): Expression = {
+    dt match {
+      // For binary stable expressions, no special handling is needed.
+      case _ if UnsafeRowUtils.isBinaryStable(dt) =>
+        expr
+
+      // Inject CollationKey for non-binary collated strings.
+      case _: StringType =>
+          CollationKey(expr)
+
+      // Recursively process struct fields for non-binary structs.
+      case StructType(fields) =>
+        processStruct(expr, fields)
+
+      // Recursively process array elements for non-binary arrays.
+      case ArrayType(et, containsNull) =>
+        processArray(expr, et, containsNull)
+
+      // Joins are not supported on maps, so there's no special handling for MapType.
+      case _ =>
+        expr
+    }
+  }
+
+  private def processStruct(str: Expression, fields: Array[StructField]): Expression = {
+    val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) =>
+      Seq(Literal(f.name), processExpression(GetStructField(str, i, Some(f.name)), f.dataType))
+    }.toImmutableArraySeq)
+    if (str.nullable) {
+      If(IsNull(str), Literal(null, struct.dataType), struct)
+    } else {
+      struct
+    }
+  }
+
+  private def processArray(arr: Expression, et: DataType, containsNull: Boolean): Expression = {
+    val param: NamedExpression = NamedLambdaVariable("a", et, containsNull)
+    val funcBody: Expression = processExpression(param, et)
+    if (!funcBody.fastEquals(param)) {
+      ArrayTransform(arr, LambdaFunction(funcBody, Seq(param)))
+    } else {
+      arr
+    }
+  }
+
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
index 9e020cb55ed56..dacee70cf1286 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteMergeIntoTable.scala
@@ -45,7 +45,7 @@ object RewriteMergeIntoTable extends RewriteRowLevelCommand with PredicateHelper
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case m @ MergeIntoTable(aliasedTable, source, cond, matchedActions, notMatchedActions,
-        notMatchedBySourceActions) if m.resolved && m.rewritable && m.aligned &&
+        notMatchedBySourceActions, _) if m.resolved && m.rewritable && m.aligned &&
         matchedActions.isEmpty && notMatchedActions.size == 1 &&
         notMatchedBySourceActions.isEmpty =>
 
@@ -79,7 +79,7 @@ object RewriteMergeIntoTable extends RewriteRowLevelCommand with PredicateHelper
       }
 
     case m @ MergeIntoTable(aliasedTable, source, cond, matchedActions, notMatchedActions,
-        notMatchedBySourceActions) if m.resolved && m.rewritable && m.aligned &&
+        notMatchedBySourceActions, _) if m.resolved && m.rewritable && m.aligned &&
         matchedActions.isEmpty && notMatchedBySourceActions.isEmpty =>
 
       EliminateSubqueryAliases(aliasedTable) match {
@@ -120,7 +120,7 @@ object RewriteMergeIntoTable extends RewriteRowLevelCommand with PredicateHelper
       }
 
     case m @ MergeIntoTable(aliasedTable, source, cond, matchedActions, notMatchedActions,
-        notMatchedBySourceActions) if m.resolved && m.rewritable && m.aligned =>
+        notMatchedBySourceActions, _) if m.resolved && m.rewritable && m.aligned =>
 
       EliminateSubqueryAliases(aliasedTable) match {
         case r @ DataSourceV2Relation(tbl: SupportsRowLevelOperations, _, _, _, _) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index e9c4dd0be7d92..ef425be42f981 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
@@ -168,7 +168,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
     if (constraintTerms.size > 1) {
       logWarning(
         log"Failed to extract state constraint terms: multiple time terms in condition\n\t" +
-          log"${MDC(EXPRESSION_TERMS, terms.mkString("\n\t"))}")
+          log"${MDC(EXPR_TERMS, terms.mkString("\n\t"))}")
       return None
     }
     if (constraintTerms.isEmpty) {
@@ -289,7 +289,7 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
           logWarning(
             log"Failed to extract state value watermark from condition " +
               log"${MDC(JOIN_CONDITION, exprToCollectFrom)} due to " +
-              log"${MDC(JOIN_CONDITION_SUB_EXPRESSION, a)}")
+              log"${MDC(JOIN_CONDITION_SUB_EXPR, a)}")
           invalid = true
           Seq.empty
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 259e28b62bca7..08c5b3531b4c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.trees.AlwaysProcess
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.{AbstractArrayType, AbstractStringType, StringTypeAnyCollation}
+import org.apache.spark.sql.internal.types.{AbstractArrayType, AbstractMapType, AbstractStringType, StringTypeAnyCollation}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.UpCastRule.numericPrecedence
 
@@ -998,9 +998,10 @@ object TypeCoercion extends TypeCoercionBase {
       case (_: StringType, AnyTimestampType) => AnyTimestampType.defaultConcreteType
       case (_: StringType, BinaryType) => BinaryType
       // Cast any atomic type to string.
-      case (any: AtomicType, _: StringType) if !any.isInstanceOf[StringType] => StringType
+      case (any: AtomicType, st: StringType) if !any.isInstanceOf[StringType] => st
       case (any: AtomicType, st: AbstractStringType)
-        if !any.isInstanceOf[StringType] => st.defaultConcreteType
+        if !any.isInstanceOf[StringType] =>
+        st.defaultConcreteType
 
       // When we reach here, input type is not acceptable for any types in this type collection,
       // try to find the first one we can implicitly cast.
@@ -1047,6 +1048,15 @@ object TypeCoercion extends TypeCoercionBase {
           }
         }
 
+      case (MapType(fromKeyType, fromValueType, fn), AbstractMapType(toKeyType, toValueType)) =>
+        val newKeyType = implicitCast(fromKeyType, toKeyType).orNull
+        val newValueType = implicitCast(fromValueType, toValueType).orNull
+        if (newKeyType != null && newValueType != null) {
+          MapType(newKeyType, newValueType, fn)
+        } else {
+          null
+        }
+
       case _ => null
     }
     Option(ret)
@@ -1110,22 +1120,22 @@ object TypeCoercion extends TypeCoercionBase {
 
       case a @ BinaryArithmetic(left @ StringTypeExpression(), right)
         if !isIntervalType(right.dataType) =>
-        a.makeCopy(Array(Cast(left, DoubleType), right))
+        a.withNewChildren(Seq(Cast(left, DoubleType), right))
       case a @ BinaryArithmetic(left, right @ StringTypeExpression())
         if !isIntervalType(left.dataType) =>
-        a.makeCopy(Array(left, Cast(right, DoubleType)))
+        a.withNewChildren(Seq(left, Cast(right, DoubleType)))
 
       // For equality between string and timestamp we cast the string to a timestamp
       // so that things like rounding of subsecond precision does not affect the comparison.
       case p @ Equality(left @ StringTypeExpression(), right @ TimestampTypeExpression()) =>
-        p.makeCopy(Array(Cast(left, TimestampType), right))
+        p.withNewChildren(Seq(Cast(left, TimestampType), right))
       case p @ Equality(left @ TimestampTypeExpression(), right @ StringTypeExpression()) =>
-        p.makeCopy(Array(left, Cast(right, TimestampType)))
+        p.withNewChildren(Seq(left, Cast(right, TimestampType)))
 
       case p @ BinaryComparison(left, right)
           if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined =>
         val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get
-        p.makeCopy(Array(castExpr(left, commonType), castExpr(right, commonType)))
+        p.withNewChildren(Seq(castExpr(left, commonType), castExpr(right, commonType)))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index e39ec267fa612..2366dc4c0eb86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ANALYSIS_ERROR, QUERY_PLAN}
+import org.apache.spark.internal.LogKeys.{ANALYSIS_ERROR, QUERY_PLAN}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, CurrentDate, CurrentTimestampLike, Expression, GroupingSets, LocalTimestamp, MonotonicallyIncreasingID, SessionWindow, WindowExpression}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
index f1cc44b270bc5..5b365a0d49aea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions.{Alias, CreateArray, CreateMap, CreateNamedStruct, Expression, LeafExpression, Literal, MapFromArrays, MapFromEntries, SubqueryExpression, Unevaluable, VariableReference}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SupervisingCommand}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.{PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMAND, PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH}
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types.DataType
 
@@ -104,12 +104,64 @@ case class PosParameterizedQuery(child: LogicalPlan, args: Seq[Expression])
     copy(child = newChild)
 }
 
+/**
+ * Base class for rules that process parameterized queries.
+ */
+abstract class ParameterizedQueryProcessor extends Rule[LogicalPlan] {
+  def assertUnresolvedPlanHasSingleParameterizedQuery(plan: LogicalPlan): Unit = {
+    if (plan.containsPattern(PARAMETERIZED_QUERY)) {
+      val parameterizedQueries = plan.collect { case p: ParameterizedQuery => p }
+      assert(parameterizedQueries.length == 1)
+    }
+  }
+}
+
+/**
+ * Moves `ParameterizedQuery` inside `SupervisingCommand` for their supervised plans to be
+ * resolved later by the analyzer.
+ *
+ * - Basic case:
+ * `PosParameterizedQuery(ExplainCommand(SomeQuery(...)))` =>
+ * `ExplainCommand(PosParameterizedQuery(SomeQuery(...)))`
+ * - Nested `SupervisedCommand`s are handled recursively:
+ * `PosParameterizedQuery(ExplainCommand(ExplainCommand(SomeQuery(...))))` =>
+ * `ExplainCommand(ExplainCommand(PosParameterizedQuery(SomeQuery(...))))`
+ */
+object MoveParameterizedQueriesDown extends ParameterizedQueryProcessor {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    assertUnresolvedPlanHasSingleParameterizedQuery(plan)
+
+    plan.resolveOperatorsWithPruning(_.containsPattern(PARAMETERIZED_QUERY)) {
+      case pq: ParameterizedQuery if pq.exists(isSupervisingCommand) =>
+        moveParameterizedQueryIntoSupervisingCommand(pq)
+    }
+  }
+
+  private def moveParameterizedQueryIntoSupervisingCommand(pq: ParameterizedQuery): LogicalPlan = {
+    // Moves parameterized query down recursively to handle nested `SupervisingCommand`s
+    def transformSupervisedPlan: PartialFunction[LogicalPlan, LogicalPlan] = {
+      case command: SupervisingCommand =>
+        command.withTransformedSupervisedPlan {
+          transformSupervisedPlan(_)
+        }
+      case plan => pq.withNewChildren(Seq(plan))
+    }
+
+    pq.child.resolveOperatorsWithPruning(_.containsPattern(COMMAND)) {
+      case command: SupervisingCommand => transformSupervisedPlan(command)
+    }
+  }
+
+  private def isSupervisingCommand(plan: LogicalPlan): Boolean =
+    plan.containsPattern(COMMAND) && plan.isInstanceOf[SupervisingCommand]
+}
+
 /**
  * Finds all named parameters in `ParameterizedQuery` and substitutes them by literals or
  * by collection constructor functions such as `map()`, `array()`, `struct()`
  * from the user-specified arguments.
  */
-object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {
+object BindParameters extends ParameterizedQueryProcessor with QueryErrorsBase {
   private def checkArgs(args: Iterable[(String, Expression)]): Unit = {
     def isNotAllowed(expr: Expression): Boolean = expr.exists {
       case _: Literal | _: CreateArray | _: CreateNamedStruct |
@@ -131,11 +183,7 @@ object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {
   }
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    if (plan.containsPattern(PARAMETERIZED_QUERY)) {
-      // One unresolved plan can have at most one ParameterizedQuery.
-      val parameterizedQueries = plan.collect { case p: ParameterizedQuery => p }
-      assert(parameterizedQueries.length == 1)
-    }
+    assertUnresolvedPlanHasSingleParameterizedQuery(plan)
 
     plan.resolveOperatorsWithPruning(_.containsPattern(PARAMETERIZED_QUERY)) {
       // We should wait for `CTESubstitution` to resolve CTE before binding parameters, as CTE
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 7a3cc4bc8e83e..a2cab60b392b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -98,7 +98,7 @@ case class ExpressionWithUnresolvedIdentifier(
 /**
  * Holds the name of a relation that has yet to be looked up in a catalog.
  *
- * @param multipartIdentifier table name
+ * @param multipartIdentifier table name, the location of files or Kafka topic name, etc.
  * @param options options to scan this relation.
  */
 case class UnresolvedRelation(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
index 66a5052d86f0a..7015ee568290b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/view.scala
@@ -35,6 +35,54 @@ object EliminateView extends Rule[LogicalPlan] with CastSupport {
   }
 }
 
+/**
+ * ViewBindingMode is used to specify the expected schema binding mode when we want to create or
+ * replace a view in [[CreateViewStatement]].
+ */
+sealed trait ViewSchemaMode
+
+/**
+ * SchemaBinding means the view only tolerates minimal changes to the underlying schema.
+ * It can tolerate extra columns in SELECT * and upcast to more generic types.
+ */
+object SchemaBinding extends ViewSchemaMode {
+  override val toString: String = "BINDING"
+}
+
+/**
+ * SchemaCompensation means the view only tolerates moderate changes to the underlying schema.
+ * It can tolerate extra columns in SELECT * and explicit casts between view body and view columns.
+ */
+object SchemaCompensation extends ViewSchemaMode {
+  override val toString: String = "COMPENSATION"
+}
+
+/**
+ * SchemaTypeEvolution means the view will adopt changed column types.
+ * In this mode the view will refresh its metastore data on reference to keep it up to day.
+ */
+object SchemaTypeEvolution extends ViewSchemaMode {
+  override val toString: String = "TYPE EVOLUTION"
+}
+
+/**
+ * SchemaUnsupported means the feature is not enabled.
+ * This mode is only transient and not persisted
+ */
+object SchemaUnsupported extends ViewSchemaMode {
+  override val toString: String = "UNSUPPORTED"
+}
+
+/**
+ * SchemaEvolution means the view will adopt changed column types and number of columns.
+ * This is a result of not having a column list and WITH EVOLUTION.
+ * Without an explicit column list the will also adopt changes to column names.
+ * In this mode the view will refresh its metastore data on reference to keep it up to day.
+ */
+object SchemaEvolution extends ViewSchemaMode {
+  override val toString: String = "EVOLUTION"
+}
+
 /**
  * ViewType is used to specify the expected view type when we want to create or replace a view in
  * [[CreateViewStatement]].
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index eb649c4d4796a..749c9df40f14f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.util.Shell
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, BasePredicate, BoundReference, Expression, Predicate}
+import org.apache.spark.sql.catalyst.expressions.Hex.unhexDigits
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -40,7 +41,7 @@ object ExternalCatalogUtils {
   // The following string escaping code is mainly copied from Hive (o.a.h.h.common.FileUtils).
   //////////////////////////////////////////////////////////////////////////////////////////////////
 
-  val charToEscape = {
+  final val (charToEscape, sizeOfCharToEscape) = {
     val bitSet = new java.util.BitSet(128)
 
     /**
@@ -60,54 +61,77 @@ object ExternalCatalogUtils {
       Array(' ', '<', '>', '|').foreach(bitSet.set(_))
     }
 
-    bitSet
+    (bitSet, bitSet.size)
   }
 
-  def needsEscaping(c: Char): Boolean = {
-    c < charToEscape.size() && charToEscape.get(c)
+  private final val HEX_CHARS = "0123456789ABCDEF".toCharArray
+
+  @inline final def needsEscaping(c: Char): Boolean = {
+    c < sizeOfCharToEscape && charToEscape.get(c)
   }
 
   def escapePathName(path: String): String = {
-    val builder = new StringBuilder()
-    path.foreach { c =>
-      if (needsEscaping(c)) {
-        builder.append('%')
-        builder.append(f"${c.asInstanceOf[Int]}%02X")
-      } else {
-        builder.append(c)
+    if (path == null || path.isEmpty) {
+      return path
+    }
+    val length = path.length
+    var firstIndex = 0
+    while (firstIndex < length && !needsEscaping(path.charAt(firstIndex))) {
+      firstIndex += 1
+    }
+    if (firstIndex == length) {
+      path
+    } else {
+      val sb = new java.lang.StringBuilder(length + 16)
+      if (firstIndex != 0) sb.append(path, 0, firstIndex)
+      while(firstIndex < length) {
+        val c = path.charAt(firstIndex)
+        if (needsEscaping(c)) {
+          sb.append('%').append(HEX_CHARS((c & 0xF0) >> 4)).append(HEX_CHARS(c & 0x0F))
+        } else {
+          sb.append(c)
+        }
+        firstIndex += 1
       }
+      sb.toString
     }
-
-    builder.toString()
   }
 
-
   def unescapePathName(path: String): String = {
-    val sb = new StringBuilder
-    var i = 0
-
-    while (i < path.length) {
-      val c = path.charAt(i)
-      if (c == '%' && i + 2 < path.length) {
-        val code: Int = try {
-          Integer.parseInt(path.substring(i + 1, i + 3), 16)
-        } catch {
-          case _: Exception => -1
-        }
-        if (code >= 0) {
-          sb.append(code.asInstanceOf[Char])
-          i += 3
+    if (path == null || path.isEmpty) {
+      return path
+    }
+    var plaintextEndIdx = path.indexOf('%')
+    val length = path.length
+    if (plaintextEndIdx == -1 || plaintextEndIdx + 2 >= length) {
+      // fast path, no %xx encoding found then return the string identity
+      path
+    } else {
+      val sb = new java.lang.StringBuilder(length)
+      var plaintextStartIdx = 0
+      while(plaintextEndIdx != -1 && plaintextEndIdx + 2 < length) {
+        if (plaintextEndIdx > plaintextStartIdx) sb.append(path, plaintextStartIdx, plaintextEndIdx)
+        val high = path.charAt(plaintextEndIdx + 1)
+        if ((high >>> 8) == 0 && unhexDigits(high) != -1) {
+          val low = path.charAt(plaintextEndIdx + 2)
+          if ((low >>> 8) == 0 && unhexDigits(low) != -1) {
+            sb.append((unhexDigits(high) << 4 | unhexDigits(low)).asInstanceOf[Char])
+            plaintextStartIdx = plaintextEndIdx + 3
+          } else {
+            sb.append('%')
+            plaintextStartIdx = plaintextEndIdx + 1
+          }
         } else {
-          sb.append(c)
-          i += 1
+          sb.append('%')
+          plaintextStartIdx = plaintextEndIdx + 1
         }
-      } else {
-        sb.append(c)
-        i += 1
+        plaintextEndIdx = path.indexOf('%', plaintextStartIdx)
+      }
+      if (plaintextStartIdx < length) {
+        sb.append(path, plaintextStartIdx, length)
       }
+      sb.toString
     }
-
-    sb.toString()
   }
 
   def generatePartitionPath(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
index f351993eb1b7a..aeeedebe330dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.errors.QueryCompilationErrors
  *
  * @param database The system preserved virtual database that keeps all the global temporary views.
  */
-class GlobalTempViewManager(val database: String) {
+class GlobalTempViewManager(database: String) {
 
   /** List of view definitions, mapping from view name to logical plan. */
   @GuardedBy("this")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/RoutineLanguage.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/RoutineLanguage.scala
new file mode 100644
index 0000000000000..fc02bf0c606db
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/RoutineLanguage.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+/**
+ * Supported routine languages for UDFs created via SQL.
+ */
+sealed trait RoutineLanguage {
+  def name: String
+}
+
+case object LanguageSQL extends RoutineLanguage {
+  override def name: String = "SQL"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index d17d0a97387d9..0e0852d0a550d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -35,7 +35,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
-import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, ExpressionInfo, UpCast}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Expression, ExpressionInfo, NamedExpression, UpCast}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
@@ -44,7 +44,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.{CaseInsensitiveStringMap, PartitioningUtils}
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
@@ -123,6 +123,7 @@ class SessionCatalog(
 
   lazy val externalCatalog = externalCatalogBuilder()
   lazy val globalTempViewManager = globalTempViewManagerBuilder()
+  val globalTempDatabase: String = SQLConf.get.globalTempDatabase
 
   /** List of temporary views, mapping from table name to their logical plan. */
   @GuardedBy("this")
@@ -273,9 +274,9 @@ class SessionCatalog(
 
   def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {
     val dbName = format(dbDefinition.name)
-    if (dbName == globalTempViewManager.database) {
+    if (dbName == globalTempDatabase) {
       throw QueryCompilationErrors.cannotCreateDatabaseWithSameNameAsPreservedDatabaseError(
-        globalTempViewManager.database)
+        globalTempDatabase)
     }
     validateName(dbName)
     externalCatalog.createDatabase(
@@ -333,9 +334,9 @@ class SessionCatalog(
 
   def setCurrentDatabase(db: String): Unit = {
     val dbName = format(db)
-    if (dbName == globalTempViewManager.database) {
+    if (dbName == globalTempDatabase) {
       throw QueryCompilationErrors.cannotUsePreservedDatabaseAsCurrentDatabaseError(
-        globalTempViewManager.database)
+        globalTempDatabase)
     }
     requireDbExists(dbName)
     synchronized { currentDb = dbName }
@@ -479,8 +480,9 @@ class SessionCatalog(
     val catalogTable = externalCatalog.getTable(db, table)
     val oldDataSchema = catalogTable.dataSchema
     // not supporting dropping columns yet
+    val resolver = conf.resolver
     val nonExistentColumnNames =
-      oldDataSchema.map(_.name).filterNot(columnNameResolved(newDataSchema, _))
+      oldDataSchema.map(_.name).filterNot(columnNameResolved(resolver, newDataSchema, _))
     if (nonExistentColumnNames.nonEmpty) {
       throw QueryCompilationErrors.dropNonExistentColumnsNotSupportedError(nonExistentColumnNames)
     }
@@ -488,8 +490,11 @@ class SessionCatalog(
     externalCatalog.alterTableDataSchema(db, table, newDataSchema)
   }
 
-  private def columnNameResolved(schema: StructType, colName: String): Boolean = {
-    schema.fields.map(_.name).exists(conf.resolver(_, colName))
+  private def columnNameResolved(
+      resolver: Resolver,
+      schema: StructType,
+      colName: String): Boolean = {
+    schema.fields.exists(f => resolver(f.name, colName))
   }
 
   /**
@@ -659,7 +664,7 @@ class SessionCatalog(
       } else {
         false
       }
-    } else if (format(name.database.get) == globalTempViewManager.database) {
+    } else if (format(name.database.get) == globalTempDatabase) {
       globalTempViewManager.update(viewName, viewDefinition)
     } else {
       false
@@ -767,9 +772,9 @@ class SessionCatalog(
     val table = format(name.table)
     if (name.database.isEmpty) {
       tempViews.get(table).map(_.tableMeta).getOrElse(getTableMetadata(name))
-    } else if (format(name.database.get) == globalTempViewManager.database) {
+    } else if (format(name.database.get) == globalTempDatabase) {
       globalTempViewManager.get(table).map(_.tableMeta)
-        .getOrElse(throw new NoSuchTableException(globalTempViewManager.database, table))
+        .getOrElse(throw new NoSuchTableException(globalTempDatabase, table))
     } else {
       getTableMetadata(name)
     }
@@ -795,7 +800,7 @@ class SessionCatalog(
 
     val oldTableName = qualifiedIdent.table
     val newTableName = format(newName.table)
-    if (db == globalTempViewManager.database) {
+    if (db == globalTempDatabase) {
       globalTempViewManager.rename(oldTableName, newTableName)
     } else {
       requireDbExists(db)
@@ -832,10 +837,10 @@ class SessionCatalog(
     val qualifiedIdent = qualifyIdentifier(name)
     val db = qualifiedIdent.database.get
     val table = qualifiedIdent.table
-    if (db == globalTempViewManager.database) {
+    if (db == globalTempDatabase) {
       val viewExists = globalTempViewManager.remove(table)
       if (!viewExists && !ignoreIfNotExists) {
-        throw new NoSuchTableException(globalTempViewManager.database, table)
+        throw new NoSuchTableException(globalTempDatabase, table)
       }
     } else {
       if (name.database.isDefined || !tempViews.contains(table)) {
@@ -873,7 +878,7 @@ class SessionCatalog(
     val qualifiedIdent = qualifyIdentifier(name)
     val db = qualifiedIdent.database.get
     val table = qualifiedIdent.table
-    if (db == globalTempViewManager.database) {
+    if (db == globalTempDatabase) {
       globalTempViewManager.get(table).map { viewDef =>
         SubqueryAlias(table, db, getTempViewPlan(viewDef))
       }.getOrElse(throw new NoSuchTableException(db, table))
@@ -926,6 +931,31 @@ class SessionCatalog(
       metadata.schema.fieldNames.exists(_.matches("_c[0-9]+"))
   }
 
+
+  private def castColToType(
+    col: Expression,
+    toField: StructField,
+    schemaMode: ViewSchemaMode): NamedExpression = {
+    val cast = schemaMode match {
+      /*
+      ** For schema binding, we cast the column to the expected type using safe cast only.
+      ** For legacy behavior, we cast the column to the expected type using safe cast only.
+      ** For schema compensation, we cast the column to the expected type using any cast
+      *  in ansi mode.
+      ** For schema (type) evolution, we take the column as is.
+      */
+      case SchemaBinding => UpCast(col, toField.dataType)
+      case SchemaUnsupported => if (conf.viewSchemaCompensation) {
+        Cast(col, toField.dataType, ansiEnabled = true)
+      } else {
+        UpCast(col, toField.dataType)
+      }
+      case SchemaCompensation => Cast(col, toField.dataType, ansiEnabled = true)
+      case SchemaTypeEvolution => col
+      case other => throw SparkException.internalError("Unexpected ViewSchemaMode")
+    }
+    Alias(cast, toField.name)(explicitMetadata = Some(toField.metadata))
+  }
   private def fromCatalogTable(metadata: CatalogTable, isTempView: Boolean): View = {
     val viewText = metadata.viewText.getOrElse {
       throw SparkException.internalError("Invalid view without text.")
@@ -945,58 +975,63 @@ class SessionCatalog(
           throw QueryCompilationErrors.invalidViewText(viewText, metadata.qualifiedName)
       }
     }
-    val projectList = if (!isHiveCreatedView(metadata)) {
-      val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) {
-        // For view created before Spark 2.2.0, the view text is already fully qualified, the plan
-        // output is the same with the view output.
-        metadata.schema.fieldNames.toImmutableArraySeq
-      } else {
-        assert(metadata.viewQueryColumnNames.length == metadata.schema.length)
-        metadata.viewQueryColumnNames
-      }
+    val schemaMode = metadata.viewSchemaMode
+    if (schemaMode == SchemaEvolution) {
+      View(desc = metadata, isTempView = isTempView, child = parsedPlan)
+    } else {
+      val projectList = if (!isHiveCreatedView(metadata)) {
+        val viewColumnNames = if (metadata.viewQueryColumnNames.isEmpty) {
+          // For view created before Spark 2.2.0, the view text is already fully qualified, the plan
+          // output is the same with the view output.
+          metadata.schema.fieldNames.toImmutableArraySeq
+        } else {
+          assert(metadata.viewQueryColumnNames.length == metadata.schema.length)
+          metadata.viewQueryColumnNames
+        }
 
-      // For view queries like `SELECT * FROM t`, the schema of the referenced table/view may
-      // change after the view has been created. We need to add an extra SELECT to pick the columns
-      // according to the recorded column names (to get the correct view column ordering and omit
-      // the extra columns that we don't require), with UpCast (to make sure the type change is
-      // safe) and Alias (to respect user-specified view column names) according to the view schema
-      // in the catalog.
-      // Note that, the column names may have duplication, e.g. `CREATE VIEW v(x, y) AS
-      // SELECT 1 col, 2 col`. We need to make sure that the matching attributes have the same
-      // number of duplications, and pick the corresponding attribute by ordinal.
-      val viewConf = View.effectiveSQLConf(metadata.viewSQLConfigs, isTempView)
-      val normalizeColName: String => String = if (viewConf.caseSensitiveAnalysis) {
-        identity
+        // For view queries like `SELECT * FROM t`, the schema of the referenced table/view may
+        // change after the view has been created. We need to add an extra SELECT to pick the
+        // columns according to the recorded column names (to get the correct view column ordering
+        // and omit the extra columns that we don't require), with UpCast (to make sure the type
+        // change is safe) and Alias (to respect user-specified view column names) according to the
+        // view schema in the catalog.
+        // Note that, the column names may have duplication, e.g. `CREATE VIEW v(x, y) AS
+        // SELECT 1 col, 2 col`. We need to make sure that the matching attributes have the same
+        // number of duplications, and pick the corresponding attribute by ordinal.
+        val viewConf = View.effectiveSQLConf(metadata.viewSQLConfigs, isTempView)
+        val normalizeColName: String => String = if (viewConf.caseSensitiveAnalysis) {
+          identity
+        } else {
+          _.toLowerCase(Locale.ROOT)
+        }
+        val nameToCounts = viewColumnNames.groupBy(normalizeColName).transform((_, v) => v.length)
+        val nameToCurrentOrdinal = scala.collection.mutable.HashMap.empty[String, Int]
+        val viewDDL = buildViewDDL(metadata, isTempView)
+
+        viewColumnNames.zip(metadata.schema).map { case (name, field) =>
+          val normalizedName = normalizeColName(name)
+          val count = nameToCounts(normalizedName)
+          val ordinal = nameToCurrentOrdinal.getOrElse(normalizedName, 0)
+          nameToCurrentOrdinal(normalizedName) = ordinal + 1
+          val col = GetViewColumnByNameAndOrdinal(
+            metadata.identifier.toString, name, ordinal, count, viewDDL)
+          castColToType(col, field, schemaMode)
+        }
       } else {
-        _.toLowerCase(Locale.ROOT)
-      }
-      val nameToCounts = viewColumnNames.groupBy(normalizeColName).transform((_, v) => v.length)
-      val nameToCurrentOrdinal = scala.collection.mutable.HashMap.empty[String, Int]
-      val viewDDL = buildViewDDL(metadata, isTempView)
-
-      viewColumnNames.zip(metadata.schema).map { case (name, field) =>
-        val normalizedName = normalizeColName(name)
-        val count = nameToCounts(normalizedName)
-        val ordinal = nameToCurrentOrdinal.getOrElse(normalizedName, 0)
-        nameToCurrentOrdinal(normalizedName) = ordinal + 1
-        val col = GetViewColumnByNameAndOrdinal(
-          metadata.identifier.toString, name, ordinal, count, viewDDL)
-        Alias(UpCast(col, field.dataType), field.name)(explicitMetadata = Some(field.metadata))
-      }
-    } else {
-      // For view created by hive, the parsed view plan may have different output columns with
-      // the schema stored in metadata. For example: `CREATE VIEW v AS SELECT 1 FROM t`
-      // the schema in metadata will be `_c0` while the parsed view plan has column named `1`
-      metadata.schema.zipWithIndex.map { case (field, index) =>
-        val col = GetColumnByOrdinal(index, field.dataType)
-        Alias(UpCast(col, field.dataType), field.name)(explicitMetadata = Some(field.metadata))
+        // For view created by hive, the parsed view plan may have different output columns with
+        // the schema stored in metadata. For example: `CREATE VIEW v AS SELECT 1 FROM t`
+        // the schema in metadata will be `_c0` while the parsed view plan has column named `1`
+        metadata.schema.zipWithIndex.map { case (field, index) =>
+          val col = GetColumnByOrdinal(index, field.dataType)
+          castColToType(col, field, schemaMode)
+        }
       }
+      View(desc = metadata, isTempView = isTempView, child = Project(projectList, parsedPlan))
     }
-    View(desc = metadata, isTempView = isTempView, child = Project(projectList, parsedPlan))
   }
 
   def isGlobalTempViewDB(dbName: String): Boolean = {
-    globalTempViewManager.database.equalsIgnoreCase(dbName)
+    globalTempDatabase.equalsIgnoreCase(dbName)
   }
 
   /**
@@ -1055,9 +1090,9 @@ class SessionCatalog(
       pattern: String,
       includeLocalTempViews: Boolean): Seq[TableIdentifier] = {
     val dbName = format(db)
-    val dbTables = if (dbName == globalTempViewManager.database) {
+    val dbTables = if (dbName == globalTempDatabase) {
       globalTempViewManager.listViewNames(pattern).map { name =>
-        TableIdentifier(name, Some(globalTempViewManager.database))
+        TableIdentifier(name, Some(globalTempDatabase))
       }
     } else {
       requireDbExists(dbName)
@@ -1078,9 +1113,9 @@ class SessionCatalog(
    */
   def listViews(db: String, pattern: String): Seq[TableIdentifier] = {
     val dbName = format(db)
-    val dbViews = if (dbName == globalTempViewManager.database) {
+    val dbViews = if (dbName == globalTempDatabase) {
       globalTempViewManager.listViewNames(pattern).map { name =>
-        TableIdentifier(name, Some(globalTempViewManager.database))
+        TableIdentifier(name, Some(globalTempDatabase))
       }
     } else {
       requireDbExists(dbName)
@@ -1096,7 +1131,7 @@ class SessionCatalog(
    * List all matching temp views in the specified database, including global/local temporary views.
    */
   def listTempViews(db: String, pattern: String): Seq[CatalogTable] = {
-    val globalTempViews = if (format(db) == globalTempViewManager.database) {
+    val globalTempViews = if (format(db) == globalTempDatabase) {
       globalTempViewManager.listViewNames(pattern).flatMap { viewName =>
         globalTempViewManager.get(viewName).map(_.tableMeta)
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunctionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunctionErrors.scala
new file mode 100644
index 0000000000000..a5381669caea8
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunctionErrors.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.errors.QueryErrorsBase
+
+/**
+ * Errors during registering and executing [[UserDefinedFunction]]s.
+ */
+object UserDefinedFunctionErrors extends QueryErrorsBase {
+  def unsupportedUserDefinedFunction(language: RoutineLanguage): Throwable = {
+    unsupportedUserDefinedFunction(language.name)
+  }
+
+  def unsupportedUserDefinedFunction(language: String): Throwable = {
+    SparkException.internalError(s"Unsupported user defined function type: $language")
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 4807d886c9f96..d55b9c972697e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -33,10 +33,10 @@ import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{CurrentUserContext, FunctionIdentifier, InternalRow, SQLConfHelper, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Resolver, UnresolvedLeafNode}
+import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, Resolver, SchemaBinding, SchemaCompensation, SchemaEvolution, SchemaTypeEvolution, SchemaUnsupported, UnresolvedLeafNode, ViewSchemaMode}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Cast, ExprId, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -401,6 +401,25 @@ case class CatalogTable(
     )
   }
 
+  /**
+   * Return the schema binding mode. Defaults to SchemaBinding if not a view or an older
+   * version, unless the viewSchemaBindingMode config is set to false
+   */
+  def viewSchemaMode: ViewSchemaMode = {
+    if (!SQLConf.get.viewSchemaBindingEnabled) {
+      SchemaUnsupported
+    } else {
+      val schemaMode = properties.getOrElse(VIEW_SCHEMA_MODE, SchemaBinding.toString)
+      schemaMode match {
+        case SchemaBinding.toString => SchemaBinding
+        case SchemaEvolution.toString => SchemaEvolution
+        case SchemaTypeEvolution.toString => SchemaTypeEvolution
+        case SchemaCompensation.toString => SchemaCompensation
+        case other => throw SparkException.internalError("Unexpected ViewSchemaMode")
+      }
+    }
+  }
+
   /**
    * Return temporary view names the current view was referred. should be empty if the
    * CatalogTable is not a Temporary View or created by older versions of Spark(before 3.1.0).
@@ -491,6 +510,9 @@ case class CatalogTable(
     if (tableType == CatalogTableType.VIEW) {
       viewText.foreach(map.put("View Text", _))
       viewOriginalText.foreach(map.put("View Original Text", _))
+      if (SQLConf.get.viewSchemaBindingEnabled) {
+        map.put("View Schema Mode", viewSchemaMode.toString)
+      }
       if (viewCatalogAndNamespace.nonEmpty) {
         import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
         map.put("View Catalog and Namespace", viewCatalogAndNamespace.quoted)
@@ -563,6 +585,8 @@ object CatalogTable {
   val VIEW_REFERRED_TEMP_FUNCTION_NAMES = VIEW_PREFIX + "referredTempFunctionsNames"
   val VIEW_REFERRED_TEMP_VARIABLE_NAMES = VIEW_PREFIX + "referredTempVariablesNames"
 
+  val VIEW_SCHEMA_MODE = VIEW_PREFIX + "schemaMode"
+
   val VIEW_STORING_ANALYZED_PLAN = VIEW_PREFIX + "storingAnalyzedPlan"
 
   val PROP_CLUSTERING_COLUMNS: String = "clusteringColumns"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
index 62638d70dd904..7b6664a4117a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtils.scala
@@ -120,6 +120,11 @@ object CSVExprUtils {
    * @throws SparkIllegalArgumentException if any of the individual input chunks are illegal
    */
   def toDelimiterStr(str: String): String = {
+    if (str == null) {
+      throw new SparkIllegalArgumentException(
+        errorClass = "INVALID_DELIMITER_VALUE.NULL_VALUE")
+    }
+
     var idx = 0
 
     var delimiter = ""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala
index e69a4552ebff2..47e2e288357e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVHeaderChecker.scala
@@ -22,7 +22,7 @@ import com.univocity.parsers.csv.{CsvParser, CsvParserSettings}
 
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.internal.{Logging, MDC, MessageWithContext}
-import org.apache.spark.internal.LogKey.{CSV_HEADER_COLUMN_NAME, CSV_HEADER_COLUMN_NAMES, CSV_HEADER_LENGTH, CSV_SCHEMA_FIELD_NAME, CSV_SCHEMA_FIELD_NAMES, CSV_SOURCE, NUM_COLUMNS}
+import org.apache.spark.internal.LogKeys.{CSV_HEADER_COLUMN_NAME, CSV_HEADER_COLUMN_NAMES, CSV_HEADER_LENGTH, CSV_SCHEMA_FIELD_NAME, CSV_SCHEMA_FIELD_NAMES, CSV_SOURCE, NUM_COLUMNS}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
index f10a53bde5ddd..e6e964ac90b38 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala
@@ -22,8 +22,8 @@ import java.io.Writer
 import com.univocity.parsers.csv.CsvWriter
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
-import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, IntervalStringStyles, IntervalUtils, SparkStringUtils, TimestampFormatter}
+import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, ToStringBase}
+import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, IntervalStringStyles, IntervalUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -65,9 +65,11 @@ class UnivocityGenerator(
   private val nullAsQuotedEmptyString =
     SQLConf.get.getConf(SQLConf.LEGACY_NULL_VALUE_WRITTEN_AS_QUOTED_EMPTY_STRING_CSV)
 
+  private val binaryFormatter = ToStringBase.getBinaryFormatter
+
   private def makeConverter(dataType: DataType): ValueConverter = dataType match {
     case BinaryType =>
-      (getter, ordinal) => SparkStringUtils.getHexString(getter.getBinary(ordinal))
+      (getter, ordinal) => binaryFormatter(getter.getBinary(ordinal)).toString
 
     case DateType =>
       (getter, ordinal) => dateFormatter.format(getter.getInt(ordinal))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index a5158d8a22c6b..61c2f7a5926b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -63,8 +63,7 @@ class UnivocityParser(
   private type ValueConverter = String => Any
 
   // This index is used to reorder parsed tokens
-  private val tokenIndexArr =
-    requiredSchema.map(f => java.lang.Integer.valueOf(dataSchema.indexOf(f))).toArray
+  private val tokenIndexArr = requiredSchema.map(f => dataSchema.indexOf(f)).toArray
 
   // True if we should inform the Univocity CSV parser to select which fields to read by their
   // positions. Generally assigned by input configuration options, except when input column(s) have
@@ -81,7 +80,8 @@ class UnivocityParser(
     // When to-be-parsed schema is shorter than the to-be-read data schema, we let Univocity CSV
     // parser select a sequence of fields for reading by their positions.
     if (parsedSchema.length < dataSchema.length) {
-      parserSetting.selectIndexes(tokenIndexArr: _*)
+      // Box into Integer here to avoid unboxing where `tokenIndexArr` is used during parsing
+      parserSetting.selectIndexes(tokenIndexArr.map(java.lang.Integer.valueOf(_)): _*)
     }
     new CsvParser(parserSetting)
   }
@@ -316,7 +316,7 @@ class UnivocityParser(
       throw BadRecordException(
         () => getCurrentInput,
         () => Array.empty,
-        QueryExecutionErrors.malformedCSVRecordError(""))
+        LazyBadRecordCauseWrapper(() => QueryExecutionErrors.malformedCSVRecordError("")))
     }
 
     val currentInput = getCurrentInput
@@ -326,7 +326,8 @@ class UnivocityParser(
       // However, we still have chance to parse some of the tokens. It continues to parses the
       // tokens normally and sets null when `ArrayIndexOutOfBoundsException` occurs for missing
       // tokens.
-      Some(QueryExecutionErrors.malformedCSVRecordError(currentInput.toString))
+      Some(LazyBadRecordCauseWrapper(
+        () => QueryExecutionErrors.malformedCSVRecordError(currentInput.toString)))
     } else None
     // When the length of the returned tokens is identical to the length of the parsed schema,
     // we just need to:
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
index 20f86a32c1a1d..81743251bada9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/EncoderUtils.scala
@@ -77,6 +77,7 @@ object EncoderUtils {
       case _: DecimalType => classOf[Decimal]
       case _: DayTimeIntervalType => classOf[PhysicalLongType.InternalType]
       case _: YearMonthIntervalType => classOf[PhysicalIntegerType.InternalType]
+      case _: StringType => classOf[UTF8String]
       case _: StructType => classOf[InternalRow]
       case _: ArrayType => classOf[ArrayData]
       case _: MapType => classOf[MapData]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index aa893ba8110ed..0b5ce65fed6df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.encoders
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.{Encoder, Row}
 import org.apache.spark.sql.catalyst.{DeserializerBuildHelper, InternalRow, JavaTypeInference, ScalaReflection, SerializerBuildHelper}
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, GetColumnByOrdinal, SimpleAnalyzer, UnresolvedAttribute, UnresolvedExtractValue}
@@ -187,6 +188,8 @@ object ExpressionEncoder {
       }
       constructProjection(row).get(0, anyObjectType).asInstanceOf[T]
     } catch {
+      case e: SparkRuntimeException if e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION" =>
+        throw e
       case e: Exception =>
         throw QueryExecutionErrors.expressionDecodingError(e, expressions)
     }
@@ -213,6 +216,8 @@ object ExpressionEncoder {
       inputRow(0) = t
       extractProjection(inputRow)
     } catch {
+      case e: SparkRuntimeException if e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION" =>
+        throw e
       case e: Exception =>
         throw QueryExecutionErrors.expressionEncodingError(e, expressions)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index c42b54222f171..13ea8c77c41b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
@@ -77,12 +79,12 @@ case class CallMethodViaReflection(
       )
     } else {
       val unexpectedParameter = children.zipWithIndex.collectFirst {
-        case (e, 0) if !(e.dataType == StringType && e.foldable) =>
+        case (e, 0) if !(e.dataType.isInstanceOf[StringType] && e.foldable) =>
           DataTypeMismatch(
             errorSubClass = "NON_FOLDABLE_INPUT",
             messageParameters = Map(
               "inputName" -> toSQLId("class"),
-              "inputType" -> toSQLType(StringType),
+              "inputType" -> toSQLType(StringTypeAnyCollation),
               "inputExpr" -> toSQLExpr(children.head)
             )
           )
@@ -90,12 +92,12 @@ case class CallMethodViaReflection(
           DataTypeMismatch(
             errorSubClass = "UNEXPECTED_NULL",
             messageParameters = Map("exprName" -> toSQLId("class")))
-        case (e, 1) if !(e.dataType == StringType && e.foldable) =>
+        case (e, 1) if !(e.dataType.isInstanceOf[StringType] && e.foldable) =>
           DataTypeMismatch(
             errorSubClass = "NON_FOLDABLE_INPUT",
             messageParameters = Map(
               "inputName" -> toSQLId("method"),
-              "inputType" -> toSQLType(StringType),
+              "inputType" -> toSQLType(StringTypeAnyCollation),
               "inputExpr" -> toSQLExpr(children(1))
             )
           )
@@ -103,14 +105,16 @@ case class CallMethodViaReflection(
           DataTypeMismatch(
             errorSubClass = "UNEXPECTED_NULL",
             messageParameters = Map("exprName" -> toSQLId("method")))
-        case (e, idx) if idx > 1 && !CallMethodViaReflection.typeMapping.contains(e.dataType) =>
+        case (e, idx) if idx > 1 &&
+          (!CallMethodViaReflection.typeMapping.contains(e.dataType)
+            && !e.dataType.isInstanceOf[StringType]) =>
           DataTypeMismatch(
             errorSubClass = "UNEXPECTED_INPUT_TYPE",
             messageParameters = Map(
               "paramIndex" -> ordinalNumber(idx),
               "requiredType" -> toSQLType(
                 TypeCollection(BooleanType, ByteType, ShortType,
-                  IntegerType, LongType, FloatType, DoubleType, StringType)),
+                  IntegerType, LongType, FloatType, DoubleType, StringTypeAnyCollation)),
               "inputSql" -> toSQLExpr(e),
               "inputType" -> toSQLType(e.dataType))
           )
@@ -134,7 +138,7 @@ case class CallMethodViaReflection(
   }
 
   override def nullable: Boolean = true
-  override val dataType: DataType = StringType
+  override val dataType: DataType = SQLConf.get.defaultStringType
   override protected def initializeInternal(partitionIndex: Int): Unit = {}
 
   override protected def evalInternal(input: InternalRow): Any = {
@@ -230,7 +234,10 @@ object CallMethodViaReflection {
         // Argument type must match. That is, either the method's argument type matches one of the
         // acceptable types defined in typeMapping, or it is a super type of the acceptable types.
         candidateTypes.zip(argTypes).forall { case (candidateType, argType) =>
-          typeMapping(argType).exists(candidateType.isAssignableFrom)
+          if (!argType.isInstanceOf[StringType]) {
+            typeMapping(argType).exists(candidateType.isAssignableFrom)
+          }
+          else candidateType.isAssignableFrom(classOf[String])
         }
       }
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index e252075c9c1c4..4a2b4b28e690e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -575,8 +575,6 @@ case class Cast(
   // notation if an exponent is needed.
   override protected def useDecimalPlainString: Boolean = ansiEnabled
 
-  override protected def useHexFormatForBinary: Boolean = false
-
   // The class name of `DateTimeUtils`
   protected def dateTimeUtilsCls: String = DateTimeUtils.getClass.getName.stripSuffix("$")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationKey.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationKey.scala
new file mode 100644
index 0000000000000..6e400d026e0ee
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationKey.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+case class CollationKey(expr: Expression) extends UnaryExpression with ExpectsInputTypes {
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def dataType: DataType = BinaryType
+
+  final lazy val collationId: Int = expr.dataType match {
+    case st: StringType =>
+      st.collationId
+  }
+
+  override def nullSafeEval(input: Any): Any =
+    CollationFactory.getCollationKeyBytes(input.asInstanceOf[UTF8String], collationId)
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, c => s"CollationFactory.getCollationKeyBytes($c, $collationId)")
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(expr = newChild)
+  }
+
+  override def child: Expression = expr
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index 258bc0ed8fe73..fde2093460876 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
+import org.apache.spark.sql.internal.types.{AbstractMapType, StringTypeAnyCollation}
 import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType, VariantType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -57,7 +58,7 @@ object ExprUtils extends QueryErrorsBase {
 
   def convertToMapData(exp: Expression): Map[String, String] = exp match {
     case m: CreateMap
-      if m.dataType.acceptsType(MapType(StringType, StringType, valueContainsNull = false)) =>
+      if AbstractMapType(StringTypeAnyCollation, StringTypeAnyCollation).acceptsType(m.dataType) =>
       val arrayMap = m.eval().asInstanceOf[ArrayBasedMapData]
       ArrayBasedMapData.toScalaMap(arrayMap).map { case (key, value) =>
         key.toString -> value.toString
@@ -77,7 +78,7 @@ object ExprUtils extends QueryErrorsBase {
       columnNameOfCorruptRecord: String): Unit = {
     schema.getFieldIndex(columnNameOfCorruptRecord).foreach { corruptFieldIndex =>
       val f = schema(corruptFieldIndex)
-      if (f.dataType != StringType || !f.nullable) {
+      if (!f.dataType.isInstanceOf[StringType] || !f.nullable) {
         throw QueryCompilationErrors.invalidFieldTypeForCorruptRecordError()
       }
     }
@@ -110,7 +111,7 @@ object ExprUtils extends QueryErrorsBase {
    */
   def checkJsonSchema(schema: DataType): TypeCheckResult = {
     val isInvalid = schema.existsRecursively {
-      case MapType(keyType, _, _) if keyType != StringType => true
+      case MapType(keyType, _, _) if !keyType.isInstanceOf[StringType] => true
       case _ => false
     }
     if (isInvalid) {
@@ -133,7 +134,7 @@ object ExprUtils extends QueryErrorsBase {
   def checkXmlSchema(schema: DataType): TypeCheckResult = {
     val isInvalid = schema.existsRecursively {
       // XML field names must be StringType
-      case MapType(keyType, _, _) if keyType != StringType => true
+      case MapType(keyType, _, _) if !keyType.isInstanceOf[StringType] => true
       case _ => false
     }
     if (isInvalid) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index fe7d5a4b782b0..de15ec43c4f31 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -376,16 +376,12 @@ abstract class Expression extends TreeNode[Expression] {
     }
 }
 
-
 /**
- * An expression that cannot be evaluated. These expressions don't live past analysis or
- * optimization time (e.g. Star) and should not be evaluated during query planning and
- * execution.
+ * An expression that cannot be evaluated but is guaranteed to be replaced with a foldable value
+ * by query optimizer (e.g. CurrentDate).
  */
-trait Unevaluable extends Expression {
-
-  /** Unevaluable is not foldable because we don't have an eval for it. */
-  final override def foldable: Boolean = false
+trait FoldableUnevaluable extends Expression {
+  override def foldable: Boolean = true
 
   final override def eval(input: InternalRow = null): Any =
     throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
@@ -394,6 +390,19 @@ trait Unevaluable extends Expression {
     throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
 }
 
+/**
+ * An expression that cannot be evaluated. These expressions don't live past analysis or
+ * optimization time (e.g. Star) and should not be evaluated during query planning and
+ * execution.
+ */
+trait Unevaluable extends Expression with FoldableUnevaluable {
+
+  /** Unevaluable is not foldable by default because we don't have an eval for it.
+   * Exception are expressions that will be replaced by a literal by Optimizer (e.g. CurrentDate).
+   * Hence we allow overriding overriding of this field in special cases.
+   */
+  final override def foldable: Boolean = false
+}
 
 /**
  * An expression that gets replaced at runtime (currently by the optimizer) into a different
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/FunctionTableSubqueryArgumentExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/FunctionTableSubqueryArgumentExpression.scala
index 94465ccff796e..bfd3bc8051dff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/FunctionTableSubqueryArgumentExpression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/FunctionTableSubqueryArgumentExpression.scala
@@ -172,9 +172,12 @@ case class FunctionTableSubqueryArgumentExpression(
     }
   }
 
-  private lazy val extraProjectedPartitioningExpressions: Seq[Alias] = {
+  lazy val extraProjectedPartitioningExpressions: Seq[Alias] = {
     partitionByExpressions.filter { e =>
-      !subqueryOutputs.contains(e)
+      !subqueryOutputs.contains(e) ||
+        // Skip deduplicating the 'partitionBy' expression(s) against the attributes of the input
+        // table if the UDTF also specified 'select' expression(s).
+        selectedInputExpressions.nonEmpty
     }.zipWithIndex.map { case (expr, index) =>
       Alias(expr, s"partition_by_$index")()
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
index fd2e302deb997..673f9397bb03f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
@@ -212,7 +212,7 @@ object TimeWindow {
    * that we can use `window` in SQL.
    */
   def parseExpression(expr: Expression): Long = expr match {
-    case NonNullLiteral(s, StringType) => getIntervalInMicroSeconds(s.toString)
+    case NonNullLiteral(s, _: StringType) => getIntervalInMicroSeconds(s.toString)
     case IntegerLiteral(i) => i.toLong
     case NonNullLiteral(l, LongType) => l.toString.toLong
     case _ => throw QueryCompilationErrors.invalidLiteralForWindowDurationError()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyString.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyString.scala
index 8db08dbbcb813..f6fc9b3abd65b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyString.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToPrettyString.scala
@@ -49,7 +49,7 @@ case class ToPrettyString(child: Expression, timeZoneId: Option[String] = None)
 
   override protected def useDecimalPlainString: Boolean = true
 
-  override protected def useHexFormatForBinary: Boolean = true
+  override protected val binaryFormatter: BinaryFormatter = ToStringBase.getBinaryFormatter
 
   private[this] lazy val castFunc: Any => UTF8String = castToString(child.dataType)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
index 4f35072c4fc7f..130b4ee4c8cac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToStringBase.scala
@@ -24,6 +24,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, DateFormatter, IntervalStringStyles, IntervalUtils, MapData, SparkStringUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.BinaryOutputStyle
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -44,7 +46,7 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
 
   protected def useDecimalPlainString: Boolean
 
-  protected def useHexFormatForBinary: Boolean
+  protected val binaryFormatter: BinaryFormatter = UTF8String.fromBytes
 
   // Makes the function accept Any type input by doing `asInstanceOf[T]`.
   @inline private def acceptAny[T](func: T => UTF8String): Any => UTF8String =
@@ -54,10 +56,7 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
   protected final def castToString(from: DataType): Any => UTF8String = from match {
     case CalendarIntervalType =>
       acceptAny[CalendarInterval](i => UTF8String.fromString(i.toString))
-    case BinaryType if useHexFormatForBinary =>
-      acceptAny[Array[Byte]](binary => UTF8String.fromString(SparkStringUtils.getHexString(binary)))
-    case BinaryType =>
-      acceptAny[Array[Byte]](UTF8String.fromBytes)
+    case BinaryType => acceptAny[Array[Byte]](binaryFormatter.apply)
     case DateType =>
       acceptAny[Int](d => UTF8String.fromString(dateFormatter.format(d)))
     case TimestampType =>
@@ -172,12 +171,11 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
   protected final def castToStringCode(
       from: DataType, ctx: CodegenContext): (ExprValue, ExprValue) => Block = {
     from match {
-      case BinaryType if useHexFormatForBinary =>
-        (c, evPrim) =>
-          val utilCls = SparkStringUtils.getClass.getName.stripSuffix("$")
-          code"$evPrim = UTF8String.fromString($utilCls.getHexString($c));"
       case BinaryType =>
-        (c, evPrim) => code"$evPrim = UTF8String.fromBytes($c);"
+        val bf = JavaCode.global(
+          ctx.addReferenceObj("binaryFormatter", binaryFormatter),
+          classOf[BinaryFormatter])
+        (c, evPrim) => code"$evPrim = $bf.apply($c);"
       case DateType =>
         val df = JavaCode.global(
           ctx.addReferenceObj("dateFormatter", dateFormatter),
@@ -414,3 +412,25 @@ trait ToStringBase { self: UnaryExpression with TimeZoneAwareExpression =>
      """.stripMargin
   }
 }
+
+object ToStringBase {
+  def getBinaryFormatter: BinaryFormatter = {
+    val style = SQLConf.get.getConf(SQLConf.BINARY_OUTPUT_STYLE)
+    style.map(BinaryOutputStyle.withName) match {
+      case Some(BinaryOutputStyle.UTF8) =>
+        (array: Array[Byte]) => UTF8String.fromBytes(array)
+      case Some(BinaryOutputStyle.BASIC) =>
+        (array: Array[Byte]) => UTF8String.fromString(array.mkString("[", ", ", "]"))
+      case Some(BinaryOutputStyle.BASE64) =>
+        (array: Array[Byte]) =>
+          UTF8String.fromString(java.util.Base64.getEncoder.withoutPadding().encodeToString(array))
+      case Some(BinaryOutputStyle.HEX) =>
+        (array: Array[Byte]) => Hex.hex(array)
+      case _ =>
+        (array: Array[Byte]) => UTF8String.fromString(SparkStringUtils.getHexString(array))
+    }
+  }
+}
+
+trait BinaryFormatter extends (Array[Byte] => UTF8String) with Serializable
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala
index d37c9d9f6452a..9041ed15fc501 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala
@@ -17,7 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, Reducer, ReducibleFunction}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, Reducer, ReducibleFunction, ScalarFunction}
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.DataType
 
 /**
@@ -30,7 +33,7 @@ import org.apache.spark.sql.types.DataType
 case class TransformExpression(
     function: BoundFunction,
     children: Seq[Expression],
-    numBucketsOpt: Option[Int] = None) extends Expression with Unevaluable {
+    numBucketsOpt: Option[Int] = None) extends Expression {
 
   override def nullable: Boolean = true
 
@@ -113,4 +116,23 @@ case class TransformExpression(
 
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
     copy(children = newChildren)
+
+  private lazy val resolvedFunction: Option[Expression] = this match {
+    case TransformExpression(scalarFunc: ScalarFunction[_], arguments, Some(numBuckets)) =>
+      Some(V2ExpressionUtils.resolveScalarFunction(scalarFunc,
+        Seq(Literal(numBuckets)) ++ arguments))
+    case TransformExpression(scalarFunc: ScalarFunction[_], arguments, None) =>
+      Some(V2ExpressionUtils.resolveScalarFunction(scalarFunc, arguments))
+    case _ => None
+  }
+
+  override def eval(input: InternalRow): Any = {
+    resolvedFunction match {
+      case Some(fn) => fn.eval(input)
+      case None => throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
+    }
+  }
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
index 4eacd3442ed5f..05eafe01906a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
@@ -132,6 +132,43 @@ case class TryDivide(left: Expression, right: Expression, replacement: Expressio
   }
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(dividend, divisor) - Returns the remainder after `expr1`/`expr2`. " +
+    "`dividend` must be a numeric. `divisor` must be a numeric.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(3, 2);
+       1
+      > SELECT _FUNC_(2L, 2L);
+       0
+      > SELECT _FUNC_(3.0, 2.0);
+       1.0
+      > SELECT _FUNC_(1, 0);
+       NULL
+  """,
+  since = "4.0.0",
+  group = "math_funcs")
+// scalastyle:on line.size.limit
+case class TryRemainder(left: Expression, right: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
+  def this(left: Expression, right: Expression) = this(left, right,
+    (left.dataType, right.dataType) match {
+      case (_: NumericType, _: NumericType) => Remainder(left, right, EvalMode.TRY)
+      // TODO: support TRY eval mode on datetime arithmetic expressions.
+      case _ => TryEval(Remainder(left, right, EvalMode.ANSI))
+    }
+  )
+
+  override def prettyName: String = "try_remainder"
+
+  override def parameters: Seq[Expression] = Seq(left, right)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(replacement = newChild)
+  }
+}
+
 @ExpressionDescription(
   usage = "_FUNC_(expr1, expr2) - Returns `expr1`-`expr2` and the result is null on overflow. " +
     "The acceptable input types are the same with the `-` operator.",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
index c6cfccb74c161..220920a5a3198 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import java.lang.reflect.{Method, Modifier}
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{FUNCTION_NAME, FUNCTION_PARAMETER}
+import org.apache.spark.internal.LogKeys.{FUNCTION_NAME, FUNCTION_PARAM}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
@@ -136,7 +136,7 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
       case _: NoSuchFunctionException =>
         val parameterString = args.map(_.dataType.typeName).mkString("(", ", ", ")")
         logWarning(log"V2 function ${MDC(FUNCTION_NAME, name)} " +
-          log"with parameter types ${MDC(FUNCTION_PARAMETER, parameterString)} is used in " +
+          log"with parameter types ${MDC(FUNCTION_PARAM, parameterString)} is used in " +
           log"partition transforms, but its definition couldn't be found in the function catalog " +
           log"provided")
         None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/With.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/With.scala
index 2745b663639f8..5f6f9afa5797a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/With.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/With.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMON_EXPR_REF, TreePattern, WITH_EXPRESSION}
 import org.apache.spark.sql.types.DataType
 
@@ -27,13 +28,35 @@ import org.apache.spark.sql.types.DataType
  */
 case class With(child: Expression, defs: Seq[CommonExpressionDef])
   extends Expression with Unevaluable {
+  // We do not allow creating a With expression with an AggregateExpression that contains a
+  // reference to a common expression defined in that scope (note that it can contain another With
+  // expression with a common expression ref of the inner With). This is to prevent the creation of
+  // a dangling CommonExpressionRef after rewriting it in RewriteWithExpression.
+  assert(!With.childContainsUnsupportedAggExpr(this))
+
   override val nodePatterns: Seq[TreePattern] = Seq(WITH_EXPRESSION)
   override def dataType: DataType = child.dataType
   override def nullable: Boolean = child.nullable
   override def children: Seq[Expression] = child +: defs
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): Expression = {
-    copy(child = newChildren.head, defs = newChildren.tail.map(_.asInstanceOf[CommonExpressionDef]))
+    val newDefs = newChildren.tail.map(_.asInstanceOf[CommonExpressionDef])
+    // If any `CommonExpressionDef` has been updated (data type or nullability), also update its
+    // `CommonExpressionRef` in the `child`.
+    val newChild = newDefs.filter(_.resolved).foldLeft(newChildren.head) { (result, newDef) =>
+      defs.find(_.id == newDef.id).map { oldDef =>
+        if (newDef.dataType != oldDef.dataType || newDef.nullable != oldDef.nullable) {
+          val newRef = new CommonExpressionRef(newDef)
+          result.transform {
+            case oldRef: CommonExpressionRef if oldRef.id == newRef.id =>
+              newRef
+          }
+        } else {
+          result
+        }
+      }.getOrElse(result)
+    }
+    copy(child = newChild, defs = newDefs)
   }
 
   /**
@@ -88,6 +111,21 @@ object With {
     val commonExprRefs = commonExprDefs.map(new CommonExpressionRef(_))
     With(replaced(commonExprRefs), commonExprDefs)
   }
+
+  private[sql] def childContainsUnsupportedAggExpr(withExpr: With): Boolean = {
+    lazy val commonExprIds = withExpr.defs.map(_.id).toSet
+    withExpr.child.exists {
+      case agg: AggregateExpression =>
+        // Check that the aggregate expression does not contain a reference to a common expression
+        // in the outer With expression (it is ok if it contains a reference to a common expression
+        // for a nested With expression).
+        agg.exists {
+          case r: CommonExpressionRef => commonExprIds.contains(r.id)
+          case _ => false
+        }
+      case _ => false
+    }
+  }
 }
 
 case class CommonExpressionId(id: Long = CommonExpressionId.newId, canonicalized: Boolean = false) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
index d1a9cafdf61fa..5977eff4526da 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, UnresolvedWithinGroup}
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, TypeCheckResult, UnresolvedWithinGroup}
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Descending, Expression, ExpressionDescription, ImplicitCastInputTypes, SortOrder}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.types.PhysicalDataType
-import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.catalyst.util.{CollationFactory, GenericArrayData, UnsafeRowUtils}
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType, BooleanType, DataType}
+import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType, BooleanType, DataType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.collection.OpenHashMap
 
 case class Mode(
@@ -48,6 +49,21 @@ case class Mode(
 
   override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType)
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (UnsafeRowUtils.isBinaryStable(child.dataType) || child.dataType.isInstanceOf[StringType]) {
+      /*
+        * The Mode class uses collation awareness logic to handle string data.
+        * Complex types with collated fields are not yet supported.
+       */
+      // TODO: SPARK-48700: Mode expression for complex types (all collations)
+      super.checkInputDataTypes()
+    } else {
+      TypeCheckResult.TypeCheckFailure("The input to the function 'mode' was" +
+        " a type of binary-unstable type that is " +
+        s"not currently supported by ${prettyName}.")
+    }
+  }
+
   override def prettyName: String = "mode"
 
   override def update(
@@ -74,7 +90,29 @@ case class Mode(
     if (buffer.isEmpty) {
       return null
     }
-
+    /*
+      * The Mode class uses special collation awareness logic
+      *  to handle string data types with various collations.
+      *
+      * For string types that don't support binary equality,
+      * we create a new map where the keys are the collation keys of the original strings.
+      *
+      * Keys from the original map are aggregated based on the corresponding collation keys.
+      *  The groupMapReduce method groups the entries by collation key and maps each group
+      *  to a single value (the sum of the counts), and finally reduces the groups to a single map.
+      *
+      * The new map is then used in the rest of the Mode evaluation logic.
+      */
+    val collationAwareBuffer = child.dataType match {
+      case c: StringType if
+        !CollationFactory.fetchCollation(c.collationId).supportsBinaryEquality =>
+        val collationId = c.collationId
+        val modeMap = buffer.toSeq.groupMapReduce {
+         case (k, _) => CollationFactory.getCollationKey(k.asInstanceOf[UTF8String], collationId)
+        }(x => x)((x, y) => (x._1, x._2 + y._2)).values
+        modeMap
+      case _ => buffer
+    }
     reverseOpt.map { reverse =>
       val defaultKeyOrdering = if (reverse) {
         PhysicalDataType.ordering(child.dataType).asInstanceOf[Ordering[AnyRef]].reverse
@@ -82,8 +120,8 @@ case class Mode(
         PhysicalDataType.ordering(child.dataType).asInstanceOf[Ordering[AnyRef]]
       }
       val ordering = Ordering.Tuple2(Ordering.Long, defaultKeyOrdering)
-      buffer.maxBy { case (key, count) => (count, key) }(ordering)
-    }.getOrElse(buffer.maxBy(_._2))._1
+      collationAwareBuffer.maxBy { case (key, count) => (count, key) }(ordering)
+    }.getOrElse(collationAwareBuffer.maxBy(_._2))._1
   }
 
   override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): Mode =
@@ -128,6 +166,7 @@ case class Mode(
     copy(child = newChild)
 }
 
+// TODO: SPARK-48701: PandasMode (all collations)
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/V2Aggregator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/V2Aggregator.scala
index bb94421bc7d40..49ba2ec8b904e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/V2Aggregator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/V2Aggregator.scala
@@ -17,13 +17,12 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
-
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes, UnsafeProjection}
 import org.apache.spark.sql.connector.catalog.functions.{AggregateFunction => V2AggregateFunction}
 import org.apache.spark.sql.types.{AbstractDataType, DataType}
 import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.util.Utils
 
 case class V2Aggregator[BUF <: java.io.Serializable, OUT](
     aggrFunc: V2AggregateFunction[BUF, OUT],
@@ -50,16 +49,11 @@ case class V2Aggregator[BUF <: java.io.Serializable, OUT](
   }
 
   override def serialize(buffer: BUF): Array[Byte] = {
-    val bos = new ByteArrayOutputStream()
-    val out = new ObjectOutputStream(bos)
-    out.writeObject(buffer)
-    out.close()
-    bos.toByteArray
+    Utils.serialize(buffer)
   }
 
   override def deserialize(bytes: Array[Byte]): BUF = {
-    val in = new ObjectInputStream(new ByteArrayInputStream(bytes))
-    in.readObject().asInstanceOf[BUF]
+    Utils.deserialize(bytes)
   }
 
   def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): V2Aggregator[BUF, OUT] =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
index 02925f3625d2e..2102428131f64 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
@@ -25,7 +25,9 @@ import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal}
 import org.apache.spark.sql.catalyst.trees.BinaryLike
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types.{AbstractDataType, BinaryType, BooleanType, DataType, IntegerType, LongType, StringType, TypeCollection}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -103,7 +105,7 @@ case class HllSketchAgg(
   override def prettyName: String = "hll_sketch_agg"
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(IntegerType, LongType, StringType, BinaryType), IntegerType)
+    Seq(TypeCollection(IntegerType, LongType, StringTypeAnyCollation, BinaryType), IntegerType)
 
   override def dataType: DataType = BinaryType
 
@@ -137,7 +139,9 @@ case class HllSketchAgg(
         // TODO: implement support for decimal/datetime/interval types
         case IntegerType => sketch.update(v.asInstanceOf[Int])
         case LongType => sketch.update(v.asInstanceOf[Long])
-        case StringType => sketch.update(v.asInstanceOf[UTF8String].toString)
+        case st: StringType =>
+          val cKey = CollationFactory.getCollationKey(v.asInstanceOf[UTF8String], st.collationId)
+          sketch.update(cKey.toString)
         case BinaryType => sketch.update(v.asInstanceOf[Array[Byte]])
         case dataType => throw new SparkUnsupportedOperationException(
           errorClass = "_LEGACY_ERROR_TEMP_3121",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index 9eecf81684cea..f1b192a3e21f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLId, toSQLType}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_ARITHMETIC, TreePattern, UNARY_POSITIVE}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_ARITHMETIC, TreePattern}
 import org.apache.spark.sql.catalyst.types.{PhysicalDecimalType, PhysicalFractionalType, PhysicalIntegerType, PhysicalIntegralType, PhysicalLongType}
 import org.apache.spark.sql.catalyst.util.{IntervalMathUtils, IntervalUtils, MathUtils, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -114,7 +114,7 @@ case class UnaryMinus(
   since = "1.5.0",
   group = "math_funcs")
 case class UnaryPositive(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends RuntimeReplaceable with ImplicitCastInputTypes with NullIntolerant {
 
   override def prettyName: String = "positive"
 
@@ -122,17 +122,15 @@ case class UnaryPositive(child: Expression)
 
   override def dataType: DataType = child.dataType
 
-  final override val nodePatterns: Seq[TreePattern] = Seq(UNARY_POSITIVE)
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
-    defineCodeGen(ctx, ev, c => c)
+  override def sql: String = s"(+ ${child.sql})"
 
-  protected override def nullSafeEval(input: Any): Any = input
+  override lazy val replacement: Expression = child
 
-  override def sql: String = s"(+ ${child.sql})"
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): UnaryPositive =
+    copy(newChildren.head)
 
-  override protected def withNewChildInternal(newChild: Expression): UnaryPositive =
-    copy(child = newChild)
+  override def children: Seq[Expression] = child :: Nil
 }
 
 /**
@@ -452,9 +450,8 @@ case class Add(
     copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    // TODO: do not reorder consecutive `Add`s with different `evalMode`
     val reorderResult = buildCanonicalizedPlan(
-      { case Add(l, r, _) => Seq(l, r) },
+      { case Add(l, r, em) if em == evalMode => Seq(l, r) },
       { case (l: Expression, r: Expression) => Add(l, r, evalMode)},
       Some(evalMode)
     )
@@ -608,10 +605,9 @@ case class Multiply(
     newLeft: Expression, newRight: Expression): Multiply = copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    // TODO: do not reorder consecutive `Multiply`s with different `evalMode`
     buildCanonicalizedPlan(
-      { case Multiply(l, r, _) => Seq(l, r) },
-      { case (l: Expression, r: Expression) => Multiply(l, r, evalMode)},
+      { case Multiply(l, r, em) if em == evalMode => Seq(l, r) },
+      { case (l: Expression, r: Expression) => Multiply(l, r, evalMode) },
       Some(evalMode)
     )
   }
@@ -888,7 +884,7 @@ case class IntegralDivide(
 }
 
 @ExpressionDescription(
-  usage = "expr1 _FUNC_ expr2 - Returns the remainder after `expr1`/`expr2`.",
+  usage = "expr1 % expr2, or mod(expr1, expr2) - Returns the remainder after `expr1`/`expr2`.",
   examples = """
     Examples:
       > SELECT 2 % 1.8;
@@ -908,6 +904,10 @@ case class Remainder(
 
   override def inputType: AbstractDataType = NumericType
 
+  // `try_remainder` has exactly the same behavior as the legacy divide, so here it only executes
+  // the error code path when `evalMode` is `ANSI`.
+  protected override def failOnError: Boolean = evalMode == EvalMode.ANSI
+
   override def symbol: String = "%"
   override def decimalMethod: String = "remainder"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 89890ea086414..88085636a5ff1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -229,7 +229,7 @@ case class BitwiseCount(child: Expression)
   override def prettyName: String = "bit_count"
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = child.dataType match {
-    case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0")
+    case BooleanType => defineCodeGen(ctx, ev, c => s"($c) ? 1 : 0")
     case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)")
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 5aa766a60c106..a39c10866984e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -32,9 +32,8 @@ import org.codehaus.janino.util.ClassFile
 
 import org.apache.spark.{SparkException, SparkIllegalArgumentException, TaskContext, TaskKilledException}
 import org.apache.spark.executor.InputMetrics
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.LogKey._
-import org.apache.spark.internal.MDC
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.HashableWeakReference
@@ -1547,13 +1546,11 @@ object CodeGenerator extends Logging {
       updateAndGetCompilationStats(evaluator)
     } catch {
       case e: InternalCompilerException =>
-        val msg = QueryExecutionErrors.failedToCompileMsg(e)
-        logError(msg, e)
+        logError("Failed to compile the generated Java code.", e)
         logGeneratedCode(code)
         throw QueryExecutionErrors.internalCompilerError(e)
       case e: CompileException =>
-        val msg = QueryExecutionErrors.failedToCompileMsg(e)
-        logError(msg, e)
+        logError("Failed to compile the generated Java code.", e)
         logGeneratedCode(code)
         throw QueryExecutionErrors.compilerError(e)
     }
@@ -1595,9 +1592,10 @@ object CodeGenerator extends Logging {
             CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(byteCodeSize)
 
             if (byteCodeSize > DEFAULT_JVM_HUGE_METHOD_LIMIT) {
-              logInfo("Generated method too long to be JIT compiled: " +
-                log"${MDC(CLASS_NAME, cf.getThisClassName)}.${MDC(METHOD_NAME, method.getName)} " +
-                log"is ${MDC(BYTECODE_SIZE, byteCodeSize)} bytes")
+              logInfo(log"Generated method too long to be JIT compiled: " +
+                log"${MDC(LogKeys.CLASS_NAME, cf.getThisClassName)}." +
+                log"${MDC(LogKeys.METHOD_NAME, method.getName)} is " +
+                log"${MDC(LogKeys.BYTECODE_SIZE, byteCodeSize)} bytes")
             }
 
             byteCodeSize
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
index 6af00e193d94d..c528b523c5e7f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
@@ -38,8 +38,8 @@ import org.apache.spark.sql.types._
     Examples:
       > SET spark.sql.collation.enabled=true;
       spark.sql.collation.enabled	true
-      > SELECT COLLATION('Spark SQL' _FUNC_ UTF8_BINARY_LCASE);
-      UTF8_BINARY_LCASE
+      > SELECT COLLATION('Spark SQL' _FUNC_ UTF8_LCASE);
+      UTF8_LCASE
       > SET spark.sql.collation.enabled=false;
       spark.sql.collation.enabled	false
   """,
@@ -57,14 +57,14 @@ object CollateExpressionBuilder extends ExpressionBuilder {
     expressions match {
       case Seq(e: Expression, collationExpr: Expression) =>
         (collationExpr.dataType, collationExpr.foldable) match {
-          case (StringType, true) =>
+          case (_: StringType, true) =>
             val evalCollation = collationExpr.eval()
             if (evalCollation == null) {
               throw QueryCompilationErrors.unexpectedNullError("collation", collationExpr)
             } else {
               Collate(e, evalCollation.toString)
             }
-          case (StringType, false) => throw QueryCompilationErrors.nonFoldableArgumentError(
+          case (_: StringType, false) => throw QueryCompilationErrors.nonFoldableArgumentError(
             funcName, "collationName", StringType)
           case (_, _) => throw QueryCompilationErrors.unexpectedInputDataTypeError(
             funcName, 1, StringType, collationExpr)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 39bf6734eb27b..ea117f876550e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -96,9 +96,9 @@ trait BinaryArrayExpressionWithImplicitCast
 @ExpressionDescription(
   usage = """
     _FUNC_(expr) - Returns the size of an array or a map.
-    The function returns null for null input if spark.sql.legacy.sizeOfNull is set to false or
-    spark.sql.ansi.enabled is set to true. Otherwise, the function returns -1 for null input.
-    With the default settings, the function returns -1 for null input.
+    This function returns -1 for null input only if spark.sql.ansi.enabled is false and
+    spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input.
+    With the default settings, the function returns null for null input.
   """,
   examples = """
     Examples:
@@ -713,6 +713,7 @@ case class MapConcat(children: Seq[Expression])
     }
   }
 
+  override def stateful: Boolean = true
   override def nullable: Boolean = children.exists(_.nullable)
 
   private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
@@ -828,6 +829,8 @@ case class MapFromEntries(child: Expression)
 
   override def nullable: Boolean = child.nullable || nullEntries
 
+  override def stateful: Boolean = true
+
   @transient override lazy val dataType: MapType = dataTypeDetails.get._1
 
   override def checkInputDataTypes(): TypeCheckResult = dataTypeDetails match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 3eb6225b5426e..1bfa11d67af6f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
@@ -195,7 +196,7 @@ case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
 
   private val defaultElementType: DataType = {
     if (useStringTypeWhenEmpty) {
-      StringType
+      SQLConf.get.defaultStringType
     } else {
       NullType
     }
@@ -244,6 +245,8 @@ case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
 
   private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
 
+  override def stateful: Boolean = true
+
   override def eval(input: InternalRow): Any = {
     var i = 0
     while (i < keys.length) {
@@ -319,6 +322,8 @@ case class MapFromArrays(left: Expression, right: Expression)
       valueContainsNull = right.dataType.asInstanceOf[ArrayType].containsNull)
   }
 
+  override def stateful: Boolean = true
+
   private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
 
   override def nullSafeEval(keyArray: Any, valueArray: Any): Any = {
@@ -349,7 +354,7 @@ case class MapFromArrays(left: Expression, right: Expression)
 case object NamePlaceholder extends LeafExpression with Unevaluable {
   override lazy val resolved: Boolean = false
   override def nullable: Boolean = false
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def prettyName: String = "NamePlaceholder"
   override def toString: String = prettyName
 }
@@ -373,7 +378,8 @@ object CreateStruct {
       // We should always use the last part of the column name (`c` in the above example) as the
       // alias name inside CreateNamedStruct.
       case (u: UnresolvedAttribute, _) => Seq(Literal(u.nameParts.last), u)
-      case (u @ UnresolvedExtractValue(_, e: Literal), _) if e.dataType == StringType => Seq(e, u)
+      case (u @ UnresolvedExtractValue(_, e: Literal), _) if e.dataType.isInstanceOf[StringType] =>
+        Seq(e, u)
       case (a: Alias, _) => Seq(Literal(a.name), a)
       case (e: NamedExpression, _) if e.resolved => Seq(Literal(e.name), e)
       case (e: NamedExpression, _) => Seq(NamePlaceholder, e)
@@ -464,7 +470,7 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression with
         toSQLId(prettyName), Seq("2n (n > 0)"), children.length
       )
     } else {
-      val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType == StringType)
+      val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType.isInstanceOf[StringType])
       if (invalidNames.nonEmpty) {
         DataTypeMismatch(
           errorSubClass = "CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
@@ -566,15 +572,18 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
     this(child, Literal(","), Literal(":"))
   }
 
+  override def stateful: Boolean = true
+
   override def first: Expression = text
   override def second: Expression = pairDelim
   override def third: Expression = keyValueDelim
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation)
 
-  override def dataType: DataType = MapType(StringType, StringType)
+  override def dataType: DataType = MapType(first.dataType, first.dataType)
 
-  private lazy val mapBuilder = new ArrayBasedMapBuilder(StringType, StringType)
+  private lazy val mapBuilder = new ArrayBasedMapBuilder(first.dataType, first.dataType)
 
   override def nullSafeEval(
       inputString: Any,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index a801d0367080d..ff94322efdaa4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -51,12 +51,12 @@ object ExtractValue {
       resolver: Resolver): Expression = {
 
     (child.dataType, extraction) match {
-      case (StructType(fields), NonNullLiteral(v, StringType)) =>
+      case (StructType(fields), NonNullLiteral(v, _: StringType)) =>
         val fieldName = v.toString
         val ordinal = findField(fields, fieldName, resolver)
         GetStructField(child, ordinal, Some(fieldName))
 
-      case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, StringType)) =>
+      case (ArrayType(StructType(fields), containsNull), NonNullLiteral(v, _: StringType)) =>
         val fieldName = v.toString
         val ordinal = findField(fields, fieldName, resolver)
         GetArrayStructFields(child, fields(ordinal).copy(name = fieldName),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 4714fc1ded9cd..cb10440c48328 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.TypeUtils._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -146,7 +147,7 @@ case class CsvToStructs(
     converter(parser.parse(csv))
   }
 
-  override def inputTypes: Seq[AbstractDataType] = StringType :: Nil
+  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
 
   override def prettyName: String = "from_csv"
 
@@ -177,7 +178,7 @@ case class SchemaOfCsv(
     child = child,
     options = ExprUtils.convertToMapData(options))
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def nullable: Boolean = false
 
@@ -300,7 +301,7 @@ case class StructsToCsv(
     (row: Any) => UTF8String.fromString(gen.writeToString(row.asInstanceOf[InternalRow]))
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index a9155e8daf101..808ad54f8ecad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType.DAY
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -104,7 +105,7 @@ trait TimestampFormatterHelper extends TimeZoneAwareExpression {
   since = "3.1.0")
 case class CurrentTimeZone() extends LeafExpression with Unevaluable {
   override def nullable: Boolean = false
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def prettyName: String = "current_timezone"
   final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
 }
@@ -134,7 +135,7 @@ case class CurrentTimeZone() extends LeafExpression with Unevaluable {
   since = "1.5.0")
 // scalastyle:on line.size.limit
 case class CurrentDate(timeZoneId: Option[String] = None)
-  extends LeafExpression with TimeZoneAwareExpression with Unevaluable {
+  extends LeafExpression with TimeZoneAwareExpression with FoldableUnevaluable {
   def this() = this(None)
   override def nullable: Boolean = false
   override def dataType: DataType = DateType
@@ -169,7 +170,7 @@ object CurDateExpressionBuilder extends ExpressionBuilder {
   }
 }
 
-abstract class CurrentTimestampLike() extends LeafExpression with Unevaluable {
+abstract class CurrentTimestampLike() extends LeafExpression with FoldableUnevaluable {
   override def nullable: Boolean = false
   override def dataType: DataType = TimestampType
   final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
@@ -235,7 +236,7 @@ case class Now() extends CurrentTimestampLike {
   group = "datetime_funcs",
   since = "3.4.0")
 case class LocalTimestamp(timeZoneId: Option[String] = None) extends LeafExpression
-  with TimeZoneAwareExpression with Unevaluable {
+  with TimeZoneAwareExpression with FoldableUnevaluable {
   def this() = this(None)
   override def nullable: Boolean = false
   override def dataType: DataType = TimestampNTZType
@@ -923,7 +924,7 @@ case class DayName(child: Expression) extends GetDateField {
   override val funcName = "getDayName"
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override protected def withNewChildInternal(newChild: Expression): DayName =
     copy(child = newChild)
 }
@@ -951,9 +952,9 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti
 
   def this(left: Expression, right: Expression) = this(left, right, None)
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringTypeAnyCollation)
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
@@ -1261,7 +1262,8 @@ abstract class ToTimestamp
   override def forTimestampNTZ: Boolean = left.dataType == TimestampNTZType
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringType, DateType, TimestampType, TimestampNTZType), StringType)
+    Seq(TypeCollection(StringTypeAnyCollation, DateType, TimestampType, TimestampNTZType),
+      StringTypeAnyCollation)
 
   override def dataType: DataType = LongType
   override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
@@ -1283,7 +1285,7 @@ abstract class ToTimestamp
           daysToMicros(t.asInstanceOf[Int], zoneId) / downScaleFactor
         case TimestampType | TimestampNTZType =>
           t.asInstanceOf[Long] / downScaleFactor
-        case StringType =>
+        case _: StringType =>
           val fmt = right.eval(input)
           if (fmt == null) {
             null
@@ -1326,7 +1328,7 @@ abstract class ToTimestamp
     }
 
     left.dataType match {
-      case StringType => formatterOption.map { fmt =>
+      case _: StringType => formatterOption.map { fmt =>
         val df = classOf[TimestampFormatter].getName
         val formatterName = ctx.addReferenceObj("formatter", fmt, df)
         nullSafeCodeGen(ctx, ev, (datetimeStr, _) =>
@@ -1429,10 +1431,10 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
     this(unix, Literal(TimestampFormatter.defaultPattern()))
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringTypeAnyCollation)
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
@@ -1540,7 +1542,7 @@ case class NextDay(
 
   def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringTypeAnyCollation)
 
   override def dataType: DataType = DateType
   override def nullable: Boolean = true
@@ -1751,7 +1753,7 @@ sealed trait UTCTimestamp extends BinaryExpression with ImplicitCastInputTypes w
   val func: (Long, String) => Long
   val funcName: String
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringTypeAnyCollation)
   override def dataType: DataType = TimestampType
 
   override def nullSafeEval(time: Any, timezone: Any): Any = {
@@ -2091,8 +2093,8 @@ case class ParseToDate(
   override def inputTypes: Seq[AbstractDataType] = {
     // Note: ideally this function should only take string input, but we allow more types here to
     // be backward compatible.
-    TypeCollection(StringType, DateType, TimestampType, TimestampNTZType) +:
-      format.map(_ => StringType).toSeq
+    TypeCollection(StringTypeAnyCollation, DateType, TimestampType, TimestampNTZType) +:
+      format.map(_ => StringTypeAnyCollation).toSeq
   }
 
   override protected def withNewChildrenInternal(
@@ -2163,10 +2165,10 @@ case class ParseToTimestamp(
   override def inputTypes: Seq[AbstractDataType] = {
     // Note: ideally this function should only take string input, but we allow more types here to
     // be backward compatible.
-    val types = Seq(StringType, DateType, TimestampType, TimestampNTZType)
+    val types = Seq(StringTypeAnyCollation, DateType, TimestampType, TimestampNTZType)
     TypeCollection(
       (if (dataType.isInstanceOf[TimestampType]) types :+ NumericType else types): _*
-    ) +: format.map(_ => StringType).toSeq
+    ) +: format.map(_ => StringTypeAnyCollation).toSeq
   }
 
   override protected def withNewChildrenInternal(
@@ -2296,7 +2298,7 @@ case class TruncDate(date: Expression, format: Expression)
   override def left: Expression = date
   override def right: Expression = format
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringTypeAnyCollation)
   override def dataType: DataType = DateType
   override def prettyName: String = "trunc"
   override val instant = date
@@ -2365,7 +2367,7 @@ case class TruncTimestamp(
   override def left: Expression = format
   override def right: Expression = timestamp
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, TimestampType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation, TimestampType)
   override def dataType: TimestampType = TimestampType
   override def prettyName: String = "date_trunc"
   override val instant = timestamp
@@ -2666,7 +2668,7 @@ case class MakeTimestamp(
   // casted into decimal safely, we use DecimalType(16, 6) which is wider than DecimalType(10, 0).
   override def inputTypes: Seq[AbstractDataType] =
     Seq(IntegerType, IntegerType, IntegerType, IntegerType, IntegerType, DecimalType(16, 6)) ++
-      timezone.map(_ => StringType)
+      timezone.map(_ => StringTypeAnyCollation)
   override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
@@ -2938,7 +2940,7 @@ case class Extract(field: Expression, source: Expression, replacement: Expressio
 object Extract {
   def createExpr(funcName: String, field: Expression, source: Expression): Expression = {
     // both string and null literals are allowed.
-    if ((field.dataType == StringType || field.dataType == NullType) && field.foldable) {
+    if ((field.dataType.isInstanceOf[StringType] || field.dataType == NullType) && field.foldable) {
       val fieldStr = field.eval().asInstanceOf[UTF8String]
       if (fieldStr == null) {
         Literal(null, DoubleType)
@@ -3113,7 +3115,8 @@ case class ConvertTimezone(
   override def second: Expression = targetTz
   override def third: Expression = sourceTs
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, TimestampNTZType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation,
+    StringTypeAnyCollation, TimestampNTZType)
   override def dataType: DataType = TimestampNTZType
 
   override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 436efa8924165..fa342f6415097 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -63,7 +63,7 @@ import org.apache.spark.util.ArrayImplicits._
 case class Md5(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def inputTypes: Seq[DataType] = Seq(BinaryType)
 
@@ -103,7 +103,7 @@ case class Md5(child: Expression)
 case class Sha2(left: Expression, right: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
 
   override def inputTypes: Seq[DataType] = Seq(BinaryType, IntegerType)
@@ -169,7 +169,7 @@ case class Sha2(left: Expression, right: Expression)
 case class Sha1(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def inputTypes: Seq[DataType] = Seq(BinaryType)
 
@@ -271,6 +271,10 @@ abstract class HashExpression[E] extends Expression {
     dt.existsRecursively(_.isInstanceOf[MapType])
   }
 
+  private def hasVariantType(dt: DataType): Boolean = {
+    dt.existsRecursively(_.isInstanceOf[VariantType])
+  }
+
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length < 1) {
       throw QueryCompilationErrors.wrongNumArgsError(
@@ -281,6 +285,10 @@ abstract class HashExpression[E] extends Expression {
       DataTypeMismatch(
         errorSubClass = "HASH_MAP_TYPE",
         messageParameters = Map("functionName" -> toSQLId(prettyName)))
+    } else if (children.exists(child => hasVariantType(child.dataType))) {
+      DataTypeMismatch(
+        errorSubClass = "HASH_VARIANT_TYPE",
+        messageParameters = Map("functionName" -> toSQLId(prettyName)))
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 896f3e9774f37..80bcf156133ed 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -920,6 +920,8 @@ case class TransformKeys(
 
   override def dataType: MapType = MapType(function.dataType, valueType, valueContainsNull)
 
+  override def stateful: Boolean = true
+
   override def checkInputDataTypes(): TypeCheckResult = {
     TypeUtils.checkForMapKeyType(function.dataType)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
index 6cd88367aa9a0..65eb995ff32ff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/inputFileBlock.scala
@@ -21,7 +21,8 @@ import org.apache.spark.rdd.InputFileBlockHolder
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.{DataType, LongType, StringType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, LongType}
 import org.apache.spark.unsafe.types.UTF8String
 
 // scalastyle:off whitespace.end.of.line
@@ -39,7 +40,7 @@ case class InputFileName() extends LeafExpression with Nondeterministic {
 
   override def nullable: Boolean = false
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def prettyName: String = "input_file_name"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index f35c6da4f8af9..7005d663a3f96 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -31,11 +31,13 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, CodegenFallback, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
+import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
 import org.apache.spark.sql.catalyst.json._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{JSON_TO_STRUCT, TreePattern}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 import org.apache.spark.util.Utils
@@ -131,8 +133,9 @@ case class GetJsonObject(json: Expression, path: Expression)
 
   override def left: Expression = json
   override def right: Expression = path
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
-  override def dataType: DataType = StringType
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
   override def prettyName: String = "get_json_object"
 
@@ -476,7 +479,7 @@ case class JsonTuple(children: Seq[Expression])
   @transient private lazy val constantFields: Int = foldableFieldNames.count(_ != null)
 
   override def elementSchema: StructType = StructType(fieldExpressions.zipWithIndex.map {
-    case (_, idx) => StructField(s"c$idx", StringType, nullable = true)
+    case (_, idx) => StructField(s"c$idx", children.head.dataType, nullable = true)
   })
 
   override def prettyName: String = "json_tuple"
@@ -486,7 +489,7 @@ case class JsonTuple(children: Seq[Expression])
       throw QueryCompilationErrors.wrongNumArgsError(
         toSQLId(prettyName), Seq("> 1"), children.length
       )
-    } else if (children.forall(child => StringType.acceptsType(child.dataType))) {
+    } else if (children.forall(child => StringTypeAnyCollation.acceptsType(child.dataType))) {
       TypeCheckResult.TypeCheckSuccess
     } else {
       DataTypeMismatch(
@@ -664,7 +667,7 @@ case class JsonToStructs(
       timeZoneId = None)
 
   override def checkInputDataTypes(): TypeCheckResult = nullableSchema match {
-    case _: StructType | _: ArrayType | _: MapType =>
+    case _: StructType | _: ArrayType | _: MapType | _: VariantType =>
       val checkResult = ExprUtils.checkJsonSchema(nullableSchema)
       if (checkResult.isFailure) checkResult else super.checkInputDataTypes()
     case _ =>
@@ -714,11 +717,14 @@ case class JsonToStructs(
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
-  override def nullSafeEval(json: Any): Any = {
-    converter(parser.parse(json.asInstanceOf[UTF8String]))
+  override def nullSafeEval(json: Any): Any = nullableSchema match {
+    case _: VariantType =>
+      VariantExpressionEvalUtils.parseJson(json.asInstanceOf[UTF8String])
+    case _ =>
+      converter(parser.parse(json.asInstanceOf[UTF8String]))
   }
 
-  override def inputTypes: Seq[AbstractDataType] = StringType :: Nil
+  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
 
   override def sql: String = schema match {
     case _: MapType => "entries"
@@ -820,7 +826,7 @@ case class StructsToJson(
     }
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def checkInputDataTypes(): TypeCheckResult = inputSchema match {
     case dt @ (_: StructType | _: MapType | _: ArrayType | _: VariantType) =>
@@ -869,7 +875,7 @@ case class SchemaOfJson(
       child = child,
       options = ExprUtils.convertToMapData(options))
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def nullable: Boolean = false
 
@@ -915,7 +921,8 @@ case class SchemaOfJson(
             .map(ArrayType(_, containsNull = at.containsNull))
             .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
         case other: DataType =>
-          jsonInferSchema.canonicalizeType(other, jsonOptions).getOrElse(StringType)
+          jsonInferSchema.canonicalizeType(other, jsonOptions).getOrElse(
+            SQLConf.get.defaultStringType)
       }
     }
 
@@ -953,7 +960,7 @@ case class SchemaOfJson(
 case class LengthOfJsonArray(child: Expression) extends UnaryExpression
   with CodegenFallback with ExpectsInputTypes {
 
-  override def inputTypes: Seq[DataType] = Seq(StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
   override def dataType: DataType = IntegerType
   override def nullable: Boolean = true
   override def prettyName: String = "json_array_length"
@@ -1026,8 +1033,8 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression
 case class JsonObjectKeys(child: Expression) extends UnaryExpression with CodegenFallback
   with ExpectsInputTypes {
 
-  override def inputTypes: Seq[DataType] = Seq(StringType)
-  override def dataType: DataType = ArrayType(StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def dataType: DataType = ArrayType(SQLConf.get.defaultStringType)
   override def nullable: Boolean = true
   override def prettyName: String = "json_object_keys"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 0fad3eff2da52..4cffc7f0b53a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -42,6 +42,7 @@ import org.json4s.JsonAST._
 
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection}
 import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
 import org.apache.spark.sql.catalyst.trees.TreePattern
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LITERAL, NULL_LITERAL, TRUE_OR_FALSE_LITERAL}
 import org.apache.spark.sql.catalyst.types._
@@ -204,6 +205,8 @@ object Literal {
       create(new GenericInternalRow(
         struct.fields.map(f => default(f.dataType).value)), struct)
     case udt: UserDefinedType[_] => Literal(default(udt.sqlType).value, udt)
+    case VariantType =>
+      create(VariantExpressionEvalUtils.castToVariant(0, IntegerType), VariantType)
     case other =>
       throw QueryExecutionErrors.noDefaultForDataTypeError(dataType)
   }
@@ -549,6 +552,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
           s"${Literal(kv._1, mapType.keyType).sql}, ${Literal(kv._2, mapType.valueType).sql}"
         }
       s"MAP(${keysAndValues.mkString(", ")})"
+    case (v: VariantVal, variantType: VariantType) => s"PARSE_JSON('${v.toJson(timeZoneId)}')"
     case _ => value.toString
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
index e5157685a9a6d..c11357352c79a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
@@ -24,7 +24,9 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter}
 import org.apache.spark.sql.errors.QueryErrorsBase
-import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.types.{AbstractDataType, DataType}
 import org.apache.spark.unsafe.types.UTF8String
 
 // scalastyle:off line.size.limit
@@ -79,12 +81,14 @@ import org.apache.spark.unsafe.types.UTF8String
 object MaskExpressionBuilder extends ExpressionBuilder {
   override def functionSignature: Option[FunctionSignature] = {
     val strArg = InputParameter("str")
-    val upperCharArg = InputParameter("upperChar", Some(Literal(Mask.MASKED_UPPERCASE)))
-    val lowerCharArg = InputParameter("lowerChar", Some(Literal(Mask.MASKED_LOWERCASE)))
-    val digitCharArg = InputParameter("digitChar", Some(Literal(Mask.MASKED_DIGIT)))
-    val otherCharArg = InputParameter(
-      "otherChar",
-      Some(Literal(Mask.MASKED_IGNORE, StringType)))
+    val upperCharArg = InputParameter("upperChar",
+      Some(Literal.create(Mask.MASKED_UPPERCASE, SQLConf.get.defaultStringType)))
+    val lowerCharArg = InputParameter("lowerChar",
+      Some(Literal.create(Mask.MASKED_LOWERCASE, SQLConf.get.defaultStringType)))
+    val digitCharArg = InputParameter("digitChar",
+      Some(Literal.create(Mask.MASKED_DIGIT, SQLConf.get.defaultStringType)))
+    val otherCharArg = InputParameter("otherChar",
+      Some(Literal.create(Mask.MASKED_IGNORE, SQLConf.get.defaultStringType)))
     val functionSignature: FunctionSignature = FunctionSignature(Seq(
       strArg, upperCharArg, lowerCharArg, digitCharArg, otherCharArg))
     Some(functionSignature)
@@ -109,33 +113,34 @@ case class Mask(
   def this(input: Expression) =
     this(
       input,
-      Literal(Mask.MASKED_UPPERCASE),
-      Literal(Mask.MASKED_LOWERCASE),
-      Literal(Mask.MASKED_DIGIT),
-      Literal(Mask.MASKED_IGNORE, StringType))
+      Literal.create(Mask.MASKED_UPPERCASE, SQLConf.get.defaultStringType),
+      Literal.create(Mask.MASKED_LOWERCASE, SQLConf.get.defaultStringType),
+      Literal.create(Mask.MASKED_DIGIT, SQLConf.get.defaultStringType),
+      Literal.create(Mask.MASKED_IGNORE, input.dataType))
 
   def this(input: Expression, upperChar: Expression) =
     this(
       input,
       upperChar,
-      Literal(Mask.MASKED_LOWERCASE),
-      Literal(Mask.MASKED_DIGIT),
-      Literal(Mask.MASKED_IGNORE, StringType))
+      Literal.create(Mask.MASKED_LOWERCASE, SQLConf.get.defaultStringType),
+      Literal.create(Mask.MASKED_DIGIT, SQLConf.get.defaultStringType),
+      Literal.create(Mask.MASKED_IGNORE, input.dataType))
 
   def this(input: Expression, upperChar: Expression, lowerChar: Expression) =
     this(
       input,
       upperChar,
       lowerChar,
-      Literal(Mask.MASKED_DIGIT),
-      Literal(Mask.MASKED_IGNORE, StringType))
+      Literal.create(Mask.MASKED_DIGIT, SQLConf.get.defaultStringType),
+      Literal.create(Mask.MASKED_IGNORE, input.dataType))
 
   def this(
       input: Expression,
       upperChar: Expression,
       lowerChar: Expression,
       digitChar: Expression) =
-    this(input, upperChar, lowerChar, digitChar, Literal(Mask.MASKED_IGNORE, StringType))
+    this(input, upperChar, lowerChar, digitChar,
+      Literal.create(Mask.MASKED_IGNORE, input.dataType))
 
   override def checkInputDataTypes(): TypeCheckResult = {
 
@@ -187,7 +192,8 @@ case class Mask(
    *      NumericType, IntegralType, FractionalType.
    */
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringType, StringType, StringType, StringType, StringType)
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation,
+      StringTypeAnyCollation, StringTypeAnyCollation)
 
   override def nullable: Boolean = true
 
@@ -276,7 +282,7 @@ case class Mask(
    * Returns the [[DataType]] of the result of evaluating this expression. It is invalid to query
    * the dataType of an unresolved expression (i.e., when `resolved` == false).
    */
-  override def dataType: DataType = StringType
+  override def dataType: DataType = input.dataType
 
   /**
    * Returns a Seq of the children of this node. Children should not change. Immutability required
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 0c09e9be12e94..00274a16b888b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.{lang => jl}
+import java.util.HexFormat.fromHexDigit
 import java.util.Locale
 
 import org.apache.spark.QueryContext
@@ -30,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{MathUtils, NumberConverter, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -450,8 +452,9 @@ case class Conv(
   override def first: Expression = numExpr
   override def second: Expression = fromBaseExpr
   override def third: Expression = toBaseExpr
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, IntegerType)
-  override def dataType: DataType = StringType
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, IntegerType, IntegerType)
+  override def dataType: DataType = first.dataType
   override def nullable: Boolean = true
 
   override def nullSafeEval(num: Any, fromBase: Any, toBase: Any): Any = {
@@ -1002,23 +1005,21 @@ case class Bin(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
 
   override def inputTypes: Seq[DataType] = Seq(LongType)
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   protected override def nullSafeEval(input: Any): Any =
-    UTF8String.fromString(jl.Long.toBinaryString(input.asInstanceOf[Long]))
+    UTF8String.toBinaryString(input.asInstanceOf[Long])
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, (c) =>
-      s"UTF8String.fromString(java.lang.Long.toBinaryString($c))")
+    defineCodeGen(ctx, ev, c => s"UTF8String.toBinaryString($c)")
   }
 
   override protected def withNewChildInternal(newChild: Expression): Bin = copy(child = newChild)
 }
 
 object Hex {
-  val hexDigits = Array[Char](
-    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
-  ).map(_.toByte)
+  private final val hexDigits =
+    Array[Byte]('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F')
 
   // lookup table to translate '0' -> 0 ... 'F'/'f' -> 15
   val unhexDigits = {
@@ -1031,61 +1032,66 @@ object Hex {
 
   def hex(bytes: Array[Byte]): UTF8String = {
     val length = bytes.length
-    val value = new Array[Byte](length * 2)
+    if (length == 0) {
+      return UTF8String.EMPTY_UTF8
+    }
+    val targetLength = length * 2L
+    if (targetLength > Int.MaxValue) {
+      throw QueryExecutionErrors.tooManyArrayElementsError(targetLength, Int.MaxValue)
+    }
+    val value = new Array[Byte](targetLength.toInt)
     var i = 0
     while (i < length) {
-      value(i * 2) = Hex.hexDigits((bytes(i) & 0xF0) >> 4)
-      value(i * 2 + 1) = Hex.hexDigits(bytes(i) & 0x0F)
+      value(i * 2) = hexDigits((bytes(i) & 0xF0) >> 4)
+      value(i * 2 + 1) = hexDigits(bytes(i) & 0x0F)
       i += 1
     }
     UTF8String.fromBytes(value)
   }
 
   def hex(num: Long): UTF8String = {
-    // Extract the hex digits of num into value[] from right to left
-    val value = new Array[Byte](16)
+    val zeros = jl.Long.numberOfLeadingZeros(num)
+    if (zeros == jl.Long.SIZE) return UTF8String.ZERO_UTF8
+    val len = (jl.Long.SIZE - zeros + 3) / 4
     var numBuf = num
-    var len = 0
-    do {
-      len += 1
-      value(value.length - len) = Hex.hexDigits((numBuf & 0xF).toInt)
+    val value = new Array[Byte](len)
+    var i = len - 1
+    while (i >= 0) {
+      value(i) = hexDigits((numBuf & 0xF).toInt)
       numBuf >>>= 4
-    } while (numBuf != 0)
-    UTF8String.fromBytes(java.util.Arrays.copyOfRange(value, value.length - len, value.length))
+      i -= 1
+    }
+    UTF8String.fromBytes(value)
   }
 
   def unhex(bytes: Array[Byte]): Array[Byte] = {
-    val out = new Array[Byte]((bytes.length + 1) >> 1)
-    var i = 0
-    var oddShift = 0
-    if ((bytes.length & 0x01) != 0) {
-      // padding with '0'
-      if (bytes(0) < 0) {
-        return null
-      }
-      val v = Hex.unhexDigits(bytes(0))
-      if (v == -1) {
-        return null
-      }
-      out(0) = v
-      i += 1
-      oddShift = 1
+    val length = bytes.length
+    if (length == 0) {
+      return Array.emptyByteArray
     }
-    // two characters form the hex value.
-    while (i < bytes.length) {
-      if (bytes(i) < 0 || bytes(i + 1) < 0) {
-        return null
+    if ((length & 0x1) != 0) {
+      // while length of bytes is odd, loop from the end to beginning w/o the head
+      val result = new Array[Byte](length / 2  + 1)
+      var i = result.length - 1
+      while (i > 0) {
+        result(i) = ((fromHexDigit(bytes(i * 2 - 1)) << 4) | fromHexDigit(bytes(i * 2))).toByte
+        i -= 1
       }
-      val first = Hex.unhexDigits(bytes(i))
-      val second = Hex.unhexDigits(bytes(i + 1))
-      if (first == -1 || second == -1) {
-        return null
+      // add it 'tailing' head
+      result(0) = fromHexDigit(bytes(0)).toByte
+      result
+    } else {
+      val result = new Array[Byte](length / 2)
+      var i = 0
+      while (i < result.length) {
+        result(i) = ((fromHexDigit(bytes(2 * i)) << 4) | fromHexDigit(bytes(2 * i + 1))).toByte
+        i += 1
       }
-      out(i / 2 + oddShift) = (((first << 4) | second) & 0xFF).toByte
-      i += 2
+      result
     }
-    out
   }
+
+  def unhex(str: String): Array[Byte] = unhex(str.getBytes())
 }
 
 /**
@@ -1108,21 +1114,24 @@ case class Hex(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(LongType, BinaryType, StringType))
+    Seq(TypeCollection(LongType, BinaryType, StringTypeAnyCollation))
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = child.dataType match {
+    case st: StringType => st
+    case _ => SQLConf.get.defaultStringType
+  }
 
   protected override def nullSafeEval(num: Any): Any = child.dataType match {
     case LongType => Hex.hex(num.asInstanceOf[Long])
     case BinaryType => Hex.hex(num.asInstanceOf[Array[Byte]])
-    case StringType => Hex.hex(num.asInstanceOf[UTF8String].getBytes)
+    case _: StringType => Hex.hex(num.asInstanceOf[UTF8String].getBytes)
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, (c) => {
       val hex = Hex.getClass.getName.stripSuffix("$")
       s"${ev.value} = " + (child.dataType match {
-        case StringType => s"""$hex.hex($c.getBytes());"""
+        case _: StringType => s"""$hex.hex($c.getBytes());"""
         case _ => s"""$hex.hex($c);"""
       })
     })
@@ -1149,47 +1158,32 @@ case class Unhex(child: Expression, failOnError: Boolean = false)
 
   def this(expr: Expression) = this(expr, false)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
 
   override def nullable: Boolean = true
   override def dataType: DataType = BinaryType
 
   protected override def nullSafeEval(num: Any): Any = {
-    val result = Hex.unhex(num.asInstanceOf[UTF8String].getBytes)
-    if (failOnError && result == null) {
-      // The failOnError is set only from `ToBinary` function - hence we might safely set `hint`
-      // parameter to `try_to_binary`.
-      throw QueryExecutionErrors.invalidInputInConversionError(
-        BinaryType,
-        num.asInstanceOf[UTF8String],
-        UTF8String.fromString("HEX"),
-        "try_to_binary")
+    try {
+      Hex.unhex(num.asInstanceOf[UTF8String].getBytes)
+    } catch {
+      case _: IllegalArgumentException if !failOnError => null
+      case _: IllegalArgumentException =>
+        throw QueryExecutionErrors.invalidInputInConversionError(
+          BinaryType,
+          num.asInstanceOf[UTF8String],
+          UTF8String.fromString("HEX"),
+          "try_to_binary")
     }
-    result
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, c => {
-      val hex = Hex.getClass.getName.stripSuffix("$")
-      val maybeFailOnErrorCode = if (failOnError) {
-        val binaryType = ctx.addReferenceObj("to", BinaryType, BinaryType.getClass.getName)
-        s"""
-           |if (${ev.value} == null) {
-           |  throw QueryExecutionErrors.invalidInputInConversionError(
-           |    $binaryType,
-           |    $c,
-           |    UTF8String.fromString("HEX"),
-           |    "try_to_binary");
-           |}
-           |""".stripMargin
-      } else {
-        s"${ev.isNull} = ${ev.value} == null;"
-      }
-
+    val expr = ctx.addReferenceObj("this", this)
+    nullSafeCodeGen(ctx, ev, input => {
       s"""
-        ${ev.value} = $hex.unhex($c.getBytes());
-        $maybeFailOnErrorCode
-       """
+        ${ev.value} = (byte[]) $expr.nullSafeEval($input);
+        ${ev.isNull} = ${ev.value} == null;
+      """
     })
   }
 
@@ -1256,6 +1250,41 @@ case class Pow(left: Expression, right: Expression)
     newLeft: Expression, newRight: Expression): Expression = copy(left = newLeft, right = newRight)
 }
 
+sealed trait BitShiftOperation
+  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+
+  def symbol: String
+  def shiftInt: (Int, Int) => Int
+  def shiftLong: (Long, Int) => Long
+
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(TypeCollection(IntegerType, LongType), IntegerType)
+
+  override def dataType: DataType = left.dataType
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    defineCodeGen(ctx, ev, (left, right) => s"$left $symbol $right")
+  }
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any = input1 match {
+    case l: jl.Long => shiftLong(l, input2.asInstanceOf[Int])
+    case i: jl.Integer => shiftInt(i, input2.asInstanceOf[Int])
+  }
+
+  override def toString: String = {
+    getTagValue(FunctionRegistry.FUNC_ALIAS) match {
+      case Some(alias) if alias == symbol => s"($left $symbol $right)"
+      case _ => super.toString
+    }
+  }
+
+  override def sql: String = {
+    getTagValue(FunctionRegistry.FUNC_ALIAS) match {
+      case Some(alias) if alias == symbol => s"(${left.sql} $symbol ${right.sql})"
+      case _ => super.sql
+    }
+  }
+}
 
 /**
  * Bitwise left shift.
@@ -1264,38 +1293,28 @@ case class Pow(left: Expression, right: Expression)
  * @param right number of bits to left shift.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(base, expr) - Bitwise left shift.",
+  usage = "base << exp - Bitwise left shift.",
   examples = """
     Examples:
-      > SELECT _FUNC_(2, 1);
+      > SELECT shiftleft(2, 1);
+       4
+      > SELECT 2 << 1;
        4
   """,
+  note = """
+      `<<` operator is added in Spark 4.0.0 as an alias for `shiftleft`.
+    """,
   since = "1.5.0",
   group = "bitwise_funcs")
-case class ShiftLeft(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
-  override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(IntegerType, LongType), IntegerType)
-
-  override def dataType: DataType = left.dataType
-
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    input1 match {
-      case l: jl.Long => l << input2.asInstanceOf[jl.Integer]
-      case i: jl.Integer => i << input2.asInstanceOf[jl.Integer]
-    }
-  }
-
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, (left, right) => s"$left << $right")
-  }
-
+case class ShiftLeft(left: Expression, right: Expression) extends BitShiftOperation {
+  override def symbol: String = "<<"
+  override def shiftInt: (Int, Int) => Int = (x: Int, y: Int) => x << y
+  override def shiftLong: (Long, Int) => Long = (x: Long, y: Int) => x << y
+  val shift: (Number, Int) => Any = (x: Number, y: Int) => x.longValue() << y
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): ShiftLeft = copy(left = newLeft, right = newRight)
 }
 
-
 /**
  * Bitwise (signed) right shift.
  *
@@ -1303,38 +1322,27 @@ case class ShiftLeft(left: Expression, right: Expression)
  * @param right number of bits to right shift.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(base, expr) - Bitwise (signed) right shift.",
+  usage = "base >> expr - Bitwise (signed) right shift.",
   examples = """
     Examples:
-      > SELECT _FUNC_(4, 1);
+      > SELECT shiftright(4, 1);
+       2
+      > SELECT 4 >> 1;
        2
   """,
+  note = """
+      `>>` operator is added in Spark 4.0.0 as an alias for `shiftright`.
+    """,
   since = "1.5.0",
   group = "bitwise_funcs")
-case class ShiftRight(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
-  override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(IntegerType, LongType), IntegerType)
-
-  override def dataType: DataType = left.dataType
-
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    input1 match {
-      case l: jl.Long => l >> input2.asInstanceOf[jl.Integer]
-      case i: jl.Integer => i >> input2.asInstanceOf[jl.Integer]
-    }
-  }
-
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, (left, right) => s"$left >> $right")
-  }
-
+case class ShiftRight(left: Expression, right: Expression) extends BitShiftOperation {
+  override def symbol: String = ">>"
+  override def shiftInt: (Int, Int) => Int = (x: Int, y: Int) => x >> y
+  override def shiftLong: (Long, Int) => Long = (x: Long, y: Int) => x >> y
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): ShiftRight = copy(left = newLeft, right = newRight)
 }
 
-
 /**
  * Bitwise unsigned right shift, for integer and long data type.
  *
@@ -1342,33 +1350,23 @@ case class ShiftRight(left: Expression, right: Expression)
  * @param right the number of bits to right shift.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(base, expr) - Bitwise unsigned right shift.",
+  usage = "base >>> expr - Bitwise unsigned right shift.",
   examples = """
     Examples:
-      > SELECT _FUNC_(4, 1);
+      > SELECT shiftrightunsigned(4, 1);
        2
+      > SELECT 4 >>> 1;
+       2
+  """,
+  note = """
+    `>>>` operator is added in Spark 4.0.0 as an alias for `shiftrightunsigned`.
   """,
   since = "1.5.0",
   group = "bitwise_funcs")
-case class ShiftRightUnsigned(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
-  override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(IntegerType, LongType), IntegerType)
-
-  override def dataType: DataType = left.dataType
-
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    input1 match {
-      case l: jl.Long => l >>> input2.asInstanceOf[jl.Integer]
-      case i: jl.Integer => i >>> input2.asInstanceOf[jl.Integer]
-    }
-  }
-
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, (left, right) => s"$left >>> $right")
-  }
-
+case class ShiftRightUnsigned(left: Expression, right: Expression) extends BitShiftOperation {
+  override def symbol: String = ">>>"
+  override def shiftInt: (Int, Int) => Int = (x: Int, y: Int) => x >>> y
+  override def shiftLong: (Long, Int) => Long = (x: Long, y: Int) => x >>> y
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): ShiftRightUnsigned =
     copy(left = newLeft, right = newRight)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index c7281e4e87378..e9fa362de14cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{MapData, RandomUUIDGenerator}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.errors.QueryExecutionErrors.raiseError
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -84,7 +85,7 @@ case class RaiseError(errorClass: Expression, errorParms: Expression, dataType:
   override def foldable: Boolean = false
   override def nullable: Boolean = true
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringType, MapType(StringType, StringType))
+    Seq(StringTypeAnyCollation, MapType(StringType, StringType))
 
   override def left: Expression = errorClass
   override def right: Expression = errorParms
@@ -199,7 +200,7 @@ object AssertTrue {
   since = "1.6.0",
   group = "misc_funcs")
 case class CurrentDatabase() extends LeafExpression with Unevaluable {
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = false
   override def prettyName: String = "current_schema"
   final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
@@ -218,7 +219,7 @@ case class CurrentDatabase() extends LeafExpression with Unevaluable {
   since = "3.1.0",
   group = "misc_funcs")
 case class CurrentCatalog() extends LeafExpression with Unevaluable {
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = false
   override def prettyName: String = "current_catalog"
   final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
@@ -251,7 +252,7 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Non
 
   override def nullable: Boolean = false
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def stateful: Boolean = true
 
@@ -292,7 +293,7 @@ case class SparkVersion() extends LeafExpression with RuntimeReplaceable {
 
   override lazy val replacement: Expression = StaticInvoke(
     classOf[ExpressionImplUtils],
-    StringType,
+    SQLConf.get.defaultStringType,
     "getSparkVersion",
     returnNullable = false)
 }
@@ -311,7 +312,7 @@ case class SparkVersion() extends LeafExpression with RuntimeReplaceable {
 case class TypeOf(child: Expression) extends UnaryExpression {
   override def nullable: Boolean = false
   override def foldable: Boolean = true
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def eval(input: InternalRow): Any = UTF8String.fromString(child.dataType.catalogString)
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -334,7 +335,7 @@ case class TypeOf(child: Expression) extends UnaryExpression {
 // scalastyle:on line.size.limit
 case class CurrentUser() extends LeafExpression with Unevaluable {
   override def nullable: Boolean = false
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def prettyName: String =
     getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_user")
   final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
@@ -412,7 +413,8 @@ case class AesEncrypt(
   override def prettyName: String = "aes_encrypt"
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType)
+    Seq(BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation,
+      BinaryType, BinaryType)
 
   override def children: Seq[Expression] = Seq(input, key, mode, padding, iv, aad)
 
@@ -485,7 +487,7 @@ case class AesDecrypt(
     this(input, key, Literal("GCM"))
 
   override def inputTypes: Seq[AbstractDataType] = {
-    Seq(BinaryType, BinaryType, StringType, StringType, BinaryType)
+    Seq(BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType)
   }
 
   override def prettyName: String = "aes_decrypt"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 6bbeba4d2969e..3258a57bb1236 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -205,10 +205,18 @@ case class Alias(child: Expression, name: String)(
     ""
   }
 
+  /**
+   * This function is performance-sensitive, so we should avoid `MetadataBuilder` manipulation,
+   * because it performs heavy operations on maps
+   */
   private def removeNonInheritableMetadata(metadata: Metadata): Metadata = {
-    val builder = new MetadataBuilder().withMetadata(metadata)
-    nonInheritableMetadataKeys.foreach(builder.remove)
-    builder.build()
+    if (metadata.isEmpty || nonInheritableMetadataKeys.forall(!metadata.contains(_))) {
+      metadata
+    } else {
+      val builder = new MetadataBuilder().withMetadata(metadata)
+      nonInheritableMetadataKeys.foreach(builder.remove)
+      builder.build()
+    }
   }
 
   override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index 6d95d7e620a2e..e914190c06456 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -26,6 +26,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
 import org.apache.spark.sql.catalyst.util.ToNumberParser
 import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, DatetimeType, Decimal, DecimalType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -47,7 +49,8 @@ abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Bo
     DecimalType.USER_DEFAULT
   }
 
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
@@ -247,8 +250,9 @@ object ToCharacterBuilder extends ExpressionBuilder {
       inputExpr.dataType match {
         case _: DatetimeType => DateFormatClass(inputExpr, format)
         case _: BinaryType =>
-          if (!(format.dataType == StringType && format.foldable)) {
-            throw QueryCompilationErrors.nonFoldableArgumentError(funcName, "format", StringType)
+          if (!(format.dataType.isInstanceOf[StringType] && format.foldable)) {
+            throw QueryCompilationErrors.nonFoldableArgumentError(funcName, "format",
+              format.dataType)
           }
           val fmt = format.eval()
           if (fmt == null) {
@@ -279,8 +283,8 @@ case class ToCharacter(left: Expression, right: Expression)
     }
   }
 
-  override def dataType: DataType = StringType
-  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, StringType)
+  override def dataType: DataType = SQLConf.get.defaultStringType
+  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, StringTypeAnyCollation)
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
     if (inputTypeCheck.isSuccess) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 462facd180c4e..09d024feccfa6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -360,6 +360,15 @@ case class StaticInvoke(
       super.stringArgs.toSeq.dropRight(1).iterator
     }
   }
+
+  override def toString: String =
+    s"static_invoke(${
+      if (objectName.startsWith("org.apache.spark.")) {
+        cls.getSimpleName
+      } else {
+        objectName
+      }
+    }.$functionName(${arguments.mkString(", ")}))"
 }
 
 /**
@@ -509,7 +518,8 @@ case class Invoke(
     ev.copy(code = code)
   }
 
-  override def toString: String = s"$targetObject.$functionName"
+  override def toString: String =
+    s"invoke($targetObject.$functionName(${arguments.mkString(", ")}))"
 
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Invoke =
     copy(targetObject = newChildren.head, arguments = newChildren.tail)
@@ -1917,16 +1927,12 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String] = Nil)
 
   override def flatArguments: Iterator[Any] = Iterator(child)
 
-  private val errMsg = "Null value appeared in non-nullable field:" +
-    walkedTypePath.mkString("\n", "\n", "\n") +
-    "If the schema is inferred from a Scala tuple/case class, or a Java bean, " +
-    "please try to use scala.Option[_] or other nullable types " +
-    "(e.g. java.lang.Integer instead of int/scala.Int)."
+  private val errMsg = walkedTypePath.mkString("\n", "\n", "\n")
 
   override def eval(input: InternalRow): Any = {
     val result = child.eval(input)
     if (result == null) {
-      throw new NullPointerException(errMsg)
+      throw QueryExecutionErrors.notNullAssertViolation(errMsg)
     }
     result
   }
@@ -1940,7 +1946,7 @@ case class AssertNotNull(child: Expression, walkedTypePath: Seq[String] = Nil)
 
     val code = childGen.code + code"""
       if (${childGen.isNull}) {
-        throw new NullPointerException($errMsgField);
+        throw QueryExecutionErrors.notNullAssertViolation($errMsgField);
       }
      """
     ev.copy(code = code, isNull = FalseLiteral, value = childGen.value)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index b33de303b5d55..297c709c6d7d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -33,8 +33,9 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LIKE_FAMLIY, REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE, TreePattern}
-import org.apache.spark.sql.catalyst.util.{GenericArrayData, StringUtils}
+import org.apache.spark.sql.catalyst.util.{CollationSupport, GenericArrayData, StringUtils}
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.types.{StringTypeAnyCollation, StringTypeBinaryLcase}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -44,7 +45,11 @@ abstract class StringRegexExpression extends BinaryExpression
   def escape(v: String): String
   def matches(regex: Pattern, str: String): Boolean
 
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
+
+  final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
+  final lazy val collationRegexFlags: Int = CollationSupport.collationAwareRegexFlags(collationId)
 
   // try cache foldable pattern
   private lazy val cache: Pattern = right match {
@@ -58,7 +63,7 @@ abstract class StringRegexExpression extends BinaryExpression
   } else {
     // Let it raise exception if couldn't compile the regex string
     try {
-      Pattern.compile(escape(str))
+      Pattern.compile(escape(str), collationRegexFlags)
     } catch {
       case e: PatternSyntaxException =>
         throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e)
@@ -158,7 +163,8 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
         val regexStr =
           StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString()))
         val pattern = ctx.addMutableState(patternClass, "patternLike",
-          v => s"""$v = $patternClass.compile("$regexStr");""")
+          v =>
+            s"""$v = $patternClass.compile("$regexStr", $collationRegexFlags);""".stripMargin)
 
         // We don't use nullSafeCodeGen here because we don't want to re-evaluate right again.
         val eval = left.genCode(ctx)
@@ -186,7 +192,7 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
         s"""
           String $rightStr = $eval2.toString();
           $patternClass $pattern = $patternClass.compile(
-            $escapeFunc($rightStr, '$escapedEscapeChar'));
+            $escapeFunc($rightStr, '$escapedEscapeChar'), $collationRegexFlags);
           ${ev.value} = $pattern.matcher($eval1.toString()).matches();
         """
       })
@@ -258,7 +264,8 @@ case class ILike(
   def this(left: Expression, right: Expression) =
     this(left, right, '\\')
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): Expression = {
@@ -273,7 +280,9 @@ sealed abstract class MultiLikeBase
 
   protected def isNotSpecified: Boolean
 
-  override def inputTypes: Seq[DataType] = StringType :: Nil
+  override def inputTypes: Seq[AbstractDataType] = StringTypeBinaryLcase :: Nil
+  final lazy val collationId: Int = child.dataType.asInstanceOf[StringType].collationId
+  final lazy val collationRegexFlags: Int = CollationSupport.collationAwareRegexFlags(collationId)
 
   override def nullable: Boolean = true
 
@@ -281,8 +290,8 @@ sealed abstract class MultiLikeBase
 
   protected lazy val hasNull: Boolean = patterns.contains(null)
 
-  protected lazy val cache = patterns.filterNot(_ == null)
-    .map(s => Pattern.compile(StringUtils.escapeLikeRegex(s.toString, '\\')))
+  protected lazy val cache = patterns.filterNot(_ == null).map(s =>
+    Pattern.compile(StringUtils.escapeLikeRegex(s.toString, '\\'), collationRegexFlags))
 
   protected lazy val matchFunc = if (isNotSpecified) {
     (p: Pattern, inputValue: String) => !p.matcher(inputValue).matches()
@@ -475,7 +484,7 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
         val regexStr =
           StringEscapeUtils.escapeJava(rVal.asInstanceOf[UTF8String].toString())
         val pattern = ctx.addMutableState(patternClass, "patternRLike",
-          v => s"""$v = $patternClass.compile("$regexStr");""")
+          v => s"""$v = $patternClass.compile("$regexStr", $collationRegexFlags);""".stripMargin)
 
         // We don't use nullSafeCodeGen here because we don't want to re-evaluate right again.
         val eval = left.genCode(ctx)
@@ -499,7 +508,7 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         s"""
           String $rightStr = $eval2.toString();
-          $patternClass $pattern = $patternClass.compile($rightStr);
+          $patternClass $pattern = $patternClass.compile($rightStr, $collationRegexFlags);
           ${ev.value} = $pattern.matcher($eval1.toString()).find(0);
         """
       })
@@ -543,17 +552,20 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
 case class StringSplit(str: Expression, regex: Expression, limit: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
-  override def dataType: DataType = ArrayType(StringType, containsNull = false)
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType, IntegerType)
+  override def dataType: DataType = ArrayType(str.dataType, containsNull = false)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeBinaryLcase, StringTypeAnyCollation, IntegerType)
   override def first: Expression = str
   override def second: Expression = regex
   override def third: Expression = limit
 
+  final lazy val collationId: Int = str.dataType.asInstanceOf[StringType].collationId
+
   def this(exp: Expression, regex: Expression) = this(exp, regex, Literal(-1))
 
   override def nullSafeEval(string: Any, regex: Any, limit: Any): Any = {
-    val strings = string.asInstanceOf[UTF8String].split(
-      regex.asInstanceOf[UTF8String], limit.asInstanceOf[Int])
+    val pattern = CollationSupport.collationAwareRegex(regex.asInstanceOf[UTF8String], collationId)
+    val strings = string.asInstanceOf[UTF8String].split(pattern, limit.asInstanceOf[Int])
     new GenericArrayData(strings.asInstanceOf[Array[Any]])
   }
 
@@ -561,7 +573,8 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
     val arrayClass = classOf[GenericArrayData].getName
     nullSafeCodeGen(ctx, ev, (str, regex, limit) => {
       // Array in java is covariant, so we don't need to cast UTF8String[] to Object[].
-      s"""${ev.value} = new $arrayClass($str.split($regex,$limit));""".stripMargin
+      s"""${ev.value} = new $arrayClass($str.split(
+         |CollationSupport.collationAwareRegex($regex, $collationId),$limit));""".stripMargin
     })
   }
 
@@ -658,7 +671,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
 
   override def nullSafeEval(s: Any, p: Any, r: Any, i: Any): Any = {
     if (!p.equals(lastRegex)) {
-      val patternAndRegex = RegExpUtils.getPatternAndLastRegex(p, prettyName)
+      val patternAndRegex = RegExpUtils.getPatternAndLastRegex(p, prettyName, collationId)
       pattern = patternAndRegex._1
       lastRegex = patternAndRegex._2
     }
@@ -683,9 +696,10 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
     }
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = subject.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringType, StringType, StringType, IntegerType)
+    Seq(StringTypeBinaryLcase, StringTypeAnyCollation, StringTypeBinaryLcase, IntegerType)
+  final lazy val collationId: Int = subject.dataType.asInstanceOf[StringType].collationId
   override def prettyName: String = "regexp_replace"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -708,7 +722,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
 
     nullSafeCodeGen(ctx, ev, (subject, regexp, rep, pos) => {
     s"""
-      ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName)}
+      ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName, collationId)}
       if (!$rep.equals($termLastReplacementInUTF8)) {
         // replacement string changed
         $termLastReplacementInUTF8 = $rep.clone();
@@ -771,15 +785,18 @@ abstract class RegExpExtractBase
 
   final override val nodePatterns: Seq[TreePattern] = Seq(REGEXP_EXTRACT_FAMILY)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeBinaryLcase, StringTypeAnyCollation, IntegerType)
   override def first: Expression = subject
   override def second: Expression = regexp
   override def third: Expression = idx
 
+  final lazy val collationId: Int = subject.dataType.asInstanceOf[StringType].collationId
+
   protected def getLastMatcher(s: Any, p: Any): Matcher = {
     if (p != lastRegex) {
       // regex value changed
-      val patternAndRegex = RegExpUtils.getPatternAndLastRegex(p, prettyName)
+      val patternAndRegex = RegExpUtils.getPatternAndLastRegex(p, prettyName, collationId)
       pattern = patternAndRegex._1
       lastRegex = patternAndRegex._2
     }
@@ -848,7 +865,7 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
     }
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = subject.dataType
   override def prettyName: String = "regexp_extract"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -863,7 +880,7 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio
 
     nullSafeCodeGen(ctx, ev, (subject, regexp, idx) => {
       s"""
-      ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName)}
+      ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName, collationId)}
       if ($matcher.find()) {
         java.util.regex.MatchResult $matchResult = $matcher.toMatchResult();
         $classNameRegExpExtractBase.checkGroupIndex("$prettyName", $matchResult.groupCount(), $idx);
@@ -947,7 +964,7 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres
     new GenericArrayData(matchResults.toArray.asInstanceOf[Array[Any]])
   }
 
-  override def dataType: DataType = ArrayType(StringType)
+  override def dataType: DataType = ArrayType(subject.dataType)
   override def prettyName: String = "regexp_extract_all"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -963,7 +980,8 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres
     }
     nullSafeCodeGen(ctx, ev, (subject, regexp, idx) => {
       s"""
-         | ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName)}
+         | ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName,
+        collationId)}
          | java.util.ArrayList $matchResults = new java.util.ArrayList<UTF8String>();
          | while ($matcher.find()) {
          |   java.util.regex.MatchResult $matchResult = $matcher.toMatchResult();
@@ -1020,7 +1038,8 @@ case class RegExpCount(left: Expression, right: Expression)
 
   override def children: Seq[Expression] = Seq(left, right)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
 
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): RegExpCount =
@@ -1053,13 +1072,14 @@ case class RegExpSubStr(left: Expression, right: Expression)
   override lazy val replacement: Expression =
     new NullIf(
       RegExpExtract(subject = left, regexp = right, idx = Literal(0)),
-      Literal(""))
+      Literal.create("", left.dataType))
 
   override def prettyName: String = "regexp_substr"
 
   override def children: Seq[Expression] = Seq(left, right)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
 
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): RegExpSubStr =
@@ -1127,7 +1147,8 @@ case class RegExpInStr(subject: Expression, regexp: Expression, idx: Expression)
       s"""
          |try {
          |  $setEvNotNull
-         |  ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName)}
+         |  ${RegExpUtils.initLastMatcherCode(ctx, subject, regexp, matcher, prettyName,
+        collationId)}
          |  if ($matcher.find()) {
          |    ${ev.value} = $matcher.toMatchResult().start() + 1;
          |  } else {
@@ -1151,17 +1172,19 @@ object RegExpUtils {
       subject: String,
       regexp: String,
       matcher: String,
-      prettyName: String): String = {
+      prettyName: String,
+      collationId: Int): String = {
     val classNamePattern = classOf[Pattern].getCanonicalName
     val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex")
     val termPattern = ctx.addMutableState(classNamePattern, "pattern")
+    val collationRegexFlags = CollationSupport.collationAwareRegexFlags(collationId)
 
     s"""
        |if (!$regexp.equals($termLastRegex)) {
        |  // regex value changed
        |  try {
        |    UTF8String r = $regexp.clone();
-       |    $termPattern = $classNamePattern.compile(r.toString());
+       |    $termPattern = $classNamePattern.compile(r.toString(), $collationRegexFlags);
        |    $termLastRegex = r;
        |  } catch (java.util.regex.PatternSyntaxException e) {
        |    throw QueryExecutionErrors.invalidPatternError("$prettyName", e.getPattern(), e);
@@ -1171,10 +1194,11 @@ object RegExpUtils {
        |""".stripMargin
   }
 
-  def getPatternAndLastRegex(p: Any, prettyName: String): (Pattern, UTF8String) = {
+  def getPatternAndLastRegex(p: Any, prettyName: String, collationId: Int): (Pattern, UTF8String) =
+  {
     val r = p.asInstanceOf[UTF8String].clone()
     val pattern = try {
-      Pattern.compile(r.toString)
+      Pattern.compile(r.toString, CollationSupport.collationAwareRegexFlags(collationId))
     } catch {
       case e: PatternSyntaxException =>
         throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index b3029302c03df..a0c796274f761 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.io.UnsupportedEncodingException
+import java.nio.{ByteBuffer, CharBuffer}
+import java.nio.charset.{CharacterCodingException, Charset, CodingErrorAction, IllegalCharsetNameException, UnsupportedCharsetException}
 import java.text.{BreakIterator, DecimalFormat, DecimalFormatSymbols}
 import java.util.{Base64 => JBase64}
 import java.util.{HashMap, Locale, Map => JMap}
@@ -25,19 +26,20 @@ import java.util.{HashMap, Locale, Map => JMap}
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.QueryContext
+import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
-import org.apache.spark.sql.catalyst.trees.BinaryLike
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UPPER_OR_LOWER}
-import org.apache.spark.sql.catalyst.util.{ArrayData, CollationSupport, GenericArrayData, TypeUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, CollationSupport, GenericArrayData, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeAnyCollation}
+import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeAnyCollation, StringTypeBinaryLcase}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.array.ByteArrayMethods
@@ -453,14 +455,18 @@ trait String2StringExpression extends ImplicitCastInputTypes {
 case class Upper(child: Expression)
   extends UnaryExpression with String2StringExpression with NullIntolerant {
 
-  // scalastyle:off caselocale
-  override def convert(v: UTF8String): UTF8String = v.toUpperCase
-  // scalastyle:on caselocale
+  final lazy val collationId: Int = child.dataType.asInstanceOf[StringType].collationId
+
+  // Flag to indicate whether to use ICU instead of JVM case mappings for UTF8_BINARY collation.
+  private final lazy val useICU = SQLConf.get.getConf(SQLConf.ICU_CASE_MAPPINGS_ENABLED)
+
+  override def convert(v: UTF8String): UTF8String =
+    CollationSupport.Upper.exec(v, collationId, useICU)
 
   final override val nodePatterns: Seq[TreePattern] = Seq(UPPER_OR_LOWER)
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, c => s"($c).toUpperCase()")
+    defineCodeGen(ctx, ev, c => CollationSupport.Upper.genCode(c, collationId, useICU))
   }
 
   override protected def withNewChildInternal(newChild: Expression): Upper = copy(child = newChild)
@@ -481,14 +487,18 @@ case class Upper(child: Expression)
 case class Lower(child: Expression)
   extends UnaryExpression with String2StringExpression with NullIntolerant {
 
-  // scalastyle:off caselocale
-  override def convert(v: UTF8String): UTF8String = v.toLowerCase
-  // scalastyle:on caselocale
+  final lazy val collationId: Int = child.dataType.asInstanceOf[StringType].collationId
+
+  // Flag to indicate whether to use ICU instead of JVM case mappings for UTF8_BINARY collation.
+  private final lazy val useICU = SQLConf.get.getConf(SQLConf.ICU_CASE_MAPPINGS_ENABLED)
+
+  override def convert(v: UTF8String): UTF8String =
+    CollationSupport.Lower.exec(v, collationId, useICU)
 
   final override val nodePatterns: Seq[TreePattern] = Seq(UPPER_OR_LOWER)
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, c => s"($c).toLowerCase()")
+    defineCodeGen(ctx, ev, c => CollationSupport.Lower.genCode(c, collationId, useICU))
   }
 
   override def prettyName: String =
@@ -686,6 +696,188 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
     newLeft: Expression, newRight: Expression): EndsWith = copy(left = newLeft, right = newRight)
 }
 
+/**
+ * A function that checks if a UTF8 string is valid.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(str) - Returns true if `str` is a valid UTF-8 string, otherwise returns false.",
+  arguments = """
+    Arguments:
+      * str - a string expression
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       true
+      > SELECT _FUNC_(x'61');
+       true
+      > SELECT _FUNC_(x'80');
+       false
+      > SELECT _FUNC_(x'61C262');
+       false
+  """,
+  since = "4.0.0",
+  group = "string_funcs")
+case class IsValidUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
+  with UnaryLike[Expression] with NullIntolerant {
+
+  override lazy val replacement: Expression = Invoke(input, "isValid", BooleanType)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+
+  override def nodeName: String = "is_valid_utf8"
+
+  override def nullable: Boolean = true
+
+  override def child: Expression = input
+
+  override protected def withNewChildInternal(newChild: Expression): IsValidUTF8 = {
+    copy(input = newChild)
+  }
+
+}
+
+/**
+ * A function that converts an invalid UTF8 string to a valid UTF8 string by replacing invalid
+ * UTF-8 byte sequences with the Unicode replacement character (U+FFFD), according to the UNICODE
+ * standard rules (Section 3.9, Paragraph D86, Table 3-7). Valid strings remain unchanged.
+ */
+// scalastyle:off
+@ExpressionDescription(
+  usage = "_FUNC_(str) - Returns the original string if `str` is a valid UTF-8 string, " +
+    "otherwise returns a new string whose invalid UTF8 byte sequences are replaced using the " +
+    "UNICODE replacement character U+FFFD.",
+  arguments = """
+    Arguments:
+      * str - a string expression
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       Spark
+      > SELECT _FUNC_(x'61');
+       a
+      > SELECT _FUNC_(x'80');
+       �
+      > SELECT _FUNC_(x'61C262');
+       a�b
+  """,
+  since = "4.0.0",
+  group = "string_funcs")
+// scalastyle:on
+case class MakeValidUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
+  with UnaryLike[Expression] with NullIntolerant {
+
+  override lazy val replacement: Expression = Invoke(input, "makeValid", input.dataType)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+
+  override def nodeName: String = "make_valid_utf8"
+
+  override def nullable: Boolean = true
+
+  override def child: Expression = input
+
+  override protected def withNewChildInternal(newChild: Expression): MakeValidUTF8 = {
+    copy(input = newChild)
+  }
+
+}
+
+/**
+ * A function that validates a UTF8 string, throwing an exception if the string is invalid.
+ */
+// scalastyle:off
+@ExpressionDescription(
+  usage = "_FUNC_(str) - Returns the original string if `str` is a valid UTF-8 string, " +
+    "otherwise throws an exception.",
+  arguments = """
+    Arguments:
+      * str - a string expression
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       Spark
+      > SELECT _FUNC_(x'61');
+       a
+  """,
+  since = "4.0.0",
+  group = "string_funcs")
+// scalastyle:on
+case class ValidateUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
+  with UnaryLike[Expression] with NullIntolerant {
+
+  override lazy val replacement: Expression = StaticInvoke(
+    classOf[ExpressionImplUtils],
+    input.dataType,
+    "validateUTF8String",
+    Seq(input),
+    inputTypes)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+
+  override def nodeName: String = "validate_utf8"
+
+  override def nullable: Boolean = true
+
+  override def child: Expression = input
+
+  override protected def withNewChildInternal(newChild: Expression): ValidateUTF8 = {
+    copy(input = newChild)
+  }
+
+}
+
+/**
+ * A function that tries to validate a UTF8 string, returning NULL if the string is invalid.
+ */
+// scalastyle:off
+@ExpressionDescription(
+  usage = "_FUNC_(str) - Returns the original string if `str` is a valid UTF-8 string, " +
+    "otherwise returns NULL.",
+  arguments = """
+    Arguments:
+      * str - a string expression
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Spark');
+       Spark
+      > SELECT _FUNC_(x'61');
+       a
+      > SELECT _FUNC_(x'80');
+       NULL
+      > SELECT _FUNC_(x'61C262');
+       NULL
+  """,
+  since = "4.0.0",
+  group = "string_funcs")
+// scalastyle:on
+case class TryValidateUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
+  with UnaryLike[Expression] with NullIntolerant {
+
+  override lazy val replacement: Expression = StaticInvoke(
+    classOf[ExpressionImplUtils],
+    input.dataType,
+    "tryValidateUTF8String",
+    Seq(input),
+    inputTypes)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+
+  override def nodeName: String = "try_validate_utf8"
+
+  override def nullable: Boolean = true
+
+  override def child: Expression = input
+
+  override protected def withNewChildInternal(newChild: Expression): TryValidateUTF8 = {
+    copy(input = newChild)
+  }
+
+}
+
 /**
  * Replace all occurrences with string.
  */
@@ -710,23 +902,25 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
 case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExpr: Expression)
   extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
+  final lazy val collationId: Int = first.dataType.asInstanceOf[StringType].collationId
+
   def this(srcExpr: Expression, searchExpr: Expression) = {
     this(srcExpr, searchExpr, Literal(""))
   }
 
   override def nullSafeEval(srcEval: Any, searchEval: Any, replaceEval: Any): Any = {
-    srcEval.asInstanceOf[UTF8String].replace(
-      searchEval.asInstanceOf[UTF8String], replaceEval.asInstanceOf[UTF8String])
+    CollationSupport.StringReplace.exec(srcEval.asInstanceOf[UTF8String],
+      searchEval.asInstanceOf[UTF8String], replaceEval.asInstanceOf[UTF8String], collationId);
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (src, search, replace) => {
-      s"""${ev.value} = $src.replace($search, $replace);"""
-    })
+    defineCodeGen(ctx, ev, (src, search, replace) =>
+      CollationSupport.StringReplace.genCode(src, search, replace, collationId))
   }
 
-  override def dataType: DataType = StringType
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType, StringType)
+  override def dataType: DataType = srcExpr.dataType
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation)
   override def first: Expression = srcExpr
   override def second: Expression = searchExpr
   override def third: Expression = replaceExpr
@@ -804,8 +998,9 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len:
 
   override def dataType: DataType = input.dataType
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType),
-    TypeCollection(StringType, BinaryType), IntegerType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(
+    TypeCollection(StringTypeAnyCollation, BinaryType),
+    TypeCollection(StringTypeAnyCollation, BinaryType), IntegerType, IntegerType)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
@@ -818,7 +1013,7 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len:
   }
 
   private lazy val replaceFunc = input.dataType match {
-    case StringType =>
+    case _: StringType =>
       (inputEval: Any, replaceEval: Any, posEval: Int, lenEval: Int) => {
         Overlay.calculate(
           inputEval.asInstanceOf[UTF8String],
@@ -856,9 +1051,14 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len:
 
 object StringTranslate {
 
-  def buildDict(matchingString: UTF8String, replaceString: UTF8String)
+  def buildDict(matchingString: UTF8String, replaceString: UTF8String, collationId: Int)
     : JMap[String, String] = {
-    val matching = matchingString.toString()
+    val matching = if (CollationFactory.fetchCollation(collationId).supportsLowercaseEquality) {
+      matchingString.toString().toLowerCase()
+    } else {
+      matchingString.toString()
+    }
+
     val replace = replaceString.toString()
     val dict = new HashMap[String, String]()
     var i = 0
@@ -909,13 +1109,16 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
   @transient private var lastReplace: UTF8String = _
   @transient private var dict: JMap[String, String] = _
 
+  final lazy val collationId: Int = first.dataType.asInstanceOf[StringType].collationId
+
   override def nullSafeEval(srcEval: Any, matchingEval: Any, replaceEval: Any): Any = {
     if (matchingEval != lastMatching || replaceEval != lastReplace) {
       lastMatching = matchingEval.asInstanceOf[UTF8String].clone()
       lastReplace = replaceEval.asInstanceOf[UTF8String].clone()
-      dict = StringTranslate.buildDict(lastMatching, lastReplace)
+      dict = StringTranslate.buildDict(lastMatching, lastReplace, collationId)
     }
-    srcEval.asInstanceOf[UTF8String].translate(dict)
+
+    CollationSupport.StringTranslate.exec(srcEval.asInstanceOf[UTF8String], dict, collationId)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -936,15 +1139,17 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
         $termLastMatching = $matching.clone();
         $termLastReplace = $replace.clone();
         $termDict = org.apache.spark.sql.catalyst.expressions.StringTranslate
-          .buildDict($termLastMatching, $termLastReplace);
+          .buildDict($termLastMatching, $termLastReplace, $collationId);
       }
-      ${ev.value} = $src.translate($termDict);
+      ${ev.value} = CollationSupport.StringTranslate.
+      exec($src, $termDict, $collationId);
       """
     })
   }
 
-  override def dataType: DataType = StringType
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType, StringType)
+  override def dataType: DataType = srcExpr.dataType
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation)
   override def first: Expression = srcExpr
   override def second: Expression = matchingExpr
   override def third: Expression = replaceExpr
@@ -977,15 +1182,19 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
 case class FindInSet(left: Expression, right: Expression) extends BinaryExpression
     with ImplicitCastInputTypes with NullIntolerant {
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+  final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
 
-  override protected def nullSafeEval(word: Any, set: Any): Any =
-    set.asInstanceOf[UTF8String].findInSet(word.asInstanceOf[UTF8String])
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+
+  override protected def nullSafeEval(word: Any, set: Any): Any = {
+    CollationSupport.FindInSet.
+      exec(word.asInstanceOf[UTF8String], set.asInstanceOf[UTF8String], collationId)
+  }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (word, set) =>
-      s"${ev.value} = $set.findInSet($word);"
-    )
+    defineCodeGen(ctx, ev, (word, set) => CollationSupport.FindInSet.
+      genCode(word, set, collationId))
   }
 
   override def dataType: DataType = IntegerType
@@ -1003,8 +1212,10 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
   protected def direction: String
 
   override def children: Seq[Expression] = srcStr +: trimStr.toSeq
-  override def dataType: DataType = StringType
-  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringType)
+  override def dataType: DataType = srcStr.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringTypeBinaryLcase)
+
+  final lazy val collationId: Int = srcStr.dataType.asInstanceOf[StringType].collationId
 
   override def nullable: Boolean = children.exists(_.nullable)
   override def foldable: Boolean = children.forall(_.foldable)
@@ -1023,13 +1234,19 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
     }
   }
 
-  protected val trimMethod: String
-
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val evals = children.map(_.genCode(ctx))
-    val srcString = evals(0)
+    val srcString = evals.head
 
     if (evals.length == 1) {
+      val stringTrimCode: String = this match {
+        case _: StringTrim =>
+          CollationSupport.StringTrim.genCode(srcString.value, collationId)
+        case _: StringTrimLeft =>
+          CollationSupport.StringTrimLeft.genCode(srcString.value, collationId)
+        case _: StringTrimRight =>
+          CollationSupport.StringTrimRight.genCode(srcString.value, collationId)
+      }
       ev.copy(code = code"""
          |${srcString.code}
          |boolean ${ev.isNull} = false;
@@ -1037,10 +1254,18 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
          |if (${srcString.isNull}) {
          |  ${ev.isNull} = true;
          |} else {
-         |  ${ev.value} = ${srcString.value}.$trimMethod();
+         |  ${ev.value} = $stringTrimCode;
          |}""".stripMargin)
     } else {
       val trimString = evals(1)
+      val stringTrimCode: String = this match {
+        case _: StringTrim =>
+          CollationSupport.StringTrim.genCode(srcString.value, trimString.value, collationId)
+        case _: StringTrimLeft =>
+          CollationSupport.StringTrimLeft.genCode(srcString.value, trimString.value, collationId)
+        case _: StringTrimRight =>
+          CollationSupport.StringTrimRight.genCode(srcString.value, trimString.value, collationId)
+      }
       ev.copy(code = code"""
          |${srcString.code}
          |boolean ${ev.isNull} = false;
@@ -1052,7 +1277,7 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
          |  if (${trimString.isNull}) {
          |    ${ev.isNull} = true;
          |  } else {
-         |    ${ev.value} = ${srcString.value}.$trimMethod(${trimString.value});
+         |    ${ev.value} = $stringTrimCode;
          |  }
          |}""".stripMargin)
     }
@@ -1145,12 +1370,11 @@ case class StringTrim(srcStr: Expression, trimStr: Option[Expression] = None)
 
   override protected def direction: String = "BOTH"
 
-  override def doEval(srcString: UTF8String): UTF8String = srcString.trim()
+  override def doEval(srcString: UTF8String): UTF8String =
+    CollationSupport.StringTrim.exec(srcString, collationId)
 
   override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
-    srcString.trim(trimString)
-
-  override val trimMethod: String = "trim"
+    CollationSupport.StringTrim.exec(srcString, trimString, collationId)
 
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
     copy(
@@ -1253,12 +1477,11 @@ case class StringTrimLeft(srcStr: Expression, trimStr: Option[Expression] = None
 
   override protected def direction: String = "LEADING"
 
-  override def doEval(srcString: UTF8String): UTF8String = srcString.trimLeft()
+  override def doEval(srcString: UTF8String): UTF8String =
+    CollationSupport.StringTrimLeft.exec(srcString, collationId)
 
   override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
-    srcString.trimLeft(trimString)
-
-  override val trimMethod: String = "trimLeft"
+    CollationSupport.StringTrimLeft.exec(srcString, trimString, collationId)
 
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): StringTrimLeft =
@@ -1314,12 +1537,11 @@ case class StringTrimRight(srcStr: Expression, trimStr: Option[Expression] = Non
 
   override protected def direction: String = "TRAILING"
 
-  override def doEval(srcString: UTF8String): UTF8String = srcString.trimRight()
+  override def doEval(srcString: UTF8String): UTF8String =
+    CollationSupport.StringTrimRight.exec(srcString, collationId)
 
   override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
-    srcString.trimRight(trimString)
-
-  override val trimMethod: String = "trimRight"
+    CollationSupport.StringTrimRight.exec(srcString, trimString, collationId)
 
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): StringTrimRight =
@@ -1349,20 +1571,24 @@ case class StringTrimRight(srcStr: Expression, trimStr: Option[Expression] = Non
 case class StringInstr(str: Expression, substr: Expression)
   extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
+  final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
+
   override def left: Expression = str
   override def right: Expression = substr
   override def dataType: DataType = IntegerType
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
 
   override def nullSafeEval(string: Any, sub: Any): Any = {
-    string.asInstanceOf[UTF8String].indexOf(sub.asInstanceOf[UTF8String], 0) + 1
+    CollationSupport.StringInstr.
+      exec(string.asInstanceOf[UTF8String], sub.asInstanceOf[UTF8String], collationId) + 1
   }
 
   override def prettyName: String = "instr"
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, (l, r) =>
-      s"($l).indexOf($r, 0) + 1")
+      defineCodeGen(ctx, ev, (string, substring) =>
+        CollationSupport.StringInstr.genCode(string, substring, collationId) + " + 1")
   }
 
   override protected def withNewChildrenInternal(
@@ -1395,21 +1621,24 @@ case class StringInstr(str: Expression, substr: Expression)
 case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
-  override def dataType: DataType = StringType
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType, IntegerType)
+  final lazy val collationId: Int = first.dataType.asInstanceOf[StringType].collationId
+
+  override def dataType: DataType = strExpr.dataType
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
   override def first: Expression = strExpr
   override def second: Expression = delimExpr
   override def third: Expression = countExpr
   override def prettyName: String = "substring_index"
 
   override def nullSafeEval(str: Any, delim: Any, count: Any): Any = {
-    str.asInstanceOf[UTF8String].subStringIndex(
-      delim.asInstanceOf[UTF8String],
-      count.asInstanceOf[Int])
+    CollationSupport.SubstringIndex.exec(str.asInstanceOf[UTF8String],
+      delim.asInstanceOf[UTF8String], count.asInstanceOf[Int], collationId);
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, (str, delim, count) => s"$str.subStringIndex($delim, $count)")
+    defineCodeGen(ctx, ev, (str, delim, count) =>
+      CollationSupport.SubstringIndex.genCode(str, delim, Integer.parseInt(count, 10), collationId))
   }
 
   override protected def withNewChildrenInternal(
@@ -1446,12 +1675,15 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
     this(substr, str, Literal(1))
   }
 
+  final lazy val collationId: Int = first.dataType.asInstanceOf[StringType].collationId
+
   override def first: Expression = substr
   override def second: Expression = str
   override def third: Expression = start
   override def nullable: Boolean = substr.nullable || str.nullable
   override def dataType: DataType = IntegerType
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
 
   override def eval(input: InternalRow): Any = {
     val s = start.eval(input)
@@ -1471,9 +1703,8 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
           if (sVal < 1) {
             0
           } else {
-            l.asInstanceOf[UTF8String].indexOf(
-              r.asInstanceOf[UTF8String],
-              s.asInstanceOf[Int] - 1) + 1
+            CollationSupport.StringLocate.exec(l.asInstanceOf[UTF8String],
+              r.asInstanceOf[UTF8String], s.asInstanceOf[Int] - 1, collationId) + 1;
           }
         }
       }
@@ -1494,8 +1725,8 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
           ${strGen.code}
           if (!${strGen.isNull}) {
             if (${startGen.value} > 0) {
-              ${ev.value} = ${strGen.value}.indexOf(${substrGen.value},
-                ${startGen.value} - 1) + 1;
+              ${ev.value} = CollationSupport.StringLocate.exec(${strGen.value},
+              ${substrGen.value}, ${startGen.value} - 1, $collationId) + 1;
             }
           } else {
             ${ev.isNull} = true;
@@ -1577,7 +1808,8 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
   override def third: Expression = pad
 
   override def dataType: DataType = str.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, IntegerType, StringTypeAnyCollation)
 
   override def nullSafeEval(string: Any, len: Any, pad: Any): Any = {
     string.asInstanceOf[UTF8String].lpad(len.asInstanceOf[Int], pad.asInstanceOf[UTF8String])
@@ -1656,7 +1888,8 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression = Litera
   override def third: Expression = pad
 
   override def dataType: DataType = str.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, IntegerType, StringTypeAnyCollation)
 
   override def nullSafeEval(string: Any, len: Any, pad: Any): Any = {
     string.asInstanceOf[UTF8String].rpad(len.asInstanceOf[Int], pad.asInstanceOf[UTF8String])
@@ -1698,10 +1931,10 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
 
   override def foldable: Boolean = children.forall(_.foldable)
   override def nullable: Boolean = children(0).nullable
-  override def dataType: DataType = StringType
+  override def dataType: DataType = children(0).dataType
 
   override def inputTypes: Seq[AbstractDataType] =
-    StringType :: List.fill(children.size - 1)(AnyDataType)
+    StringTypeAnyCollation :: List.fill(children.size - 1)(AnyDataType)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.isEmpty) {
@@ -1813,16 +2046,19 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
 case class InitCap(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
+  final lazy val collationId: Int = child.dataType.asInstanceOf[StringType].collationId
+
+  // Flag to indicate whether to use ICU instead of JVM case mappings for UTF8_BINARY collation.
+  private final lazy val useICU = SQLConf.get.getConf(SQLConf.ICU_CASE_MAPPINGS_ENABLED)
+
   override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
   override def dataType: DataType = child.dataType
 
   override def nullSafeEval(string: Any): Any = {
-    // scalastyle:off caselocale
-    string.asInstanceOf[UTF8String].toLowerCase.toTitleCase
-    // scalastyle:on caselocale
+    CollationSupport.InitCap.exec(string.asInstanceOf[UTF8String], collationId, useICU)
   }
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    defineCodeGen(ctx, ev, str => s"$str.toLowerCase().toTitleCase()")
+    defineCodeGen(ctx, ev, str => CollationSupport.InitCap.genCode(str, collationId, useICU))
   }
 
   override protected def withNewChildInternal(newChild: Expression): InitCap =
@@ -1878,7 +2114,7 @@ case class StringRepeat(str: Expression, times: Expression)
 case class StringSpace(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def inputTypes: Seq[DataType] = Seq(IntegerType)
 
   override def nullSafeEval(s: Any): Any = {
@@ -1990,15 +2226,15 @@ case class Right(str: Expression, len: Expression) extends RuntimeReplaceable
 
   override lazy val replacement: Expression = If(
     IsNull(str),
-    Literal(null, StringType),
+    Literal(null, str.dataType),
     If(
       LessThanOrEqual(len, Literal(0)),
-      Literal(UTF8String.EMPTY_UTF8, StringType),
+      Literal(UTF8String.EMPTY_UTF8, str.dataType),
       new Substring(str, UnaryMinus(len))
     )
   )
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation, IntegerType)
   override def left: Expression = str
   override def right: Expression = len
   override protected def withNewChildrenInternal(
@@ -2029,7 +2265,7 @@ case class Left(str: Expression, len: Expression) extends RuntimeReplaceable
   override lazy val replacement: Expression = Substring(str, Literal(1), len)
 
   override def inputTypes: Seq[AbstractDataType] = {
-    Seq(TypeCollection(StringType, BinaryType), IntegerType)
+    Seq(TypeCollection(StringTypeAnyCollation, BinaryType), IntegerType)
   }
 
   override def left: Expression = str
@@ -2064,16 +2300,17 @@ case class Left(str: Expression, len: Expression) extends RuntimeReplaceable
 case class Length(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
   override def dataType: DataType = IntegerType
-  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(TypeCollection(StringTypeAnyCollation, BinaryType))
 
   protected override def nullSafeEval(value: Any): Any = child.dataType match {
-    case StringType => value.asInstanceOf[UTF8String].numChars
+    case _: StringType => value.asInstanceOf[UTF8String].numChars
     case BinaryType => value.asInstanceOf[Array[Byte]].length
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     child.dataType match {
-      case StringType => defineCodeGen(ctx, ev, c => s"($c).numChars()")
+      case _: StringType => defineCodeGen(ctx, ev, c => s"($c).numChars()")
       case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length")
     }
   }
@@ -2098,16 +2335,17 @@ case class Length(child: Expression)
 case class BitLength(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
   override def dataType: DataType = IntegerType
-  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(TypeCollection(StringTypeAnyCollation, BinaryType))
 
   protected override def nullSafeEval(value: Any): Any = child.dataType match {
-    case StringType => value.asInstanceOf[UTF8String].numBytes * 8
+    case _: StringType => value.asInstanceOf[UTF8String].numBytes * 8
     case BinaryType => value.asInstanceOf[Array[Byte]].length * 8
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     child.dataType match {
-      case StringType => defineCodeGen(ctx, ev, c => s"($c).numBytes() * 8")
+      case _: StringType => defineCodeGen(ctx, ev, c => s"($c).numBytes() * 8")
       case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length * 8")
     }
   }
@@ -2136,16 +2374,17 @@ case class BitLength(child: Expression)
 case class OctetLength(child: Expression)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
   override def dataType: DataType = IntegerType
-  override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType))
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(TypeCollection(StringTypeAnyCollation, BinaryType))
 
   protected override def nullSafeEval(value: Any): Any = child.dataType match {
-    case StringType => value.asInstanceOf[UTF8String].numBytes
+    case _: StringType => value.asInstanceOf[UTF8String].numBytes
     case BinaryType => value.asInstanceOf[Array[Byte]].length
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     child.dataType match {
-      case StringType => defineCodeGen(ctx, ev, c => s"($c).numBytes()")
+      case _: StringType => defineCodeGen(ctx, ev, c => s"($c).numBytes()")
       case BinaryType => defineCodeGen(ctx, ev, c => s"($c).length")
     }
   }
@@ -2196,8 +2435,8 @@ case class Levenshtein(
   }
 
   override def inputTypes: Seq[AbstractDataType] = threshold match {
-    case Some(_) => Seq(StringType, StringType, IntegerType)
-    case _ => Seq(StringType, StringType)
+    case Some(_) => Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
+    case _ => Seq(StringTypeAnyCollation, StringTypeAnyCollation)
   }
 
   override def children: Seq[Expression] = threshold match {
@@ -2320,9 +2559,9 @@ case class Levenshtein(
 case class SoundEx(child: Expression)
   extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
-  override def inputTypes: Seq[DataType] = Seq(StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
 
   override def nullSafeEval(input: Any): Any = input.asInstanceOf[UTF8String].soundex()
 
@@ -2586,7 +2825,7 @@ object Decode {
         val input = params.head
         val other = params.tail
         val itr = other.iterator
-        var default: Expression = Literal.create(null, StringType)
+        var default: Expression = Literal.create(null, SQLConf.get.defaultStringType)
         val branches = ArrayBuffer.empty[(Expression, Expression)]
         while (itr.hasNext) {
           val search = itr.next()
@@ -2605,7 +2844,7 @@ object Decode {
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
-    _FUNC_(bin, charset) - Decodes the first argument using the second argument character set.
+    _FUNC_(bin, charset) - Decodes the first argument using the second argument character set. If either argument is null, the result will also be null.
 
     _FUNC_(expr, search, result [, search, result ] ... [, default]) - Compares expr
       to each search value in order. If expr is equal to a search value, _FUNC_ returns
@@ -2615,7 +2854,7 @@ object Decode {
   arguments = """
     Arguments:
       * bin - a binary expression to decode
-      * charset - one of the charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16' to decode `bin` into a STRING. It is case insensitive.
+      * charset - one of the charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32' to decode `bin` into a STRING. It is case insensitive.
   """,
   examples = """
     Examples:
@@ -2630,7 +2869,10 @@ object Decode {
       > SELECT _FUNC_(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks');
        SQL
   """,
-  since = "3.2.0",
+  since = "1.5.0",
+  note = """
+    _FUNC_(expr, search, result [, search, result ] ... [, default]) is supported since 3.2.0
+  """,
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Decode(params: Seq[Expression], replacement: Expression)
@@ -2645,81 +2887,69 @@ case class Decode(params: Seq[Expression], replacement: Expression)
   }
 }
 
-/**
- * Decodes the first argument into a String using the provided character set.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = "_FUNC_(bin, charset) - Decodes the first argument using the second argument character set. If either argument is null, the result will also be null.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_(encode('abc', 'utf-8'), 'utf-8');
-       abc
-  """,
-  arguments = """
-    Arguments:
-      * bin - a binary expression to decode
-      * charset - one of the charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16' to decode `bin` into a STRING. It is case insensitive.
-  """,
-  since = "1.5.0",
-  group = "string_funcs")
-// scalastyle:on line.size.limit
-case class StringDecode(bin: Expression, charset: Expression, legacyCharsets: Boolean)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+case class StringDecode(
+    bin: Expression,
+    charset: Expression,
+    legacyCharsets: Boolean,
+    legacyErrorAction: Boolean)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
 
   def this(bin: Expression, charset: Expression) =
-    this(bin, charset, SQLConf.get.legacyJavaCharsets)
+    this(bin, charset, SQLConf.get.legacyJavaCharsets, SQLConf.get.legacyCodingErrorAction)
 
-  override def left: Expression = bin
-  override def right: Expression = charset
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType, StringTypeAnyCollation)
+  override def prettyName: String = "decode"
+  override def toString: String = s"$prettyName($bin, $charset)"
 
-  private val supportedCharsets = Set(
-    "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE", "UTF-16LE", "UTF-16")
-
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    val fromCharset = input2.asInstanceOf[UTF8String].toString
-    try {
-      if (legacyCharsets || supportedCharsets.contains(fromCharset.toUpperCase(Locale.ROOT))) {
-        UTF8String.fromString(new String(input1.asInstanceOf[Array[Byte]], fromCharset))
-      } else throw new UnsupportedEncodingException
-    } catch {
-      case _: UnsupportedEncodingException =>
-        throw QueryExecutionErrors.invalidCharsetError(prettyName, fromCharset)
-    }
-  }
-
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (bytes, charset) => {
-      val fromCharset = ctx.freshName("fromCharset")
-      val sc = JavaCode.global(
-        ctx.addReferenceObj("supportedCharsets", supportedCharsets),
-        supportedCharsets.getClass)
-      s"""
-        String $fromCharset = $charset.toString();
-        try {
-          if ($legacyCharsets || $sc.contains($fromCharset.toUpperCase(java.util.Locale.ROOT))) {
-            ${ev.value} = UTF8String.fromString(new String($bytes, $fromCharset));
-          } else {
-            throw new java.io.UnsupportedEncodingException();
-          }
-        } catch (java.io.UnsupportedEncodingException e) {
-          throw QueryExecutionErrors.invalidCharsetError("$prettyName", $fromCharset);
-        }
-      """
-    })
-  }
-
-  override protected def withNewChildrenInternal(
-      newLeft: Expression, newRight: Expression): StringDecode =
-    copy(bin = newLeft, charset = newRight)
+  override def replacement: Expression = StaticInvoke(
+    classOf[StringDecode],
+    SQLConf.get.defaultStringType,
+    "decode",
+    Seq(bin, charset, Literal(legacyCharsets), Literal(legacyErrorAction)),
+    Seq(BinaryType, StringTypeAnyCollation, BooleanType, BooleanType))
 
-  override def prettyName: String = "decode"
+  override def children: Seq[Expression] = Seq(bin, charset)
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
+    copy(bin = newChildren(0), charset = newChildren(1))
 }
 
 object StringDecode {
   def apply(bin: Expression, charset: Expression): StringDecode = new StringDecode(bin, charset)
+  def decode(
+      input: Array[Byte],
+      charset: UTF8String,
+      legacyCharsets: Boolean,
+      legacyErrorAction: Boolean): UTF8String = {
+    val fromCharset = charset.toString
+    if (legacyCharsets || Encode.VALID_CHARSETS.contains(fromCharset.toUpperCase(Locale.ROOT))) {
+      val decoder = try {
+        val codingErrorAction = if (legacyErrorAction) {
+          CodingErrorAction.REPLACE
+        } else {
+          CodingErrorAction.REPORT
+        }
+        Charset.forName(fromCharset)
+          .newDecoder()
+          .onMalformedInput(codingErrorAction)
+          .onUnmappableCharacter(codingErrorAction)
+      } catch {
+        case _: IllegalCharsetNameException |
+             _: UnsupportedCharsetException |
+             _: IllegalArgumentException =>
+          throw QueryExecutionErrors.invalidCharsetError("decode", fromCharset)
+      }
+      try {
+        val cb = decoder.decode(ByteBuffer.wrap(input))
+        UTF8String.fromString(cb.toString)
+      } catch {
+        case _: CharacterCodingException =>
+          throw QueryExecutionErrors.malformedCharacterCoding("decode", fromCharset)
+      }
+    } else {
+      throw QueryExecutionErrors.invalidCharsetError("decode", fromCharset)
+    }
+  }
 }
 
 /**
@@ -2731,7 +2961,7 @@ object StringDecode {
   arguments = """
     Arguments:
       * str - a string expression
-      * charset - one of the charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16' to encode `str` into a BINARY. It is case insensitive.
+      * charset - one of the charsets 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32' to encode `str` into a BINARY. It is case insensitive.
   """,
   examples = """
     Examples:
@@ -2741,59 +2971,79 @@ object StringDecode {
   since = "1.5.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
-case class Encode(str: Expression, charset: Expression, legacyCharsets: Boolean)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+case class Encode(
+    str: Expression,
+    charset: Expression,
+    legacyCharsets: Boolean,
+    legacyErrorAction: Boolean)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
 
   def this(value: Expression, charset: Expression) =
-    this(value, charset, SQLConf.get.legacyJavaCharsets)
+    this(value, charset, SQLConf.get.legacyJavaCharsets, SQLConf.get.legacyCodingErrorAction)
 
-  override def left: Expression = str
-  override def right: Expression = charset
   override def dataType: DataType = BinaryType
   override def inputTypes: Seq[AbstractDataType] =
     Seq(StringTypeAnyCollation, StringTypeAnyCollation)
 
-  private val supportedCharsets = Set(
-    "US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE", "UTF-16LE", "UTF-16")
-
-  protected override def nullSafeEval(input1: Any, input2: Any): Any = {
-    val toCharset = input2.asInstanceOf[UTF8String].toString
-    try {
-      if (legacyCharsets || supportedCharsets.contains(toCharset.toUpperCase(Locale.ROOT))) {
-        input1.asInstanceOf[UTF8String].toString.getBytes(toCharset)
-      } else throw new UnsupportedEncodingException
-    } catch {
-      case _: UnsupportedEncodingException =>
-        throw QueryExecutionErrors.invalidCharsetError(prettyName, toCharset)
-    }
-  }
+  override val replacement: Expression = StaticInvoke(
+    classOf[Encode],
+    BinaryType,
+    "encode",
+    Seq(
+      str, charset, Literal(legacyCharsets, BooleanType), Literal(legacyErrorAction, BooleanType)),
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, BooleanType, BooleanType))
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (string, charset) => {
-      val toCharset = ctx.freshName("toCharset")
-      val sc = JavaCode.global(
-        ctx.addReferenceObj("supportedCharsets", supportedCharsets),
-        supportedCharsets.getClass)
-      s"""
-        String $toCharset = $charset.toString();
-        try {
-          if ($legacyCharsets || $sc.contains($toCharset.toUpperCase(java.util.Locale.ROOT))) {
-            ${ev.value} = $string.toString().getBytes($toCharset);
-          } else {
-            throw new java.io.UnsupportedEncodingException();
-          }
-        } catch (java.io.UnsupportedEncodingException e) {
-          throw QueryExecutionErrors.invalidCharsetError("$prettyName", $toCharset);
-        }"""
-    })
-  }
+  override def toString: String = s"$prettyName($str, $charset)"
 
-  override protected def withNewChildrenInternal(
-    newLeft: Expression, newRight: Expression): Encode = copy(str = newLeft, charset = newRight)
+  override def children: Seq[Expression] = Seq(str, charset)
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
+    copy(str = newChildren.head, charset = newChildren(1))
 }
 
 object Encode {
   def apply(value: Expression, charset: Expression): Encode = new Encode(value, charset)
+
+  private[expressions] final lazy val VALID_CHARSETS =
+    Set("US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE", "UTF-16LE", "UTF-16", "UTF-32")
+
+  def encode(
+      input: UTF8String,
+      charset: UTF8String,
+      legacyCharsets: Boolean,
+      legacyErrorAction: Boolean): Array[Byte] = {
+    val toCharset = charset.toString
+    if (input.numBytes == 0 || "UTF-8".equalsIgnoreCase(toCharset)) {
+      return input.getBytes
+    }
+    if (legacyCharsets || VALID_CHARSETS.contains(toCharset.toUpperCase(Locale.ROOT))) {
+      val encoder = try {
+        val codingErrorAction = if (legacyErrorAction) {
+          CodingErrorAction.REPLACE
+        } else {
+          CodingErrorAction.REPORT
+        }
+        Charset.forName(toCharset)
+          .newEncoder()
+          .onMalformedInput(codingErrorAction)
+          .onUnmappableCharacter(codingErrorAction)
+      } catch {
+        case _: IllegalCharsetNameException |
+             _: UnsupportedCharsetException |
+             _: IllegalArgumentException =>
+          throw QueryExecutionErrors.invalidCharsetError("encode", toCharset)
+      }
+      try {
+        val bb = encoder.encode(CharBuffer.wrap(input.toString))
+        JavaUtils.bufferToArray(bb)
+      } catch {
+        case _: CharacterCodingException =>
+          throw QueryExecutionErrors.malformedCharacterCoding("encode", toCharset)
+      }
+    } else {
+      throw QueryExecutionErrors.invalidCharsetError("encode", toCharset)
+    }
+  }
 }
 
 /**
@@ -3173,13 +3423,14 @@ case class Sentences(
 case class StringSplitSQL(
     str: Expression,
     delimiter: Expression) extends BinaryExpression with NullIntolerant {
-  override def dataType: DataType = ArrayType(StringType, containsNull = false)
+  override def dataType: DataType = ArrayType(str.dataType, containsNull = false)
+  final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
   override def left: Expression = str
   override def right: Expression = delimiter
 
   override def nullSafeEval(string: Any, delimiter: Any): Any = {
-    val strings = string.asInstanceOf[UTF8String].splitSQL(
-      delimiter.asInstanceOf[UTF8String], -1);
+    val strings = CollationSupport.StringSplitSQL.exec(string.asInstanceOf[UTF8String],
+      delimiter.asInstanceOf[UTF8String], collationId)
     new GenericArrayData(strings.asInstanceOf[Array[Any]])
   }
 
@@ -3187,7 +3438,8 @@ case class StringSplitSQL(
     val arrayClass = classOf[GenericArrayData].getName
     nullSafeCodeGen(ctx, ev, (str, delimiter) => {
       // Array in java is covariant, so we don't need to cast UTF8String[] to Object[].
-      s"${ev.value} = new $arrayClass($str.splitSQL($delimiter,-1));"
+      s"${ev.value} = new $arrayClass(" +
+        s"${CollationSupport.StringSplitSQL.genCode(str, delimiter, collationId)});"
     })
   }
 
@@ -3225,10 +3477,11 @@ case class SplitPart (
     partNum: Expression)
   extends RuntimeReplaceable with ImplicitCastInputTypes {
   override lazy val replacement: Expression =
-    ElementAt(StringSplitSQL(str, delimiter), partNum, Some(Literal.create("", StringType)),
+    ElementAt(StringSplitSQL(str, delimiter), partNum, Some(Literal.create("", str.dataType)),
       false)
   override def nodeName: String = "split_part"
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
   def children: Seq[Expression] = Seq(str, delimiter, partNum)
   protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = {
     copy(str = newChildren.apply(0), delimiter = newChildren.apply(1),
@@ -3290,7 +3543,7 @@ case class Luhncheck(input: Expression) extends RuntimeReplaceable with Implicit
     Seq(input),
     inputTypes)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
 
   override def prettyName: String = "luhn_check"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index b8d00074bfba3..75ca4930cf8c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -20,8 +20,9 @@ package org.apache.spark.sql.catalyst.expressions
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, HintInfo, LogicalPlan}
+import org.apache.spark.sql.catalyst.optimizer.DecorrelateInnerQuery
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -249,6 +250,79 @@ object SubExprUtils extends PredicateHelper {
       }
     }
   }
+
+  /**
+   * Returns the inner query attributes that are guaranteed to have a single value for each
+   * outer row. Therefore, a scalar subquery is allowed to group-by on these attributes.
+   * We can derive these from correlated equality predicates, though we need to take care about
+   * propagating this through operators like OUTER JOIN or UNION.
+   *
+   * Positive examples:
+   * - x = outer(a) AND y = outer(b)
+   * - x = 1
+   * - x = outer(a) + 1
+   *
+   * Negative examples:
+   * - x <= outer(a)
+   * - x + y = outer(a)
+   * - x = outer(a) OR y = outer(b)
+   * - y + outer(b) = 1 (this and similar expressions could be supported, but very carefully)
+   * - An equality under the right side of a LEFT OUTER JOIN, e.g.
+   *   select *, (select count(*) from y left join
+   *     (select * from z where z1 = x1) sub on y2 = z2 group by z1) from x;
+   * - An equality under UNION e.g.
+   *   select *, (select count(*) from
+   *     (select * from y where y1 = x1 union all select * from y) group by y1) from x;
+   */
+  def getCorrelatedEquivalentInnerColumns(plan: LogicalPlan): AttributeSet = {
+    plan match {
+      case Filter(cond, child) =>
+        val correlated = AttributeSet(splitConjunctivePredicates(cond)
+          .filter(
+            SQLConf.get.getConf(SQLConf.SCALAR_SUBQUERY_ALLOW_GROUP_BY_COLUMN_EQUAL_TO_CONSTANT)
+            || containsOuter(_))
+          .filter(DecorrelateInnerQuery.canPullUpOverAgg)
+          .flatMap(_.references))
+        correlated ++ getCorrelatedEquivalentInnerColumns(child)
+
+      case Join(left, right, joinType, _, _) =>
+         joinType match {
+          case _: InnerLike =>
+            AttributeSet(plan.children.flatMap(child => getCorrelatedEquivalentInnerColumns(child)))
+          case LeftOuter => getCorrelatedEquivalentInnerColumns(left)
+          case RightOuter => getCorrelatedEquivalentInnerColumns(right)
+          case FullOuter => AttributeSet.empty
+          case LeftSemi => getCorrelatedEquivalentInnerColumns(left)
+          case LeftAnti => getCorrelatedEquivalentInnerColumns(left)
+          case _ => AttributeSet.empty
+        }
+
+      case _: Union => AttributeSet.empty
+      case Except(left, right, _) => getCorrelatedEquivalentInnerColumns(left)
+
+      case
+        _: Aggregate |
+        _: Distinct |
+        _: Intersect |
+        _: GlobalLimit |
+        _: LocalLimit |
+        _: Offset |
+        _: Project |
+        _: Repartition |
+        _: RepartitionByExpression |
+        _: RebalancePartitions |
+        _: Sample |
+        _: Sort |
+        _: Window |
+        _: Tail |
+        _: WithCTE |
+        _: Range |
+        _: SubqueryAlias =>
+        AttributeSet(plan.children.flatMap(child => getCorrelatedEquivalentInnerColumns(child)))
+
+      case _ => AttributeSet.empty
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
new file mode 100644
index 0000000000000..ca53058230fb8
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{MapType, NullType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.Utils
+
+/**
+ * Converts a binary column of Avro format into its corresponding Catalyst value.
+ * This is a thin wrapper over the [[AvroDataToCatalyst]] class to create a SQL function.
+ *
+ * @param child the Catalyst binary input column.
+ * @param jsonFormatSchema the Avro schema in JSON string format.
+ * @param options the options to use when performing the conversion.
+ *
+ * @since 4.0.0
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(child, jsonFormatSchema, options) - Converts a binary Avro value into a Catalyst value.
+    """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }]}', map()) IS NULL AS result FROM (SELECT NAMED_STRUCT('u', NAMED_STRUCT('member0', member0, 'member1', member1)) AS s FROM VALUES (1, NULL), (NULL,  'a') tab(member0, member1));
+       [false]
+  """,
+  note = """
+    The specified schema must match actual schema of the read data, otherwise the behavior
+    is undefined: it may fail or return arbitrary result.
+    To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
+    set via the corresponding option.
+  """,
+  group = "misc_funcs",
+  since = "4.0.0"
+)
+// scalastyle:on line.size.limit
+case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Expression)
+  extends TernaryExpression with RuntimeReplaceable {
+  override def first: Expression = child
+  override def second: Expression = jsonFormatSchema
+  override def third: Expression = options
+
+  override def withNewChildrenInternal(
+      newFirst: Expression, newSecond: Expression, newThird: Expression): Expression = {
+    copy(child = newFirst, jsonFormatSchema = newSecond, options = newThird)
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val schemaCheck = jsonFormatSchema.dataType match {
+      case _: StringType |
+           _: NullType
+        if jsonFormatSchema.foldable =>
+        None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The second argument of the FROM_AVRO SQL function must be a constant string " +
+            "containing the JSON representation of the schema to use for converting the value " +
+            "from AVRO format"))
+    }
+    val optionsCheck = options.dataType match {
+      case MapType(StringType, StringType, _) |
+           MapType(NullType, NullType, _) |
+           _: NullType
+        if options.foldable =>
+        None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The third argument of the FROM_AVRO SQL function must be a constant map of strings to " +
+            "strings containing the options to use for converting the value from AVRO format"))
+    }
+    schemaCheck.getOrElse(
+      optionsCheck.getOrElse(
+        TypeCheckResult.TypeCheckSuccess))
+  }
+
+  override def replacement: Expression = {
+    val schemaValue: String = jsonFormatSchema.eval() match {
+      case s: UTF8String =>
+        s.toString
+      case null =>
+        ""
+    }
+    val optionsValue: Map[String, String] = options.eval() match {
+      case a: ArrayBasedMapData if a.keyArray.array.nonEmpty =>
+        val keys: Array[String] = a.keyArray.array.map(_.toString)
+        val values: Array[String] = a.valueArray.array.map(_.toString)
+        keys.zip(values).toMap
+      case _ =>
+        Map.empty
+    }
+    val constructor = try {
+      Utils.classForName("org.apache.spark.sql.avro.AvroDataToCatalyst").getConstructors().head
+    } catch {
+      case _: java.lang.ClassNotFoundException =>
+        throw QueryCompilationErrors.avroNotLoadedSqlFunctionsUnusable(functionName = "FROM_AVRO")
+    }
+    val expr = constructor.newInstance(child, schemaValue, optionsValue)
+    expr.asInstanceOf[Expression]
+  }
+}
+
+/**
+ * Converts a Catalyst binary input value into its corresponding AvroAvro format result.
+ * This is a thin wrapper over the [[CatalystDataToAvro]] class to create a SQL function.
+ *
+ * @param child the Catalyst binary input column.
+ * @param jsonFormatSchema the Avro schema in JSON string format.
+ *
+ * @since 4.0.0
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(child, jsonFormatSchema) - Converts a Catalyst binary input value into its corresponding
+      Avro format result.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }]}', MAP()) IS NULL FROM (SELECT NULL AS s);
+       [true]
+  """,
+  group = "misc_funcs",
+  since = "4.0.0"
+)
+// scalastyle:on line.size.limit
+case class ToAvro(child: Expression, jsonFormatSchema: Expression)
+  extends BinaryExpression with RuntimeReplaceable {
+  override def left: Expression = child
+
+  override def right: Expression = jsonFormatSchema
+
+  override def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Expression = {
+    copy(child = newLeft, jsonFormatSchema = newRight)
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    jsonFormatSchema.dataType match {
+      case _: StringType if jsonFormatSchema.foldable =>
+        TypeCheckResult.TypeCheckSuccess
+      case _ =>
+        TypeCheckResult.TypeCheckFailure(
+          "The second argument of the TO_AVRO SQL function must be a constant string " +
+            "containing the JSON representation of the schema to use for converting the value " +
+            "to AVRO format")
+    }
+  }
+
+  override def replacement: Expression = {
+    val schemaValue: Option[String] = jsonFormatSchema.eval() match {
+      case null =>
+        None
+      case s: UTF8String =>
+        Some(s.toString)
+    }
+    val constructor = try {
+      Utils.classForName("org.apache.spark.sql.avro.CatalystDataToAvro").getConstructors().head
+    } catch {
+      case _: java.lang.ClassNotFoundException =>
+        throw QueryCompilationErrors.avroNotLoadedSqlFunctionsUnusable(functionName = "TO_AVRO")
+    }
+    val expr = constructor.newInstance(child, schemaValue)
+    expr.asInstanceOf[Expression]
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
index 47b37a5edeba8..ef8f2ea96eb0b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
@@ -28,7 +28,8 @@ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.types.{AbstractDataType, DataType}
 import org.apache.spark.unsafe.types.UTF8String
 
 // scalastyle:off line.size.limit
@@ -54,16 +55,16 @@ case class UrlEncode(child: Expression)
   override def replacement: Expression =
     StaticInvoke(
       UrlCodec.getClass,
-      StringType,
+      SQLConf.get.defaultStringType,
       "encode",
       Seq(child, Literal("UTF-8")),
-      Seq(StringType, StringType))
+      Seq(StringTypeAnyCollation, StringTypeAnyCollation))
 
   override protected def withNewChildInternal(newChild: Expression): Expression = {
     copy(child = newChild)
   }
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
 
   override def prettyName: String = "url_encode"
 }
@@ -91,16 +92,16 @@ case class UrlDecode(child: Expression)
   override def replacement: Expression =
     StaticInvoke(
       UrlCodec.getClass,
-      StringType,
+      SQLConf.get.defaultStringType,
       "decode",
       Seq(child, Literal("UTF-8")),
-      Seq(StringType, StringType))
+      Seq(StringTypeAnyCollation, StringTypeAnyCollation))
 
   override protected def withNewChildInternal(newChild: Expression): Expression = {
     copy(child = newChild)
   }
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
 
   override def prettyName: String = "url_decode"
 }
@@ -154,8 +155,8 @@ case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.ge
   def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
 
   override def nullable: Boolean = true
-  override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType)
-  override def dataType: DataType = StringType
+  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringTypeAnyCollation)
+  override def dataType: DataType = SQLConf.get.defaultStringType
   override def prettyName: String = "parse_url"
 
   // If the url is a constant, cache the URL object so that we don't need to convert url
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
index d55edcd662b9d..f7f7097173bb4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.variant
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.{ArrayData, BadRecordException, MapData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.types.variant.{Variant, VariantBuilder, VariantSizeLimitException, VariantUtil}
@@ -31,16 +31,39 @@ import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
  */
 object VariantExpressionEvalUtils {
 
-  def parseJson(input: UTF8String): VariantVal = {
+  def parseJson(input: UTF8String, failOnError: Boolean = true): VariantVal = {
+    def parseJsonFailure(exception: Throwable): VariantVal = {
+      if (failOnError) {
+        throw exception
+      } else {
+        null
+      }
+    }
     try {
       val v = VariantBuilder.parseJson(input.toString)
       new VariantVal(v.getValue, v.getMetadata)
     } catch {
       case _: VariantSizeLimitException =>
-        throw QueryExecutionErrors.variantSizeLimitError(VariantUtil.SIZE_LIMIT, "parse_json")
+        parseJsonFailure(QueryExecutionErrors
+          .variantSizeLimitError(VariantUtil.SIZE_LIMIT, "parse_json"))
       case NonFatal(e) =>
-        throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(
-          input.toString, BadRecordException(() => input, cause = e))
+        parseJsonFailure(QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(
+          input.toString, e))
+    }
+  }
+
+  def isVariantNull(input: VariantVal): Boolean = {
+    if (input == null) {
+      // This is a SQL NULL, not a Variant NULL
+      false
+    } else {
+      val variantValue = input.getValue
+      if (variantValue.isEmpty) {
+        throw QueryExecutionErrors.malformedVariant()
+      } else {
+        // Variant NULL is denoted by basic_type == 0 and val_header == 0
+        variantValue(0) == 0
+      }
     }
   }
 
@@ -65,7 +88,8 @@ object VariantExpressionEvalUtils {
       case LongType => builder.appendLong(input.asInstanceOf[Long])
       case FloatType => builder.appendFloat(input.asInstanceOf[Float])
       case DoubleType => builder.appendDouble(input.asInstanceOf[Double])
-      case StringType => builder.appendString(input.asInstanceOf[UTF8String].toString)
+      case _: DecimalType => builder.appendDecimal(input.asInstanceOf[Decimal].toJavaBigDecimal)
+      case _: StringType => builder.appendString(input.asInstanceOf[UTF8String].toString)
       case BinaryType => builder.appendBinary(input.asInstanceOf[Array[Byte]])
       case DateType => builder.appendDate(input.asInstanceOf[Int])
       case TimestampType => builder.appendTimestamp(input.asInstanceOf[Long])
@@ -79,7 +103,8 @@ object VariantExpressionEvalUtils {
         val offsets = new java.util.ArrayList[java.lang.Integer](data.numElements())
         for (i <- 0 until data.numElements()) {
           offsets.add(builder.getWritePos - start)
-          buildVariant(builder, data.get(i, elementType), elementType)
+          val element = if (data.isNullAt(i)) null else data.get(i, elementType)
+          buildVariant(builder, element, elementType)
         }
         builder.finishWritingArray(start, offsets)
       case MapType(StringType, valueType, _) =>
@@ -92,7 +117,8 @@ object VariantExpressionEvalUtils {
           val key = keys.getUTF8String(i).toString
           val id = builder.addKey(key)
           fields.add(new VariantBuilder.FieldEntry(key, id, builder.getWritePos - start))
-          buildVariant(builder, values.get(i, valueType), valueType)
+          val value = if (values.isNullAt(i)) null else values.get(i, valueType)
+          buildVariant(builder, value, valueType)
         }
         builder.finishWritingObject(start, fields)
       case StructType(structFields) =>
@@ -103,7 +129,8 @@ object VariantExpressionEvalUtils {
           val key = structFields(i).name
           val id = builder.addKey(key)
           fields.add(new VariantBuilder.FieldEntry(key, id, builder.getWritePos - start))
-          buildVariant(builder, data.get(i, structFields(i).dataType), structFields(i).dataType)
+          val value = if (data.isNullAt(i)) null else data.get(i, structFields(i).dataType)
+          buildVariant(builder, value, structFields(i).dataType)
         }
         builder.finishWritingObject(start, fields)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
index cab75e1996abc..b80fb11b6813b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
@@ -37,41 +37,80 @@ import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.types.variant._
 import org.apache.spark.types.variant.VariantUtil.Type
 import org.apache.spark.unsafe.types._
 
+
+/**
+ * The implementation for `parse_json` and `try_parse_json` expressions. Parse a JSON string as a
+ * Variant value.
+ * @param child The string value to parse as a variant.
+ * @param failOnError Controls whether the expression should throw an exception or return null if
+ *                    the string does not represent a valid JSON value.
+ */
+case class ParseJson(child: Expression, failOnError: Boolean = true)
+  extends UnaryExpression with ExpectsInputTypes with RuntimeReplaceable {
+
+  override lazy val replacement: Expression = StaticInvoke(
+    VariantExpressionEvalUtils.getClass,
+    VariantType,
+    "parseJson",
+    Seq(child, Literal(failOnError, BooleanType)),
+    inputTypes :+ BooleanType,
+    returnNullable = !failOnError)
+
+  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
+
+  override def dataType: DataType = VariantType
+
+  override def prettyName: String = if (failOnError) "parse_json" else "try_parse_json"
+
+  override protected def withNewChildInternal(newChild: Expression): ParseJson =
+    copy(child = newChild)
+}
+
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "_FUNC_(jsonStr) - Parse a JSON string as an Variant value. Throw an exception when the string is not valid JSON value.",
+  usage = "_FUNC_(expr) - Check if a variant value is a variant null. Returns true if and only if the input is a variant null and false otherwise (including in the case of SQL NULL).",
   examples = """
     Examples:
-      > SELECT _FUNC_('{"a":1,"b":0.8}');
-       {"a":1,"b":0.8}
+      > SELECT _FUNC_(parse_json('null'));
+       true
+      > SELECT _FUNC_(parse_json('"null"'));
+       false
+      > SELECT _FUNC_(parse_json('13'));
+       false
+      > SELECT _FUNC_(parse_json(null));
+       false
+      > SELECT _FUNC_(variant_get(parse_json('{"a":null, "b":"spark"}'), "$.c"));
+       false
+      > SELECT _FUNC_(variant_get(parse_json('{"a":null, "b":"spark"}'), "$.a"));
+       true
   """,
   since = "4.0.0",
-  group = "variant_funcs"
-)
+  group = "variant_funcs")
 // scalastyle:on line.size.limit
-case class ParseJson(child: Expression)
-  extends UnaryExpression with ExpectsInputTypes with RuntimeReplaceable {
+case class IsVariantNull(child: Expression) extends UnaryExpression
+  with Predicate with ExpectsInputTypes with RuntimeReplaceable {
 
   override lazy val replacement: Expression = StaticInvoke(
     VariantExpressionEvalUtils.getClass,
-    VariantType,
-    "parseJson",
+    BooleanType,
+    "isVariantNull",
     Seq(child),
     inputTypes,
+    propagateNull = false,
     returnNullable = false)
 
-  override def inputTypes: Seq[AbstractDataType] = StringType :: Nil
-
-  override def dataType: DataType = VariantType
+  override def inputTypes: Seq[AbstractDataType] = Seq(VariantType)
 
-  override def prettyName: String = "parse_json"
+  override def prettyName: String = "is_variant_null"
 
-  override protected def withNewChildInternal(newChild: Expression): ParseJson =
+  override protected def withNewChildInternal(newChild: Expression): IsVariantNull =
     copy(child = newChild)
 }
 
@@ -162,7 +201,7 @@ case class VariantGet(
 
   final override def nodePatternsInternal(): Seq[TreePattern] = Seq(VARIANT_GET)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(VariantType, StringType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(VariantType, StringTypeAnyCollation)
 
   override def prettyName: String = if (failOnError) "variant_get" else "try_variant_get"
 
@@ -223,7 +262,7 @@ case object VariantGet {
         VariantType =>
       true
     case ArrayType(elementType, _) => checkDataType(elementType)
-    case MapType(StringType, valueType, _) => checkDataType(valueType)
+    case MapType(_: StringType, valueType, _) => checkDataType(valueType)
     case StructType(fields) => fields.forall(f => checkDataType(f.dataType))
     case _ => false
   }
@@ -283,7 +322,13 @@ case object VariantGet {
       }
     }
 
-    if (dataType == VariantType) return new VariantVal(v.getValue, v.getMetadata)
+    if (dataType == VariantType) {
+      // Build a new variant, in order to strip off any unnecessary metadata.
+      val builder = new VariantBuilder
+      builder.appendVariant(v)
+      val result = builder.result()
+      return new VariantVal(result.getValue, result.getMetadata)
+    }
     val variantType = v.getType
     if (variantType == Type.NULL) return null
     dataType match {
@@ -297,11 +342,12 @@ case object VariantGet {
             }
           case Type.BOOLEAN => Literal(v.getBoolean, BooleanType)
           case Type.LONG => Literal(v.getLong, LongType)
-          case Type.STRING => Literal(UTF8String.fromString(v.getString), StringType)
+          case Type.STRING => Literal(UTF8String.fromString(v.getString),
+            SQLConf.get.defaultStringType)
           case Type.DOUBLE => Literal(v.getDouble, DoubleType)
           case Type.DECIMAL =>
             val d = Decimal(v.getDecimal)
-            Literal(Decimal(v.getDecimal), DecimalType(d.precision, d.scale))
+            Literal(d, DecimalType(d.precision, d.scale))
           case Type.DATE => Literal(v.getLong.toInt, DateType)
           case Type.TIMESTAMP => Literal(v.getLong, TimestampType)
           case Type.TIMESTAMP_NTZ => Literal(v.getLong, TimestampNTZType)
@@ -350,7 +396,7 @@ case object VariantGet {
         } else {
           invalidCast()
         }
-      case MapType(StringType, valueType, _) =>
+      case MapType(_: StringType, valueType, _) =>
         if (variantType == Type.OBJECT) {
           val size = v.objectSize()
           val keyArray = new Array[Any](size)
@@ -384,6 +430,47 @@ case object VariantGet {
   }
 }
 
+abstract class ParseJsonExpressionBuilderBase(failOnError: Boolean) extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 1) {
+      ParseJson(expressions.head, failOnError)
+    } else {
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1), numArgs)
+    }
+  }
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(jsonStr) - Parse a JSON string as a Variant value. Throw an exception when the string is not valid JSON value.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('{"a":1,"b":0.8}');
+       {"a":1,"b":0.8}
+  """,
+  since = "4.0.0",
+  group = "variant_funcs"
+)
+// scalastyle:on line.size.limit
+object ParseJsonExpressionBuilder extends ParseJsonExpressionBuilderBase(true)
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(jsonStr) - Parse a JSON string as a Variant value. Return NULL when the string is not valid JSON value.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('{"a":1,"b":0.8}');
+       {"a":1,"b":0.8}
+      > SELECT _FUNC_('{"a":1,');
+       NULL
+  """,
+  since = "4.0.0",
+  group = "variant_funcs"
+)
+// scalastyle:on line.size.limit
+object TryParseJsonExpressionBuilder extends ParseJsonExpressionBuilderBase(false)
+
 abstract class VariantGetExpressionBuilderBase(failOnError: Boolean) extends ExpressionBuilder {
   override def build(funcName: String, expressions: Seq[Expression]): Expression = {
     val numArgs = expressions.length
@@ -490,7 +577,7 @@ case class VariantExplode(child: Expression) extends UnaryExpression with Genera
   override def elementSchema: StructType = {
     new StructType()
       .add("pos", IntegerType, nullable = false)
-      .add("key", StringType, nullable = true)
+      .add("key", SQLConf.get.defaultStringType, nullable = true)
       .add("value", VariantType, nullable = false)
   }
 }
@@ -547,7 +634,7 @@ case class SchemaOfVariant(child: Expression)
     with ExpectsInputTypes {
   override lazy val replacement: Expression = StaticInvoke(
     SchemaOfVariant.getClass,
-    StringType,
+    SQLConf.get.defaultStringType,
     "schemaOfVariant",
     Seq(child),
     inputTypes,
@@ -555,7 +642,7 @@ case class SchemaOfVariant(child: Expression)
 
   override def inputTypes: Seq[AbstractDataType] = Seq(VariantType)
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def prettyName: String = "schema_of_variant"
 
@@ -598,11 +685,11 @@ object SchemaOfVariant {
     case Type.NULL => NullType
     case Type.BOOLEAN => BooleanType
     case Type.LONG => LongType
-    case Type.STRING => StringType
+    case Type.STRING => SQLConf.get.defaultStringType
     case Type.DOUBLE => DoubleType
     case Type.DECIMAL =>
-      val d = v.getDecimal
-      DecimalType(d.precision(), d.scale())
+      val d = Decimal(v.getDecimal)
+      DecimalType(d.precision, d.scale)
     case Type.DATE => DateType
     case Type.TIMESTAMP => TimestampType
     case Type.TIMESTAMP_NTZ => TimestampNTZType
@@ -643,7 +730,7 @@ case class SchemaOfVariantAgg(
 
   override def inputTypes: Seq[AbstractDataType] = Seq(VariantType)
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def nullable: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 00711332350cf..5881c456f6e86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -853,7 +853,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
   // for each partition.
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!buckets.foldable) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> toSQLId("buckets"),
@@ -864,7 +864,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     }
 
     if (buckets.dataType != IntegerType) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "UNEXPECTED_INPUT_TYPE",
         messageParameters = Map(
           "paramIndex" -> ordinalNumber(0),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index c3a285178c110..f65061e8d0ea9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -23,6 +23,8 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -39,7 +41,8 @@ abstract class XPathExtract
   /** XPath expressions are always nullable, e.g. if the xml string is empty. */
   override def nullable: Boolean = true
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!path.foldable) {
@@ -47,7 +50,7 @@ abstract class XPathExtract
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> toSQLId("path"),
-          "inputType" -> toSQLType(StringType),
+          "inputType" -> toSQLType(StringTypeAnyCollation),
           "inputExpr" -> toSQLExpr(path)
         )
       )
@@ -221,7 +224,7 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
 // scalastyle:on line.size.limit
 case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath_string"
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def nullSafeEval(xml: Any, path: Any): Any = {
     val ret = xpathUtil.evalString(xml.asInstanceOf[UTF8String].toString, pathString)
@@ -245,7 +248,7 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath"
-  override def dataType: DataType = ArrayType(StringType, containsNull = false)
+  override def dataType: DataType = ArrayType(SQLConf.get.defaultStringType, containsNull = false)
 
   override def nullSafeEval(xml: Any, path: Any): Any = {
     val nodeList = xpathUtil.evalNodeList(xml.asInstanceOf[UTF8String].toString, pathString)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
index 415d55d19ded2..48a87db291a8d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
@@ -21,12 +21,13 @@ import java.io.CharArrayWriter
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.util.{ArrayData, DropMalformedMode, FailFastMode, FailureSafeParser, GenericArrayData, PermissiveMode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
+import org.apache.spark.sql.catalyst.util.{DropMalformedMode, FailFastMode, FailureSafeParser, PermissiveMode}
 import org.apache.spark.sql.catalyst.util.TypeUtils._
 import org.apache.spark.sql.catalyst.xml.{StaxXmlGenerator, StaxXmlParser, ValidatorUtil, XmlInferSchema, XmlOptions}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.StringTypeAnyCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -51,13 +52,12 @@ import org.apache.spark.unsafe.types.UTF8String
   since = "4.0.0")
 // scalastyle:on line.size.limit
 case class XmlToStructs(
-    schema: DataType,
+    schema: StructType,
     options: Map[String, String],
     child: Expression,
     timeZoneId: Option[String] = None)
   extends UnaryExpression
   with TimeZoneAwareExpression
-  with CodegenFallback
   with ExpectsInputTypes
   with NullIntolerant
   with QueryErrorsBase {
@@ -73,7 +73,7 @@ case class XmlToStructs(
 
   // The XML input data might be missing certain fields. We force the nullability
   // of the user-provided schema to avoid data corruptions.
-  val nullableSchema = schema.asNullable
+  private val nullableSchema = schema.asNullable
 
   def this(child: Expression, schema: Expression) = this(child, schema, Map.empty[String, String])
 
@@ -86,42 +86,27 @@ case class XmlToStructs(
 
   // This converts parsed rows to the desired output by the given schema.
   @transient
-  lazy val converter = nullableSchema match {
-    case _: StructType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next() else null
-    case _: ArrayType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getArray(0) else null
-    case _: MapType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getMap(0) else null
-  }
+  private lazy val converter =
+    (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next() else null
 
-  val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
+  private val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
 
-  @transient lazy val parser = {
+  @transient
+  private lazy val parser = {
     val parsedOptions = new XmlOptions(options, timeZoneId.get, nameOfCorruptRecord)
     val mode = parsedOptions.parseMode
     if (mode != PermissiveMode && mode != FailFastMode) {
       throw QueryCompilationErrors.parseModeUnsupportedError("from_xml", mode)
     }
-    val (parserSchema, actualSchema) = nullableSchema match {
-      case s: StructType =>
-        ExprUtils.verifyColumnNameOfCorruptRecord(s, parsedOptions.columnNameOfCorruptRecord)
-        (s, StructType(s.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)))
-      case other =>
-        (StructType(Array(StructField("value", other))), other)
-    }
-
-    val rowSchema: StructType = schema match {
-      case st: StructType => st
-      case ArrayType(st: StructType, _) => st
-    }
-    val rawParser = new StaxXmlParser(rowSchema, parsedOptions)
+    ExprUtils.verifyColumnNameOfCorruptRecord(
+      nullableSchema, parsedOptions.columnNameOfCorruptRecord)
+    val rawParser = new StaxXmlParser(schema, parsedOptions)
     val xsdSchema = Option(parsedOptions.rowValidationXSDPath).map(ValidatorUtil.getSchema)
 
     new FailureSafeParser[String](
       input => rawParser.doParseColumn(input, mode, xsdSchema),
       mode,
-      parserSchema,
+      nullableSchema,
       parsedOptions.columnNameOfCorruptRecord)
   }
 
@@ -130,23 +115,17 @@ case class XmlToStructs(
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = {
     copy(timeZoneId = Option(timeZoneId))
   }
-  override def nullSafeEval(xml: Any): Any = xml match {
-    case arr: GenericArrayData =>
-      new GenericArrayData(arr.array.map(s => converter(parser.parse(s.toString))))
-    case arr: ArrayData =>
-      new GenericArrayData(arr.array.map(s => converter(parser.parse(s.toString))))
-    case _ =>
-      val str = xml.asInstanceOf[UTF8String].toString
-      converter(parser.parse(str))
-  }
 
-  override def inputTypes: Seq[AbstractDataType] = StringType :: Nil
+  override def nullSafeEval(xml: Any): Any =
+    converter(parser.parse(xml.asInstanceOf[UTF8String].toString))
 
-  override def sql: String = schema match {
-    case _: MapType => "entries"
-    case _ => super.sql
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val expr = ctx.addReferenceObj("this", this)
+    defineCodeGen(ctx, ev, input => s"(InternalRow) $expr.nullSafeEval($input)")
   }
 
+  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
+
   override def prettyName: String = "from_xml"
 
   protected def withNewChildInternal(newChild: Expression): XmlToStructs =
@@ -178,16 +157,13 @@ case class SchemaOfXml(
     child = child,
     options = ExprUtils.convertToMapData(options))
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def nullable: Boolean = false
 
   @transient
   private lazy val xmlOptions = new XmlOptions(options, "UTC")
 
-  @transient
-  private lazy val xmlFactory = xmlOptions.buildXmlFactory()
-
   @transient
   private lazy val xmlInferSchema = {
     if (xmlOptions.parseMode == DropMalformedMode) {
@@ -226,7 +202,7 @@ case class SchemaOfXml(
           .map(ArrayType(_, containsNull = at.containsNull))
           .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
       case other: DataType =>
-        xmlInferSchema.canonicalizeType(other).getOrElse(StringType)
+        xmlInferSchema.canonicalizeType(other).getOrElse(SQLConf.get.defaultStringType)
     }
 
     UTF8String.fromString(dataType.sql)
@@ -265,7 +241,6 @@ case class StructsToXml(
     timeZoneId: Option[String] = None)
   extends UnaryExpression
   with TimeZoneAwareExpression
-  with CodegenFallback
   with ExpectsInputTypes
   with NullIntolerant {
   override def nullable: Boolean = true
@@ -320,13 +295,18 @@ case class StructsToXml(
       getAndReset()
   }
 
-  override def dataType: DataType = StringType
+  override def dataType: DataType = SQLConf.get.defaultStringType
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
   override def nullSafeEval(value: Any): Any = converter(value)
 
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val expr = ctx.addReferenceObj("this", this)
+    defineCodeGen(ctx, ev, input => s"(UTF8String) $expr.nullSafeEval($input)")
+  }
+
   override def inputTypes: Seq[AbstractDataType] = StructType :: Nil
 
   override def prettyName: String = "to_xml"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index e5aa0bb6d2c06..945b6e7de8b7a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -188,6 +188,11 @@ class JSONOptions(
   val writeNonAsciiCharacterAsCodePoint: Boolean =
     parameters.get(WRITE_NON_ASCII_CHARACTER_AS_CODEPOINT).map(_.toBoolean).getOrElse(false)
 
+  // This option takes in a column name and specifies that the entire JSON record should be stored
+  // as a single VARIANT type column in the table with the given column name.
+  // E.g. spark.read.format("json").option("singleVariantColumn", "colName")
+  val singleVariantColumn: Option[String] = parameters.get(SINGLE_VARIANT_COLUMN)
+
   /** Build a Jackson [[JsonFactory]] using JSON options. */
   def buildJsonFactory(): JsonFactory = {
     val streamReadConstraints = StreamReadConstraints
@@ -282,6 +287,7 @@ object JSONOptions extends DataSourceOptions {
   val COLUMN_NAME_OF_CORRUPTED_RECORD = newOption("columnNameOfCorruptRecord")
   val TIME_ZONE = newOption("timeZone")
   val WRITE_NON_ASCII_CHARACTER_AS_CODEPOINT = newOption("writeNonAsciiCharacterAsCodePoint")
+  val SINGLE_VARIANT_COLUMN = newOption("singleVariantColumn")
   // Options with alternative
   val ENCODING = "encoding"
   val CHARSET = "charset"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index d3f33a70323fc..b2c302fbbbe31 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
 import com.fasterxml.jackson.core._
+import org.apache.hadoop.fs.PositionedReadable
 
 import org.apache.spark.SparkUpgradeException
 import org.apache.spark.internal.Logging
@@ -36,7 +37,8 @@ import org.apache.spark.sql.errors.{ExecutionErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.types.variant._
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String, VariantVal}
 import org.apache.spark.util.Utils
 
 /**
@@ -105,12 +107,31 @@ class JacksonParser(
    */
   private def makeRootConverter(dt: DataType): JsonParser => Iterable[InternalRow] = {
     dt match {
+      case _: StructType if options.singleVariantColumn.isDefined => (parser: JsonParser) => {
+        Some(InternalRow(parseVariant(parser)))
+      }
       case st: StructType => makeStructRootConverter(st)
       case mt: MapType => makeMapRootConverter(mt)
       case at: ArrayType => makeArrayRootConverter(at)
     }
   }
 
+  protected final def parseVariant(parser: JsonParser): VariantVal = {
+    // Skips `FIELD_NAME` at the beginning. This check is adapted from `parseJsonToken`, but we
+    // cannot directly use the function here because it also handles the `VALUE_NULL` token and
+    // returns null (representing a SQL NULL). Instead, we want to return a variant null.
+    if (parser.getCurrentToken == FIELD_NAME) {
+      parser.nextToken()
+    }
+    try {
+      val v = VariantBuilder.parseJson(parser)
+      new VariantVal(v.getValue, v.getMetadata)
+    } catch {
+      case _: VariantSizeLimitException =>
+        throw QueryExecutionErrors.variantSizeLimitError(VariantUtil.SIZE_LIMIT, "JacksonParser")
+    }
+  }
+
   private def makeStructRootConverter(st: StructType): JsonParser => Iterable[InternalRow] = {
     val elementConverter = makeConverter(st)
     val fieldConverters = st.map(_.dataType).map(makeConverter).toArray
@@ -255,19 +276,63 @@ class JacksonParser(
           }
       }
 
-    case StringType =>
-      (parser: JsonParser) => parseJsonToken[UTF8String](parser, dataType) {
+    case _: StringType => (parser: JsonParser) => {
+      // This must be enabled if we will retrieve the bytes directly from the raw content:
+      val includeSourceInLocation = JsonParser.Feature.INCLUDE_SOURCE_IN_LOCATION
+      val originalMask = if (includeSourceInLocation.enabledIn(parser.getFeatureMask)) {
+        1
+      } else {
+        0
+      }
+      parser.overrideStdFeatures(includeSourceInLocation.getMask, includeSourceInLocation.getMask)
+      val result = parseJsonToken[UTF8String](parser, dataType) {
         case VALUE_STRING =>
           UTF8String.fromString(parser.getText)
 
-        case _ =>
+        case other =>
           // Note that it always tries to convert the data as string without the case of failure.
-          val writer = new ByteArrayOutputStream()
-          Utils.tryWithResource(factory.createGenerator(writer, JsonEncoding.UTF8)) {
-            generator => generator.copyCurrentStructure(parser)
+          val startLocation = parser.currentTokenLocation()
+          def skipAhead(): Unit = {
+            other match {
+              case START_OBJECT =>
+                parser.skipChildren()
+              case START_ARRAY =>
+                parser.skipChildren()
+              case _ =>
+              // Do nothing in this case; we've already read the token
+            }
           }
-          UTF8String.fromBytes(writer.toByteArray)
-      }
+
+          // PositionedReadable
+          startLocation.contentReference().getRawContent match {
+            case byteArray: Array[Byte] if exactStringParsing =>
+              skipAhead()
+              val endLocation = parser.currentLocation.getByteOffset
+
+              UTF8String.fromBytes(
+                byteArray,
+                startLocation.getByteOffset.toInt,
+                endLocation.toInt - (startLocation.getByteOffset.toInt))
+            case positionedReadable: PositionedReadable if exactStringParsing =>
+              skipAhead()
+              val endLocation = parser.currentLocation.getByteOffset
+
+              val size = endLocation.toInt - (startLocation.getByteOffset.toInt)
+              val buffer = new Array[Byte](size)
+              positionedReadable.read(startLocation.getByteOffset, buffer, 0, size)
+              UTF8String.fromBytes(buffer, 0, size)
+            case _ =>
+              val writer = new ByteArrayOutputStream()
+              Utils.tryWithResource(factory.createGenerator(writer, JsonEncoding.UTF8)) {
+                generator => generator.copyCurrentStructure(parser)
+              }
+              UTF8String.fromBytes(writer.toByteArray)
+          }
+        }
+      // Reset back to the original configuration:
+      parser.overrideStdFeatures(includeSourceInLocation.getMask, originalMask)
+      result
+    }
 
     case TimestampType =>
       (parser: JsonParser) => parseJsonToken[java.lang.Long](parser, dataType) {
@@ -380,6 +445,8 @@ class JacksonParser(
         case _ => null
       }
 
+    case _: VariantType => parseVariant
+
     // We don't actually hit this exception though, we keep it for understandability
     case _ => throw ExecutionErrors.unsupportedDataTypeError(dataType)
   }
@@ -407,6 +474,8 @@ class JacksonParser(
 
   private val allowEmptyString = SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_EMPTY_STRING_IN_JSON)
 
+  private val exactStringParsing = SQLConf.get.getConf(SQLConf.JSON_EXACT_STRING_PARSING)
+
   /**
    * This function throws an exception for failed conversion. For empty string on data types
    * except for string and binary types, this also throws an exception.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 7ee522226e3ec..d982e1f19da0c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -372,6 +372,11 @@ object JsonInferSchema {
         case (DoubleType, _: DecimalType) | (_: DecimalType, DoubleType) =>
           DoubleType
 
+        // This branch is only used by `SchemaOfVariant.mergeSchema` because `JsonInferSchema` never
+        // produces `FloatType`.
+        case (FloatType, _: DecimalType) | (_: DecimalType, FloatType) =>
+          DoubleType
+
         case (t1: DecimalType, t2: DecimalType) =>
           val scale = math.max(t1.scale, t2.scale)
           val range = math.max(t1.precision - t1.scale, t2.precision - t2.scale)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala
index 9c150f1f3308f..176e927b2d212 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala
@@ -120,33 +120,52 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
           hasHitSelectiveFilter = hasHitSelectiveFilter || isLikelySelective(condition),
           currentPlan,
           targetKey)
-      case ExtractEquiJoinKeys(_, lkeys, rkeys, _, _, left, right, _) =>
+      case ExtractEquiJoinKeys(joinType, lkeys, rkeys, _, _, left, right, _) =>
         // Runtime filters use one side of the [[Join]] to build a set of join key values and prune
         // the other side of the [[Join]]. It's also OK to use a superset of the join key values
-        // (ignore null values) to do the pruning.
+        // (ignore null values) to do the pruning. We can also extract from the other side if the
+        // join keys are transitive, and the other side always produces a superset output of join
+        // key values. Any join side always produce a superset output of its corresponding
+        // join keys, but for transitive join keys we need to check the join type.
         // We assume other rules have already pushed predicates through join if possible.
         // So the predicate references won't pass on anymore.
         if (left.output.exists(_.semanticEquals(targetKey))) {
           extract(left, AttributeSet.empty, hasHitFilter = false, hasHitSelectiveFilter = false,
             currentPlan = left, targetKey = targetKey).orElse {
-            // We can also extract from the right side if the join keys are transitive.
-            lkeys.zip(rkeys).find(_._1.semanticEquals(targetKey)).map(_._2)
-              .flatMap { newTargetKey =>
-                extract(right, AttributeSet.empty,
-                  hasHitFilter = false, hasHitSelectiveFilter = false, currentPlan = right,
-                  targetKey = newTargetKey)
-              }
+            // An example that extract from the right side if the join keys are transitive.
+            //     left table: 1, 2, 3
+            //     right table, 3, 4
+            //     right outer join output: (3, 3), (null, 4)
+            //     right key output: 3, 4
+            if (canPruneLeft(joinType)) {
+              lkeys.zip(rkeys).find(_._1.semanticEquals(targetKey)).map(_._2)
+                .flatMap { newTargetKey =>
+                  extract(right, AttributeSet.empty,
+                    hasHitFilter = false, hasHitSelectiveFilter = false, currentPlan = right,
+                    targetKey = newTargetKey)
+                }
+            } else {
+              None
+            }
           }
         } else if (right.output.exists(_.semanticEquals(targetKey))) {
           extract(right, AttributeSet.empty, hasHitFilter = false, hasHitSelectiveFilter = false,
             currentPlan = right, targetKey = targetKey).orElse {
-            // We can also extract from the left side if the join keys are transitive.
-            rkeys.zip(lkeys).find(_._1.semanticEquals(targetKey)).map(_._2)
-              .flatMap { newTargetKey =>
-                extract(left, AttributeSet.empty,
-                  hasHitFilter = false, hasHitSelectiveFilter = false, currentPlan = left,
-                  targetKey = newTargetKey)
-              }
+            // An example that extract from the left side if the join keys are transitive.
+            // left table: 1, 2, 3
+            // right table, 3, 4
+            // left outer join output: (1, null), (2, null), (3, 3)
+            // left key output: 1, 2, 3
+            if (canPruneRight(joinType)) {
+              rkeys.zip(lkeys).find(_._1.semanticEquals(targetKey)).map(_._2)
+                .flatMap { newTargetKey =>
+                  extract(left, AttributeSet.empty,
+                    hasHitFilter = false, hasHitSelectiveFilter = false, currentPlan = left,
+                    targetKey = newTargetKey)
+                }
+            } else {
+              None
+            }
           }
         } else {
           None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
index 8d7ff4cbf163d..8cc25328ce70b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
@@ -37,23 +37,19 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{CTE, PLAN_EXPRESSION}
  * query level.
  *
  * @param alwaysInline if true, inline all CTEs in the query plan.
+ * @param keepDanglingRelations if true, dangling CTE relations will be kept in the original
+ *                              `WithCTE` node.
  */
-case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
+case class InlineCTE(
+    alwaysInline: Boolean = false,
+    keepDanglingRelations: Boolean = false) extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = {
     if (!plan.isInstanceOf[Subquery] && plan.containsPattern(CTE)) {
-      val cteMap = mutable.SortedMap.empty[Long, (CTERelationDef, Int, mutable.Map[Long, Int])]
+      val cteMap = mutable.SortedMap.empty[Long, CTEReferenceInfo]
       buildCTEMap(plan, cteMap)
       cleanCTEMap(cteMap)
-      val notInlined = mutable.ArrayBuffer.empty[CTERelationDef]
-      val inlined = inlineCTE(plan, cteMap, notInlined)
-      // CTEs in SQL Commands have been inlined by `CTESubstitution` already, so it is safe to add
-      // WithCTE as top node here.
-      if (notInlined.isEmpty) {
-        inlined
-      } else {
-        WithCTE(inlined, notInlined.toSeq)
-      }
+      inlineCTE(plan, cteMap)
     } else {
       plan
     }
@@ -74,22 +70,23 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
    *
    * @param plan The plan to collect the CTEs from
    * @param cteMap A mutable map that accumulates the CTEs and their reference information by CTE
-   *               ids. The value of the map is tuple whose elements are:
-   *               - The CTE definition
-   *               - The number of incoming references to the CTE. This includes references from
-   *                 other CTEs and regular places.
-   *               - A mutable inner map that tracks outgoing references (counts) to other CTEs.
+   *               ids.
    * @param outerCTEId While collecting the map we use this optional CTE id to identify the
    *                   current outer CTE.
    */
-  def buildCTEMap(
+  private def buildCTEMap(
       plan: LogicalPlan,
-      cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])],
+      cteMap: mutable.Map[Long, CTEReferenceInfo],
       outerCTEId: Option[Long] = None): Unit = {
     plan match {
       case WithCTE(child, cteDefs) =>
         cteDefs.foreach { cteDef =>
-          cteMap(cteDef.id) = (cteDef, 0, mutable.Map.empty.withDefaultValue(0))
+          cteMap(cteDef.id) = CTEReferenceInfo(
+            cteDef = cteDef,
+            refCount = 0,
+            outgoingRefs = mutable.Map.empty.withDefaultValue(0),
+            shouldInline = true
+          )
         }
         cteDefs.foreach { cteDef =>
           buildCTEMap(cteDef, cteMap, Some(cteDef.id))
@@ -97,11 +94,9 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
         buildCTEMap(child, cteMap, outerCTEId)
 
       case ref: CTERelationRef =>
-        val (cteDef, refCount, refMap) = cteMap(ref.cteId)
-        cteMap(ref.cteId) = (cteDef, refCount + 1, refMap)
+        cteMap(ref.cteId) = cteMap(ref.cteId).withRefCountIncreased(1)
         outerCTEId.foreach { cteId =>
-          val (_, _, outerRefMap) = cteMap(cteId)
-          outerRefMap(ref.cteId) += 1
+          cteMap(cteId).increaseOutgoingRefCount(ref.cteId, 1)
         }
 
       case _ =>
@@ -129,15 +124,12 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
    * @param cteMap A mutable map that accumulates the CTEs and their reference information by CTE
    *               ids. Needs to be sorted to speed up cleaning.
    */
-  private def cleanCTEMap(
-      cteMap: mutable.SortedMap[Long, (CTERelationDef, Int, mutable.Map[Long, Int])]
-    ) = {
+  private def cleanCTEMap(cteMap: mutable.SortedMap[Long, CTEReferenceInfo]): Unit = {
     cteMap.keys.toSeq.reverse.foreach { currentCTEId =>
-      val (_, currentRefCount, refMap) = cteMap(currentCTEId)
-      if (currentRefCount == 0) {
-        refMap.foreach { case (referencedCTEId, uselessRefCount) =>
-          val (cteDef, refCount, refMap) = cteMap(referencedCTEId)
-          cteMap(referencedCTEId) = (cteDef, refCount - uselessRefCount, refMap)
+      val refInfo = cteMap(currentCTEId)
+      if (refInfo.refCount == 0) {
+        refInfo.outgoingRefs.foreach { case (referencedCTEId, uselessRefCount) =>
+          cteMap(referencedCTEId) = cteMap(referencedCTEId).withRefCountDecreased(uselessRefCount)
         }
       }
     }
@@ -145,30 +137,46 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
 
   private def inlineCTE(
       plan: LogicalPlan,
-      cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])],
-      notInlined: mutable.ArrayBuffer[CTERelationDef]): LogicalPlan = {
+      cteMap: mutable.Map[Long, CTEReferenceInfo]): LogicalPlan = {
     plan match {
       case WithCTE(child, cteDefs) =>
+        val notInlined = mutable.ArrayBuffer.empty[CTERelationDef]
         cteDefs.foreach { cteDef =>
-          val (cte, refCount, refMap) = cteMap(cteDef.id)
-          if (refCount > 0) {
-            val inlined = cte.copy(child = inlineCTE(cte.child, cteMap, notInlined))
-            cteMap(cteDef.id) = (inlined, refCount, refMap)
-            if (!shouldInline(inlined, refCount)) {
-              notInlined.append(inlined)
-            }
+          val refInfo = cteMap(cteDef.id)
+          if (refInfo.refCount > 0) {
+            val newDef = refInfo.cteDef.copy(child = inlineCTE(refInfo.cteDef.child, cteMap))
+            val inlineDecision = shouldInline(newDef, refInfo.refCount)
+            cteMap(cteDef.id) = cteMap(cteDef.id).copy(
+              cteDef = newDef, shouldInline = inlineDecision
+            )
+            if (!inlineDecision) notInlined += newDef
+          } else if (keepDanglingRelations) {
+            notInlined += refInfo.cteDef
           }
         }
-        inlineCTE(child, cteMap, notInlined)
+        val inlined = inlineCTE(child, cteMap)
+        if (notInlined.isEmpty) {
+          inlined
+        } else {
+          // Retain the not-inlined CTE relations in place.
+          WithCTE(inlined, notInlined.toSeq)
+        }
 
       case ref: CTERelationRef =>
-        val (cteDef, refCount, _) = cteMap(ref.cteId)
-        if (shouldInline(cteDef, refCount)) {
-          if (ref.outputSet == cteDef.outputSet) {
-            cteDef.child
+        val refInfo = cteMap(ref.cteId)
+        if (refInfo.shouldInline) {
+          if (ref.outputSet == refInfo.cteDef.outputSet) {
+            refInfo.cteDef.child
           } else {
             val ctePlan = DeduplicateRelations(
-              Join(cteDef.child, cteDef.child, Inner, None, JoinHint(None, None))).children(1)
+              Join(
+                refInfo.cteDef.child,
+                refInfo.cteDef.child,
+                Inner,
+                None,
+                JoinHint(None, None)
+              )
+            ).children(1)
             val projectList = ref.output.zip(ctePlan.output).map { case (tgtAttr, srcAttr) =>
               if (srcAttr.semanticEquals(tgtAttr)) {
                 tgtAttr
@@ -184,13 +192,41 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
 
       case _ if plan.containsPattern(CTE) =>
         plan
-          .withNewChildren(plan.children.map(child => inlineCTE(child, cteMap, notInlined)))
+          .withNewChildren(plan.children.map(child => inlineCTE(child, cteMap)))
           .transformExpressionsWithPruning(_.containsAllPatterns(PLAN_EXPRESSION, CTE)) {
             case e: SubqueryExpression =>
-              e.withNewPlan(inlineCTE(e.plan, cteMap, notInlined))
+              e.withNewPlan(inlineCTE(e.plan, cteMap))
           }
 
       case _ => plan
     }
   }
 }
+
+/**
+ * The bookkeeping information for tracking CTE relation references.
+ *
+ * @param cteDef The CTE relation definition
+ * @param refCount The number of incoming references to this CTE relation. This includes references
+ *                 from other CTE relations and regular places.
+ * @param outgoingRefs A mutable map that tracks outgoing reference counts to other CTE relations.
+ * @param shouldInline If true, this CTE relation should be inlined in the places that reference it.
+ */
+case class CTEReferenceInfo(
+    cteDef: CTERelationDef,
+    refCount: Int,
+    outgoingRefs: mutable.Map[Long, Int],
+    shouldInline: Boolean) {
+
+  def withRefCountIncreased(count: Int): CTEReferenceInfo = {
+    copy(refCount = refCount + count)
+  }
+
+  def withRefCountDecreased(count: Int): CTEReferenceInfo = {
+    copy(refCount = refCount - count)
+  }
+
+  def increaseOutgoingRefCount(cteDefId: Long, count: Int): Unit = {
+    outgoingRefs(cteDefId) += count
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
index ca3c14177e6bd..8de2663a98094 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -218,6 +218,11 @@ object NestedColumnAliasing {
     case _ => false
   }
 
+  private def canAlias(ev: Expression): Boolean = {
+    // we can not alias the attr from lambda variable whose expr id is not available
+    !ev.exists(_.isInstanceOf[NamedLambdaVariable]) && ev.references.size == 1
+  }
+
   /**
    * Returns two types of expressions:
    * - Root references that are individually accessed
@@ -226,11 +231,11 @@ object NestedColumnAliasing {
    */
   private def collectRootReferenceAndExtractValue(e: Expression): Seq[Expression] = e match {
     case _: AttributeReference => Seq(e)
-    case GetStructField(_: ExtractValue | _: AttributeReference, _, _) => Seq(e)
+    case GetStructField(_: ExtractValue | _: AttributeReference, _, _) if canAlias(e) => Seq(e)
     case GetArrayStructFields(_: MapValues |
                               _: MapKeys |
                               _: ExtractValue |
-                              _: AttributeReference, _, _, _, _) => Seq(e)
+                              _: AttributeReference, _, _, _, _) if canAlias(e) => Seq(e)
     case es if es.children.nonEmpty => es.children.flatMap(collectRootReferenceAndExtractValue)
     case _ => Seq.empty
   }
@@ -249,13 +254,8 @@ object NestedColumnAliasing {
     val otherRootReferences = new mutable.ArrayBuffer[AttributeReference]()
     exprList.foreach { e =>
       extractor(e).foreach {
-        // we can not alias the attr from lambda variable whose expr id is not available
-        case ev: ExtractValue if !ev.exists(_.isInstanceOf[NamedLambdaVariable]) =>
-          if (ev.references.size == 1) {
-            nestedFieldReferences.append(ev)
-          }
+        case ev: ExtractValue => nestedFieldReferences.append(ev)
         case ar: AttributeReference => otherRootReferences.append(ar)
-        case _ => // ignore
       }
     }
     val exclusiveAttrSet = AttributeSet(exclusiveAttrs ++ otherRootReferences)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJoinCondition.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJoinCondition.scala
new file mode 100644
index 0000000000000..7c41ebea050be
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJoinCondition.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions.{And, EqualNullSafe, EqualTo, IsNull, Or, PredicateHelper}
+import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{JOIN, OR}
+
+/**
+ * Replaces `t1.id is null and t2.id is null or t1.id = t2.id` to `t1.id <=> t2.id`
+ * in join condition for better performance.
+ */
+object OptimizeJoinCondition extends Rule[LogicalPlan] with PredicateHelper {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
+    _.containsPattern(JOIN), ruleId) {
+    case j @ Join(_, _, _, condition, _) if condition.nonEmpty =>
+      val newCondition = condition.map(_.transformWithPruning(_.containsPattern(OR), ruleId) {
+        case Or(EqualTo(l, r), And(IsNull(c1), IsNull(c2)))
+          if (l.semanticEquals(c1) && r.semanticEquals(c2))
+            || (l.semanticEquals(c2) && r.semanticEquals(c1)) =>
+          EqualNullSafe(l, r)
+        case Or(And(IsNull(c1), IsNull(c2)), EqualTo(l, r))
+          if (l.semanticEquals(c1) && r.semanticEquals(c2))
+            || (l.semanticEquals(c2) && r.semanticEquals(c1)) =>
+          EqualNullSafe(l, r)
+      })
+      j.copy(condition = newCondition)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
index 83646611578cb..61c08eb8f8b6f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The rule is applied both normal and AQE Optimizer. It optimizes plan using max rows:
@@ -31,19 +32,37 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
  *     it's grouping only(include the rewritten distinct plan), convert aggregate to project
  *   - if the max rows of the child of aggregate is less than or equal to 1,
  *     set distinct to false in all aggregate expression
+ *
+ * Note: the rule should not be applied to streaming source, since the number of rows it sees is
+ * just for current microbatch. It does not mean the streaming source will ever produce max 1
+ * rows during lifetime of the query. Suppose the case: the streaming query has a case where
+ * batch 0 runs with empty data in streaming source A which triggers the rule with Aggregate,
+ * and batch 1 runs with several data in streaming source A which no longer trigger the rule.
+ * In the above scenario, this could fail the query as stateful operator is expected to be planned
+ * for every batches whereas here it is planned "selectively".
  */
 object OptimizeOneRowPlan extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
+    val enableForStreaming = conf.getConf(SQLConf.STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED)
+
     plan.transformUpWithPruning(_.containsAnyPattern(SORT, AGGREGATE), ruleId) {
-      case Sort(_, _, child) if child.maxRows.exists(_ <= 1L) => child
-      case Sort(_, false, child) if child.maxRowsPerPartition.exists(_ <= 1L) => child
-      case agg @ Aggregate(_, _, child) if agg.groupOnly && child.maxRows.exists(_ <= 1L) =>
+      case Sort(_, _, child) if child.maxRows.exists(_ <= 1L) &&
+        isChildEligible(child, enableForStreaming) => child
+      case Sort(_, false, child) if child.maxRowsPerPartition.exists(_ <= 1L) &&
+        isChildEligible(child, enableForStreaming) => child
+      case agg @ Aggregate(_, _, child) if agg.groupOnly && child.maxRows.exists(_ <= 1L) &&
+        isChildEligible(child, enableForStreaming) =>
         Project(agg.aggregateExpressions, child)
-      case agg: Aggregate if agg.child.maxRows.exists(_ <= 1L) =>
+      case agg: Aggregate if agg.child.maxRows.exists(_ <= 1L) &&
+        isChildEligible(agg.child, enableForStreaming) =>
         agg.transformExpressions {
           case aggExpr: AggregateExpression if aggExpr.isDistinct =>
             aggExpr.copy(isDistinct = false)
         }
     }
   }
+
+  private def isChildEligible(child: LogicalPlan, enableForStreaming: Boolean): Boolean = {
+    enableForStreaming || !child.isStreaming
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index cacde9f5a7122..95923a1419513 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -20,15 +20,14 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.collection.mutable
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.LogKey._
-import org.apache.spark.internal.MDC
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{RepartitionOperation, _}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.AlwaysProcess
 import org.apache.spark.sql.catalyst.trees.TreePattern._
@@ -84,6 +83,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         PushDownPredicates,
         PushDownLeftSemiAntiJoin,
         PushLeftSemiLeftAntiThroughJoin,
+        OptimizeJoinCondition,
         LimitPushDown,
         LimitPushDownThroughWindow,
         ColumnPruning,
@@ -116,7 +116,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
         BooleanSimplification,
         SimplifyConditionals,
         PushFoldableIntoBranches,
-        RemoveDispensableExpressions,
         SimplifyBinaryComparison,
         ReplaceNullWithFalseInPredicate,
         PruneFilters,
@@ -445,7 +444,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
     val excludedRules = excludedRulesConf.filter { ruleName =>
       val nonExcludable = nonExcludableRules.contains(ruleName)
       if (nonExcludable) {
-        logWarning(log"Optimization rule '${MDC(RULE_NAME, ruleName)}' " +
+        logWarning(log"Optimization rule '${MDC(LogKeys.RULE_NAME, ruleName)}' " +
           log"was not excluded from the optimizer because this rule is a non-excludable rule.")
       }
       !nonExcludable
@@ -457,7 +456,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         val filteredRules = batch.rules.filter { rule =>
           val exclude = excludedRules.contains(rule.ruleName)
           if (exclude) {
-            logInfo(log"Optimization rule '${MDC(RULE_NAME, rule.ruleName)}' " +
+            logInfo(log"Optimization rule '${MDC(LogKeys.RULE_NAME, rule.ruleName)}' " +
               log"is excluded from the optimizer.")
           }
           !exclude
@@ -467,7 +466,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         } else if (filteredRules.nonEmpty) {
           Some(Batch(batch.name, batch.strategy, filteredRules: _*))
         } else {
-          logInfo(log"Optimization batch '${MDC(RULE_BATCH_NAME, batch.name)}' " +
+          logInfo(log"Optimization batch '${MDC(LogKeys.BATCH_NAME, batch.name)}' " +
             log"is excluded from the optimizer as all enclosed rules have been excluded.")
           None
         }
@@ -1238,7 +1237,12 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
     case _: Attribute | _: OuterReference => true
     case _ if e.foldable => true
     // PythonUDF is handled by the rule ExtractPythonUDFs
-    case _: PythonUDF => true
+    case _: PythonUDF =>
+      if (conf.getConf(SQLConf.AVOID_COLLAPSE_UDF_WITH_EXPENSIVE_EXPR)) {
+        e.children.forall(isCheap)
+      } else {
+        true
+      }
     // Alias and ExtractValue are very cheap.
     case _: Alias | _: ExtractValue => e.children.forall(isCheap)
     case _ => false
@@ -1768,6 +1772,10 @@ object PushPredicateThroughNonJoin extends Rule[LogicalPlan] with PredicateHelpe
       val aliasMap = getAliasMap(project)
       project.copy(child = Filter(replaceAlias(condition, aliasMap), grandChild))
 
+    // We can push down deterministic predicate through Aggregate, including throwable predicate.
+    // If we can push down a filter through Aggregate, it means the filter only references the
+    // grouping keys or constants. The Aggregate operator can't reduce distinct values of grouping
+    // keys so the filter won't see any new data after push down.
     case filter @ Filter(condition, aggregate: Aggregate)
       if aggregate.aggregateExpressions.forall(_.deterministic)
         && aggregate.groupingExpressions.nonEmpty =>
@@ -1777,8 +1785,8 @@ object PushPredicateThroughNonJoin extends Rule[LogicalPlan] with PredicateHelpe
       // attributes produced by the aggregate operator's child operator.
       val (pushDown, stayUp) = splitConjunctivePredicates(condition).partition { cond =>
         val replaced = replaceAlias(cond, aliasMap)
-        cond.deterministic && !cond.throwable &&
-          cond.references.nonEmpty && replaced.references.subsetOf(aggregate.child.outputSet)
+        cond.deterministic && cond.references.nonEmpty &&
+          replaced.references.subsetOf(aggregate.child.outputSet)
       }
 
       if (pushDown.nonEmpty) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index fd7a87087ddd2..832af340c3397 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -58,13 +58,15 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
     case _ => false
   }
 
-  protected def empty(plan: LogicalPlan): LocalRelation =
+  protected def empty(plan: LogicalPlan): LogicalPlan =
     LocalRelation(plan.output, data = Seq.empty, isStreaming = plan.isStreaming)
 
   // Construct a project list from plan's output, while the value is always NULL.
   private def nullValueProjectList(plan: LogicalPlan): Seq[NamedExpression] =
     plan.output.map{ a => Alias(cast(Literal(null), a.dataType), a.name)(a.exprId) }
 
+  protected def canExecuteWithoutJoin(plan: LogicalPlan): Boolean = true
+
   protected def commonApplyFunc: PartialFunction[LogicalPlan, LogicalPlan] = {
     case p: Union if p.children.exists(isEmpty) =>
       val newChildren = p.children.filterNot(isEmpty)
@@ -111,18 +113,19 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
           case LeftSemi if isRightEmpty | isFalseCondition => empty(p)
           case LeftAnti if isRightEmpty | isFalseCondition => p.left
           case FullOuter if isLeftEmpty && isRightEmpty => empty(p)
-          case LeftOuter | FullOuter if isRightEmpty =>
+          case LeftOuter | FullOuter if isRightEmpty && canExecuteWithoutJoin(p.left) =>
             Project(p.left.output ++ nullValueProjectList(p.right), p.left)
           case RightOuter if isRightEmpty => empty(p)
-          case RightOuter | FullOuter if isLeftEmpty =>
+          case RightOuter | FullOuter if isLeftEmpty && canExecuteWithoutJoin(p.right) =>
             Project(nullValueProjectList(p.left) ++ p.right.output, p.right)
-          case LeftOuter if isFalseCondition =>
+          case LeftOuter if isFalseCondition && canExecuteWithoutJoin(p.left) =>
             Project(p.left.output ++ nullValueProjectList(p.right), p.left)
-          case RightOuter if isFalseCondition =>
+          case RightOuter if isFalseCondition && canExecuteWithoutJoin(p.right) =>
             Project(nullValueProjectList(p.left) ++ p.right.output, p.right)
           case _ => p
         }
-      } else if (joinType == LeftSemi && conditionOpt.isEmpty && nonEmpty(p.right)) {
+      } else if (joinType == LeftSemi && conditionOpt.isEmpty &&
+        nonEmpty(p.right) && canExecuteWithoutJoin(p.left)) {
         p.left
       } else if (joinType == LeftAnti && conditionOpt.isEmpty && nonEmpty(p.right)) {
         empty(p)
@@ -130,8 +133,10 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
         p
       }
 
-    // the only case can be matched here is that LogicalQueryStage is empty
-    case p: LeafNode if !p.isInstanceOf[LocalRelation] && isEmpty(p) => empty(p)
+    // Only replace a query stage if it would lead to a reduction of operators. !p.isDirectStage
+    // means the physical node it contains is partial aggregate instead of QueryStageExec, which
+    // is exactly what we want to propagate empty relation.
+    case p: LogicalQueryStage if isEmpty(p) && !p.isDirectStage => empty(p)
 
     case p: UnaryNode if p.children.nonEmpty && p.children.forall(isEmpty) => p match {
       case _: Project => empty(p)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index 772382f5f1e12..2cda1142299ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.SparkIllegalArgumentException
-import org.apache.spark.internal.LogKey.{SQL_TEXT, UNSUPPORTED_EXPRESSION}
+import org.apache.spark.internal.LogKeys.{SQL_TEXT, UNSUPPORTED_EXPR}
 import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, EqualNullSafe, Expression, If, In, InSet, LambdaFunction, Literal, MapFilter, Not, Or}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
@@ -141,7 +141,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
             "expr" -> e.sql))
       } else {
         val message = log"Expected a Boolean type expression in replaceNullWithFalse, " +
-          log"but got the type `${MDC(UNSUPPORTED_EXPRESSION, e.dataType.catalogString)}` " +
+          log"but got the type `${MDC(UNSUPPORTED_EXPR, e.dataType.catalogString)}` " +
           log"in `${MDC(SQL_TEXT, e.sql)}`."
         logWarning(message)
         e
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
index 934eadbcee551..393a66f7c1e4f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpression.scala
@@ -21,36 +21,65 @@ import scala.collection.mutable
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, PlanHelper, Project}
+import org.apache.spark.sql.catalyst.planning.PhysicalAggregation
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, PlanHelper, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMON_EXPR_REF, WITH_EXPRESSION}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Rewrites the `With` expressions by adding a `Project` to pre-evaluate the common expressions, or
  * just inline them if they are cheap.
  *
+ * Since this rule can introduce new `Project` operators, it is advised to run [[CollapseProject]]
+ * after this rule.
+ *
  * Note: For now we only use `With` in a few `RuntimeReplaceable` expressions. If we expand its
  *       usage, we should support aggregate/window functions as well.
  */
 object RewriteWithExpression extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    plan.transformDownWithSubqueriesAndPruning(_.containsPattern(WITH_EXPRESSION)) {
+    plan.transformUpWithSubqueriesAndPruning(_.containsPattern(WITH_EXPRESSION)) {
+      // For aggregates, separate the computation of the aggregations themselves from the final
+      // result by moving the final result computation into a projection above it. This prevents
+      // this rule from producing an invalid Aggregate operator.
+      case p @ PhysicalAggregation(
+          groupingExpressions, aggregateExpressions, resultExpressions, child)
+          if p.expressions.exists(_.containsPattern(WITH_EXPRESSION)) =>
+        // PhysicalAggregation returns aggregateExpressions as attribute references, which we change
+        // to aliases so that they can be referred to by resultExpressions.
+        val aggExprs = aggregateExpressions.map(
+          ae => Alias(ae, "_aggregateexpression")(ae.resultId))
+        val aggExprIds = aggExprs.map(_.exprId).toSet
+        val resExprs = resultExpressions.map(_.transform {
+          case a: AttributeReference if aggExprIds.contains(a.exprId) =>
+            a.withName("_aggregateexpression")
+        }.asInstanceOf[NamedExpression])
+        // Rewrite the projection and the aggregate separately and then piece them together.
+        val agg = Aggregate(groupingExpressions, groupingExpressions ++ aggExprs, child)
+        val rewrittenAgg = applyInternal(agg)
+        val proj = Project(resExprs, rewrittenAgg)
+        applyInternal(proj)
       case p if p.expressions.exists(_.containsPattern(WITH_EXPRESSION)) =>
-        val inputPlans = p.children.toArray
-        var newPlan: LogicalPlan = p.mapExpressions { expr =>
-          rewriteWithExprAndInputPlans(expr, inputPlans)
-        }
-        newPlan = newPlan.withNewChildren(inputPlans.toIndexedSeq)
-        // Since we add extra Projects with extra columns to pre-evaluate the common expressions,
-        // the current operator may have extra columns if it inherits the output columns from its
-        // child, and we need to project away the extra columns to keep the plan schema unchanged.
-        assert(p.output.length <= newPlan.output.length)
-        if (p.output.length < newPlan.output.length) {
-          assert(p.outputSet.subsetOf(newPlan.outputSet))
-          Project(p.output, newPlan)
-        } else {
-          newPlan
-        }
+        applyInternal(p)
+    }
+  }
+
+  private def applyInternal(p: LogicalPlan): LogicalPlan = {
+    val inputPlans = p.children.toArray
+    var newPlan: LogicalPlan = p.mapExpressions { expr =>
+      rewriteWithExprAndInputPlans(expr, inputPlans)
+    }
+    newPlan = newPlan.withNewChildren(inputPlans.toIndexedSeq)
+    // Since we add extra Projects with extra columns to pre-evaluate the common expressions,
+    // the current operator may have extra columns if it inherits the output columns from its
+    // child, and we need to project away the extra columns to keep the plan schema unchanged.
+    assert(p.output.length <= newPlan.output.length)
+    if (p.output.length < newPlan.output.length) {
+      assert(p.outputSet.subsetOf(newPlan.outputSet))
+      Project(p.output, newPlan)
+    } else {
+      newPlan
     }
   }
 
@@ -93,7 +122,12 @@ object RewriteWithExpression extends Rule[LogicalPlan] {
               //       if it's ref count is 1.
               refToExpr(id) = child
             } else {
-              val alias = Alias(child, s"_common_expr_$index")()
+              val aliasName = if (SQLConf.get.getConf(SQLConf.USE_COMMON_EXPR_ID_FOR_ALIAS)) {
+                s"_common_expr_${id.id}"
+              } else {
+                s"_common_expr_$index"
+              }
+              val alias = Alias(child, aliasName)()
               val fakeProj = Project(Seq(alias), inputPlans(childProjectionIndex))
               if (PlanHelper.specialExpressionsInUnsupportedOperator(fakeProj).nonEmpty) {
                 // We have to inline the common expression if it cannot be put in a Project.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 32700f176f25a..1750a0e275732 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -738,18 +738,19 @@ object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {
     } else {
       pattern match {
         case startsWith(prefix) =>
-          Some(StartsWith(input, Literal(prefix)))
+          Some(StartsWith(input, Literal.create(prefix, input.dataType)))
         case endsWith(postfix) =>
-          Some(EndsWith(input, Literal(postfix)))
+          Some(EndsWith(input, Literal.create(postfix, input.dataType)))
         // 'a%a' pattern is basically same with 'a%' && '%a'.
         // However, the additional `Length` condition is required to prevent 'a' match 'a%a'.
-        case startsAndEndsWith(prefix, postfix) =>
-          Some(And(GreaterThanOrEqual(Length(input), Literal(prefix.length + postfix.length)),
-            And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))))
+        case startsAndEndsWith(prefix, postfix) => Some(
+          And(GreaterThanOrEqual(Length(input), Literal.create(prefix.length + postfix.length)),
+          And(StartsWith(input, Literal.create(prefix, input.dataType)),
+            EndsWith(input, Literal.create(postfix, input.dataType)))))
         case contains(infix) =>
-          Some(Contains(input, Literal(infix)))
+          Some(Contains(input, Literal.create(infix, input.dataType)))
         case equalTo(str) =>
-          Some(EqualTo(input, Literal(str)))
+          Some(EqualTo(input, Literal.create(str, input.dataType)))
         case _ => None
       }
     }
@@ -785,7 +786,7 @@ object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
     _.containsPattern(LIKE_FAMLIY), ruleId) {
-    case l @ Like(input, Literal(pattern, StringType), escapeChar) =>
+    case l @ Like(input, Literal(pattern, _: StringType), escapeChar) =>
       if (pattern == null) {
         // If pattern is null, return null value directly, since "col like null" == null.
         Literal(null, BooleanType)
@@ -1023,7 +1024,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
       plan
     } else {
       plan transformExpressions {
-        case a: AttributeReference if foldableMap.contains(a) => foldableMap(a)
+        case a: AttributeReference if foldableMap.contains(a) => foldableMap(a).withName(a.name)
       }
     }
   }
@@ -1088,17 +1089,6 @@ object SimplifyCasts extends Rule[LogicalPlan] {
 }
 
 
-/**
- * Removes nodes that are not necessary.
- */
-object RemoveDispensableExpressions extends Rule[LogicalPlan] {
-  def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
-    _.containsPattern(UNARY_POSITIVE), ruleId) {
-    case UnaryPositive(child) => child
-  }
-}
-
-
 /**
  * Removes the inner case conversion expressions that are unnecessary because
  * the inner conversion is overwritten by the outer one.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 92ac7599a8ff7..48753fbfe3267 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, instantToMicros, localDateTimeToMicros}
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLExpr
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 
@@ -151,11 +152,11 @@ case class ReplaceCurrentLike(catalogManager: CatalogManager) extends Rule[Logic
 
     plan.transformAllExpressionsWithPruning(_.containsPattern(CURRENT_LIKE)) {
       case CurrentDatabase() =>
-        Literal.create(currentNamespace, StringType)
+        Literal.create(currentNamespace, SQLConf.get.defaultStringType)
       case CurrentCatalog() =>
-        Literal.create(currentCatalog, StringType)
+        Literal.create(currentCatalog, SQLConf.get.defaultStringType)
       case CurrentUser() =>
-        Literal.create(currentUser, StringType)
+        Literal.create(currentUser, SQLConf.get.defaultStringType)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 655b7c3455b1e..9fc4873c248b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -20,15 +20,16 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.annotation.tailrec
 import scala.util.control.NonFatal
 
-import org.apache.spark.internal.LogKey.JOIN_CONDITION
-import org.apache.spark.internal.MDC
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{HASH_JOIN_KEYS, JOIN_CONDITION}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, ExtractFiltersAndInnerJoins}
+import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, ExtractFiltersAndInnerJoins, ExtractSingleColumnNullAwareAntiJoin}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
@@ -286,61 +287,55 @@ case object BuildRight extends BuildSide
 
 case object BuildLeft extends BuildSide
 
-trait JoinSelectionHelper {
+trait JoinSelectionHelper extends Logging {
 
   def getBroadcastBuildSide(
-      left: LogicalPlan,
-      right: LogicalPlan,
-      joinType: JoinType,
-      hint: JoinHint,
+      join: Join,
       hintOnly: Boolean,
       conf: SQLConf): Option[BuildSide] = {
     val buildLeft = if (hintOnly) {
-      hintToBroadcastLeft(hint)
+      hintToBroadcastLeft(join.hint)
     } else {
-      canBroadcastBySize(left, conf) && !hintToNotBroadcastLeft(hint)
+      canBroadcastBySize(join.left, conf) && !hintToNotBroadcastLeft(join.hint)
     }
     val buildRight = if (hintOnly) {
-      hintToBroadcastRight(hint)
+      hintToBroadcastRight(join.hint)
     } else {
-      canBroadcastBySize(right, conf) && !hintToNotBroadcastRight(hint)
+      canBroadcastBySize(join.right, conf) && !hintToNotBroadcastRight(join.hint)
     }
     getBuildSide(
-      canBuildBroadcastLeft(joinType) && buildLeft,
-      canBuildBroadcastRight(joinType) && buildRight,
-      left,
-      right
+      canBuildBroadcastLeft(join.joinType) && buildLeft,
+      canBuildBroadcastRight(join.joinType) && buildRight,
+      join.left,
+      join.right
     )
   }
 
   def getShuffleHashJoinBuildSide(
-      left: LogicalPlan,
-      right: LogicalPlan,
-      joinType: JoinType,
-      hint: JoinHint,
+      join: Join,
       hintOnly: Boolean,
       conf: SQLConf): Option[BuildSide] = {
     val buildLeft = if (hintOnly) {
-      hintToShuffleHashJoinLeft(hint)
+      hintToShuffleHashJoinLeft(join.hint)
     } else {
-      hintToPreferShuffleHashJoinLeft(hint) ||
-        (!conf.preferSortMergeJoin && canBuildLocalHashMapBySize(left, conf) &&
-          muchSmaller(left, right, conf)) ||
+      hintToPreferShuffleHashJoinLeft(join.hint) ||
+        (!conf.preferSortMergeJoin && canBuildLocalHashMapBySize(join.left, conf) &&
+          muchSmaller(join.left, join.right, conf)) ||
         forceApplyShuffledHashJoin(conf)
     }
     val buildRight = if (hintOnly) {
-      hintToShuffleHashJoinRight(hint)
+      hintToShuffleHashJoinRight(join.hint)
     } else {
-      hintToPreferShuffleHashJoinRight(hint) ||
-        (!conf.preferSortMergeJoin && canBuildLocalHashMapBySize(right, conf) &&
-          muchSmaller(right, left, conf)) ||
+      hintToPreferShuffleHashJoinRight(join.hint) ||
+        (!conf.preferSortMergeJoin && canBuildLocalHashMapBySize(join.right, conf) &&
+          muchSmaller(join.right, join.left, conf)) ||
         forceApplyShuffledHashJoin(conf)
     }
     getBuildSide(
-      canBuildShuffledHashJoinLeft(joinType) && buildLeft,
-      canBuildShuffledHashJoinRight(joinType) && buildRight,
-      left,
-      right
+      canBuildShuffledHashJoinLeft(join.joinType) && buildLeft,
+      canBuildShuffledHashJoinRight(join.joinType) && buildRight,
+      join.left,
+      join.right
     )
   }
 
@@ -400,11 +395,32 @@ trait JoinSelectionHelper {
     }
   }
 
-  def canPlanAsBroadcastHashJoin(join: Join, conf: SQLConf): Boolean = {
-    getBroadcastBuildSide(join.left, join.right, join.joinType,
-      join.hint, hintOnly = true, conf).isDefined ||
-      getBroadcastBuildSide(join.left, join.right, join.joinType,
-        join.hint, hintOnly = false, conf).isDefined
+  protected def hashJoinSupported
+      (leftKeys: Seq[Expression], rightKeys: Seq[Expression]): Boolean = {
+    val result = leftKeys.concat(rightKeys).forall(e => UnsafeRowUtils.isBinaryStable(e.dataType))
+    if (!result) {
+      val keysNotSupportingHashJoin = leftKeys.concat(rightKeys).filterNot(
+        e => UnsafeRowUtils.isBinaryStable(e.dataType))
+      logWarning(log"Hash based joins are not supported due to joining on keys that don't " +
+        log"support binary equality. Keys not supporting hash joins: " +
+        log"${
+          MDC(HASH_JOIN_KEYS, keysNotSupportingHashJoin.map(
+            e => e.toString + " due to DataType: " + e.dataType.typeName).mkString(", "))
+        }")
+    }
+    result
+  }
+
+  def canPlanAsBroadcastHashJoin(join: Join, conf: SQLConf): Boolean = join match {
+    case ExtractEquiJoinKeys(_, leftKeys, rightKeys, _, _, _, _, _) =>
+      val hashJoinSupport = hashJoinSupported(leftKeys, rightKeys)
+      val noShufflePlannedBefore =
+        !hashJoinSupport || getShuffleHashJoinBuildSide(join, hintOnly = true, conf).isEmpty
+      getBroadcastBuildSide(join, hintOnly = true, conf).isDefined ||
+        (noShufflePlannedBefore &&
+          getBroadcastBuildSide(join, hintOnly = false, conf).isDefined)
+    case ExtractSingleColumnNullAwareAntiJoin(_, _) => true
+    case _ => false
   }
 
   def canPruneLeft(joinType: JoinType): Boolean = joinType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
index 2d6fabaaef68a..1c477964a6890 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.catalyst.parser
 
+import org.antlr.v4.runtime.ParserRuleContext
+
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
@@ -30,44 +32,56 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface {
   override def astBuilder: AstBuilder
 
   /** Creates Expression for a given SQL string. */
-  override def parseExpression(sqlText: String): Expression = parse(sqlText) { parser =>
-    val ctx = parser.singleExpression()
-    withOrigin(ctx, Some(sqlText)) {
-      astBuilder.visitSingleExpression(ctx)
+  override def parseExpression(sqlText: String): Expression =
+    parse(sqlText) { parser =>
+      val ctx = parser.singleExpression()
+      withErrorHandling(ctx, Some(sqlText)) {
+        astBuilder.visitSingleExpression(ctx)
+      }
     }
-  }
 
   /** Creates TableIdentifier for a given SQL string. */
-  override def parseTableIdentifier(sqlText: String): TableIdentifier = parse(sqlText) { parser =>
-    astBuilder.visitSingleTableIdentifier(parser.singleTableIdentifier())
-  }
+  override def parseTableIdentifier(sqlText: String): TableIdentifier =
+    parse(sqlText) { parser =>
+      val ctx = parser.singleTableIdentifier()
+      withErrorHandling(ctx, Some(sqlText)) {
+        astBuilder.visitSingleTableIdentifier(ctx)
+      }
+    }
 
   /** Creates FunctionIdentifier for a given SQL string. */
   override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = {
     parse(sqlText) { parser =>
-      astBuilder.visitSingleFunctionIdentifier(parser.singleFunctionIdentifier())
+      val ctx = parser.singleFunctionIdentifier()
+      withErrorHandling(ctx, Some(sqlText)) {
+        astBuilder.visitSingleFunctionIdentifier(ctx)
+      }
     }
   }
 
   /** Creates a multi-part identifier for a given SQL string */
   override def parseMultipartIdentifier(sqlText: String): Seq[String] = {
     parse(sqlText) { parser =>
-      astBuilder.visitSingleMultipartIdentifier(parser.singleMultipartIdentifier())
+      val ctx = parser.singleMultipartIdentifier()
+      withErrorHandling(ctx, Some(sqlText)) {
+        astBuilder.visitSingleMultipartIdentifier(ctx)
+      }
     }
   }
 
   /** Creates LogicalPlan for a given SQL string of query. */
-  override def parseQuery(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
-    val ctx = parser.query()
-    withOrigin(ctx, Some(sqlText)) {
-      astBuilder.visitQuery(ctx)
+  override def parseQuery(sqlText: String): LogicalPlan =
+    parse(sqlText) { parser =>
+      val ctx = parser.query()
+      withErrorHandling(ctx, Some(sqlText)) {
+        astBuilder.visitQuery(ctx)
+      }
     }
-  }
 
   /** Creates LogicalPlan for a given SQL string. */
   override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
     val ctx = parser.singleStatement()
-    withOrigin(ctx, Some(sqlText)) {
+    withErrorHandling(ctx, Some(sqlText)) {
       astBuilder.visitSingleStatement(ctx) match {
         case plan: LogicalPlan => plan
         case _ =>
@@ -76,4 +90,28 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface {
       }
     }
   }
+
+  /** Creates [[CompoundBody]] for a given SQL script string. */
+  override def parseScript(sqlScriptText: String): CompoundBody = parse(sqlScriptText) { parser =>
+    val ctx = parser.compoundOrSingleStatement()
+    withErrorHandling(ctx, Some(sqlScriptText)) {
+      astBuilder.visitCompoundOrSingleStatement(ctx) match {
+        case body: CompoundBody => body
+        case _ =>
+          val position = Origin(None, None)
+          throw QueryParsingErrors.sqlStatementUnsupportedError(sqlScriptText, position)
+      }
+    }
+  }
+
+  def withErrorHandling[T](ctx: ParserRuleContext, sqlText: Option[String])(toResult: => T): T = {
+    withOrigin(ctx, sqlText) {
+      try {
+        toResult
+      } catch {
+        case so: StackOverflowError =>
+          throw QueryParsingErrors.parserStackOverflow(ctx)
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 34672485ddc9b..dc43bd1636594 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -20,21 +20,20 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
-import scala.collection.mutable.{ArrayBuffer, Set}
+import scala.collection.mutable.{ArrayBuffer, ListBuffer, Set}
 import scala.jdk.CollectionConverters._
 import scala.util.{Left, Right}
 
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.misc.Interval
 import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
-import org.apache.commons.codec.DecoderException
-import org.apache.commons.codec.binary.Hex
 
 import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkThrowable}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PARTITION_SPECIFICATION
+import org.apache.spark.internal.LogKeys.PARTITION_SPECIFICATION
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FUNC_ALIAS
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, ClusterBySpec}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AnyValue, First, Last}
@@ -50,7 +49,9 @@ import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryParsingErrors}
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLStmt
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LEGACY_BANG_EQUALS_NOT
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 import org.apache.spark.util.ArrayImplicits._
@@ -114,6 +115,45 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
   }
 
+  override def visitCompoundOrSingleStatement(
+      ctx: CompoundOrSingleStatementContext): CompoundBody = withOrigin(ctx) {
+    Option(ctx.singleCompoundStatement()).map { s =>
+      visit(s).asInstanceOf[CompoundBody]
+    }.getOrElse {
+      val logicalPlan = visitSingleStatement(ctx.singleStatement())
+      CompoundBody(Seq(SingleStatement(parsedPlan = logicalPlan)))
+    }
+  }
+
+  override def visitSingleCompoundStatement(ctx: SingleCompoundStatementContext): CompoundBody = {
+    visit(ctx.beginEndCompoundBlock()).asInstanceOf[CompoundBody]
+  }
+
+  private def visitCompoundBodyImpl(ctx: CompoundBodyContext): CompoundBody = {
+    val buff = ListBuffer[CompoundPlanStatement]()
+    ctx.compoundStatements.forEach(compoundStatement => {
+      buff += visit(compoundStatement).asInstanceOf[CompoundPlanStatement]
+    })
+    CompoundBody(buff.toSeq)
+  }
+
+  override def visitBeginEndCompoundBlock(ctx: BeginEndCompoundBlockContext): CompoundBody = {
+    visitCompoundBodyImpl(ctx.compoundBody())
+  }
+
+  override def visitCompoundBody(ctx: CompoundBodyContext): CompoundBody = {
+    visitCompoundBodyImpl(ctx)
+  }
+
+  override def visitCompoundStatement(ctx: CompoundStatementContext): CompoundPlanStatement =
+    withOrigin(ctx) {
+      Option(ctx.statement()).map {s =>
+        SingleStatement(parsedPlan = visit(s).asInstanceOf[LogicalPlan])
+      }.getOrElse {
+        visit(ctx.beginEndCompoundBlock()).asInstanceOf[CompoundPlanStatement]
+      }
+    }
+
   override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) {
     visit(ctx.statement).asInstanceOf[LogicalPlan]
   }
@@ -365,6 +405,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil)
     val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
 
+    blockBang(ctx.errorCapturingNot())
+
     if (ctx.EXISTS != null) {
       invalidStatement("INSERT INTO ... IF NOT EXISTS", ctx)
     }
@@ -381,6 +423,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val cols = Option(ctx.identifierList()).map(visitIdentifierList).getOrElse(Nil)
     val partitionKeys = Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
 
+    blockBang(ctx.errorCapturingNot())
+
     val dynamicPartitionKeys: Map[String, Option[String]] = partitionKeys.filter(_._2.isEmpty)
     if (ctx.EXISTS != null && dynamicPartitionKeys.nonEmpty) {
       operationNotAllowed("IF NOT EXISTS with dynamic partitions: " +
@@ -455,6 +499,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
 
   override def visitMergeIntoTable(ctx: MergeIntoTableContext): LogicalPlan = withOrigin(ctx) {
+    val withSchemaEvolution = ctx.EVOLUTION() != null
     val targetTable = createUnresolvedRelation(ctx.target)
     val targetTableAlias = getTableAliasWithoutColumnAlias(ctx.targetAlias, "MERGE")
     val aliasedTarget = targetTableAlias.map(SubqueryAlias(_, targetTable)).getOrElse(targetTable)
@@ -549,7 +594,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       mergeCondition,
       matchedActions.toSeq,
       notMatchedActions.toSeq,
-      notMatchedBySourceActions.toSeq)
+      notMatchedBySourceActions.toSeq,
+      withSchemaEvolution)
   }
 
   /**
@@ -843,7 +889,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     // Create the attributes.
     val (attributes, schemaLess) = if (transformClause.colTypeList != null) {
       // Typed return columns.
-      (DataTypeUtils.toAttributes(createSchema(transformClause.colTypeList)), false)
+      val schema = createSchema(transformClause.colTypeList)
+      val replacedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)
+      (DataTypeUtils.toAttributes(replacedSchema), false)
     } else if (transformClause.identifierSeq != null) {
       // Untyped return columns.
       val attrs = visitIdentifierSeq(transformClause.identifierSeq).map { name =>
@@ -1630,6 +1678,20 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       }
       partitionByExpressions = p.partition.asScala.map(expression).toSeq
       orderByExpressions = p.sortItem.asScala.map(visitSortItem).toSeq
+      def invalidPartitionOrOrderingExpression(clause: String): String = {
+        "The table function call includes a table argument with an invalid " +
+          s"partitioning/ordering specification: the $clause clause included multiple " +
+          "expressions without parentheses surrounding them; please add parentheses around " +
+          "these expressions and then retry the query again"
+      }
+      validate(
+        Option(p.invalidMultiPartitionExpression).isEmpty,
+        message = invalidPartitionOrOrderingExpression("PARTITION BY"),
+        ctx = p.invalidMultiPartitionExpression)
+      validate(
+        Option(p.invalidMultiSortItem).isEmpty,
+        message = invalidPartitionOrOrderingExpression("ORDER BY"),
+        ctx = p.invalidMultiSortItem)
     }
     validate(
       !(withSinglePartition && partitionByExpressions.nonEmpty),
@@ -1864,6 +1926,25 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       exceptCols.toSeq)
   }
 
+  /**
+   * Check for the inappropriate usage of the '!' token.
+   * '!' used to be a synonym for 'NOT' in the lexer, but that was too general.
+   * '!' should only be a synonym for 'NOT' when used as a prefix in a logical operation.
+   * We do that now explicitly.
+   */
+  def blockBang(ctx: ErrorCapturingNotContext): ErrorCapturingNotContext = {
+    val tolerateBang = conf.getConf(LEGACY_BANG_EQUALS_NOT)
+    if (ctx != null && ctx.BANG() != null && !tolerateBang) {
+      withOrigin(ctx) {
+        throw new ParseException(
+          errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+          messageParameters = Map("clause" -> toSQLStmt("!")),
+          ctx)
+      }
+    }
+    ctx
+  }
+
   /**
    * Create an aliased expression if an alias is specified. Both single and multi-aliases are
    * supported.
@@ -2003,9 +2084,12 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    */
   private def withPredicate(e: Expression, ctx: PredicateContext): Expression = withOrigin(ctx) {
     // Invert a predicate if it has a valid NOT clause.
-    def invertIfNotDefined(e: Expression): Expression = ctx.NOT match {
-      case null => e
-      case not => Not(e)
+    def invertIfNotDefined(e: Expression): Expression = {
+      val withNot = blockBang(ctx.errorCapturingNot)
+      withNot match {
+        case null => e
+        case _ => Not(e)
+      }
     }
 
     def getValueExpressions(e: Expression): Seq[Expression] = e match {
@@ -2027,6 +2111,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       case _ => new Like(expr, pattern)
     }
 
+    val withNot = blockBang(ctx.errorCapturingNot)
+
     // Create the predicate.
     ctx.kind.getType match {
       case SqlBaseParser.BETWEEN =>
@@ -2046,7 +2132,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
               // So we use LikeAny or NotLikeAny instead.
               val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String])
               val (expr, pat) = lowerLikeArgsIfNeeded(e, patterns)
-              ctx.NOT match {
+              withNot match {
                 case null => LikeAny(expr, pat)
                 case _ => NotLikeAny(expr, pat)
               }
@@ -2062,7 +2148,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
               // So we use LikeAll or NotLikeAll instead.
               val patterns = expressions.map(_.eval(EmptyRow).asInstanceOf[UTF8String])
               val (expr, pat) = lowerLikeArgsIfNeeded(e, patterns)
-              ctx.NOT match {
+              withNot match {
                 case null => LikeAll(expr, pat)
                 case _ => NotLikeAll(expr, pat)
               }
@@ -2086,23 +2172,23 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
         }
       case SqlBaseParser.RLIKE =>
         invertIfNotDefined(RLike(e, expression(ctx.pattern)))
-      case SqlBaseParser.NULL if ctx.NOT != null =>
+      case SqlBaseParser.NULL if withNot != null =>
         IsNotNull(e)
       case SqlBaseParser.NULL =>
         IsNull(e)
-      case SqlBaseParser.TRUE => ctx.NOT match {
+      case SqlBaseParser.TRUE => withNot match {
         case null => EqualNullSafe(e, Literal(true))
         case _ => Not(EqualNullSafe(e, Literal(true)))
       }
-      case SqlBaseParser.FALSE => ctx.NOT match {
+      case SqlBaseParser.FALSE => withNot match {
         case null => EqualNullSafe(e, Literal(false))
         case _ => Not(EqualNullSafe(e, Literal(false)))
       }
-      case SqlBaseParser.UNKNOWN => ctx.NOT match {
+      case SqlBaseParser.UNKNOWN => withNot match {
         case null => IsUnknown(e)
         case _ => IsNotUnknown(e)
       }
-      case SqlBaseParser.DISTINCT if ctx.NOT != null =>
+      case SqlBaseParser.DISTINCT if withNot != null =>
         EqualNullSafe(e, expression(ctx.right))
       case SqlBaseParser.DISTINCT =>
         Not(EqualNullSafe(e, expression(ctx.right)))
@@ -2148,6 +2234,19 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
   }
 
+  override def visitShiftExpression(ctx: ShiftExpressionContext): Expression = withOrigin(ctx) {
+    val left = expression(ctx.left)
+    val right = expression(ctx.right)
+    val operator = ctx.shiftOperator().getChild(0).asInstanceOf[TerminalNode]
+    val shift = operator.getSymbol.getType match {
+      case SqlBaseParser.SHIFT_LEFT => ShiftLeft(left, right)
+      case SqlBaseParser.SHIFT_RIGHT => ShiftRight(left, right)
+      case SqlBaseParser.SHIFT_RIGHT_UNSIGNED => ShiftRightUnsigned(left, right)
+    }
+    shift.setTagValue(FUNC_ALIAS, operator.getText)
+    shift
+  }
+
   /**
    * Create a unary arithmetic expression. The following arithmetic operators are supported:
    * - Plus: '+'
@@ -2203,6 +2302,20 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    */
   override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) {
     val rawDataType = typedVisit[DataType](ctx.dataType())
+    ctx.dataType() match {
+      case context: PrimitiveDataTypeContext =>
+        val typeCtx = context.`type`()
+        if (typeCtx.start.getType == STRING) {
+          typeCtx.children.asScala.toSeq match {
+            case Seq(_, cctx: CollateClauseContext) =>
+              throw QueryParsingErrors.dataTypeUnsupportedError(
+                rawDataType.typeName,
+                ctx.dataType().asInstanceOf[PrimitiveDataTypeContext])
+            case _ =>
+          }
+        }
+      case _ =>
+    }
     val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType)
     ctx.name.getType match {
       case SqlBaseParser.CAST =>
@@ -2222,6 +2335,20 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    */
   override def visitCastByColon(ctx: CastByColonContext): Expression = withOrigin(ctx) {
     val rawDataType = typedVisit[DataType](ctx.dataType())
+    ctx.dataType() match {
+      case context: PrimitiveDataTypeContext =>
+        val typeCtx = context.`type`()
+        if (typeCtx.start.getType == STRING) {
+          typeCtx.children.asScala.toSeq match {
+            case Seq(_, cctx: CollateClauseContext) =>
+              throw QueryParsingErrors.dataTypeUnsupportedError(
+                rawDataType.typeName,
+                ctx.dataType().asInstanceOf[PrimitiveDataTypeContext])
+            case _ =>
+          }
+        }
+      case _ =>
+    }
     val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType)
     val cast = Cast(expression(ctx.primaryExpression), dataType)
     cast.setTagValue(Cast.USER_SPECIFIED_CAST, ())
@@ -2698,11 +2825,10 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
           Literal(interval, CalendarIntervalType)
         }
       case BINARY_HEX =>
-        val padding = if (value.length % 2 != 0) "0" else ""
         try {
-          Literal(Hex.decodeHex(padding + value))
+          Literal(Hex.unhex(value), BinaryType)
         } catch {
-          case e: DecoderException =>
+          case e: IllegalArgumentException =>
             val ex = QueryParsingErrors.cannotParseValueTypeError("X", value, ctx)
             ex.setStackTrace(e.getStackTrace)
             throw ex
@@ -3143,24 +3269,24 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   /**
    * Create top level table schema.
    */
-  protected def createSchema(ctx: CreateOrReplaceTableColTypeListContext): StructType = {
-    val columns = Option(ctx).toArray.flatMap(visitCreateOrReplaceTableColTypeList)
+  protected def createSchema(ctx: ColDefinitionListContext): StructType = {
+    val columns = Option(ctx).toArray.flatMap(visitColDefinitionList)
     StructType(columns.map(_.toV1Column))
   }
 
   /**
    * Get CREATE TABLE column definitions.
    */
-  override def visitCreateOrReplaceTableColTypeList(
-      ctx: CreateOrReplaceTableColTypeListContext): Seq[ColumnDefinition] = withOrigin(ctx) {
-    ctx.createOrReplaceTableColType().asScala.map(visitCreateOrReplaceTableColType).toSeq
+  override def visitColDefinitionList(
+      ctx: ColDefinitionListContext): Seq[ColumnDefinition] = withOrigin(ctx) {
+    ctx.colDefinition().asScala.map(visitColDefinition).toSeq
   }
 
   /**
    * Get a CREATE TABLE column definition.
    */
-  override def visitCreateOrReplaceTableColType(
-      ctx: CreateOrReplaceTableColTypeContext): ColumnDefinition = withOrigin(ctx) {
+  override def visitColDefinition(
+      ctx: ColDefinitionContext): ColumnDefinition = withOrigin(ctx) {
     import ctx._
 
     val name: String = colName.getText
@@ -3171,6 +3297,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     var commentSpec: Option[CommentSpecContext] = None
     ctx.colDefinitionOption().asScala.foreach { option =>
       if (option.NULL != null) {
+        blockBang(option.errorCapturingNot)
         if (!nullable) {
           throw QueryParsingErrors.duplicateTableColumnDescriptor(
             option, name, "NOT NULL")
@@ -3424,6 +3551,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    */
   override def visitCreateTableHeader(
       ctx: CreateTableHeaderContext): TableHeader = withOrigin(ctx) {
+    blockBang(ctx.errorCapturingNot)
     val temporary = ctx.TEMPORARY != null
     val ifNotExists = ctx.EXISTS != null
     if (temporary && ifNotExists) {
@@ -3543,7 +3671,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
   }
 
-  private def cleanNamespaceProperties(
+  protected def cleanNamespaceProperties(
       properties: Map[String, String],
       ctx: ParserRuleContext): Map[String, String] = withOrigin(ctx) {
     import SupportsNamespaces._
@@ -3600,6 +3728,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       properties += PROP_LOCATION -> _
     }
 
+    blockBang(ctx.errorCapturingNot)
+
     CreateNamespace(
       withIdentClause(ctx.identifierReference, UnresolvedNamespace(_)),
       ctx.EXISTS != null,
@@ -3983,8 +4113,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val (identifierContext, temp, ifNotExists, external) =
       visitCreateTableHeader(ctx.createTableHeader)
 
-    val columns = Option(ctx.createOrReplaceTableColTypeList())
-      .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil)
+    val columns = Option(ctx.colDefinitionList()).map(visitColDefinitionList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
     val (partTransforms, partCols, bucketSpec, properties, options, location,
       comment, serdeInfo, clusterBySpec) = visitCreateTableClauses(ctx.createTableClauses())
@@ -4065,8 +4194,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val orCreate = ctx.replaceTableHeader().CREATE() != null
     val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo,
       clusterBySpec) = visitCreateTableClauses(ctx.createTableClauses())
-    val columns = Option(ctx.createOrReplaceTableColTypeList())
-      .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil)
+    val columns = Option(ctx.colDefinitionList()).map(visitColDefinitionList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
 
     if (provider.isDefined && serdeInfo.isDefined) {
@@ -4207,7 +4335,10 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     var colPosition: Option[ColPositionContext] = None
     val columnName = name.last
     ctx.colDefinitionDescriptorWithPosition.asScala.foreach { option =>
+      blockBang(option.errorCapturingNot)
+
       if (option.NULL != null) {
+        blockBang(option.errorCapturingNot)
         if (!nullable) {
           throw QueryParsingErrors.duplicateTableColumnDescriptor(
             option, columnName, "NOT NULL", isCreate = false)
@@ -4411,6 +4542,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
         }
         var commentSpec: Option[CommentSpecContext] = None
         colType.colDefinitionDescriptorWithPosition.asScala.foreach { opt =>
+          blockBang(opt.errorCapturingNot)
+
           if (opt.NULL != null) {
             throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
               "NOT NULL", "REPLACE COLUMNS", ctx)
@@ -4862,6 +4995,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       val location = Option(splCtx.locationSpec).map(visitLocationSpec)
       UnresolvedPartitionSpec(spec, location)
     }
+    blockBang(ctx.errorCapturingNot)
     AddPartitions(
       createUnresolvedTable(
         ctx.identifierReference,
@@ -4954,6 +5088,62 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       query = plan(ctx.query))
   }
 
+  /**
+   * Defined the schema binding mode during CREATE or ALTER VIEW.
+   * The method also accept a NULL context, in which case it will return the session default
+   *
+   * {{{
+   *   WITH SCHEMA [ BINDING | COMPENSATION | TYPE EVOLUTION | EVOLUTION ]
+   * }}}
+   */
+  override def visitSchemaBinding(ctx: SchemaBindingContext): ViewSchemaMode = {
+    if (ctx == null) {
+      // No schema binding specified, return the session default
+      if (conf.viewSchemaBindingEnabled) {
+        if (conf.viewSchemaCompensation) {
+          SchemaCompensation
+        } else {
+          SchemaBinding
+        }
+      } else {
+        SchemaUnsupported
+      }
+    } else if (!conf.viewSchemaBindingEnabled) {
+      // If the feature is disabled, throw an exception
+      withOrigin(ctx) {
+        throw new ParseException(
+          errorClass = "FEATURE_NOT_ENABLED",
+          messageParameters = Map("featureName" -> "VIEW ... WITH SCHEMA ...",
+            "configKey" -> "spark.sql.legacy.viewSchemaBindingMode",
+            "configValue" -> "true"),
+          ctx)
+      }
+    } else if (ctx.COMPENSATION != null) {
+      SchemaCompensation
+    } else if (ctx.TYPE != null) {
+      SchemaTypeEvolution
+    } else if (ctx.EVOLUTION != null) {
+      SchemaEvolution
+    } else {
+      SchemaBinding
+    }
+  }
+
+  /**
+   * Alter the schema binding of a view. This creates a [[AlterViewSchemaBinding]]
+   *
+   * For example:
+   * {{{
+   *   ALTER VIEW multi_part_name WITH SCHEMA ...;
+   * }}}
+   */
+  override def visitAlterViewSchemaBinding(ctx: AlterViewSchemaBindingContext): LogicalPlan
+  = withOrigin(ctx) {
+    AlterViewSchemaBinding(
+      createUnresolvedView(ctx.identifierReference, "ALTER VIEW ... WITH SCHEMA ..."),
+      viewSchemaMode = visitSchemaBinding(ctx.schemaBinding))
+  }
+
   /**
    * Create a [[RenameTable]] command.
    *
@@ -5106,6 +5296,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       .map(x => (Option(x.options).map(visitPropertyKeyValues).getOrElse(Map.empty))).toSeq
     val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
 
+    blockBang(ctx.errorCapturingNot)
+
     CreateIndex(
       createUnresolvedTable(ctx.identifierReference, "CREATE INDEX"),
       indexName,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index 3aec1dd431138..04edb0f75c463 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -62,4 +62,10 @@ trait ParserInterface extends DataTypeParserInterface {
    */
   @throws[ParseException]("Text cannot be parsed to a LogicalPlan")
   def parseQuery(sqlText: String): LogicalPlan
+
+  /**
+   * Parse a SQL script string to a [[CompoundBody]].
+   */
+  @throws[ParseException]("Text cannot be parsed to a CompoundBody")
+  def parseScript(sqlScriptText: String): CompoundBody
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingLogicalOperators.scala
new file mode 100644
index 0000000000000..816ef82a3d8e6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingLogicalOperators.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin, WithOrigin}
+
+/**
+ * Trait for all SQL Scripting logical operators that are product of parsing phase.
+ * These operators will be used by the SQL Scripting interpreter to generate execution nodes.
+ */
+sealed trait CompoundPlanStatement
+
+/**
+ * Logical operator representing result of parsing a single SQL statement
+ *   that is supposed to be executed against Spark.
+ * @param parsedPlan Result of SQL statement parsing.
+ */
+case class SingleStatement(parsedPlan: LogicalPlan)
+  extends CompoundPlanStatement
+  with WithOrigin {
+
+  override val origin: Origin = CurrentOrigin.get
+
+  def getText(sqlScriptText: String): String = {
+    if (origin.startIndex.isEmpty || origin.stopIndex.isEmpty) {
+      return null
+    }
+    sqlScriptText.substring(origin.startIndex.get, origin.stopIndex.get + 1)
+  }
+}
+
+/**
+ * Logical operator for a compound body. Contains all statements within the compound body.
+ * @param collection Collection of statements within the compound body.
+ */
+case class CompoundBody(collection: Seq[CompoundPlanStatement]) extends CompoundPlanStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 0f049103542ec..3f417644082c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.plans
 
+import java.util.IdentityHashMap
+
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
@@ -75,8 +77,13 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   /**
    * The set of all attributes that are input to this operator by its children.
    */
-  def inputSet: AttributeSet =
-    AttributeSet(children.flatMap(_.asInstanceOf[QueryPlan[PlanType]].output))
+  def inputSet: AttributeSet = {
+    children match {
+      case Seq() => AttributeSet.empty
+      case Seq(c) => c.outputSet
+      case _ => AttributeSet.fromAttributeSets(children.map(_.outputSet))
+    }
+  }
 
   /**
    * The set of all attributes that are produced by this node.
@@ -221,12 +228,14 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
       }
     }
 
+    @scala.annotation.nowarn("cat=deprecation")
     def recursiveTransform(arg: Any): AnyRef = arg match {
       case e: Expression => transformExpression(e)
       case Some(value) => Some(recursiveTransform(value))
       case m: Map[_, _] => m
       case d: DataType => d // Avoid unpacking Structs
-      case stream: LazyList[_] => stream.map(recursiveTransform).force
+      case stream: Stream[_] => stream.map(recursiveTransform).force
+      case lazyList: LazyList[_] => lazyList.map(recursiveTransform).force
       case seq: Iterable[_] => seq.map(recursiveTransform)
       case other: AnyRef => other
       case null => null
@@ -438,7 +447,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   override def verboseString(maxFields: Int): String = simpleString(maxFields)
 
   override def simpleStringWithNodeId(): String = {
-    val operatorId = getTagValue(QueryPlan.OP_ID_TAG).map(id => s"$id").getOrElse("unknown")
+    val operatorId = Option(QueryPlan.localIdMap.get().get(this)).map(id => s"$id")
+      .getOrElse("unknown")
     s"$nodeName ($operatorId)".trim
   }
 
@@ -458,7 +468,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   }
 
   protected def formattedNodeName: String = {
-    val opId = getTagValue(QueryPlan.OP_ID_TAG).map(id => s"$id").getOrElse("unknown")
+    val opId = Option(QueryPlan.localIdMap.get().get(this)).map(id => s"$id")
+      .getOrElse("unknown")
     val codegenId =
       getTagValue(QueryPlan.CODEGEN_ID_TAG).map(id => s" [codegen id : $id]").getOrElse("")
     s"($opId) $nodeName$codegenId"
@@ -517,6 +528,30 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
     transformDownWithSubqueriesAndPruning(AlwaysProcess.fn, UnknownRuleId)(f)
   }
 
+  /**
+   * Same as `transformUpWithSubqueries` except allows for pruning opportunities.
+   */
+  def transformUpWithSubqueriesAndPruning(
+    cond: TreePatternBits => Boolean,
+    ruleId: RuleId = UnknownRuleId)
+    (f: PartialFunction[PlanType, PlanType]): PlanType = {
+    val g: PartialFunction[PlanType, PlanType] = new PartialFunction[PlanType, PlanType] {
+      override def isDefinedAt(x: PlanType): Boolean = true
+
+      override def apply(plan: PlanType): PlanType = {
+        val transformed = plan.transformExpressionsUpWithPruning(t =>
+          t.containsPattern(PLAN_EXPRESSION) && cond(t)) {
+          case planExpression: PlanExpression[PlanType@unchecked] =>
+            val newPlan = planExpression.plan.transformUpWithSubqueriesAndPruning(cond, ruleId)(f)
+            planExpression.withNewPlan(newPlan)
+        }
+        f.applyOrElse[PlanType, PlanType](transformed, identity)
+      }
+    }
+
+    transformUpWithPruning(cond, ruleId)(g)
+  }
+
   /**
    * This method is the top-down (pre-order) counterpart of transformUpWithSubqueries.
    * Returns a copy of this node where the given partial function has been recursively applied
@@ -646,9 +681,17 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
 }
 
 object QueryPlan extends PredicateHelper {
-  val OP_ID_TAG = TreeNodeTag[Int]("operatorId")
   val CODEGEN_ID_TAG = new TreeNodeTag[Int]("wholeStageCodegenId")
 
+  /**
+   * A thread local map to store the mapping between the query plan and the query plan id.
+   * The scope of this thread local is within ExplainUtils.processPlan. The reason we define it here
+   * is because [[ QueryPlan ]] also needs this, and it doesn't have access to `execution` package
+   * from `catalyst`.
+   */
+  val localIdMap: ThreadLocal[java.util.Map[QueryPlan[_], Int]] = ThreadLocal.withInitial(() =>
+    new IdentityHashMap[QueryPlan[_], Int]())
+
   /**
    * Normalize the exprIds in the given expression, by updating the exprId in `AttributeReference`
    * with its referenced ordinal from input attributes. It's similar to `BindReferences` but we
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index f123258683ec3..d9da255eccc9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -19,10 +19,22 @@ package org.apache.spark.sql.catalyst.plans
 
 import java.util.Locale
 
-import org.apache.spark.{SparkIllegalArgumentException, SparkUnsupportedOperationException}
+import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.Attribute
 
 object JoinType {
+
+  val supported = Seq(
+    "inner",
+    "outer", "full", "fullouter", "full_outer",
+    "leftouter", "left", "left_outer",
+    "rightouter", "right", "right_outer",
+    "leftsemi", "left_semi", "semi",
+    "leftanti", "left_anti", "anti",
+    "cross"
+  )
+
   def apply(typ: String): JoinType = typ.toLowerCase(Locale.ROOT).replace("_", "") match {
     case "inner" => Inner
     case "outer" | "full" | "fullouter" => FullOuter
@@ -32,20 +44,12 @@ object JoinType {
     case "leftanti" | "anti" => LeftAnti
     case "cross" => Cross
     case _ =>
-      val supported = Seq(
-        "inner",
-        "outer", "full", "fullouter", "full_outer",
-        "leftouter", "left", "left_outer",
-        "rightouter", "right", "right_outer",
-        "leftsemi", "left_semi", "semi",
-        "leftanti", "left_anti", "anti",
-        "cross")
-
-      throw new SparkIllegalArgumentException(
-        errorClass = "_LEGACY_ERROR_TEMP_3216",
+      throw new AnalysisException(
+        errorClass = "UNSUPPORTED_JOIN_TYPE",
         messageParameters = Map(
           "typ" -> typ,
-          "supported" -> supported.mkString("'", "', '", "'")))
+          "supported" -> supported.mkString("'", "', '", "'"))
+      )
   }
 }
 
@@ -129,15 +133,16 @@ object LeftSemiOrAnti {
 
 object AsOfJoinDirection {
 
+  val supported = Seq("forward", "backward", "nearest")
+
   def apply(direction: String): AsOfJoinDirection = {
     direction.toLowerCase(Locale.ROOT) match {
       case "forward" => Forward
       case "backward" => Backward
       case "nearest" => Nearest
       case _ =>
-        val supported = Seq("forward", "backward", "nearest")
-        throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3217",
+        throw new AnalysisException(
+          errorClass = "AS_OF_JOIN.UNSUPPORTED_DIRECTION",
           messageParameters = Map(
             "direction" -> direction,
             "supported" -> supported.mkString("'", "', '", "'")))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
index fc9eb5d03e49f..bd277e92d11d2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
@@ -63,3 +63,14 @@ trait AnalysisOnlyCommand extends Command {
   // on the `AnalysisContext`
   def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan
 }
+
+/**
+ * A logical node that does not expose its sub-nodes as children, but rather supervises them
+ * in an implementation-defined manner.
+ */
+trait SupervisingCommand extends LeafCommand {
+  /**
+   * Transforms its supervised plan using `transformer` and returns a copy of `SupervisingCommand`
+   */
+  def withTransformedSupervisedPlan(transformer: LogicalPlan => LogicalPlan): LogicalPlan
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EmptyRelation.scala
new file mode 100644
index 0000000000000..9e055ae7f3bd8
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EmptyRelation.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.SortOrder
+
+case class EmptyRelation(logical: LogicalPlan) extends LeafNode {
+  override val isStreaming: Boolean = logical.isStreaming
+
+  override val outputOrdering: Seq[SortOrder] = logical.outputOrdering
+
+  override def output: Seq[Attribute] = logical.output
+
+  override def computeStats(): Statistics = Statistics(sizeInBytes = 0, rowCount = Some(0))
+
+  override def maxRows: Option[Long] = Some(0)
+
+  override def maxRowsPerPartition: Option[Long] = Some(0)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
index 32a9030ff62b1..8cfc939755ef7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql.catalyst.plans.logical
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.trees.TreePattern.{EVENT_TIME_WATERMARK, TreePattern}
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark.updateEventTimeColumn
+import org.apache.spark.sql.catalyst.trees.TreePattern.{EVENT_TIME_WATERMARK, TreePattern, UPDATE_EVENT_TIME_WATERMARK_COLUMN}
 import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -32,6 +33,36 @@ object EventTimeWatermark {
   def getDelayMs(delay: CalendarInterval): Long = {
     IntervalUtils.getDuration(delay, TimeUnit.MILLISECONDS)
   }
+
+  /**
+   * Adds watermark delay to the metadata for newEventTime in provided attributes.
+   *
+   * If any other existing attributes have watermark delay present in their metadata, watermark
+   * delay will be removed from their metadata.
+   */
+  def updateEventTimeColumn(
+      attributes: Seq[Attribute],
+      delayMs: Long,
+      newEventTime: Attribute): Seq[Attribute] = {
+    attributes.map { a =>
+      if (a semanticEquals newEventTime) {
+        val updatedMetadata = new MetadataBuilder()
+          .withMetadata(a.metadata)
+          .putLong(EventTimeWatermark.delayKey, delayMs)
+          .build()
+        a.withMetadata(updatedMetadata)
+      } else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
+        // Remove existing columns tagged as eventTime for watermark
+        val updatedMetadata = new MetadataBuilder()
+          .withMetadata(a.metadata)
+          .remove(EventTimeWatermark.delayKey)
+          .build()
+        a.withMetadata(updatedMetadata)
+      } else {
+        a
+      }
+    }
+  }
 }
 
 /**
@@ -49,26 +80,38 @@ case class EventTimeWatermark(
   // logic here because we also maintain the compatibility flag. (See
   // SQLConf.STATEFUL_OPERATOR_ALLOW_MULTIPLE for details.)
   // TODO: Disallow updating the metadata once we remove the compatibility flag.
-  override val output: Seq[Attribute] = child.output.map { a =>
-    if (a semanticEquals eventTime) {
-      val delayMs = EventTimeWatermark.getDelayMs(delay)
-      val updatedMetadata = new MetadataBuilder()
-        .withMetadata(a.metadata)
-        .putLong(EventTimeWatermark.delayKey, delayMs)
-        .build()
-      a.withMetadata(updatedMetadata)
-    } else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
-      // Remove existing watermark
-      val updatedMetadata = new MetadataBuilder()
-        .withMetadata(a.metadata)
-        .remove(EventTimeWatermark.delayKey)
-        .build()
-      a.withMetadata(updatedMetadata)
+  override val output: Seq[Attribute] = {
+    val delayMs = EventTimeWatermark.getDelayMs(delay)
+    updateEventTimeColumn(child.output, delayMs, eventTime)
+  }
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): EventTimeWatermark =
+    copy(child = newChild)
+}
+
+/**
+ * Updates the event time column to [[eventTime]] in the child output.
+ *
+ * Any watermark calculations performed after this node will use the
+ * updated eventTimeColumn.
+ */
+case class UpdateEventTimeWatermarkColumn(
+    eventTime: Attribute,
+    delay: Option[CalendarInterval],
+    child: LogicalPlan) extends UnaryNode {
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(UPDATE_EVENT_TIME_WATERMARK_COLUMN)
+
+  override def output: Seq[Attribute] = {
+    if (delay.isDefined) {
+      val delayMs = EventTimeWatermark.getDelayMs(delay.get)
+      updateEventTimeColumn(child.output, delayMs, eventTime)
     } else {
-      a
+      child.output
     }
   }
 
-  override protected def withNewChildInternal(newChild: LogicalPlan): EventTimeWatermark =
+  override protected def withNewChildInternal(
+      newChild: LogicalPlan): UpdateEventTimeWatermarkColumn =
     copy(child = newChild)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
index 7e04af190e4aa..0aa73f1939e10 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
@@ -70,6 +70,8 @@ trait FunctionBuilderBase[T] {
   }
 
   def build(funcName: String, expressions: Seq[Expression]): T
+
+  def supportsLambda: Boolean = false
 }
 
 object NamedParametersSupport {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index b989233da6740..938a8ffe9e446 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.{AliasAwareQueryOutputOrdering, QueryPlan}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TreeNodeTag, UnaryLike}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{LOGICAL_QUERY_STAGE, TreePattern}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.MetadataColumnHelper
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -103,7 +104,20 @@ abstract class LogicalPlan
    */
   lazy val resolved: Boolean = expressions.forall(_.resolved) && childrenResolved
 
-  override protected def statePrefix = if (!resolved) "'" else super.statePrefix
+  override protected def statePrefix = {
+    if (!resolved) {
+      "'"
+    } else {
+      val prefixFromSuper = super.statePrefix
+      // Ancestor class could mark something on the prefix, including 'invalid'. Add a marker for
+      // `streaming` only when there is no marker from ancestor class.
+      if (prefixFromSuper.isEmpty && isStreaming) {
+        "~"
+      } else {
+        prefixFromSuper
+      }
+    }
+  }
 
   /**
    * Returns true if all its children of this query plan have been resolved.
@@ -118,7 +132,9 @@ abstract class LogicalPlan
   def resolve(schema: StructType, resolver: Resolver): Seq[Attribute] = {
     schema.map { field =>
       resolve(field.name :: Nil, resolver).map {
-        case a: AttributeReference => a
+        case a: AttributeReference =>
+          // Keep the metadata in given schema.
+          a.withMetadata(field.metadata)
         case _ => throw QueryExecutionErrors.resolveCannotHandleNestedSchema(this)
       }.getOrElse {
         throw QueryCompilationErrors.cannotResolveAttributeError(
@@ -212,6 +228,33 @@ trait LeafNode extends LogicalPlan with LeafLike[LogicalPlan] {
     throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3114")
 }
 
+/**
+ * A abstract class for LogicalQueryStage that is visible in logical rewrites.
+ */
+abstract class LogicalQueryStage extends LeafNode {
+  override protected val nodePatterns: Seq[TreePattern] = Seq(LOGICAL_QUERY_STAGE)
+
+  /**
+   * Returns the logical plan that is included in this query stage
+   */
+  def logicalPlan: LogicalPlan
+
+  /**
+   * Returns the physical plan.
+   */
+  def physicalPlan: QueryPlan[_]
+
+  /**
+   * Return true if the physical stage is materialized
+   */
+  def isMaterialized: Boolean
+
+  /**
+   * Return true if the physical plan corresponds directly to a stage
+   */
+  def isDirectStage: Boolean
+}
+
 /**
  * A logical plan node with single child.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 1c8f7a97dd7fe..0135fcfb3cc8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -783,6 +783,7 @@ object View {
         "spark.sql.hive.convertMetastoreParquet",
         "spark.sql.hive.convertMetastoreOrc",
         "spark.sql.hive.convertInsertingPartitionedTable",
+        "spark.sql.hive.convertInsertingUnpartitionedTable",
         "spark.sql.hive.convertMetastoreCtas"
       ).contains(key) || key.startsWith("spark.sql.catalog.")
     }
@@ -910,6 +911,10 @@ case class WithCTE(plan: LogicalPlan, cteDefs: Seq[CTERelationDef]) extends Logi
   def withNewPlan(newPlan: LogicalPlan): WithCTE = {
     withNewChildren(children.init :+ newPlan).asInstanceOf[WithCTE]
   }
+
+  override def maxRows: Option[Long] = plan.maxRows
+
+  override def maxRowsPerPartition: Option[Long] = plan.maxRowsPerPartition
 }
 
 /**
@@ -1070,7 +1075,8 @@ case class Range(
   override def newInstance(): Range = copy(output = output.map(_.newInstance()))
 
   override def simpleString(maxFields: Int): String = {
-    s"Range ($start, $end, step=$step, splits=$numSlices)"
+    val splits = if (numSlices.isDefined) { s", splits=$numSlices" } else { "" }
+    s"Range ($start, $end, step=$step$splits)"
   }
 
   override def maxRows: Option[Long] = {
@@ -2056,6 +2062,8 @@ case class LateralJoin(
     joinType: JoinType,
     condition: Option[Expression]) extends UnaryNode {
 
+  override lazy val allAttributes: AttributeSeq = left.output ++ right.plan.output
+
   require(Seq(Inner, LeftOuter, Cross).contains(joinType),
     s"Unsupported lateral join type $joinType")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index 28d52d39093b3..07423b612c301 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -579,7 +579,7 @@ object TransformWithState {
       child: LogicalPlan): LogicalPlan = {
     val keyEncoder = encoderFor[K]
     val mapped = new TransformWithState(
-      UnresolvedDeserializer(encoderFor[K].deserializer, groupingAttributes),
+      UnresolvedDeserializer(keyEncoder.deserializer, groupingAttributes),
       UnresolvedDeserializer(encoderFor[V].deserializer, dataAttributes),
       groupingAttributes,
       dataAttributes,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 37e751ea9884b..6339a18796fa0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.{SparkIllegalArgumentException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AssignmentUtils, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedIdentifier, UnresolvedException}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AssignmentUtils, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedIdentifier, UnresolvedException, ViewSchemaMode}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.FunctionResource
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, MetadataAttribute, NamedExpression, UnaryExpression, Unevaluable, V2ExpressionUtils}
@@ -64,10 +64,11 @@ trait V2WriteCommand extends UnaryCommand with KeepAnalyzedQuery with CTEInChild
     table.skipSchemaResolution || (query.output.size == table.output.size &&
       query.output.zip(table.output).forall {
         case (inAttr, outAttr) =>
+          val inType = CharVarcharUtils.getRawType(inAttr.metadata).getOrElse(inAttr.dataType)
           val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
           // names and types must match, nullability must be compatible
           inAttr.name == outAttr.name &&
-            DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) &&
+            DataType.equalsIgnoreCompatibleNullability(inType, outType) &&
             (outAttr.nullable || !inAttr.nullable)
       })
   }
@@ -754,7 +755,8 @@ case class MergeIntoTable(
     mergeCondition: Expression,
     matchedActions: Seq[MergeAction],
     notMatchedActions: Seq[MergeAction],
-    notMatchedBySourceActions: Seq[MergeAction]) extends BinaryCommand with SupportsSubquery {
+    notMatchedBySourceActions: Seq[MergeAction],
+    withSchemaEvolution: Boolean) extends BinaryCommand with SupportsSubquery {
 
   lazy val aligned: Boolean = {
     val actions = matchedActions ++ notMatchedActions ++ notMatchedBySourceActions
@@ -1292,6 +1294,17 @@ case class AlterViewAs(
   }
 }
 
+/**
+ * The logical plan of the ALTER VIEW ... WITH SCHEMA command.
+ */
+case class AlterViewSchemaBinding(
+    child: LogicalPlan,
+    viewSchemaMode: ViewSchemaMode)
+  extends UnaryCommand {
+  override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
+    copy(child = newChild)
+}
+
 /**
  * The logical plan of the CREATE VIEW ... command.
  */
@@ -1303,7 +1316,8 @@ case class CreateView(
     originalText: Option[String],
     query: LogicalPlan,
     allowExisting: Boolean,
-    replace: Boolean) extends BinaryCommand with CTEInChildren {
+    replace: Boolean,
+    viewSchemaMode: ViewSchemaMode) extends BinaryCommand with CTEInChildren {
   override def left: LogicalPlan = child
   override def right: LogicalPlan = query
   override protected def withNewChildrenInternal(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 2364130f79e4c..19595eef10b34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -385,8 +385,9 @@ case class KeyGroupedPartitioning(
             val attributes = expressions.flatMap(_.collectLeaves())
 
             if (SQLConf.get.v2BucketingAllowJoinKeysSubsetOfPartitionKeys) {
-              // check that all join keys (required clustering keys) contained in partitioning
-              requiredClustering.forall(x => attributes.exists(_.semanticEquals(x))) &&
+              // check that join keys (required clustering keys)
+              // overlap with partition keys (KeyGroupedPartitioning attributes)
+              requiredClustering.exists(x => attributes.exists(_.semanticEquals(x))) &&
                   expressions.forall(_.collectLeaves().size == 1)
             } else {
               attributes.forall(x => requiredClustering.exists(_.semanticEquals(x)))
@@ -870,12 +871,30 @@ case class KeyGroupedShuffleSpec(
     if (results.forall(p => p.isEmpty)) None else Some(results)
   }
 
-  override def canCreatePartitioning: Boolean = SQLConf.get.v2BucketingShuffleEnabled &&
-    // Only support partition expressions are AttributeReference for now
-    partitioning.expressions.forall(_.isInstanceOf[AttributeReference])
+  override def canCreatePartitioning: Boolean = {
+    // Allow one side shuffle for SPJ for now only if partially-clustered is not enabled
+    // and for join keys less than partition keys only if transforms are not enabled.
+    val checkExprType = if (SQLConf.get.v2BucketingAllowJoinKeysSubsetOfPartitionKeys) {
+      e: Expression => e.isInstanceOf[AttributeReference]
+    } else {
+      e: Expression => e.isInstanceOf[AttributeReference] || e.isInstanceOf[TransformExpression]
+    }
+    SQLConf.get.v2BucketingShuffleEnabled &&
+      !SQLConf.get.v2BucketingPartiallyClusteredDistributionEnabled &&
+      partitioning.expressions.forall(checkExprType)
+  }
+
+
 
   override def createPartitioning(clustering: Seq[Expression]): Partitioning = {
-    KeyGroupedPartitioning(clustering, partitioning.numPartitions, partitioning.partitionValues)
+    val newExpressions: Seq[Expression] = clustering.zip(partitioning.expressions).map {
+      case (c, e: TransformExpression) => TransformExpression(
+        e.function, Seq(c), e.numBucketsOpt)
+      case (c, _) => c
+    }
+    KeyGroupedPartitioning(newExpressions,
+      partitioning.numPartitions,
+      partitioning.partitionValues)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 476ace2662f8b..c8b3f224a3129 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.rules
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MessageWithContext}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.trees.TreeNode
@@ -75,11 +75,11 @@ class PlanChangeLogger[TreeType <: TreeNode[_]] extends Logging {
       def message(): MessageWithContext = {
         if (!oldPlan.fastEquals(newPlan)) {
           log"""
-             |=== Result of Batch ${MDC(RULE_BATCH_NAME, batchName)} ===
+             |=== Result of Batch ${MDC(BATCH_NAME, batchName)} ===
              |${MDC(QUERY_PLAN, sideBySide(oldPlan.treeString, newPlan.treeString).mkString("\n"))}
           """.stripMargin
         } else {
-          log"Batch ${MDC(RULE_BATCH_NAME, batchName)} has no effect."
+          log"Batch ${MDC(BATCH_NAME, batchName)} has no effect."
         }
       }
 
@@ -90,14 +90,16 @@ class PlanChangeLogger[TreeType <: TreeNode[_]] extends Logging {
   def logMetrics(metrics: QueryExecutionMetrics): Unit = {
     val totalTime = metrics.time / NANOS_PER_MILLIS.toDouble
     val totalTimeEffective = metrics.timeEffective / NANOS_PER_MILLIS.toDouble
+    // scalastyle:off line.size.limit
     val message: MessageWithContext =
       log"""
          |=== Metrics of Executed Rules ===
-         |Total number of runs: ${MDC(RULE_NUMBER_OF_RUNS, metrics.numRuns)}
+         |Total number of runs: ${MDC(NUM_RULE_OF_RUNS, metrics.numRuns)}
          |Total time: ${MDC(TOTAL_TIME, totalTime)} ms
-         |Total number of effective runs: ${MDC(RULE_NUMBER_OF_RUNS, metrics.numEffectiveRuns)}
+         |Total number of effective runs: ${MDC(NUM_EFFECTIVE_RULE_OF_RUNS, metrics.numEffectiveRuns)}
          |Total time of effective runs: ${MDC(TOTAL_EFFECTIVE_TIME, totalTimeEffective)} ms
       """.stripMargin
+    // scalastyle:on line.size.limit
 
     logBasedOnLevel(message)
   }
@@ -145,7 +147,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
     override val maxIterationsSetting: String = null) extends Strategy
 
   /** A batch of rules. */
-  protected case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
+  protected[catalyst] case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
 
   /** Defines a sequence of rule batches, to be overridden by the implementation. */
   protected def batches: Seq[Batch]
@@ -263,7 +265,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
                 log"to a larger value."
             }
             val log = log"Max iterations (${MDC(NUM_ITERATIONS, iteration - 1)}) " +
-              log"reached for batch ${MDC(RULE_BATCH_NAME, batch.name)}" +
+              log"reached for batch ${MDC(BATCH_NAME, batch.name)}" +
               endingMsg
             if (Utils.isTesting || batch.strategy.errorOnExceed) {
               throw new RuntimeException(log.message)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index 778d56788e89e..d36ce37406063 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -104,6 +104,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CombinedTypeCoercionRule" ::
       "org.apache.spark.sql.catalyst.analysis.UpdateOuterReferences" ::
       "org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveUpdateEventTimeWatermarkColumn" ::
       // Catalyst Optimizer rules
       "org.apache.spark.sql.catalyst.optimizer.BooleanSimplification" ::
       "org.apache.spark.sql.catalyst.optimizer.CollapseProject" ::
@@ -135,6 +136,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeCsvJsonExprs" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeIn" ::
+      "org.apache.spark.sql.catalyst.optimizer.OptimizeJoinCondition" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeRand" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeOneRowPlan" ::
       "org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries" ::
@@ -148,7 +150,6 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.PushFoldableIntoBranches" ::
       "org.apache.spark.sql.catalyst.optimizer.PushLeftSemiLeftAntiThroughJoin" ::
       "org.apache.spark.sql.catalyst.optimizer.ReassignLambdaVariableID" ::
-      "org.apache.spark.sql.catalyst.optimizer.RemoveDispensableExpressions" ::
       "org.apache.spark.sql.catalyst.optimizer.RemoveLiteralFromGroupExpressions" ::
       "org.apache.spark.sql.catalyst.optimizer.GenerateOptimization" ::
       "org.apache.spark.sql.catalyst.optimizer.RemoveNoopOperators" ::
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 94e893d468b39..6683f2dbfb392 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.trees
 
 import java.util.UUID
 
+import scala.annotation.nowarn
 import scala.collection.{mutable, Map}
 import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
@@ -78,8 +79,16 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
   /**
    * A mutable map for holding auxiliary information of this tree node. It will be carried over
    * when this node is copied via `makeCopy`, or transformed via `transformUp`/`transformDown`.
+   * We lazily evaluate the `tags` since the default size of a `mutable.Map` is nonzero. This
+   * will reduce unnecessary memory pressure.
    */
-  private val tags: mutable.Map[TreeNodeTag[_], Any] = mutable.Map.empty
+  private[this] var _tags: mutable.Map[TreeNodeTag[_], Any] = null
+  private def tags: mutable.Map[TreeNodeTag[_], Any] = {
+    if (_tags eq null) {
+      _tags = mutable.Map.empty
+    }
+    _tags
+  }
 
   /**
    * Default tree pattern [[BitSet] for a [[TreeNode]].
@@ -112,7 +121,14 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
    * ineffective for subsequent apply calls on this tree because query plan structures are
    * immutable.
    */
-  private val ineffectiveRules: BitSet = new BitSet(RuleIdCollection.NumRules)
+  private[this] var _ineffectiveRules: BitSet = null
+  private def ineffectiveRules: BitSet = {
+    if (_ineffectiveRules eq null) {
+      _ineffectiveRules = new BitSet(RuleIdCollection.NumRules)
+    }
+    _ineffectiveRules
+  }
+  private def isIneffectiveRulesEmpty = _ineffectiveRules eq null
 
   /**
    * @return a sequence of tree pattern enums in a TreeNode T. It does not include propagated
@@ -141,17 +157,19 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
    *         UnknownId, it returns false.
    */
   protected def isRuleIneffective(ruleId : RuleId): Boolean = {
-    if (ruleId eq UnknownRuleId) {
+    if (isIneffectiveRulesEmpty || (ruleId eq UnknownRuleId)) {
       return false
     }
     ineffectiveRules.get(ruleId.id)
   }
 
+  def isTagsEmpty: Boolean = (_tags eq null) || _tags.isEmpty
+
   def copyTagsFrom(other: BaseType): Unit = {
     // SPARK-32753: it only makes sense to copy tags to a new node
     // but it's too expensive to detect other cases likes node removal
     // so we make a compromise here to copy tags to node with no tags
-    if (tags.isEmpty) {
+    if (isTagsEmpty && !other.isTagsEmpty) {
       tags ++= other.tags
     }
   }
@@ -161,11 +179,17 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
   }
 
   def getTagValue[T](tag: TreeNodeTag[T]): Option[T] = {
-    tags.get(tag).map(_.asInstanceOf[T])
+    if (isTagsEmpty) {
+      None
+    } else {
+      tags.get(tag).map(_.asInstanceOf[T])
+    }
   }
 
   def unsetTagValue[T](tag: TreeNodeTag[T]): Unit = {
-    tags -= tag
+    if (!isTagsEmpty) {
+      tags -= tag
+    }
   }
 
   /**
@@ -355,12 +379,16 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
       case nonChild: AnyRef => nonChild
       case null => null
     }
+    @nowarn("cat=deprecation")
     val newArgs = mapProductIterator {
       case s: StructType => s // Don't convert struct types to some other type of Seq[StructField]
       // Handle Seq[TreeNode] in TreeNode parameters.
-      case s: LazyList[_] =>
-        // LazyList is lazy so we need to force materialization
+      case s: Stream[_] =>
+        // Stream is lazy so we need to force materialization
         s.map(mapChild).force
+      case l: LazyList[_] =>
+        // LazyList is lazy so we need to force materialization
+        l.map(mapChild).force
       case s: Seq[_] =>
         s.map(mapChild)
       case m: Map[_, _] =>
@@ -778,6 +806,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
       case other => other
     }
 
+    @nowarn("cat=deprecation")
     val newArgs = mapProductIterator {
       case arg: TreeNode[_] if containsChild(arg) =>
         arg.asInstanceOf[BaseType].clone()
@@ -790,7 +819,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
         case (_, other) => other
       }
       case d: DataType => d // Avoid unpacking Structs
-      case args: LazyList[_] => args.map(mapChild).force // Force materialization on stream
+      case args: Stream[_] => args.map(mapChild).force // Force materialization on stream
+      case args: LazyList[_] => args.map(mapChild).force // Force materialization on LazyList
       case args: Iterable[_] => args.map(mapChild)
       case nonChild: AnyRef => nonChild
       case null => null
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index 4ab075db5709a..c5cc1eaf8f05d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -96,7 +96,6 @@ object TreePattern extends Enumeration  {
   val VARIANT_GET: Value = Value
   val WINDOW_EXPRESSION: Value = Value
   val WINDOW_TIME: Value = Value
-  val UNARY_POSITIVE: Value = Value
   val UNPIVOT: Value = Value
   val UPDATE_FIELDS: Value = Value
   val UPPER_OR_LOWER: Value = Value
@@ -133,6 +132,7 @@ object TreePattern extends Enumeration  {
   val UNION: Value = Value
   val UNRESOLVED_RELATION: Value = Value
   val UNRESOLVED_WITH: Value = Value
+  val UPDATE_EVENT_TIME_WATERMARK_COLUMN: Value = Value
   val TEMP_RESOLVED_COLUMN: Value = Value
   val TYPED_FILTER: Value = Value
   val WINDOW: Value = Value
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
index cf8e903f03a34..f8bb1077a080f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/DataTypeUtils.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy.{ANSI, STRICT}
-import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, Decimal, DecimalType, MapType, NullType, StructField, StructType, UserDefinedType}
+import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, Decimal, DecimalType, MapType, NullType, StringType, StructField, StructType, UserDefinedType}
 import org.apache.spark.sql.types.DecimalType.{forType, fromDecimal}
 
 object DataTypeUtils {
@@ -47,6 +47,31 @@ object DataTypeUtils {
     DataType.equalsIgnoreCaseAndNullability(from, to)
   }
 
+  /**
+   * Compares two types, ignoring nullability of ArrayType, MapType, StructType, ignoring case
+   * sensitivity of field names in StructType as well as differences in collation for String types.
+   */
+  def equalsIgnoreCaseNullabilityAndCollation(from: DataType, to: DataType): Boolean = {
+    (from, to) match {
+      case (ArrayType(fromElement, _), ArrayType(toElement, _)) =>
+        equalsIgnoreCaseNullabilityAndCollation(fromElement, toElement)
+
+      case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>
+        equalsIgnoreCaseNullabilityAndCollation(fromKey, toKey) &&
+          equalsIgnoreCaseNullabilityAndCollation(fromValue, toValue)
+
+      case (StructType(fromFields), StructType(toFields)) =>
+        fromFields.length == toFields.length &&
+          fromFields.zip(toFields).forall { case (l, r) =>
+            l.name.equalsIgnoreCase(r.name) &&
+              equalsIgnoreCaseNullabilityAndCollation(l.dataType, r.dataType)
+          }
+
+      case (_: StringType, _: StringType) => true
+      case (fromDataType, toDataType) => fromDataType == toDataType
+    }
+  }
+
   private val SparkGeneratedName = """col\d+""".r
   private def isSparkGeneratedName(name: String): Boolean = name match {
     case SparkGeneratedName(_*) => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
index 65a56c1064e45..4fa6a2275e743 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala
@@ -67,16 +67,32 @@ case class PartialResultArrayException(
   extends Exception(cause)
 
 /**
- * Exception thrown when the underlying parser meet a bad record and can't parse it.
+ * Exception thrown when the underlying parser met a bad record and can't parse it.
+ * The stacktrace is not collected for better performance, and thus, this exception should
+ * not be used in a user-facing context.
  * @param record a function to return the record that cause the parser to fail
  * @param partialResults a function that returns an row array, which is the partial results of
  *                      parsing this bad record.
- * @param cause the actual exception about why the record is bad and can't be parsed.
+ * @param cause the actual exception about why the record is bad and can't be parsed. It's better
+ *                      to use `LazyBadRecordCauseWrapper` here to delay heavy cause construction
+ *                      until it's needed.
  */
 case class BadRecordException(
     @transient record: () => UTF8String,
     @transient partialResults: () => Array[InternalRow] = () => Array.empty[InternalRow],
-    cause: Throwable) extends Exception(cause)
+    cause: Throwable) extends Exception(cause) {
+  override def getStackTrace(): Array[StackTraceElement] = new Array[StackTraceElement](0)
+  override def fillInStackTrace(): Throwable = this
+}
+
+/**
+ * Exception to use as `BadRecordException` cause to delay heavy user-facing exception construction.
+ * Does not contain stacktrace and used only for control flow
+ */
+case class LazyBadRecordCauseWrapper(cause: () => Throwable) extends Exception() {
+  override def getStackTrace(): Array[StackTraceElement] = new Array[StackTraceElement](0)
+  override def fillInStackTrace(): Throwable = this
+}
 
 /**
  * Exception thrown when the underlying parser parses a JSON array as a struct.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index 06a88b5d7b51b..011e385043d30 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util
 import scala.collection.mutable
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index e31ccdb818259..f1c36f2f5c28f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -698,7 +698,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
       }
     } catch {
       case _: scala.MatchError =>
-        throw SparkException.internalError(s"Got the unexpected unit '$unit'.")
+        throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPADD", unit)
       case _: ArithmeticException | _: DateTimeException =>
         throw QueryExecutionErrors.timestampAddOverflowError(micros, quantity, unit)
       case e: Throwable =>
@@ -736,7 +736,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
       val endLocalTs = getLocalDateTime(endTs, zoneId)
       timestampDiffMap(unitInUpperCase)(startLocalTs, endLocalTs)
     } else {
-      throw SparkException.internalError(s"Got the unexpected unit '$unit'.")
+      throw QueryExecutionErrors.invalidDatetimeUnitError("TIMESTAMPDIFF", unit)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index 10cd159c769b2..d9946d1b12ec3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -78,10 +78,17 @@ class FailureSafeParser[IN](
             case StringAsDataTypeException(fieldName, fieldValue, dataType) =>
               throw QueryExecutionErrors.cannotParseStringAsDataTypeError(e.record().toString,
                 fieldName, fieldValue, dataType)
-            case other => throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(
-              toResultRow(e.partialResults().headOption, e.record).toString, other)
+            case causeWrapper: LazyBadRecordCauseWrapper =>
+              throwMalformedRecordsDetectedInRecordParsingError(e, causeWrapper.cause())
+            case cause => throwMalformedRecordsDetectedInRecordParsingError(e, cause)
           }
       }
     }
   }
+
+  private def throwMalformedRecordsDetectedInRecordParsingError(
+      e: BadRecordException, cause: Throwable): Nothing = {
+    throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(
+      toResultRow(e.partialResults().headOption, e.record).toString, cause)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
index deb817a0cdb70..46f14876be363 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
@@ -163,9 +163,9 @@ object GeneratedColumn {
         s"generation expression data type ${analyzed.dataType.simpleString} " +
         s"is incompatible with column data type ${dataType.simpleString}")
     }
-    if (analyzed.exists(e => SchemaUtils.hasNonBinarySortableCollatedString(e.dataType))) {
+    if (analyzed.exists(e => SchemaUtils.hasNonUTF8BinaryCollation(e.dataType))) {
       throw unsupportedExpressionError(
-        "generation expression cannot contain non-binary orderable collated string type")
+        "generation expression cannot contain non utf8 binary collated string type")
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
index 9a0bdc6bcfd11..90e3bdcd082cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, Murmur3HashFunctio
 import org.apache.spark.sql.catalyst.plans.physical.KeyGroupedPartitioning
 import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition}
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.util.NonFateSharingCache
 
 /**
  * Wraps the [[InternalRow]] with the corresponding [[DataType]] to make it comparable with
@@ -34,9 +35,10 @@ import org.apache.spark.sql.types.{DataType, StructField, StructType}
  * @param dataTypes the data types for the row
  */
 class InternalRowComparableWrapper(val row: InternalRow, val dataTypes: Seq[DataType]) {
+  import InternalRowComparableWrapper._
 
-  private val structType = StructType(dataTypes.map(t => StructField("f", t)))
-  private val ordering = RowOrdering.createNaturalAscendingOrdering(dataTypes)
+  private val structType = structTypeCache.get(dataTypes)
+  private val ordering = orderingCache.get(dataTypes)
 
   override def hashCode(): Int = Murmur3HashFunction.hash(row, structType, 42L).toInt
 
@@ -53,6 +55,21 @@ class InternalRowComparableWrapper(val row: InternalRow, val dataTypes: Seq[Data
 }
 
 object InternalRowComparableWrapper {
+  private final val MAX_CACHE_ENTRIES = 1024
+
+  private val orderingCache = {
+    val loadFunc = (dataTypes: Seq[DataType]) => {
+      RowOrdering.createNaturalAscendingOrdering(dataTypes)
+    }
+    NonFateSharingCache(loadFunc, MAX_CACHE_ENTRIES)
+  }
+
+  private val structTypeCache = {
+    val loadFunc = (dataTypes: Seq[DataType]) => {
+      StructType(dataTypes.map(t => StructField("f", t)))
+    }
+    NonFateSharingCache(loadFunc, MAX_CACHE_ENTRIES)
+  }
 
   def apply(
       partition: InputPartition with HasPartitionKey,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala
index dd1e466d1b38f..cc1a01083af41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ParseMode.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.util
 import java.util.Locale
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PARSE_MODE
+import org.apache.spark.internal.LogKeys.PARSE_MODE
 
 sealed trait ParseMode {
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index db9adef8ef3b2..6b4f29bea7579 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -21,14 +21,14 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral}
-import org.apache.spark.sql.catalyst.optimizer.ConstantFolding
+import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, ReplaceExpressions}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
@@ -169,7 +169,7 @@ object ResolveDefaultColumns extends QueryErrorsBase
   def resolveColumnDefaultInAssignmentValue(
       key: Expression,
       value: Expression,
-      invalidColumnDefaultException: Throwable): Expression = {
+      invalidColumnDefaultException: => Throwable): Expression = {
     key match {
       case attr: AttributeReference =>
         value match {
@@ -284,12 +284,13 @@ object ResolveDefaultColumns extends QueryErrorsBase
       throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
         statementType, colName, defaultSQL)
     }
+
     // Analyze the parse result.
     val plan = try {
       val analyzer: Analyzer = DefaultColumnAnalyzer
       val analyzed = analyzer.execute(Project(Seq(Alias(parsed, colName)()), OneRowRelation()))
       analyzer.checkAnalysis(analyzed)
-      ConstantFolding(analyzed)
+      ConstantFolding(ReplaceExpressions(analyzed))
     } catch {
       case ex: AnalysisException =>
         throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
@@ -298,6 +299,21 @@ object ResolveDefaultColumns extends QueryErrorsBase
     val analyzed: Expression = plan.collectFirst {
       case Project(Seq(a: Alias), OneRowRelation()) => a.child
     }.get
+
+    if (!analyzed.foldable) {
+      throw QueryCompilationErrors.defaultValueNotConstantError(statementType, colName, defaultSQL)
+    }
+
+    // Another extra check, expressions should already be resolved if AnalysisException is not
+    // thrown in the code block above
+    if (!analyzed.resolved) {
+      throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
+        statementType,
+        colName,
+        defaultSQL,
+        cause = null)
+    }
+
     // Perform implicit coercion from the provided expression type to the required column type.
     coerceDefaultValue(analyzed, dataType, statementType, colName, defaultSQL)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index 04df3635d4754..e2a5319cbe1ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -22,7 +22,7 @@ import java.util.regex.{Pattern, PatternSyntaxException}
 import org.apache.commons.text.similarity.LevenshteinDistance
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -66,12 +66,6 @@ object StringUtils extends Logging {
     "(?s)" + out.result() // (?s) enables dotall mode, causing "." to match new lines
   }
 
-  /**
-   * Returns a pretty string of the byte array which prints each byte as a hex digit and add spaces
-   * between them. For example, [1A C0].
-   */
-  def getHexString(bytes: Array[Byte]): String = bytes.map("%02X".format(_)).mkString("[", " ", "]")
-
   private[this] val trueStrings =
     Set("t", "true", "y", "yes", "1").map(UTF8String.fromString)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
index d2c708b380cf5..a0d578c66e736 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -58,7 +58,7 @@ object TypeUtils extends QueryErrorsBase {
   }
 
   def checkForMapKeyType(keyType: DataType): TypeCheckResult = {
-    if (keyType.existsRecursively(_.isInstanceOf[MapType])) {
+    if (keyType.existsRecursively(dt => dt.isInstanceOf[MapType] || dt.isInstanceOf[VariantType])) {
       DataTypeMismatch(
         errorSubClass = "INVALID_MAP_KEY_TYPE",
         messageParameters = Map(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
index 87f0b50b9af22..514138ab7508a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
@@ -169,8 +169,8 @@ class StaxXmlGenerator(
 
   def writeElement(dt: DataType, v: Any, options: XmlOptions): Unit = (dt, v) match {
     case (_, null) | (NullType, _) => gen.writeCharacters(options.nullValue)
-    case (StringType, v: UTF8String) => gen.writeCharacters(v.toString)
-    case (StringType, v: String) => gen.writeCharacters(v)
+    case (_: StringType, v: UTF8String) => gen.writeCharacters(v.toString)
+    case (_: StringType, v: String) => gen.writeCharacters(v)
     case (TimestampType, v: Timestamp) =>
       gen.writeCharacters(timestampFormatter.format(v.toInstant()))
     case (TimestampType, v: Long) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
index 725ef8fe79f79..9a0528468842c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParser.scala
@@ -397,8 +397,7 @@ class StaxXmlParser(
                     row(anyIndex) = values :+ newValue
                 }
               } else {
-                StaxXmlParserUtils.skipChildren(parser)
-                StaxXmlParserUtils.skipNextEndElement(parser, field, options)
+                StaxXmlParserUtils.skipChildren(parser, field, options)
               }
           }
         } catch {
@@ -802,19 +801,6 @@ class XmlTokenizer(
         commentIdx = 0
       }
 
-      if (c == cdataStart(cdataIdx)) {
-        if (cdataIdx >= cdataStart.length - 1) {
-          //  If a CDATA beigns we must ignore everything until its end
-          buffer.setLength(buffer.length - cdataStart.length)
-          cdataIdx = 0
-          readUntilMatch(cdataEnd)
-        } else {
-          cdataIdx += 1
-        }
-      } else {
-        cdataIdx = 0
-      }
-
       if (c == '>' && prevC != '/') {
         canSelfClose = false
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParserUtils.scala
index a59ea6f460dee..5d267143b06c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlParserUtils.scala
@@ -165,24 +165,27 @@ object StaxXmlParserUtils {
 
   /**
    * Skip the children of the current XML element.
+   * Before this function is called, the 'startElement' of the object has already been consumed.
+   * Upon completion, this function consumes the 'endElement' of the object,
+   * which effectively skipping the entire object enclosed within these elements.
    */
-  def skipChildren(parser: XMLEventReader): Unit = {
-    var shouldStop = checkEndElement(parser)
+  def skipChildren(
+      parser: XMLEventReader,
+      expectedNextEndElementName: String,
+      options: XmlOptions): Unit = {
+    var shouldStop = false
     while (!shouldStop) {
       parser.nextEvent match {
-        case _: StartElement =>
-          val e = parser.peek
-          if (e.isCharacters && e.asCharacters.isWhiteSpace) {
-            // There can be a `Characters` event between `StartElement`s.
-            // So, we need to check further to decide if this is a data or just
-            // a whitespace between them.
-            parser.next
-          }
-          if (parser.peek.isStartElement) {
-            skipChildren(parser)
-          }
-        case _: EndElement =>
-          shouldStop = checkEndElement(parser)
+        case startElement: StartElement =>
+          val childField = StaxXmlParserUtils.getName(startElement.asStartElement.getName, options)
+          skipChildren(parser, childField, options)
+        case endElement: EndElement =>
+          val endElementName = getName(endElement.getName, options)
+          assert(
+            endElementName == expectedNextEndElementName,
+            s"Expected EndElement </$expectedNextEndElementName>, but found </$endElementName>"
+          )
+          shouldStop = true
         case _: XMLEvent => // do nothing
       }
     }
@@ -197,9 +200,10 @@ object StaxXmlParserUtils {
       case c: Characters if c.isWhiteSpace =>
         skipNextEndElement(parser, expectedNextEndElementName, options)
       case endElement: EndElement =>
+        val endElementName = getName(endElement.getName, options)
         assert(
-          getName(endElement.getName, options) == expectedNextEndElementName,
-          s"Expected EndElement </$expectedNextEndElementName>")
+          endElementName == expectedNextEndElementName,
+          s"Expected EndElement </$expectedNextEndElementName>, but found </$endElementName>")
       case _ => throw new IllegalStateException(
         s"Expected EndElement </$expectedNextEndElementName>")
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
index 3d93c4e8742ab..8a5291d0bac74 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/ValidatorUtil.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkFiles
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.MDC
 
 /**
@@ -42,7 +42,7 @@ object ValidatorUtil extends Logging {
         val in = openSchemaFile(new Path(key))
         try {
           val schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI)
-          schemaFactory.newSchema(new StreamSource(in))
+          schemaFactory.newSchema(new StreamSource(in, key))
         } finally {
           in.close()
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index 5485f5255b6e7..f36310e8ad899 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -525,10 +525,15 @@ private[sql] object CatalogV2Util {
     }
 
     if (isDefaultColumn) {
-      val e = analyze(f, EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+      val e = analyze(
+        f,
+        statementType = "Column analysis",
+        metadataKey = EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+
       assert(e.resolved && e.foldable,
         "The existence default value must be a simple SQL string that is resolved and foldable, " +
           "but got: " + f.getExistenceDefaultValue().get)
+
       val defaultValue = new ColumnDefaultValue(
         f.getCurrentDefaultValue().get, LiteralValue(e.eval(), f.dataType))
       val cleanedMetadata = metadataWithKeysRemoved(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index fc41d5a98e4aa..b43e627c0eece 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.connector.expressions
 
+import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -388,7 +390,7 @@ private[sql] object HoursTransform {
 private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] {
   override def toString: String = {
     if (dataType.isInstanceOf[StringType]) {
-      s"'$value'"
+      s"'${StringUtils.replace(s"$value", "'", "''")}'"
     } else {
       s"$value"
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 6ad5624d4730a..d3bd265d0459e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -175,6 +175,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "functionName" -> toSQLId(funcName)))
   }
 
+  def nullDataSourceOption(option: String): Throwable = {
+    new AnalysisException(
+      errorClass = "NULL_DATA_SOURCE_OPTION",
+      messageParameters = Map("option" -> option)
+    )
+  }
+
   def unorderablePivotColError(pivotCol: Expression): Throwable = {
     new AnalysisException(
       errorClass = "INCOMPARABLE_PIVOT_COLUMN",
@@ -2687,6 +2694,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         Map("tableName" -> toSQLId(tableName), "columnName" -> toSQLId(columnName))
     )
   }
+
+  def cannotAlterCollationBucketColumn(tableName: String, columnName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_ALTER_COLLATION_BUCKET_COLUMN",
+      messageParameters =
+        Map("tableName" -> toSQLId(tableName), "columnName" -> toSQLId(columnName))
+    )
+  }
+
   def cannotFindColumnError(name: String, fieldNames: Array[String]): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1246",
@@ -2970,6 +2986,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "dataColumns" -> query.output.map(c => toSQLId(c.name)).mkString(", ")))
   }
 
+  def cannotAlterTempViewWithSchemaBindingError() : Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.TEMPORARY_VIEW_WITH_SCHEMA_BINDING_MODE",
+      messageParameters = Map.empty)
+  }
+
   def unsupportedCreateOrReplaceViewOnTableError(
       name: TableIdentifier, replace: Boolean): Throwable = {
     if (replace) {
@@ -3227,6 +3249,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
+  def invalidSingleVariantColumn(): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_SINGLE_VARIANT_COLUMN",
+      messageParameters = Map.empty)
+  }
+
   def writeWithSaveModeUnsupportedBySourceError(source: String, createMode: String): Throwable = {
     new AnalysisException(
       errorClass = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
@@ -3295,7 +3323,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def invalidJoinTypeInJoinWithError(joinType: JoinType): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1319",
+      errorClass = "INVALID_JOIN_TYPE_FOR_JOINWITH",
       messageParameters = Map("joinType" -> joinType.sql))
   }
 
@@ -4055,4 +4083,18 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     callDeprecatedMethodError("createTable(..., StructType, ...)",
       "createTable(..., Array[Column], ...)")
   }
+
+  def cannotAssignEventTimeColumn(): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK",
+      messageParameters = Map()
+    )
+  }
+
+  def avroNotLoadedSqlFunctionsUnusable(functionName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE",
+      messageParameters = Map("functionName" -> functionName)
+    )
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index ceb90fe6bea50..6fb09bdeffc51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -211,6 +211,15 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       summary = getSummary(context))
   }
 
+  def invalidUTF8StringError(str: UTF8String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_UTF8_STRING",
+      messageParameters = Map(
+        "str" -> str.getBytes.map(byte => f"\\x$byte%02X").mkString
+      )
+    )
+  }
+
   def invalidArrayIndexError(
       index: Int,
       numElements: Int,
@@ -629,7 +638,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
   }
 
   def failedToCompileMsg(e: Exception): String = {
-    s"failed to compile: $e"
+    s"Failed to compile: $e"
   }
 
   def internalCompilerError(e: InternalCompilerException): Throwable = {
@@ -2727,6 +2736,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       messageParameters = Map("path" -> path, "functionName" -> toSQLId(functionName)))
   }
 
+  def malformedVariant(): Throwable = new SparkRuntimeException(
+    "MALFORMED_VARIANT",
+    Map.empty
+  )
+
   def invalidCharsetError(functionName: String, charset: String): RuntimeException = {
     new SparkIllegalArgumentException(
       errorClass = "INVALID_PARAMETER_VALUE.CHARSET",
@@ -2736,6 +2750,14 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
         "charset" -> charset))
   }
 
+  def malformedCharacterCoding(functionName: String, charset: String): RuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "MALFORMED_CHARACTER_CODING",
+      messageParameters = Map(
+        "function" -> toSQLId(functionName),
+        "charset" -> charset))
+  }
+
   def invalidWriterCommitMessageError(details: String): Throwable = {
     new SparkRuntimeException(
       errorClass = "INVALID_WRITER_COMMIT_MESSAGE",
@@ -2757,4 +2779,35 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
         "numFields" -> numFields.toString,
         "schemaLen" -> schemaLen.toString))
   }
+
+  def emittedRowsAreOlderThanWatermark(
+      currentWatermark: Long, emittedRowEventTime: Long): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED",
+      messageParameters = Map(
+        "currentWatermark" -> currentWatermark.toString,
+        "emittedRowEventTime" -> emittedRowEventTime.toString
+      )
+    )
+  }
+
+  def notNullAssertViolation(walkedTypePath: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "NOT_NULL_ASSERT_VIOLATION",
+      messageParameters = Map(
+        "walkedTypePath" -> walkedTypePath
+      )
+    )
+  }
+
+  def invalidDatetimeUnitError(
+      functionName: String,
+      invalidValue: String): Throwable = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
+      messageParameters = Map(
+        "functionName" -> toSQLId(functionName),
+        "parameter" -> toSQLId("unit"),
+        "invalidValue" -> s"'$invalidValue'"))
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index e5ba1be0f5f4f..4e7c6c180e9a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -535,8 +535,7 @@ object SQLConf {
   val COLUMN_VECTOR_OFFHEAP_ENABLED =
     buildConf("spark.sql.columnVector.offheap.enabled")
       .internal()
-      .doc("When true, use OffHeapColumnVector in ColumnarBatch. " +
-        s"Defaults to $MEMORY_OFFHEAP_ENABLED.")
+      .doc("When true, use OffHeapColumnVector in ColumnarBatch.")
       .version("2.3.0")
       .fallbackConf(MEMORY_OFFHEAP_ENABLED)
 
@@ -772,14 +771,28 @@ object SQLConf {
         " produced by a builtin function such as to_char or CAST")
       .version("4.0.0")
       .stringConf
-      .checkValue(CollationFactory.isValidCollation,
+      .checkValue(
+        collationName => {
+          try {
+            CollationFactory.fetchCollation(collationName)
+            true
+          } catch {
+            case e: SparkException if e.getErrorClass == "COLLATION_INVALID_NAME" => false
+          }
+        },
         "DEFAULT_COLLATION",
-        name =>
-          Map(
-            "proposal" -> CollationFactory.getClosestCollation(name)
-          ))
+        collationName => Map(
+          "proposals" -> CollationFactory.getClosestSuggestionsOnInvalidName(collationName, 3)))
       .createWithDefault("UTF8_BINARY")
 
+  val ICU_CASE_MAPPINGS_ENABLED =
+    buildConf("spark.sql.icu.caseMappings.enabled")
+      .doc("When enabled we use the ICU library (instead of the JVM) to implement case mappings" +
+        " for strings under UTF8_BINARY collation.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val FETCH_SHUFFLE_BLOCKS_IN_BATCH =
     buildConf("spark.sql.adaptive.fetchShuffleBlocksInBatch")
       .internal()
@@ -1495,6 +1508,48 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  /**
+   * Output style for binary data.
+   */
+  object BinaryOutputStyle extends Enumeration {
+    type BinaryOutputStyle = Value
+    val
+    /**
+     * Output as UTF-8 string.
+     * [83, 112, 97, 114, 107] -> "Spark"
+     */
+    UTF8,
+    /**
+     * Output as comma separated byte array string.
+     * [83, 112, 97, 114, 107] -> [83, 112, 97, 114, 107]
+     */
+    BASIC,
+    /**
+     * Output as base64 encoded string.
+     * [83, 112, 97, 114, 107] -> U3Bhcmsg
+     */
+    BASE64,
+    /**
+     * Output as hex string.
+     * [83, 112, 97, 114, 107] -> 537061726b
+     */
+    HEX,
+    /**
+     * Output as discrete hex string.
+     * [83, 112, 97, 114, 107] -> [53 70 61 72 6b]
+     */
+    HEX_DISCRETE = Value
+  }
+
+  val BINARY_OUTPUT_STYLE = buildConf("spark.sql.binaryOutputStyle")
+    .doc("The output style used display binary data. Valid values are 'UTF8', " +
+      "'BASIC', 'BASE64', 'HEX', and 'HEX_DISCRETE'.")
+    .version("4.0.0")
+    .stringConf
+    .transform(_.toUpperCase(Locale.ROOT))
+    .checkValues(BinaryOutputStyle.values.map(_.toString))
+    .createOptional
+
   val PARTITION_COLUMN_TYPE_INFERENCE =
     buildConf("spark.sql.sources.partitionColumnTypeInference.enabled")
       .doc("When true, automatically infer the data types for partitioned columns.")
@@ -1527,7 +1582,7 @@ object SQLConf {
         "side. This could help to eliminate unnecessary shuffles")
       .version("3.4.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED =
     buildConf("spark.sql.sources.v2.bucketing.partiallyClusteredDistribution.enabled")
@@ -1658,6 +1713,22 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val VIEW_SCHEMA_BINDING_ENABLED = buildConf("spark.sql.legacy.viewSchemaBindingMode")
+    .internal()
+    .doc("Set to false to disable the WITH SCHEMA clause for view DDL and suppress the line in " +
+      "DESCRIBE EXTENDED and SHOW CREATE TABLE.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(true)
+
+  val VIEW_SCHEMA_COMPENSATION = buildConf("spark.sql.legacy.viewSchemaCompensation")
+    .internal()
+    .doc("Set to false to revert default view schema binding mode from WITH SCHEMA COMPENSATION " +
+      "to WITH SCHEMA BINDING.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(true)
+
   // The output committer class used by data sources. The specified class needs to be a
   // subclass of org.apache.hadoop.mapreduce.OutputCommitter.
   val OUTPUT_COMMITTER_CLASS = buildConf("spark.sql.sources.outputCommitterClass")
@@ -1914,6 +1985,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val IGNORE_INVALID_PARTITION_PATHS = buildConf("spark.sql.files.ignoreInvalidPartitionPaths")
+    .doc("Whether to ignore invalid partition paths that do not match <column>=<value>. When " +
+      "the option is enabled, table with two partition directories 'table/invalid' and " +
+      "'table/col=1' will only load the latter directory and ignore the invalid partition")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(false)
+
   val MAX_RECORDS_PER_FILE = buildConf("spark.sql.files.maxRecordsPerFile")
     .doc("Maximum number of records to write out to a single file. " +
       "If this value is zero or negative, there is no limit.")
@@ -2065,6 +2144,13 @@ object SQLConf {
       .timeConf(TimeUnit.MILLISECONDS)
       .createWithDefault(TimeUnit.MINUTES.toMillis(1)) // 1 minute
 
+  val STREAMING_TRANSFORM_WITH_STATE_OP_STATE_SCHEMA_VERSION =
+    buildConf("spark.sql.streaming.transformWithState.stateSchemaVersion")
+      .doc("The version of the state schema used by the transformWithState operator")
+      .version("4.0.0")
+      .intConf
+      .createWithDefault(3)
+
   val STATE_STORE_COMPRESSION_CODEC =
     buildConf("spark.sql.streaming.stateStore.compression.codec")
       .internal()
@@ -2238,7 +2324,9 @@ object SQLConf {
   buildConf("spark.sql.streaming.stateStore.skipNullsForStreamStreamJoins.enabled")
     .internal()
     .doc("When true, this config will skip null values in hash based stream-stream joins. " +
-      "The number of skipped null values will be shown as custom metric of stream join operator.")
+      "The number of skipped null values will be shown as custom metric of stream join operator. " +
+      "If the streaming query was started with Spark 3.5 or above, please exercise caution " +
+      "before enabling this config since it may hide potential data loss/corruption issues.")
     .version("3.3.0")
     .booleanConf
     .createWithDefault(false)
@@ -2271,6 +2359,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED =
+    buildConf("spark.sql.streaming.optimizeOneRowPlan.enabled")
+      .internal()
+      .doc("When true, enable OptimizeOneRowPlan rule for the case where the child is a " +
+        "streaming Dataset. This is a fallback flag to revert the 'incorrect' behavior, hence " +
+        "this configuration must not be used without understanding in depth. Use this only to " +
+        "quickly recover failure in existing query!")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val VARIABLE_SUBSTITUTE_ENABLED =
     buildConf("spark.sql.variable.substitute")
       .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " +
@@ -2524,6 +2623,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val AVOID_COLLAPSE_UDF_WITH_EXPENSIVE_EXPR =
+    buildConf("spark.sql.optimizer.avoidCollapseUDFWithExpensiveExpr")
+      .doc("Whether to avoid collapsing projections that would duplicate expensive expressions " +
+        "in UDFs.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val FILE_SINK_LOG_DELETION = buildConf("spark.sql.streaming.fileSink.log.deletion")
     .internal()
     .doc("Whether to delete the expired log files in file stream sink.")
@@ -2874,6 +2981,22 @@ object SQLConf {
       .intConf
       .createWithDefault(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD.defaultValue.get)
 
+  val SHUFFLE_DEPENDENCY_SKIP_MIGRATION_ENABLED =
+    buildConf("spark.sql.shuffleDependency.skipMigration.enabled")
+      .doc("When enabled, shuffle dependencies for a Spark Connect SQL execution are marked at " +
+        "the end of the execution, and they will not be migrated during decommissions.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(Utils.isTesting)
+
+  val SHUFFLE_DEPENDENCY_FILE_CLEANUP_ENABLED =
+    buildConf("spark.sql.shuffleDependency.fileCleanup.enabled")
+      .doc("When enabled, shuffle files will be cleaned up at the end of Spark Connect " +
+        "SQL executions.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(Utils.isTesting)
+
   val SORT_MERGE_JOIN_EXEC_BUFFER_IN_MEMORY_THRESHOLD =
     buildConf("spark.sql.sortMergeJoinExec.buffer.in.memory.threshold")
       .internal()
@@ -3371,7 +3494,7 @@ object SQLConf {
       "standard directly, but their behaviors align with ANSI SQL's style")
     .version("3.0.0")
     .booleanConf
-    .createWithDefault(sys.env.get("SPARK_ANSI_SQL_MODE").contains("true"))
+    .createWithDefault(!sys.env.get("SPARK_ANSI_SQL_MODE").contains("false"))
 
   val ENFORCE_RESERVED_KEYWORDS = buildConf("spark.sql.ansi.enforceReservedKeywords")
     .doc(s"When true and '${ANSI_ENABLED.key}' is true, the Spark SQL parser enforces the ANSI " +
@@ -3443,6 +3566,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val USE_COMMON_EXPR_ID_FOR_ALIAS =
+    buildConf("spark.sql.useCommonExprIdForAlias")
+      .internal()
+      .doc("When true, use the common expression ID for the alias when rewriting With " +
+        "expressions. Otherwise, use the index of the common expression definition. When true " +
+        "this avoids duplicate alias names, but is helpful to set to false for testing to ensure" +
+        "that alias names are consistent.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES =
     buildConf("spark.sql.defaultColumn.useNullsForMissingDefaultValues")
       .internal()
@@ -3736,7 +3870,7 @@ object SQLConf {
     .checkValues((1 to 9).toSet + Deflater.DEFAULT_COMPRESSION)
     .createOptional
 
-  val AVRO_XZ_LEVEL = buildConf("spark.sql.avro.zx.level")
+  val AVRO_XZ_LEVEL = buildConf("spark.sql.avro.xz.level")
     .doc("Compression level for the xz codec used in writing of AVRO files. " +
       "Valid value must be in the range of from 1 to 9 inclusive " +
       "The default value is 6.")
@@ -4122,11 +4256,20 @@ object SQLConf {
   val LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED =
     buildConf("spark.sql.legacy.mssqlserver.numericMapping.enabled")
       .internal()
-      .doc("When true, use legacy MsSqlServer SMALLINT and REAL type mapping.")
+      .doc("When true, use legacy MsSqlServer TINYINT, SMALLINT and REAL type mapping.")
       .version("2.4.5")
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_MSSQLSERVER_DATETIMEOFFSET_MAPPING_ENABLED =
+    buildConf("spark.sql.legacy.mssqlserver.datetimeoffsetMapping.enabled")
+      .internal()
+      .doc("When true, DATETIMEOFFSET is mapped to StringType; otherwise, it is mapped to " +
+        "TimestampType.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_MYSQL_BIT_ARRAY_MAPPING_ENABLED =
     buildConf("spark.sql.legacy.mysql.bitArrayMapping.enabled")
       .internal()
@@ -4135,6 +4278,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_MYSQL_TIMESTAMPNTZ_MAPPING_ENABLED =
+    buildConf("spark.sql.legacy.mysql.timestampNTZMapping.enabled")
+      .internal()
+      .doc("When true, TimestampNTZType and MySQL TIMESTAMP can be converted bidirectionally. " +
+        "For reading, MySQL TIMESTAMP is converted to TimestampNTZType when JDBC read option " +
+        "preferTimestampNTZ is true. For writing, TimestampNTZType is converted to MySQL " +
+        "TIMESTAMP; otherwise, DATETIME")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_ORACLE_TIMESTAMP_MAPPING_ENABLED =
     buildConf("spark.sql.legacy.oracle.timestampMapping.enabled")
       .internal()
@@ -4144,6 +4298,33 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_DB2_TIMESTAMP_MAPPING_ENABLED =
+    buildConf("spark.sql.legacy.db2.numericMapping.enabled")
+      .internal()
+      .doc("When true, SMALLINT maps to IntegerType in DB2; otherwise, ShortType" )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_DB2_BOOLEAN_MAPPING_ENABLED =
+    buildConf("spark.sql.legacy.db2.booleanMapping.enabled")
+      .internal()
+      .doc("When true, BooleanType maps to CHAR(1) in DB2; otherwise, BOOLEAN" )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val LEGACY_POSTGRES_DATETIME_MAPPING_ENABLED =
+    buildConf("spark.sql.legacy.postgres.datetimeMapping.enabled")
+      .internal()
+      .doc("When true, TimestampType maps to TIMESTAMP WITHOUT TIME ZONE in PostgreSQL for " +
+        "writing; otherwise, TIMESTAMP WITH TIME ZONE. When true, TIMESTAMP WITH TIME ZONE " +
+        "can be converted to TimestampNTZType when JDBC read option preferTimestampNTZ is " +
+        "true; otherwise, converted to TimestampType regardless of preferTimestampNTZ.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val CSV_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.csv.filterPushdown.enabled")
     .doc("When true, enable filter pushdown to CSV datasource.")
     .version("3.0.0")
@@ -4171,6 +4352,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val JSON_EXACT_STRING_PARSING =
+    buildConf("spark.sql.json.enableExactStringParsing")
+      .internal()
+      .doc("When set to true, string columns extracted from JSON objects will be extracted " +
+        "exactly as they appear in the input string, with no changes")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK =
     buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")
       .internal()
@@ -4421,7 +4611,7 @@ object SQLConf {
         s"instead of the value of ${DEFAULT_DATA_SOURCE_NAME.key} as the table provider.")
       .version("3.1.0")
       .booleanConf
-      .createWithDefault(true)
+      .createWithDefault(sys.env.get("SPARK_SQL_LEGACY_CREATE_HIVE_TABLE").contains("true"))
 
   val LEGACY_CHAR_VARCHAR_AS_STRING =
     buildConf("spark.sql.legacy.charVarcharAsString")
@@ -4449,6 +4639,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val LEGACY_NO_CHAR_PADDING_IN_PREDICATE = buildConf("spark.sql.legacy.noCharPaddingInPredicate")
+    .internal()
+    .doc("When true, Spark will not apply char type padding for CHAR type columns in string " +
+      s"comparison predicates, when '${READ_SIDE_CHAR_PADDING.key}' is false.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(false)
+
   val CLI_PRINT_HEADER =
     buildConf("spark.sql.cli.print.header")
      .doc("When set to true, spark-sql CLI prints the names of the columns in query output.")
@@ -4508,6 +4706,7 @@ object SQLConf {
 
   val LEGACY_INFER_ARRAY_TYPE_FROM_FIRST_ELEMENT =
     buildConf("spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled")
+      .internal()
       .doc("PySpark's SparkSession.createDataFrame infers the element type of an array from all " +
         "values in the array by default. If this config is set to true, it restores the legacy " +
         "behavior of only inferring the type from the first array element.")
@@ -4515,6 +4714,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_INFER_MAP_STRUCT_TYPE_FROM_FIRST_ITEM =
+    buildConf("spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled")
+      .internal()
+      .doc("PySpark's SparkSession.createDataFrame infers the key/value types of a map from all " +
+        "paris in the map by default. If this config is set to true, it restores the legacy " +
+        "behavior of only inferring the type from the first non-null pair.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_USE_V1_COMMAND =
     buildConf("spark.sql.legacy.useV1Command")
       .internal()
@@ -4745,6 +4954,24 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_SCALAR_SUBQUERY_ALLOW_GROUP_BY_NON_EQUALITY_CORRELATED_PREDICATE =
+    buildConf("spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate")
+      .internal()
+      .doc("When set to true, use incorrect legacy behavior for checking whether a scalar " +
+        "subquery with a group-by on correlated columns is allowed. See SPARK-48503")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val SCALAR_SUBQUERY_ALLOW_GROUP_BY_COLUMN_EQUAL_TO_CONSTANT =
+    buildConf("spark.sql.analyzer.scalarSubqueryAllowGroupByColumnEqualToConstant")
+      .internal()
+      .doc("When set to true, allow scalar subqueries with group-by on a column that also " +
+        " has an equality filter with a constant (SPARK-48557).")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val ALLOW_SUBQUERY_EXPRESSIONS_IN_LAMBDAS_AND_HIGHER_ORDER_FUNCTIONS =
     buildConf("spark.sql.analyzer.allowSubqueryExpressionsInLambdasOrHigherOrderFunctions")
       .internal()
@@ -4798,6 +5025,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val LEGACY_CODING_ERROR_ACTION = buildConf("spark.sql.legacy.codingErrorAction")
+    .internal()
+    .doc("When set to true, encode/decode functions replace unmappable characters with mojibake " +
+      "instead of reporting coding errors.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(false)
+
   val LEGACY_EVAL_CURRENT_TIME = buildConf("spark.sql.legacy.earlyEvalCurrentTime")
     .internal()
     .doc("When set to true, evaluation and constant folding will happen for now() and " +
@@ -4810,6 +5045,16 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val LEGACY_BANG_EQUALS_NOT = buildConf("spark.sql.legacy.bangEqualsNot")
+    .internal()
+    .doc("When set to true, '!' is a lexical equivalent for 'NOT'. That is '!' can be used " +
+      "outside of the documented prefix usage in a logical expression." +
+      "Examples are: `expr ! IN (1, 2)` and `expr ! BETWEEN 1 AND 2`, but also `IF ! EXISTS`."
+    )
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(false)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -5061,6 +5306,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def ignoreMissingFiles: Boolean = getConf(IGNORE_MISSING_FILES)
 
+  def ignoreInvalidPartitionPaths: Boolean = getConf(IGNORE_INVALID_PARTITION_PATHS)
+
   def maxRecordsPerFile: Long = getConf(MAX_RECORDS_PER_FILE)
 
   def useCompression: Boolean = getConf(COMPRESS_CACHED)
@@ -5241,12 +5488,27 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def legacyMsSqlServerNumericMappingEnabled: Boolean =
     getConf(LEGACY_MSSQLSERVER_NUMERIC_MAPPING_ENABLED)
 
+  def legacyMsSqlServerDatetimeOffsetMappingEnabled: Boolean =
+    getConf(LEGACY_MSSQLSERVER_DATETIMEOFFSET_MAPPING_ENABLED)
+
   def legacyMySqlBitArrayMappingEnabled: Boolean =
     getConf(LEGACY_MYSQL_BIT_ARRAY_MAPPING_ENABLED)
 
+  def legacyMySqlTimestampNTZMappingEnabled: Boolean =
+    getConf(LEGACY_MYSQL_TIMESTAMPNTZ_MAPPING_ENABLED)
+
   def legacyOracleTimestampMappingEnabled: Boolean =
     getConf(LEGACY_ORACLE_TIMESTAMP_MAPPING_ENABLED)
 
+  def legacyDB2numericMappingEnabled: Boolean =
+    getConf(LEGACY_DB2_TIMESTAMP_MAPPING_ENABLED)
+
+  def legacyDB2BooleanMappingEnabled: Boolean =
+    getConf(LEGACY_DB2_BOOLEAN_MAPPING_ENABLED)
+
+  def legacyPostgresDatetimeMappingEnabled: Boolean =
+    getConf(LEGACY_POSTGRES_DATETIME_MAPPING_ENABLED)
+
   override def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value = {
     LegacyBehaviorPolicy.withName(getConf(SQLConf.LEGACY_TIME_PARSER_POLICY))
   }
@@ -5392,6 +5654,10 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def groupByAliases: Boolean = getConf(GROUP_BY_ALIASES)
 
+  def viewSchemaBindingEnabled: Boolean = getConf(VIEW_SCHEMA_BINDING_ENABLED)
+
+  def viewSchemaCompensation: Boolean = getConf(VIEW_SCHEMA_COMPENSATION)
+
   def defaultCacheStorageLevel: StorageLevel =
     StorageLevel.fromString(getConf(DEFAULT_CACHE_STORAGE_LEVEL))
 
@@ -5702,6 +5968,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def legacyInferArrayTypeFromFirstElement: Boolean = getConf(
     SQLConf.LEGACY_INFER_ARRAY_TYPE_FROM_FIRST_ELEMENT)
 
+  def legacyInferMapStructTypeFromFirstItem: Boolean = getConf(
+    SQLConf.LEGACY_INFER_MAP_STRUCT_TYPE_FROM_FIRST_ITEM)
+
   def parquetFieldIdReadEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_READ_ENABLED)
 
   def parquetFieldIdWriteEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED)
@@ -5722,6 +5991,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def defaultDatabase: String = getConf(StaticSQLConf.CATALOG_DEFAULT_DATABASE)
 
+  def globalTempDatabase: String = getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+
   def allowsTempViewCreationWithMultipleNameparts: Boolean =
     getConf(SQLConf.ALLOW_TEMP_VIEW_CREATION_WITH_MULTIPLE_NAME_PARTS)
 
@@ -5738,6 +6009,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def legacyJavaCharsets: Boolean = getConf(SQLConf.LEGACY_JAVA_CHARSETS)
 
+  def legacyCodingErrorAction: Boolean = getConf(SQLConf.LEGACY_CODING_ERROR_ACTION)
+
   def legacyEvalCurrentTime: Boolean = getConf(SQLConf.LEGACY_EVAL_CURRENT_TIME)
 
   /** ********************** SQLConf functionality methods ************ */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
index a52bca1066059..88f556130bfe6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath
 import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue, NamedReference}
 import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse => V2AlwaysFalse, AlwaysTrue => V2AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate}
-import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.types.{DataType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -381,3 +381,87 @@ case class AlwaysFalse() extends Filter {
 @Evolving
 object AlwaysFalse extends AlwaysFalse {
 }
+
+/**
+ * Base class for collation aware string filters.
+ */
+@Evolving
+abstract class CollatedFilter() extends Filter {
+
+  /** The corresponding non-collation aware filter. */
+  def correspondingFilter: Filter
+  def dataType: DataType
+
+  override def references: Array[String] = correspondingFilter.references
+  override def toV2: Predicate = correspondingFilter.toV2
+}
+
+/** Collation aware equivalent of [[EqualTo]]. */
+@Evolving
+case class CollatedEqualTo(attribute: String, value: Any, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = EqualTo(attribute, value)
+}
+
+/** Collation aware equivalent of [[EqualNullSafe]]. */
+@Evolving
+case class CollatedEqualNullSafe(attribute: String, value: Any, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = EqualNullSafe(attribute, value)
+}
+
+/** Collation aware equivalent of [[GreaterThan]]. */
+@Evolving
+case class CollatedGreaterThan(attribute: String, value: Any, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = GreaterThan(attribute, value)
+}
+
+/** Collation aware equivalent of [[GreaterThanOrEqual]]. */
+@Evolving
+case class CollatedGreaterThanOrEqual(attribute: String, value: Any, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = GreaterThanOrEqual(attribute, value)
+}
+
+/** Collation aware equivalent of [[LessThan]]. */
+@Evolving
+case class CollatedLessThan(attribute: String, value: Any, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = LessThan(attribute, value)
+}
+
+/** Collation aware equivalent of [[LessThanOrEqual]]. */
+@Evolving
+case class CollatedLessThanOrEqual(attribute: String, value: Any, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = LessThanOrEqual(attribute, value)
+}
+
+/** Collation aware equivalent of [[In]]. */
+@Evolving
+case class CollatedIn(attribute: String, values: Array[Any], dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = In(attribute, values)
+}
+
+/** Collation aware equivalent of [[StringStartsWith]]. */
+@Evolving
+case class CollatedStringStartsWith(attribute: String, value: String, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = StringStartsWith(attribute, value)
+}
+
+/** Collation aware equivalent of [[StringEndsWith]]. */
+@Evolving
+case class CollatedStringEndsWith(attribute: String, value: String, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = StringEndsWith(attribute, value)
+}
+
+/** Collation aware equivalent of [[StringContains]]. */
+@Evolving
+case class CollatedStringContains(attribute: String, value: String, dataType: DataType)
+  extends CollatedFilter {
+  override def correspondingFilter: Filter = StringContains(attribute, value)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala
index 026272a0f2d85..fd942ba60de4b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataTypeExpression.scala
@@ -30,7 +30,18 @@ private[sql] abstract class DataTypeExpression(val dataType: DataType) {
 }
 
 private[sql] case object BooleanTypeExpression extends DataTypeExpression(BooleanType)
-private[sql] case object StringTypeExpression extends DataTypeExpression(StringType)
+private[sql] case object StringTypeExpression {
+  /**
+   * Enables matching against StringType for expressions:
+   * {{{
+   *   case Cast(child @ StringType(collationId), NumericType) =>
+   *     ...
+   * }}}
+   */
+  def unapply(e: Expression): Boolean = {
+    e.dataType.isInstanceOf[StringType]
+  }
+}
 private[sql] case object TimestampTypeExpression extends DataTypeExpression(TimestampType)
 private[sql] case object DateTypeExpression extends DataTypeExpression(DateType)
 private[sql] case object ByteTypeExpression extends DataTypeExpression(ByteType)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
index d459d2dd12272..1e0bac331dc75 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -295,12 +295,29 @@ private[spark] object SchemaUtils {
   def escapeMetaCharacters(str: String): String = SparkSchemaUtils.escapeMetaCharacters(str)
 
   /**
-   * Checks if a given data type has a non-default collation string type.
+   * Checks if a given data type has a non utf8 binary (implicit) collation type.
    */
-  def hasNonBinarySortableCollatedString(dt: DataType): Boolean = {
+  def hasNonUTF8BinaryCollation(dt: DataType): Boolean = {
     dt.existsRecursively {
-      case st: StringType => !st.supportsBinaryOrdering
+      case st: StringType => !st.isUTF8BinaryCollation
       case _ => false
     }
   }
+
+  /**
+   * Replaces any collated string type with non collated StringType
+   * recursively in the given data type.
+   */
+  def replaceCollatedStringWithString(dt: DataType): DataType = dt match {
+    case ArrayType(et, nullable) =>
+      ArrayType(replaceCollatedStringWithString(et), nullable)
+    case MapType(kt, vt, nullable) =>
+      MapType(replaceCollatedStringWithString(kt), replaceCollatedStringWithString(vt), nullable)
+    case StructType(fields) =>
+      StructType(fields.map { field =>
+        field.copy(dataType = replaceCollatedStringWithString(field.dataType))
+      })
+    case _: StringType => StringType
+    case _ => dt
+  }
 }
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
index 4b33f9bc52785..40e6182e587b3 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 
-import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
+import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException, SparkRuntimeException}
 import org.apache.spark.unsafe.types.UTF8String
 
 class ExpressionImplUtilsSuite extends SparkFunSuite {
@@ -353,4 +353,67 @@ class ExpressionImplUtilsSuite extends SparkFunSuite {
       parameters = t.errorParamsMap
     )
   }
+
+  test("Validate UTF8 string") {
+    def validateUTF8(str: UTF8String, expected: UTF8String, except: Boolean): Unit = {
+      if (except) {
+        checkError(
+          exception = intercept[SparkIllegalArgumentException] {
+            ExpressionImplUtils.validateUTF8String(str)
+          },
+          errorClass = "INVALID_UTF8_STRING",
+          parameters = Map(
+            "str" -> str.getBytes.map(byte => f"\\x$byte%02X").mkString
+          )
+        )
+      } else {
+        assert(ExpressionImplUtils.validateUTF8String(str)== expected)
+      }
+    }
+    validateUTF8(UTF8String.EMPTY_UTF8,
+      UTF8String.fromString(""), except = false)
+    validateUTF8(UTF8String.fromString(""),
+      UTF8String.fromString(""), except = false)
+    validateUTF8(UTF8String.fromString("aa"),
+      UTF8String.fromString("aa"), except = false)
+    validateUTF8(UTF8String.fromString("\u0061"),
+      UTF8String.fromString("\u0061"), except = false)
+    validateUTF8(UTF8String.fromString(""),
+      UTF8String.fromString(""), except = false)
+    validateUTF8(UTF8String.fromString("abc"),
+      UTF8String.fromString("abc"), except = false)
+    validateUTF8(UTF8String.fromString("hello"),
+      UTF8String.fromString("hello"), except = false)
+    validateUTF8(UTF8String.fromBytes(Array.empty[Byte]),
+      UTF8String.fromString(""), except = false)
+    validateUTF8(UTF8String.fromBytes(Array[Byte](0x41)),
+      UTF8String.fromString("A"), except = false)
+    validateUTF8(UTF8String.fromBytes(Array[Byte](0x61)),
+      UTF8String.fromString("a"), except = false)
+    // scalastyle:off nonascii
+    validateUTF8(UTF8String.fromBytes(Array[Byte](0x80.toByte)),
+      UTF8String.fromString("\uFFFD"), except = true)
+    validateUTF8(UTF8String.fromBytes(Array[Byte](0xFF.toByte)),
+      UTF8String.fromString("\uFFFD"), except = true)
+    // scalastyle:on nonascii
+  }
+
+  test("TryValidate UTF8 string") {
+    def tryValidateUTF8(str: UTF8String, expected: UTF8String): Unit = {
+      assert(ExpressionImplUtils.tryValidateUTF8String(str) == expected)
+    }
+    tryValidateUTF8(UTF8String.fromString(""), UTF8String.fromString(""))
+    tryValidateUTF8(UTF8String.fromString("aa"), UTF8String.fromString("aa"))
+    tryValidateUTF8(UTF8String.fromString("\u0061"), UTF8String.fromString("\u0061"))
+    tryValidateUTF8(UTF8String.EMPTY_UTF8, UTF8String.fromString(""))
+    tryValidateUTF8(UTF8String.fromString(""), UTF8String.fromString(""))
+    tryValidateUTF8(UTF8String.fromString("abc"), UTF8String.fromString("abc"))
+    tryValidateUTF8(UTF8String.fromString("hello"), UTF8String.fromString("hello"))
+    tryValidateUTF8(UTF8String.fromBytes(Array.empty[Byte]), UTF8String.fromString(""))
+    tryValidateUTF8(UTF8String.fromBytes(Array[Byte](0x41)), UTF8String.fromString("A"))
+    tryValidateUTF8(UTF8String.fromBytes(Array[Byte](0x61)), UTF8String.fromString("a"))
+    tryValidateUTF8(UTF8String.fromBytes(Array[Byte](0x80.toByte)), null)
+    tryValidateUTF8(UTF8String.fromBytes(Array[Byte](0xFF.toByte)), null)
+  }
+
 }
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
index 238b8ac04e7e6..0db155e88aea5 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
@@ -6,7 +6,7 @@
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 22f24d8266177..088f0e21710d2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -53,9 +53,12 @@ object RandomDataGenerator {
    */
   private val PROBABILITY_OF_NULL: Float = 0.1f
 
-  final val MAX_STR_LEN: Int = 1024
-  final val MAX_ARR_SIZE: Int = 128
-  final val MAX_MAP_SIZE: Int = 128
+  final val MAX_STR_LEN: Int =
+    System.getProperty("spark.sql.test.randomDataGenerator.maxStrLen", "1024").toInt
+  final val MAX_ARR_SIZE: Int =
+    System.getProperty("spark.sql.test.randomDataGenerator.maxArraySize", "128").toInt
+  final val MAX_MAP_SIZE: Int =
+    System.getProperty("spark.sql.test.randomDataGenerator.maxMapSize", "128").toInt
 
   /**
    * Helper function for constructing a biased random number generator which returns "interesting"
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala
new file mode 100644
index 0000000000000..4cbffff184cd9
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/EscapePathBenchmark.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
+
+/**
+ * Benchmark for path escaping/unescaping
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/EscapePathBenchmark-results.txt".
+ * }}}
+ */
+object EscapePathBenchmark extends BenchmarkBase {
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val N = 1000000
+    runBenchmark("Escape") {
+      val benchmark = new Benchmark("Escape Tests", N, 10, output = output)
+      val paths = Seq(
+        "https://issues.apache.org/jira/browse/SPARK-48551",
+        "https...issues.apache.org/jira/browse/SPARK-48551",
+        "https...issues.apache.org.jira/browse/SPARK-48551",
+        "https...issues.apache.org.jira.browse/SPARK-48551",
+        "https...issues.apache.org.jira.browse.SPARK-48551")
+      benchmark.addCase("Legacy") { _ =>
+        (1 to N).foreach(_ => paths.foreach(escapePathNameLegacy))
+      }
+
+      benchmark.addCase("New") { _ =>
+        (1 to N).foreach(_ => {
+          paths.foreach(ExternalCatalogUtils.escapePathName)
+        })
+      }
+      benchmark.run()
+    }
+
+    runBenchmark("Unescape") {
+      val benchmark = new Benchmark("Unescape Tests", N, 10, output = output)
+      val paths = Seq(
+        "https%3A%2F%2Fissues.apache.org%2Fjira%2Fbrowse%2FSPARK-48551",
+        "https:%2F%2Fissues.apache.org%2Fjira%2Fbrowse%2FSPARK-48551",
+        "https:/%2Fissues.apache.org%2Fjira%2Fbrowse%2FSPARK-48551",
+        "https://issues.apache.org%2Fjira%2Fbrowse%2FSPARK-48551",
+        "https://issues.apache.org/jira%2Fbrowse%2FSPARK-48551",
+        "https://issues.apache.org/jira%2Fbrowse%2FSPARK-48551",
+        "https://issues.apache.org/jira/browse%2FSPARK-48551",
+        "https://issues.apache.org/jira/browse%2SPARK-48551",
+        "https://issues.apache.org/jira/browse/SPARK-48551")
+      benchmark.addCase("Legacy") { _ =>
+        (1 to N).foreach(_ => paths.foreach(unescapePathNameLegacy))
+      }
+
+      benchmark.addCase("New") { _ =>
+        (1 to N).foreach(_ => {
+          paths.foreach(ExternalCatalogUtils.unescapePathName)
+        })
+      }
+      benchmark.run()
+    }
+  }
+
+  /**
+   * Legacy implementation of escapePathName before Spark 4.0
+   */
+  def escapePathNameLegacy(path: String): String = {
+    val builder = new StringBuilder()
+    path.foreach { c =>
+      if (ExternalCatalogUtils.needsEscaping(c)) {
+        builder.append('%')
+        builder.append(f"${c.asInstanceOf[Int]}%02X")
+      } else {
+        builder.append(c)
+      }
+    }
+
+    builder.toString()
+  }
+
+  def unescapePathNameLegacy(path: String): String = {
+    val sb = new StringBuilder
+    var i = 0
+    while (i < path.length) {
+      val c = path.charAt(i)
+      if (c == '%' && i + 2 < path.length) {
+        val code: Int = try {
+          Integer.parseInt(path.substring(i + 1, i + 3), 16)
+        } catch {
+          case _: Exception => -1
+        }
+        if (code >= 0) {
+          sb.append(code.asInstanceOf[Char])
+          i += 3
+        } else {
+          sb.append(c)
+          i += 1
+        }
+      } else {
+        sb.append(c)
+        i += 1
+      }
+    }
+    sb.toString()
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
index 8806431ab4395..9977dcd83d6af 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
@@ -98,7 +98,7 @@ trait SQLKeywordUtils extends SparkFunSuite with SQLHelper {
           }
           (symbol, literals) :: Nil
         } else {
-          val literal = literalDef.replaceAll("'", "").trim
+          val literal = literalDef.split("\\{")(0).replaceAll("'", "").trim
           // The case where a symbol string and its literal string are different,
           // e.g., `SETMINUS: 'MINUS';`.
           if (symbol != literal) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index f12d224096917..19eb3a418543d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -360,6 +360,40 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "inputType" -> "\"BOOLEAN\"",
       "requiredType" -> "\"INT\""))
 
+  errorClassTest(
+    "the buckets of ntile window function is not foldable",
+    testRelation2.select(
+      WindowExpression(
+        NTile(Literal(99.9f)),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(99.9)\"",
+      "paramIndex" -> "first",
+      "inputSql" -> "\"99.9\"",
+      "inputType" -> "\"FLOAT\"",
+      "requiredType" -> "\"INT\""))
+
+
+  errorClassTest(
+    "the buckets of ntile window function is not int literal",
+    testRelation2.select(
+      WindowExpression(
+        NTile(AttributeReference("b", IntegerType)()),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(b)\"",
+      "inputName" -> "`buckets`",
+      "inputExpr" -> "\"b\"",
+      "inputType" -> "\"INT\""))
+
   errorClassTest(
     "unresolved attributes",
     testRelation.select($"abcd"),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 3c628d35dcdb8..62856a96f7ee8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -759,9 +759,10 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         testRelation,
         testRelation,
         cond,
-        UpdateAction(Some(cond), Assignment($"a", $"a") :: Nil) :: Nil,
-        Nil,
-        Nil
+        matchedActions = UpdateAction(Some(cond), Assignment($"a", $"a") :: Nil) :: Nil,
+        notMatchedActions = Nil,
+        notMatchedBySourceActions = Nil,
+        withSchemaEvolution = false
       ),
       "AMBIGUOUS_REFERENCE",
       Map("name" -> "`a`", "referenceNames" -> "[`a`, `a`]"))
@@ -1795,4 +1796,15 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     assert(refs.head.resolved)
     assert(refs.head.isStreaming)
   }
+
+  test("SPARK-47927: ScalaUDF output nullability") {
+    val udf = ScalaUDF(
+      function = (i: Int) => i + 1,
+      dataType = IntegerType,
+      children = $"a" :: Nil,
+      nullable = false,
+      inputEncoders = Seq(Some(ExpressionEncoder[Int]().resolveAndBind())))
+    val plan = testRelation.select(udf.as("u")).select($"u").analyze
+    assert(plan.output.head.nullable)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 4b58755e13ef6..4367cbbd24a89 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -747,6 +747,17 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
     )
   }
 
+  test("hash expressions are prohibited on VariantType elements") {
+    val argument = Literal.create(null, VariantType)
+    val murmur3Hash = new Murmur3Hash(Seq(argument))
+    assert(murmur3Hash.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "HASH_VARIANT_TYPE",
+        messageParameters = Map("functionName" -> toSQLId(murmur3Hash.prettyName))
+      )
+    )
+  }
+
   test("check types for Lag") {
     val lag = Lag(Literal(1), NonFoldableLiteral(10), Literal(null), true)
     assert(lag.checkInputDataTypes() ==
@@ -800,4 +811,9 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
         "This should have been converted during analysis."))
     )
   }
+
+  test("check that current time is foldable") {
+    val rnd = Rand(Month(CurrentDate()))
+    assert(rnd.checkInputDataTypes().isSuccess)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
index 953b2c8bb1011..39cf298aec434 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
@@ -86,4 +86,22 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
       testRelationWithData.groupBy(Literal(1))(Literal(100).as("a"))
     )
   }
+
+  test("SPARK-47895: group by all repeated analysis") {
+    val plan = testRelation.groupBy($"all")(Literal(100).as("a")).analyze
+    comparePlans(
+      plan,
+      testRelation.groupBy(Literal(1))(Literal(100).as("a"))
+    )
+
+    val testRelationWithData = testRelation.copy(data = Seq(new GenericInternalRow(Array(1: Any))))
+    // Copy the plan to reset its `analyzed` flag, so that analyzer rules will re-apply.
+    val copiedPlan = plan.transform {
+      case _: LocalRelation => testRelationWithData
+    }
+    comparePlans(
+      copiedPlan.analyze, // repeated analysis
+      testRelationWithData.groupBy(Literal(1))(Literal(100).as("a"))
+    )
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala
new file mode 100644
index 0000000000000..4cdbda5494196
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtilsSuite.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.{escapePathName, unescapePathName}
+
+class ExternalCatalogUtilsSuite extends SparkFunSuite {
+
+  test("SPARK-48551: escapePathName") {
+    ExternalCatalogUtils.charToEscape.stream().toArray.map(_.asInstanceOf[Char]).foreach { c =>
+      // Check parity with old conversion technique:
+      assert(escapePathName(c.toString) === "%" + f"$c%02X",
+        s"wrong escaping for $c")
+    }
+    assert(escapePathName("") === "")
+    assert(escapePathName(" ") === " ")
+    assert(escapePathName("\n") === "%0A")
+    assert(escapePathName("a b") === "a b")
+    assert(escapePathName("a:b") === "a%3Ab")
+    assert(escapePathName(":ab") === "%3Aab")
+    assert(escapePathName("ab:") === "ab%3A")
+    assert(escapePathName("a%b") === "a%25b")
+    assert(escapePathName("a,b") === "a,b")
+    assert(escapePathName("a/b") === "a%2Fb")
+  }
+
+  test("SPARK-48551: unescapePathName") {
+    ExternalCatalogUtils.charToEscape.stream().toArray.map(_.asInstanceOf[Char]).foreach { c =>
+      // Check parity with old conversion technique:
+      assert(unescapePathName("%" + f"$c%02X") === c.toString,
+        s"wrong unescaping for $c")
+    }
+    assert(unescapePathName(null) === null)
+    assert(unescapePathName("") === "")
+    assert(unescapePathName(" ") === " ")
+    assert(unescapePathName("%0A") === "\n")
+    assert(unescapePathName("a b") === "a b")
+    assert(unescapePathName("a%3Ab") === "a:b")
+    assert(unescapePathName("%3Aab") === ":ab")
+    assert(unescapePathName("ab%3A") === "ab:")
+    assert(unescapePathName("a%25b") === "a%b")
+    assert(unescapePathName("a,b") === "a,b")
+    assert(unescapePathName("a%2Fb") === "a/b")
+    assert(unescapePathName("a%2") === "a%2")
+    assert(unescapePathName("a%F ") === "a%F ")
+    assert(unescapePathName("%0") === "%0")
+    assert(unescapePathName("0%") === "0%")
+    // scalastyle:off nonascii
+    assert(unescapePathName("a\u00FF") === "a\u00FF")
+    // scalastyle:on nonascii
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index e9a60ff17fc1b..48f829548bb65 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -933,17 +933,17 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       createTempView(catalog, "temp_view4", tempTable, overrideIfExists = false)
       createGlobalTempView(catalog, "global_temp_view1", tempTable, overrideIfExists = false)
       createGlobalTempView(catalog, "global_temp_view2", tempTable, overrideIfExists = false)
-      assert(catalog.listTables(catalog.globalTempViewManager.database, "*").toSet ==
+      assert(catalog.listTables(catalog.globalTempDatabase, "*").toSet ==
         Set(TableIdentifier("temp_view1"),
           TableIdentifier("temp_view4"),
-          TableIdentifier("global_temp_view1", Some(catalog.globalTempViewManager.database)),
-          TableIdentifier("global_temp_view2", Some(catalog.globalTempViewManager.database))))
-      assert(catalog.listTables(catalog.globalTempViewManager.database, "*temp_view1").toSet ==
+          TableIdentifier("global_temp_view1", Some(catalog.globalTempDatabase)),
+          TableIdentifier("global_temp_view2", Some(catalog.globalTempDatabase))))
+      assert(catalog.listTables(catalog.globalTempDatabase, "*temp_view1").toSet ==
         Set(TableIdentifier("temp_view1"),
-          TableIdentifier("global_temp_view1", Some(catalog.globalTempViewManager.database))))
-      assert(catalog.listTables(catalog.globalTempViewManager.database, "global*").toSet ==
-        Set(TableIdentifier("global_temp_view1", Some(catalog.globalTempViewManager.database)),
-          TableIdentifier("global_temp_view2", Some(catalog.globalTempViewManager.database))))
+          TableIdentifier("global_temp_view1", Some(catalog.globalTempDatabase))))
+      assert(catalog.listTables(catalog.globalTempDatabase, "global*").toSet ==
+        Set(TableIdentifier("global_temp_view1", Some(catalog.globalTempDatabase)),
+          TableIdentifier("global_temp_view2", Some(catalog.globalTempDatabase))))
     }
   }
 
@@ -1906,9 +1906,9 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       assert(catalog.getCachedTable(qualifiedName1) != null)
 
       createGlobalTempView(catalog, "tbl2", Range(2, 10, 1, 10), false)
-      val qualifiedName2 = QualifiedTableName(catalog.globalTempViewManager.database, "tbl2")
+      val qualifiedName2 = QualifiedTableName(catalog.globalTempDatabase, "tbl2")
       catalog.cacheTable(qualifiedName2, Range(2, 10, 1, 10))
-      catalog.refreshTable(TableIdentifier("tbl2", Some(catalog.globalTempViewManager.database)))
+      catalog.refreshTable(TableIdentifier("tbl2", Some(catalog.globalTempDatabase)))
       assert(catalog.getCachedTable(qualifiedName2) != null)
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
index 2e94c723a6f26..d4b68500e0789 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
@@ -33,6 +33,15 @@ class CSVExprUtilsSuite extends SparkFunSuite {
     assert(CSVExprUtils.toChar("""\\""") === '\\')
   }
 
+  test("Does not accept null delimiter") {
+    checkError(
+      exception = intercept[SparkIllegalArgumentException]{
+        CSVExprUtils.toDelimiterStr(null)
+      },
+      errorClass = "INVALID_DELIMITER_VALUE.NULL_VALUE",
+      parameters = Map.empty)
+  }
+
   test("Does not accept delimiter larger than one character") {
     checkError(
       exception = intercept[SparkIllegalArgumentException]{
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
index 82238de31f9fb..e29609c741633 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.encoders
 
 import scala.reflect.runtime.universe.TypeTag
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -169,10 +170,10 @@ class EncoderResolutionSuite extends PlanTest {
     fromRow(InternalRow(new GenericArrayData(Array(1, 2))))
 
     // If there is null value, it should throw runtime exception
-    val e = intercept[RuntimeException] {
+    val exception = intercept[SparkRuntimeException] {
       fromRow(InternalRow(new GenericArrayData(Array(1, null))))
     }
-    assert(e.getCause.getMessage.contains("Null value appeared in non-nullable field"))
+    assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("the real number of fields doesn't match encoder schema: tuple encoder") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index df73d50fdcd6b..01a3daa77d38d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.encoders
 import scala.collection.mutable
 import scala.util.Random
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.CodegenInterpretedPlanTest
@@ -275,9 +276,10 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
   test("RowEncoder should throw RuntimeException if input row object is null") {
     val schema = new StructType().add("int", IntegerType)
     val encoder = ExpressionEncoder(schema)
-    val e = intercept[RuntimeException](toRow(encoder, null))
-    assert(e.getCause.getMessage.contains("Null value appeared in non-nullable field"))
-    assert(e.getCause.getMessage.contains("top level Product or row object"))
+    // Check the error class only since the parameters may change depending on how we are running
+    // this test case.
+    val exception = intercept[SparkRuntimeException](toRow(encoder, null))
+    assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("RowEncoder should validate external type") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
index 9089c6f17d408..63602d04b5c79 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
@@ -134,6 +134,47 @@ class BitwiseExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("BitCount") {
+    // null
+    val nullLongLiteral = Literal.create(null, LongType)
+    val nullIntLiteral = Literal.create(null, IntegerType)
+    val nullBooleanLiteral = Literal.create(null, BooleanType)
+    checkEvaluation(BitwiseCount(nullLongLiteral), null)
+    checkEvaluation(BitwiseCount(nullIntLiteral), null)
+    checkEvaluation(BitwiseCount(nullBooleanLiteral), null)
+
+    // boolean
+    checkEvaluation(BitwiseCount(Literal(true)), 1)
+    checkEvaluation(BitwiseCount(Literal(false)), 0)
+
+    // byte/tinyint
+    checkEvaluation(BitwiseCount(Literal(1.toByte)), 1)
+    checkEvaluation(BitwiseCount(Literal(2.toByte)), 1)
+    checkEvaluation(BitwiseCount(Literal(3.toByte)), 2)
+
+    // short/smallint
+    checkEvaluation(BitwiseCount(Literal(1.toShort)), 1)
+    checkEvaluation(BitwiseCount(Literal(2.toShort)), 1)
+    checkEvaluation(BitwiseCount(Literal(3.toShort)), 2)
+
+    // int
+    checkEvaluation(BitwiseCount(Literal(1)), 1)
+    checkEvaluation(BitwiseCount(Literal(2)), 1)
+    checkEvaluation(BitwiseCount(Literal(3)), 2)
+
+    // long/bigint
+    checkEvaluation(BitwiseCount(Literal(1L)), 1)
+    checkEvaluation(BitwiseCount(Literal(2L)), 1)
+    checkEvaluation(BitwiseCount(Literal(3L)), 2)
+
+    // negative num
+    checkEvaluation(BitwiseCount(Literal(-1L)), 64)
+
+    // edge value
+    checkEvaluation(BitwiseCount(Literal(9223372036854775807L)), 63)
+    checkEvaluation(BitwiseCount(Literal(-9223372036854775808L)), 1)
+  }
+
   test("BitGet") {
     val nullLongLiteral = Literal.create(null, LongType)
     val nullIntLiteral = Literal.create(null, IntegerType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 3366d99dd75e1..7e545d3321054 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -454,4 +454,29 @@ class CanonicalizeSuite extends SparkFunSuite {
     // different.
     assert(common3.canonicalized != common4.canonicalized)
   }
+
+  test("SPARK-48035: Add/Multiply operator canonicalization should take into account the" +
+    "evaluation mode of the operands before operand reordering") {
+    Seq(1, 10) map { multiCommutativeOpOptThreshold =>
+        val default = SQLConf.get.getConf(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD)
+        SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key,
+          multiCommutativeOpOptThreshold.toString)
+        try {
+          val l1 = Literal(1)
+          val l2 = Literal(2)
+          val l3 = Literal(3)
+
+          val expr1 = Add(Add(l1, l2), l3)
+          val expr2 = Add(Add(l2, l1, EvalMode.TRY), l3)
+          assert(!expr1.semanticEquals(expr2))
+
+          val expr3 = Multiply(Multiply(l1, l2), l3)
+          val expr4 = Multiply(Multiply(l2, l1, EvalMode.TRY), l3)
+          assert(!expr3.semanticEquals(expr4))
+        } finally {
+          SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key,
+            default.toString)
+        }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 4df8d87074fc2..4c045f9fda731 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -104,7 +104,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("SPARK-22543: split large if expressions into blocks due to JVM code size limit") {
     var strExpr: Expression = Literal("abc")
     for (_ <- 1 to 150) {
-      strExpr = StringDecode(Encode(strExpr, "utf-8"), "utf-8")
+      strExpr = StringTrimRight(StringTrimLeft(strExpr))
     }
 
     val expressions = Seq(If(EqualTo(strExpr, strExpr), strExpr, strExpr))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala
index da5bddb0c09fd..a843d43ae83b6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallbackSuite.scala
@@ -89,7 +89,7 @@ class CodeGeneratorWithInterpretedFallbackSuite extends SparkFunSuite with PlanT
         FailedCodegenProjection.createObject(input)
       }
     }.getMessage
-    assert(errMsg.contains("failed to compile: org.codehaus.commons.compiler.CompileException:"))
+    assert(errMsg.contains("Failed to compile: org.codehaus.commons.compiler.CompileException:"))
   }
 
   test("SPARK-25358 Correctly handles NoOp in MutableProjection") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
index 537bac9aae9b4..a4651c6c4c7e9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("validate default collation") {
@@ -32,8 +33,8 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("collate against literal") {
-    val collateExpr = Collate(Literal("abc"), "UTF8_BINARY_LCASE")
-    val collationId = CollationFactory.collationNameToId("UTF8_BINARY_LCASE")
+    val collateExpr = Collate(Literal("abc"), "UTF8_LCASE")
+    val collationId = CollationFactory.collationNameToId("UTF8_LCASE")
     assert(collateExpr.dataType == StringType(collationId))
     checkEvaluation(collateExpr, "abc")
   }
@@ -62,7 +63,7 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[SparkException] { Collate(Literal("abc"), "UTF8_BS") },
       errorClass = "COLLATION_INVALID_NAME",
       sqlState = "42704",
-      parameters = Map("proposal" -> "UTF8_BINARY", "collationName" -> "UTF8_BS"))
+      parameters = Map("collationName" -> "UTF8_BS", "proposals" -> "UTF8_LCASE"))
   }
 
   test("collation on non-explicit default collation") {
@@ -71,11 +72,12 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("collation on explicitly collated string") {
     checkEvaluation(
-      Collation(Literal.create("abc", StringType(1))).replacement,
-      "UTF8_BINARY_LCASE")
+      Collation(Literal.create("abc",
+        StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))).replacement,
+      "UTF8_LCASE")
     checkEvaluation(
-      Collation(Collate(Literal("abc"), "UTF8_BINARY_LCASE")).replacement,
-      "UTF8_BINARY_LCASE")
+      Collation(Collate(Literal("abc"), "UTF8_LCASE")).replacement,
+      "UTF8_LCASE")
   }
 
   test("Array operations on arrays of collated strings") {
@@ -90,8 +92,8 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       (Seq("a"), Seq("a"), true, "UTF8_BINARY"),
       (Seq("a"), Seq("b"), false, "UTF8_BINARY"),
       (Seq("a"), Seq("A"), false, "UTF8_BINARY"),
-      (Seq("a"), Seq("A"), true, "UTF8_BINARY_LCASE"),
-      (Seq("a", "B"), Seq("A", "b"), true, "UTF8_BINARY_LCASE"),
+      (Seq("a"), Seq("A"), true, "UTF8_LCASE"),
+      (Seq("a", "B"), Seq("A", "b"), true, "UTF8_LCASE"),
       (Seq("a"), Seq("A"), false, "UNICODE"),
       (Seq("a", "B"), Seq("A", "b"), true, "UNICODE_CI")
     )
@@ -106,8 +108,8 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       (Seq("a", "b", "c"), Seq("a", "b", "c"), "UTF8_BINARY"),
       (Seq("a", "a", "a"), Seq("a"), "UTF8_BINARY"),
       (Seq("aaa", "AAA", "Aaa", "aAa"), Seq("aaa", "AAA", "Aaa", "aAa"), "UTF8_BINARY"),
-      (Seq("aaa", "AAA", "Aaa", "aAa"), Seq("aaa"), "UTF8_BINARY_LCASE"),
-      (Seq("aaa", "AAA", "Aaa", "aAa", "b"), Seq("aaa", "b"), "UTF8_BINARY_LCASE"),
+      (Seq("aaa", "AAA", "Aaa", "aAa"), Seq("aaa"), "UTF8_LCASE"),
+      (Seq("aaa", "AAA", "Aaa", "aAa", "b"), Seq("aaa", "b"), "UTF8_LCASE"),
       (Seq("aaa", "AAA", "Aaa", "aAa"), Seq("aaa"), "UNICODE_CI")
     )
     for ((in, out, collName) <- distinct)
@@ -118,8 +120,8 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       (Seq("a"), Seq("a"), Seq("a"), "UTF8_BINARY"),
       (Seq("a"), Seq("b"), Seq("a", "b"), "UTF8_BINARY"),
       (Seq("a"), Seq("A"), Seq("a", "A"), "UTF8_BINARY"),
-      (Seq("a"), Seq("A"), Seq("a"), "UTF8_BINARY_LCASE"),
-      (Seq("a", "B"), Seq("A", "b"), Seq("a", "B"), "UTF8_BINARY_LCASE"),
+      (Seq("a"), Seq("A"), Seq("a"), "UTF8_LCASE"),
+      (Seq("a", "B"), Seq("A", "b"), Seq("a", "B"), "UTF8_LCASE"),
       (Seq("a"), Seq("A"), Seq("a", "A"), "UNICODE"),
       (Seq("a", "B"), Seq("A", "b"), Seq("a", "B"), "UNICODE_CI")
     )
@@ -134,8 +136,8 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       (Seq("a"), Seq("a"), Seq("a"), "UTF8_BINARY"),
       (Seq("a"), Seq("b"), Seq.empty, "UTF8_BINARY"),
       (Seq("a"), Seq("A"), Seq.empty, "UTF8_BINARY"),
-      (Seq("a"), Seq("A"), Seq("a"), "UTF8_BINARY_LCASE"),
-      (Seq("a", "B"), Seq("A", "b"), Seq("a", "B"), "UTF8_BINARY_LCASE"),
+      (Seq("a"), Seq("A"), Seq("a"), "UTF8_LCASE"),
+      (Seq("a", "B"), Seq("A", "b"), Seq("a", "B"), "UTF8_LCASE"),
       (Seq("a"), Seq("A"), Seq.empty, "UNICODE"),
       (Seq("a", "B"), Seq("A", "b"), Seq("a", "B"), "UNICODE_CI")
     )
@@ -150,8 +152,8 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       (Seq("a"), Seq("a"), Seq.empty, "UTF8_BINARY"),
       (Seq("a"), Seq("b"), Seq("a"), "UTF8_BINARY"),
       (Seq("a"), Seq("A"), Seq("a"), "UTF8_BINARY"),
-      (Seq("a"), Seq("A"), Seq.empty, "UTF8_BINARY_LCASE"),
-      (Seq("a", "B"), Seq("A", "b"), Seq.empty, "UTF8_BINARY_LCASE"),
+      (Seq("a"), Seq("A"), Seq.empty, "UTF8_LCASE"),
+      (Seq("a", "B"), Seq("A", "b"), Seq.empty, "UTF8_LCASE"),
       (Seq("a"), Seq("A"), Seq("a"), "UNICODE"),
       (Seq("a", "B"), Seq("A", "b"), Seq.empty, "UNICODE_CI")
     )
@@ -161,4 +163,57 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(ArrayExcept(left, right), out)
     }
   }
+
+  test("CollationKey generates correct collation key for collated string") {
+    val testCases = Seq(
+      ("", "UTF8_BINARY", UTF8String.fromString("").getBytes),
+      ("aa", "UTF8_BINARY", UTF8String.fromString("aa").getBytes),
+      ("AA", "UTF8_BINARY", UTF8String.fromString("AA").getBytes),
+      ("aA", "UTF8_BINARY", UTF8String.fromString("aA").getBytes),
+      ("", "UTF8_LCASE", UTF8String.fromString("").getBytes),
+      ("aa", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
+      ("AA", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
+      ("aA", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
+      ("", "UNICODE", Array[Byte](1, 1, 0)),
+      ("aa", "UNICODE", Array[Byte](42, 42, 1, 6, 1, 6, 0)),
+      ("AA", "UNICODE", Array[Byte](42, 42, 1, 6, 1, -36, -36, 0)),
+      ("aA", "UNICODE", Array[Byte](42, 42, 1, 6, 1, -59, -36, 0)),
+      ("", "UNICODE_CI", Array[Byte](1, 0)),
+      ("aa", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0)),
+      ("AA", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0)),
+      ("aA", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0))
+    )
+    for ((input, collation, expected) <- testCases) {
+      val str = Literal.create(input, StringType(collation))
+      checkEvaluation(CollationKey(str), expected)
+    }
+  }
+
+  test("collation name normalization in collation expression") {
+    Seq(
+      ("en_USA", "en_USA"),
+      ("en_CS", "en"),
+      ("en_AS", "en"),
+      ("en_CS_AS", "en"),
+      ("en_AS_CS", "en"),
+      ("en_CI", "en_CI"),
+      ("en_AI", "en_AI"),
+      ("en_AI_CI", "en_CI_AI"),
+      ("en_CI_AI", "en_CI_AI"),
+      ("en_CS_AI", "en_AI"),
+      ("en_AI_CS", "en_AI"),
+      ("en_CI_AS", "en_CI"),
+      ("en_AS_CI", "en_CI"),
+      ("en_USA_AI_CI", "en_USA_CI_AI"),
+      // randomized case
+      ("EN_USA", "en_USA"),
+      ("SR_CYRL", "sr_Cyrl"),
+      ("sr_cyrl_srb", "sr_Cyrl_SRB"),
+      ("sR_cYRl_sRb", "sr_Cyrl_SRB")
+    ).foreach {
+      case (collation, normalized) =>
+        checkEvaluation(Collation(Literal.create("abc", StringType(collation))).replacement,
+          normalized)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
new file mode 100644
index 0000000000000..6f0d0c13b32a3
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.types._
+
+class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  test("Like/ILike/RLike expressions with collated strings") {
+    case class LikeTestCase[R](l: String, regexLike: String, regexRLike: String, collation: String,
+      expectedLike: R, expectedILike: R, expectedRLike: R)
+    val testCases = Seq(
+      LikeTestCase("AbC", "%AbC%", ".b.", "UTF8_BINARY", true, true, true),
+      LikeTestCase("AbC", "%ABC%", ".B.", "UTF8_BINARY", false, true, false),
+      LikeTestCase("AbC", "%abc%", ".b.", "UTF8_LCASE", true, true, true),
+      LikeTestCase("", "", "", "UTF8_LCASE", true, true, true),
+      LikeTestCase("Foo", "", "", "UTF8_LCASE", false, false, true),
+      LikeTestCase("", "%foo%", ".o.", "UTF8_LCASE", false, false, false),
+      LikeTestCase("AbC", "%ABC%", ".B.", "UTF8_BINARY", false, true, false),
+      LikeTestCase(null, "%foo%", ".o.", "UTF8_BINARY", null, null, null),
+      LikeTestCase("Foo", null, null, "UTF8_BINARY", null, null, null),
+      LikeTestCase(null, null, null, "UTF8_BINARY", null, null, null)
+    )
+    testCases.foreach(t => {
+      // Like
+      checkEvaluation(Like(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.regexLike, StringType), '\\'), t.expectedLike)
+      // ILike
+      checkEvaluation(ILike(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.regexLike, StringType), '\\').replacement, t.expectedILike)
+      // RLike
+      checkEvaluation(RLike(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.regexRLike, StringType)), t.expectedRLike)
+    })
+  }
+
+  test("StringSplit expression with collated strings") {
+    case class StringSplitTestCase[R](s: String, r: String, collation: String, expected: R)
+    val testCases = Seq(
+      StringSplitTestCase("1A2B3C", "[ABC]", "UTF8_BINARY", Seq("1", "2", "3", "")),
+      StringSplitTestCase("1A2B3C", "[abc]", "UTF8_BINARY", Seq("1A2B3C")),
+      StringSplitTestCase("1A2B3C", "[ABC]", "UTF8_LCASE", Seq("1", "2", "3", "")),
+      StringSplitTestCase("1A2B3C", "[abc]", "UTF8_LCASE", Seq("1", "2", "3", "")),
+      StringSplitTestCase("1A2B3C", "[1-9]+", "UTF8_BINARY", Seq("", "A", "B", "C")),
+      StringSplitTestCase("", "", "UTF8_BINARY", Seq("")),
+      StringSplitTestCase("1A2B3C", "", "UTF8_BINARY", Seq("1", "A", "2", "B", "3", "C")),
+      StringSplitTestCase("", "[1-9]+", "UTF8_BINARY", Seq("")),
+      StringSplitTestCase(null, "[1-9]+", "UTF8_BINARY", null),
+      StringSplitTestCase("1A2B3C", null, "UTF8_BINARY", null),
+      StringSplitTestCase(null, null, "UTF8_BINARY", null)
+    )
+    testCases.foreach(t => {
+      // StringSplit
+      checkEvaluation(StringSplit(
+        Literal.create(t.s, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.r, StringType), -1), t.expected)
+    })
+  }
+
+  test("Regexp expressions with collated strings") {
+    case class RegexpTestCase[R](l: String, r: String, collation: String,
+      expectedExtract: R, expectedExtractAll: R, expectedCount: R)
+    val testCases = Seq(
+      RegexpTestCase("AbC-aBc", ".b.", "UTF8_BINARY", "AbC", Seq("AbC"), 1),
+      RegexpTestCase("AbC-abc", ".b.", "UTF8_BINARY", "AbC", Seq("AbC", "abc"), 2),
+      RegexpTestCase("AbC-aBc", ".b.", "UTF8_LCASE", "AbC", Seq("AbC", "aBc"), 2),
+      RegexpTestCase("ABC-abc", ".b.", "UTF8_LCASE", "ABC", Seq("ABC", "abc"), 2),
+      RegexpTestCase("", "", "UTF8_LCASE", "", Seq(""), 1),
+      RegexpTestCase("Foo", "", "UTF8_LCASE", "", Seq("", "", "", ""), 4),
+      RegexpTestCase("", ".o.", "UTF8_LCASE", "", Seq(), 0),
+      RegexpTestCase("Foo", ".O.", "UTF8_BINARY", "", Seq(), 0),
+      RegexpTestCase(null, ".O.", "UTF8_BINARY", null, null, null),
+      RegexpTestCase("Foo", null, "UTF8_BINARY", null, null, null),
+      RegexpTestCase(null, null, "UTF8_BINARY", null, null, null)
+    )
+    testCases.foreach(t => {
+      // RegExpExtract
+      checkEvaluation(RegExpExtract(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.r, StringType), 0), t.expectedExtract)
+      // RegExpExtractAll
+      checkEvaluation(RegExpExtractAll(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.r, StringType), 0), t.expectedExtractAll)
+      // RegExpCount
+      checkEvaluation(RegExpCount(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.r, StringType)).replacement, t.expectedCount)
+      // RegExpInStr
+      def expectedInStr(count: Any): Any = count match {
+        case null => null
+        case 0 => 0
+        case n: Int if n >= 1 => 1
+      }
+      checkEvaluation(RegExpInStr(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Literal.create(t.r, StringType), 0), expectedInStr(t.expectedCount))
+    })
+  }
+
+  test("MultiLikeBase regexp expressions with collated strings") {
+    val nullStr = Literal.create(null, StringType)
+    // Supported collations (StringTypeBinaryLcase)
+    val binaryCollation = StringType(CollationFactory.collationNameToId("UTF8_BINARY"))
+    val lowercaseCollation = StringType(CollationFactory.collationNameToId("UTF8_LCASE"))
+    // LikeAll
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%oo"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%bar%"), false)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", "%oo"), true)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", "%bar%"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%oo"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%bar%"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", nullStr), null)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%feo%", nullStr), false)
+    checkEvaluation(Literal.create(null, binaryCollation).likeAll("%foo%", "%oo"), null)
+    // NotLikeAll
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", "%oo"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", "%bar%"), true)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAll("%foo%", "%oo"), false)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAll("%goo%", "%bar%"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", "%oo"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", "%bar%"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", nullStr), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%feo%", nullStr), null)
+    checkEvaluation(Literal.create(null, binaryCollation).notLikeAll("%foo%", "%oo"), null)
+    // LikeAny
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", "%hoo"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", "%bar%"), true)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%goo%", "%hoo"), false)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%foo%", "%bar%"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", "%hoo"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", "%bar%"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", nullStr), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%feo%", nullStr), null)
+    checkEvaluation(Literal.create(null, binaryCollation).likeAny("%foo%", "%oo"), null)
+    // NotLikeAny
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%hoo"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%oo%"), false)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAny("%Foo%", "%hoo"), true)
+    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAny("%foo%", "%oo%"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%Foo%", "%hoo"), true)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%oo%"), false)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", nullStr), null)
+    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%feo%", nullStr), true)
+    checkEvaluation(Literal.create(null, binaryCollation).notLikeAny("%foo%", "%oo"), null)
+  }
+
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 5f135e46a3775..497b335289b11 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 
 class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -359,6 +359,38 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     )
   }
 
+  // map key can't be variant
+  val map6 = CreateMap(Seq(
+    Literal.create(new VariantVal(Array[Byte](), Array[Byte]())),
+    Literal.create(1)
+  ))
+  map6.checkInputDataTypes() match {
+    case TypeCheckResult.TypeCheckSuccess => fail("should not allow variant as a part of map key")
+    case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+      assert(errorSubClass == "INVALID_MAP_KEY_TYPE")
+      assert(messageParameters === Map("keyType" -> "\"VARIANT\""))
+  }
+
+  // map key can't contain variant
+  val map7 = CreateMap(
+    Seq(
+      CreateStruct(
+        Seq(Literal.create(1), Literal.create(new VariantVal(Array[Byte](), Array[Byte]())))
+      ),
+      Literal.create(1)
+    )
+  )
+  map7.checkInputDataTypes() match {
+    case TypeCheckResult.TypeCheckSuccess => fail("should not allow variant as a part of map key")
+    case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+      assert(errorSubClass == "INVALID_MAP_KEY_TYPE")
+      assert(
+        messageParameters === Map(
+          "keyType" -> "\"STRUCT<col1: INT NOT NULL, col2: VARIANT NOT NULL>\""
+        )
+      )
+  }
+
   test("MapFromArrays") {
     val intSeq = Seq(5, 10, 15, 20, 25)
     val longSeq = intSeq.map(_.toLong)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 21e6b8692911d..a063e53486ad8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -71,10 +71,15 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     new ArrayBasedMapData(keyArray, valueArray)
   }
 
+  protected def replace(expr: Expression): Expression = expr match {
+    case r: RuntimeReplaceable => replace(r.replacement)
+    case _ => expr.mapChildren(replace)
+  }
+
   private def prepareEvaluation(expression: Expression): Expression = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance()
     val resolver = ResolveTimeZone
-    val expr = resolver.resolveTimeZones(expression)
+    val expr = resolver.resolveTimeZones(replace(expression))
     assert(expr.resolved)
     serializer.deserialize(serializer.serialize(expr))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HexBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HexBenchmark.scala
new file mode 100644
index 0000000000000..df3fcbb83906c
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HexBenchmark.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.util.Locale
+
+import org.apache.commons.codec.binary.{Hex => ApacheHex}
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Benchmark for hex
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/HexBenchmark-results.txt".
+ * }}}
+ */
+object HexBenchmark extends BenchmarkBase {
+
+  private val hexStrings = {
+    var tmp = Seq("", "A", "AB", "ABC", "ABCD", "123ABCDEF")
+    tmp = tmp ++ tmp.map(_.toLowerCase(Locale.ROOT))
+    (2 to 4).foreach { i => tmp = tmp ++ tmp.map(x => x * i) }
+    tmp.map(UTF8String.fromString(_).toString)
+  }
+
+  private val hexBin = hexStrings.map(_.getBytes)
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("UnHex Comparison") {
+      val N = 1_000_000
+      val benchmark = new Benchmark(s"Cardinality $N", N, 3, output = output)
+      benchmark.addCase("Common Codecs") { _ =>
+        (1 to N).foreach(_ => hexStrings.foreach(y => apacheDecodeHex(y)))
+      }
+
+      benchmark.addCase("Java") { _ =>
+        (1 to N).foreach(_ => hexStrings.foreach(y => javaUnhex(y)))
+      }
+
+      benchmark.addCase("Spark") { _ =>
+        (1 to N).foreach(_ => hexStrings.foreach(y => builtinUnHex(y)))
+      }
+
+      benchmark.addCase("Spark Binary") { _ =>
+        (1 to N).foreach(_ => hexBin.foreach(y => builtinUnHex(y)))
+      }
+      benchmark.run()
+    }
+  }
+
+  def apacheDecodeHex(value: String): Array[Byte] = {
+    val padding = if (value.length % 2 != 0) "0" else ""
+    ApacheHex.decodeHex(padding + value)
+  }
+
+  def builtinUnHex(value: String): Array[Byte] = {
+    Hex.unhex(value)
+  }
+
+  def builtinUnHex(value: Array[Byte]): Array[Byte] = {
+    Hex.unhex(value)
+  }
+
+  def javaUnhex(value: String) : Array[Byte] = {
+    val padding = if ((value.length & 0x1) != 0) "0" else ""
+    java.util.HexFormat.of().parseHex(padding + value)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HexSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HexSuite.scala
new file mode 100644
index 0000000000000..a3f963538f447
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HexSuite.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+
+class HexSuite extends SparkFunSuite {
+  test("SPARK-48596: hex long values") {
+    assert(Hex.hex(0).toString === "0")
+    assert(Hex.hex(1).toString === "1")
+    assert(Hex.hex(15).toString === "F")
+    assert(Hex.hex(16).toString === "10")
+    assert(Hex.hex(255).toString === "FF")
+    assert(Hex.hex(256).toString === "100")
+    assert(Hex.hex(4095).toString === "FFF")
+    assert(Hex.hex(4096).toString === "1000")
+    assert(Hex.hex(65535).toString === "FFFF")
+    assert(Hex.hex(65536).toString === "10000")
+    assert(Hex.hex(1048575).toString === "FFFFF")
+    assert(Hex.hex(1048576).toString === "100000")
+    assert(Hex.hex(-1).toString === "FFFFFFFFFFFFFFFF")
+    assert(Hex.hex(Long.MinValue).toString === "8000000000000000")
+    assert(Hex.hex(Long.MaxValue).toString === "7FFFFFFFFFFFFFFF")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index da8e11c0433eb..ace017b1cddc3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
@@ -53,10 +53,13 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("AssertNotNUll") {
-    val ex = intercept[RuntimeException] {
-      evaluateWithoutCodegen(AssertNotNull(Literal(null)))
-    }.getMessage
-    assert(ex.contains("Null value appeared in non-nullable field"))
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        evaluateWithoutCodegen(AssertNotNull(Literal(null)))
+      },
+      errorClass = "NOT_NULL_ASSERT_VIOLATION",
+      sqlState = "42000",
+      parameters = Map("walkedTypePath" -> "\n\n"))
   }
 
   test("IsNaN") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 0fcceef392389..ebd7245434819 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -489,6 +489,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // non ascii characters are not allowed in the code, so we disable the scalastyle here.
     checkEvaluation(
       StringDecode(Encode(Literal("大千世界"), Literal("UTF-16LE")), Literal("UTF-16LE")), "大千世界")
+    checkEvaluation(
+      StringDecode(Encode(Literal("大千世界"), Literal("UTF-32")), Literal("UTF-32")), "大千世界")
     checkEvaluation(
       StringDecode(Encode(a, Literal("utf-8")), Literal("utf-8")), "大千世界", create_row("大千世界"))
     checkEvaluation(
@@ -503,8 +505,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(StringDecode(b, Literal.create(null, StringType)), null, create_row(null))
 
     // Test escaping of charset
-    GenerateUnsafeProjection.generate(Encode(a, Literal("\"quote")) :: Nil)
-    GenerateUnsafeProjection.generate(StringDecode(b, Literal("\"quote")) :: Nil)
+    GenerateUnsafeProjection.generate(Encode(a, Literal("\"quote")).replacement :: Nil)
+    GenerateUnsafeProjection.generate(StringDecode(b, Literal("\"quote")).replacement :: Nil)
   }
 
   test("initcap unit test") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
index 780a2692e87f7..e082f2e3acccd 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
@@ -46,6 +46,19 @@ class TryEvalSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("try_remainder") {
+    Seq(
+      (3.0, 2.0, 1.0),
+      (1.0, 0.0, null),
+      (-1.0, 0.0, null)
+    ).foreach { case (a, b, expected) =>
+      val left = Literal(a)
+      val right = Literal(b)
+      val input = Remainder(left, right, EvalMode.TRY)
+      checkEvaluation(input, expected)
+    }
+  }
+
   test("try_subtract") {
     Seq(
       (1, 1, 0),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
index 574d5daa361e9..8fc72caa47860 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
@@ -25,9 +25,13 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
 
   test("parseJson type coercion") {
     def check(json: String, expectedValue: Array[Byte], expectedMetadata: Array[Byte]): Unit = {
+      // parse_json
       val actual = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json))
+      // try_parse_json
+      val tryActual = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+        failOnError = false)
       val expected = new VariantVal(expectedValue, expectedMetadata)
-      assert(actual === expected)
+      assert(actual === expected && tryActual === expected)
     }
 
     // Dictionary size is `0` for value 0. An empty dictionary contains one offset `0` for the
@@ -104,6 +108,8 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
 
   test("parseJson negative") {
     def checkException(json: String, errorClass: String, parameters: Map[String, String]): Unit = {
+      val try_parse_json_output = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+        failOnError = false)
       checkError(
         exception = intercept[SparkThrowable] {
           VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json))
@@ -111,6 +117,7 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
         errorClass = errorClass,
         parameters = parameters
       )
+      assert(try_parse_json_output === null)
     }
     for (json <- Seq("", "[", "+1", "1a", """{"a": 1, "b": 2, "a": "3"}""")) {
       checkException(json, "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
@@ -122,4 +129,42 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
         Map("sizeLimit" -> "16.0 MiB", "functionName" -> "`parse_json`"))
     }
   }
+
+  test("isVariantNull") {
+    def check(json: String, expected: Boolean): Unit = {
+      if (json != null) {
+        val parsedVariant = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json))
+        val actual = VariantExpressionEvalUtils.isVariantNull(parsedVariant)
+        assert(actual == expected)
+      } else {
+        val actual = VariantExpressionEvalUtils.isVariantNull(null)
+        assert(actual == expected)
+      }
+    }
+
+    // Primitive types
+    check("null", expected = true)
+    check(null, expected = false)
+    check("0", expected = false)
+    check("13", expected = false)
+    check("-54", expected = false)
+    check("2147483647", expected = false)
+    check("2147483648", expected = false)
+    check("238457328534848", expected = false)
+    check("342.769", expected = false)
+    check("true", expected = false)
+    check("false", expected = false)
+    check("false", expected = false)
+    check("65.43", expected = false)
+    check("\"" + "spark" * 100 + "\"", expected = false)
+    // Short String
+    check("\"\"", expected = false)
+    check("\"null\"", expected = false)
+    // Array
+    check("[]", expected = false)
+    check("[null, null]", expected = false)
+    check("[{\"a\" : 13}, \"spark\"]", expected = false)
+    // Object
+    check("[{\"a\" : 13, \"b\" : null}]", expected = false)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
index 9aa1dcd2ef952..a758fa84f6fca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.variant
 
 import java.time.{LocalDateTime, ZoneId, ZoneOffset}
 
+import scala.collection.mutable
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
@@ -58,6 +59,9 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     check(Array(primitiveHeader(INT8), 0, 0, 0, 0, 0, 0, 0), emptyMetadata)
     // DECIMAL16 only has 15 byte content.
     check(Array(primitiveHeader(DECIMAL16)) ++ Array.fill(16)(0.toByte), emptyMetadata)
+    // 1e38 has a precision of 39. Even if it still fits into 16 bytes, it is not a valid decimal.
+    check(Array[Byte](primitiveHeader(DECIMAL16), 0) ++
+      BigDecimal(1e38).toBigInt.toByteArray.reverse, emptyMetadata)
     // Short string content too short.
     check(Array(shortStrHeader(2), 'x'), emptyMetadata)
     // Long string length too short (requires 4 bytes).
@@ -239,6 +243,13 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     check(expectedResult4, smallObject, smallMetadata)
   }
 
+  test("is_variant_null invalid input") {
+    checkErrorInExpression[SparkRuntimeException](
+      IsVariantNull(Literal(new VariantVal(Array(), Array(1, 2, 3)))),
+      "MALFORMED_VARIANT"
+    )
+  }
+
   private def parseJson(input: String): VariantVal =
     VariantExpressionEvalUtils.parseJson(UTF8String.fromString(input))
 
@@ -800,6 +811,15 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       "Hello")
   }
 
+  test("SPARK-48150: ParseJson expression nullability") {
+    assert(!ParseJson(Literal("["), failOnError = true).replacement.nullable)
+    assert(ParseJson(Literal("["), failOnError = false).replacement.nullable)
+    checkEvaluation(
+      ParseJson(Literal("["), failOnError = false).replacement,
+      null
+    )
+  }
+
   test("cast to variant") {
     def check[T : TypeTag](input: T, expectedJson: String): Unit = {
       val cast = Cast(Literal.create(input), VariantType, evalMode = EvalMode.ANSI)
@@ -807,9 +827,27 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     check(null.asInstanceOf[String], null)
+    // The following tests cover all allowed scalar types.
     for (input <- Seq[Any](false, true, 0.toByte, 1.toShort, 2, 3L, 4.0F, 5.0D)) {
       check(input, input.toString)
     }
+    for (precision <- Seq(9, 18, 38)) {
+      val input = BigDecimal("9" * precision)
+      check(Literal.create(input, DecimalType(precision, 0)), input.toString)
+    }
+    check("", "\"\"")
+    check("x" * 128, "\"" + ("x" * 128) + "\"")
+    check(Array[Byte](1, 2, 3), "\"AQID\"")
+    check(Literal(0, DateType), "\"1970-01-01\"")
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      check(Literal(0L, TimestampType), "\"1970-01-01 00:00:00+00:00\"")
+      check(Literal(0L, TimestampNTZType), "\"1970-01-01 00:00:00\"")
+    }
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "America/Los_Angeles") {
+      check(Literal(0L, TimestampType), "\"1969-12-31 16:00:00-08:00\"")
+      check(Literal(0L, TimestampNTZType), "\"1970-01-01 00:00:00\"")
+    }
+
     check(Array(null, "a", "b", "c"), """[null,"a","b","c"]""")
     check(Map("z" -> 1, "y" -> 2, "x" -> 3), """{"x":3,"y":2,"z":1}""")
     check(Array(parseJson("""{"a": 1,"b": [1, 2, 3]}"""),
@@ -823,4 +861,50 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       StructType.fromDDL("c ARRAY<STRING>,b MAP<STRING, STRING>,a STRUCT<i: INT>"))
     check(struct, """{"a":{"i":0},"b":{"a":"123","b":"true","c":"f"},"c":["123","true","f"]}""")
   }
+
+  test("schema_of_variant - schema merge") {
+    val nul = Literal(null, StringType)
+    val boolean = Literal.default(BooleanType)
+    val long = Literal.default(LongType)
+    val string = Literal.default(StringType)
+    val double = Literal.default(DoubleType)
+    val date = Literal.default(DateType)
+    val timestamp = Literal.default(TimestampType)
+    val timestampNtz = Literal.default(TimestampNTZType)
+    val float = Literal.default(FloatType)
+    val binary = Literal.default(BinaryType)
+    val decimal = Literal(Decimal("123.456"), DecimalType(6, 3))
+    val array1 = Literal(Array(0L))
+    val array2 = Literal(Array(0.0))
+    val struct1 = Literal.default(StructType.fromDDL("a string"))
+    val struct2 = Literal.default(StructType.fromDDL("a boolean, b bigint"))
+    val inputs = Seq(nul, boolean, long, string, double, date, timestamp, timestampNtz, float,
+      binary, decimal, array1, array2, struct1, struct2)
+
+    val results = mutable.HashMap.empty[(Literal, Literal), String]
+    for (i <- inputs) {
+      val inputType = if (i.value == null) "VOID" else i.dataType.sql
+      results.put((nul, i), inputType)
+      results.put((i, i), inputType)
+    }
+    results.put((long, double), "DOUBLE")
+    results.put((long, float), "FLOAT")
+    results.put((long, decimal), "DECIMAL(23,3)")
+    results.put((double, float), "DOUBLE")
+    results.put((double, decimal), "DOUBLE")
+    results.put((date, timestamp), "TIMESTAMP")
+    results.put((date, timestampNtz), "TIMESTAMP_NTZ")
+    results.put((timestamp, timestampNtz), "TIMESTAMP")
+    results.put((float, decimal), "DOUBLE")
+    results.put((array1, array2), "ARRAY<DOUBLE>")
+    results.put((struct1, struct2), "STRUCT<a: VARIANT, b: BIGINT>")
+
+    for (i1 <- inputs) {
+      for (i2 <- inputs) {
+        val expected = results.getOrElse((i1, i2), results.getOrElse((i2, i1), "VARIANT"))
+        val array = CreateArray(Seq(Cast(i1, VariantType), Cast(i2, VariantType)))
+        checkEvaluation(SchemaOfVariant(Cast(array, VariantType)).replacement, s"ARRAY<$expected>")
+      }
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala
index a8dc2b20f56d8..8351e94c0c360 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/UDFXPathUtilSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.xml
 
+import java.nio.charset.StandardCharsets
 import javax.xml.xpath.XPathConstants.STRING
 
 import org.w3c.dom.Node
@@ -85,7 +86,7 @@ class UDFXPathUtilSuite extends SparkFunSuite {
     tempFile.deleteOnExit()
     val fname = tempFile.getAbsolutePath
 
-    FileUtils.writeStringToFile(tempFile, secretValue)
+    FileUtils.writeStringToFile(tempFile, secretValue, StandardCharsets.UTF_8)
 
     val xml =
       s"""<?xml version="1.0" encoding="utf-8"?>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index d2368908d0386..2bbdb4e689193 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -36,6 +36,8 @@ class BinaryComparisonSimplificationSuite extends PlanTest {
         EliminateSubqueryAliases) ::
       Batch("Infer Filters", Once,
           InferFiltersFromConstraints) ::
+      Batch("Compute current time", Once,
+        ComputeCurrentTime) ::
       Batch("Constant Folding", FixedPoint(50),
         NullPropagation,
         ConstantFolding,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
index c5f506d4d6832..e83f231c188e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Rand, UpdateFields}
+import org.apache.spark.sql.catalyst.expressions.{Alias, CreateArray, Expression, GetArrayItem, PythonUDF, Rand, UpdateFields}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.types.MetadataBuilder
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ArrayType, IntegerType, MetadataBuilder}
 
 class CollapseProjectSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
@@ -266,4 +267,35 @@ class CollapseProjectSuite extends PlanTest {
     val expected = relation.select(($"a" + $"b").as("C")).analyze
     comparePlans(optimized, expected)
   }
+
+  test("ES-1102888: collapse project duplicating aggregate expressions in UDF") {
+    withSQLConf(SQLConf.AVOID_COLLAPSE_UDF_WITH_EXPENSIVE_EXPR.key -> "true") {
+      val pythonUdf = (e: Expression) => {
+        PythonUDF("udf", null, ArrayType(IntegerType), Seq(e), 0, udfDeterministic = true)
+      }
+
+      val query = testRelation
+        .groupBy($"a")(collectList($"b").as("l1"))
+        .select(pythonUdf($"l1").as("l2"))
+        .select(CreateArray(Seq(
+          GetArrayItem($"l2", 0),
+          GetArrayItem($"l2", 1),
+          GetArrayItem($"l2", 2),
+          GetArrayItem($"l2", 3)
+        )))
+        .analyze
+
+      val optimized = Optimize.execute(query)
+      val expected = testRelation
+        .groupBy($"a")(pythonUdf(collectList($"b")).as("l2"))
+        .select(CreateArray(Seq(
+          GetArrayItem($"l2", 0),
+          GetArrayItem($"l2", 1),
+          GetArrayItem($"l2", 2),
+          GetArrayItem($"l2", 3)
+        )))
+        .analyze
+      comparePlans(optimized, expected)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 03e65412d166b..5027222be6b80 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -219,6 +219,17 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("Can't push down nondeterministic filter through aggregate") {
+    val originalQuery = testRelation
+      .groupBy($"a")($"a", count($"b") as "c")
+      .where(Rand(10) > $"a")
+      .analyze
+
+    val optimized = Optimize.execute(originalQuery)
+
+    comparePlans(optimized, originalQuery)
+  }
+
   test("filters: combines filters") {
     val originalQuery = testRelation
       .select($"a")
@@ -1483,14 +1494,16 @@ class FilterPushdownSuite extends PlanTest {
   test("SPARK-46707: push down predicate with sequence (without step) through aggregates") {
     val x = testRelation.subquery("x")
 
-    // do not push down when sequence has step param
+    // Always push down sequence as it's deterministic
     val queryWithStep = x.groupBy($"x.a", $"x.b")($"x.a", $"x.b")
       .where(IsNotNull(Sequence($"x.a", $"x.b", Some(Literal(1)))))
       .analyze
     val optimizedQueryWithStep = Optimize.execute(queryWithStep)
-    comparePlans(optimizedQueryWithStep, queryWithStep)
+    val correctAnswerWithStep = x.where(IsNotNull(Sequence($"x.a", $"x.b", Some(Literal(1)))))
+      .groupBy($"x.a", $"x.b")($"x.a", $"x.b")
+      .analyze
+    comparePlans(optimizedQueryWithStep, correctAnswerWithStep)
 
-    // push down when sequence does not have step param
     val queryWithoutStep = x.groupBy($"x.a", $"x.b")($"x.a", $"x.b")
       .where(IsNotNull(Sequence($"x.a", $"x.b", None)))
       .analyze
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index 767ef38ea7f7d..5866f29e4e864 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -214,4 +214,15 @@ class FoldablePropagationSuite extends PlanTest {
     val expected = testRelation.select(foldableAttr, $"a").rebalance(foldableAttr, $"a").analyze
     comparePlans(optimized, expected)
   }
+
+  test("SPARK-48419: Foldable propagation replace foldable column should use origin column name") {
+    val query = testRelation
+      .select($"a".as("x"), "str".as("Y"), $"b".as("z"))
+      .select($"x", $"y", $"z")
+    val optimized = Optimize.execute(query.analyze)
+    val correctAnswer = testRelation
+      .select($"a".as("x"), "str".as("Y"), $"b".as("z"))
+      .select($"x", "str".as("y"), $"z").analyze
+    comparePlans(optimized, correctAnswer)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTESuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTESuite.scala
new file mode 100644
index 0000000000000..9d775a5335c67
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTESuite.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.analysis.TestRelation
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CTERelationDef, CTERelationRef, LogicalPlan, OneRowRelation, WithCTE}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+class InlineCTESuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("inline CTE", FixedPoint(100), InlineCTE()) :: Nil
+  }
+
+  test("SPARK-48307: not-inlined CTE relation in command") {
+    val cteDef = CTERelationDef(OneRowRelation().select(rand(0).as("a")))
+    val cteRef = CTERelationRef(cteDef.id, cteDef.resolved, cteDef.output, cteDef.isStreaming)
+    val plan = AppendData.byName(
+      TestRelation(Seq($"a".double)),
+      WithCTE(cteRef.except(cteRef, isAll = true), Seq(cteDef))
+    ).analyze
+    comparePlans(Optimize.execute(plan), plan)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
index 6acce44922f69..61fb68cfba863 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
-import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, JoinHint, NO_BROADCAST_HASH, SHUFFLE_HASH}
+import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, Join, JoinHint, NO_BROADCAST_HASH, SHUFFLE_HASH}
 import org.apache.spark.sql.catalyst.statsEstimation.StatsTestPlan
 import org.apache.spark.sql.internal.SQLConf
 
@@ -38,16 +38,15 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
     size = Some(1000),
     attributeStats = AttributeMap(Seq()))
 
+  private val join = Join(left, right, Inner, None, JoinHint(None, None))
+
   private val hintBroadcast = Some(HintInfo(Some(BROADCAST)))
   private val hintNotToBroadcast = Some(HintInfo(Some(NO_BROADCAST_HASH)))
   private val hintShuffleHash = Some(HintInfo(Some(SHUFFLE_HASH)))
 
   test("getBroadcastBuildSide (hintOnly = true) return BuildLeft with only a left hint") {
     val broadcastSide = getBroadcastBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(hintBroadcast, None),
+      join.copy(hint = JoinHint(hintBroadcast, None)),
       hintOnly = true,
       SQLConf.get
     )
@@ -56,10 +55,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getBroadcastBuildSide (hintOnly = true) return BuildRight with only a right hint") {
     val broadcastSide = getBroadcastBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(None, hintBroadcast),
+      join.copy(hint = JoinHint(None, hintBroadcast)),
       hintOnly = true,
       SQLConf.get
     )
@@ -68,10 +64,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getBroadcastBuildSide (hintOnly = true) return smaller side with both having hints") {
     val broadcastSide = getBroadcastBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(hintBroadcast, hintBroadcast),
+      join.copy(hint = JoinHint(hintBroadcast, hintBroadcast)),
       hintOnly = true,
       SQLConf.get
     )
@@ -80,10 +73,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getBroadcastBuildSide (hintOnly = true) return None when no side has a hint") {
     val broadcastSide = getBroadcastBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(None, None),
+      join.copy(hint = JoinHint(None, None)),
       hintOnly = true,
       SQLConf.get
     )
@@ -92,10 +82,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getBroadcastBuildSide (hintOnly = false) return BuildRight when right is broadcastable") {
     val broadcastSide = getBroadcastBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(None, None),
+      join.copy(hint = JoinHint(None, None)),
       hintOnly = false,
       SQLConf.get
     )
@@ -105,10 +92,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
   test("getBroadcastBuildSide (hintOnly = false) return None when right has no broadcast hint") {
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
       val broadcastSide = getBroadcastBuildSide(
-        left,
-        right,
-        Inner,
-        JoinHint(None, hintNotToBroadcast ),
+        join.copy(hint = JoinHint(None, hintNotToBroadcast)),
         hintOnly = false,
         SQLConf.get
       )
@@ -118,10 +102,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getShuffleHashJoinBuildSide (hintOnly = true) return BuildLeft with only a left hint") {
     val broadcastSide = getShuffleHashJoinBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(hintShuffleHash, None),
+      join.copy(hint = JoinHint(hintShuffleHash, None)),
       hintOnly = true,
       SQLConf.get
     )
@@ -130,10 +111,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getShuffleHashJoinBuildSide (hintOnly = true) return BuildRight with only a right hint") {
     val broadcastSide = getShuffleHashJoinBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(None, hintShuffleHash),
+      join.copy(hint = JoinHint(None, hintShuffleHash)),
       hintOnly = true,
       SQLConf.get
     )
@@ -142,10 +120,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getShuffleHashJoinBuildSide (hintOnly = true) return smaller side when both have hints") {
     val broadcastSide = getShuffleHashJoinBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(hintShuffleHash, hintShuffleHash),
+      join.copy(hint = JoinHint(hintShuffleHash, hintShuffleHash)),
       hintOnly = true,
       SQLConf.get
     )
@@ -154,10 +129,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getShuffleHashJoinBuildSide (hintOnly = true) return None when no side has a hint") {
     val broadcastSide = getShuffleHashJoinBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(None, None),
+      join.copy(hint = JoinHint(None, None)),
       hintOnly = true,
       SQLConf.get
     )
@@ -166,10 +138,7 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   test("getShuffleHashJoinBuildSide (hintOnly = false) return BuildRight when right is smaller") {
     val broadcastSide = getBroadcastBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(None, None),
+      join.copy(hint = JoinHint(None, None)),
       hintOnly = false,
       SQLConf.get
     )
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala
index b640344658d40..b3444b0b43077 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala
@@ -38,7 +38,7 @@ class MergeScalarSubqueriesSuite extends PlanTest {
   val testRelation = LocalRelation(Symbol("a").int, Symbol("b").int, Symbol("c").string)
   val testRelationWithNonBinaryCollation = LocalRelation(
     Symbol("utf8_binary").string("UTF8_BINARY"),
-    Symbol("utf8_binary_lcase").string("UTF8_BINARY_LCASE"))
+    Symbol("utf8_lcase").string("UTF8_LCASE"))
 
   private def definitionNode(plan: LogicalPlan, cteIndex: Int) = {
     CTERelationDef(plan, cteIndex, underSubquery = true)
@@ -204,7 +204,7 @@ class MergeScalarSubqueriesSuite extends PlanTest {
     val subquery1 = ScalarSubquery(testRelationWithNonBinaryCollation.groupBy(
       Symbol("utf8_binary"))(max(Symbol("utf8_binary")).as("max_utf8_binary")))
     val subquery2 = ScalarSubquery(testRelationWithNonBinaryCollation.groupBy(
-      Symbol("utf8_binary_lcase"))(max(Symbol("utf8_binary_lcase")).as("utf8_binary_lcase")))
+      Symbol("utf8_lcase"))(max(Symbol("utf8_lcase")).as("utf8_lcase")))
     val originalQuery = testRelationWithNonBinaryCollation.select(subquery1, subquery2)
     Optimize.execute(originalQuery.analyze).collect {
       case WithCTE(_, _) => fail("Should not have merged")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
index bd0cc6216f7a2..38cd25cf491a1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
@@ -863,6 +863,27 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     // The plan is expected to be unchanged.
     comparePlans(plan, RemoveNoopOperators.apply(optimized.get))
   }
+
+  test("SPARK-48428: Do not pushdown when attr is used in expression with mutliple references") {
+    val query = contact
+      .limit(5)
+      .select(
+        GetStructField(GetStructField(CreateStruct(Seq($"id", $"employer")), 1), 0),
+        $"employer.id")
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val expected = contact
+      .select($"id", $"employer")
+      .limit(5)
+      .select(
+        GetStructField(GetStructField(CreateStruct(Seq($"id", $"employer")), 1), 0),
+        $"employer.id")
+      .analyze
+
+    comparePlans(optimized, expected)
+  }
 }
 
 object NestedColumnAliasingSuite {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJoinConditionSuite.scala
new file mode 100644
index 0000000000000..e7f090ec4d0dc
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJoinConditionSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules._
+
+class OptimizeJoinConditionSuite extends PlanTest {
+
+  private object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Optimize join condition", FixedPoint(1),
+        OptimizeJoinCondition) :: Nil
+  }
+
+  val testRelation = LocalRelation($"a".int, $"b".int)
+  val testRelation1 = LocalRelation($"c".int, $"d".int)
+
+  test("Replace equivalent expression to <=> in join condition") {
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
+    val joinTypes = Seq(Inner, FullOuter, LeftOuter, RightOuter, LeftSemi, LeftSemi, Cross)
+    joinTypes.foreach(joinType => {
+      val originalQuery =
+        x.join(y, joinType, Option($"a" === $"c" || ($"a".isNull && $"c".isNull)))
+      val correctAnswer =
+        x.join(y, joinType, Option($"a" <=> $"c"))
+      comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
+    })
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
index 29bc46eaa3ebe..cbd24bd7bb299 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
@@ -158,7 +158,8 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
       cond,
       Seq(DeleteAction(None)),
       Seq(InsertAction(None, Seq(Assignment($"a", $"c"), Assignment($"b", $"d")))),
-      Seq(DeleteAction(None)))
+      Seq(DeleteAction(None)),
+      withSchemaEvolution = false)
     val analyzedMergePlan = mergePlan.analyze
     assert(analyzedMergePlan.resolved)
 
@@ -166,7 +167,7 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
     assert(optimized.resolved)
 
     optimized match {
-      case MergeIntoTable(_, _, s: InSubquery, _, _, _) =>
+      case MergeIntoTable(_, _, s: InSubquery, _, _, _, _) =>
         val outerRefs = SubExprUtils.getOuterReferences(s.query.plan)
         assert(outerRefs.isEmpty, "should be no outer refs")
       case other =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index 7d037799fba76..a50842a26b2ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -500,7 +500,8 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
         mergeCondition = expr,
         matchedActions,
         notMatchedActions,
-        notMatchedBySourceActions)
+        notMatchedBySourceActions,
+        withSchemaEvolution = false)
     }
     val originalPlan = func(testRelation, anotherTestRelation, originalCond).analyze
     val optimizedPlan = Optimize.execute(originalPlan)
@@ -522,7 +523,8 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
         mergeCondition = expr,
         matchedActions,
         notMatchedActions,
-        Seq.empty)
+        notMatchedBySourceActions = Seq.empty,
+        withSchemaEvolution = false)
     }
     val originalPlanWithStar = mergePlanWithStar(originalCond).analyze
     val optimizedPlanWithStar = Optimize.execute(originalPlanWithStar)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
index a386e9bf4efe6..0aeca961aa513 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteWithExpressionSuite.scala
@@ -18,90 +18,96 @@
 package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.analysis.TempResolvedColumn
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Coalesce, CommonExpressionDef, CommonExpressionRef, With}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
-import org.apache.spark.sql.types.IntegerType
 
 class RewriteWithExpressionSuite extends PlanTest {
 
   object Optimizer extends RuleExecutor[LogicalPlan] {
-    val batches = Batch("Rewrite With expression", Once, RewriteWithExpression) :: Nil
+    val batches = Batch("Rewrite With expression", Once,
+      PullOutGroupingExpressions,
+      RewriteWithExpression) :: Nil
   }
 
   private val testRelation = LocalRelation($"a".int, $"b".int)
   private val testRelation2 = LocalRelation($"x".int, $"y".int)
 
+  private def normalizeCommonExpressionIds(plan: LogicalPlan): LogicalPlan = {
+    plan.transformAllExpressions {
+      case a: Alias if a.name.startsWith("_common_expr") =>
+        a.withName("_common_expr_0")
+      case a: AttributeReference if a.name.startsWith("_common_expr") =>
+        a.withName("_common_expr_0")
+    }
+  }
+
+  override def comparePlans(
+    plan1: LogicalPlan, plan2: LogicalPlan, checkAnalysis: Boolean = true): Unit = {
+    super.comparePlans(normalizeCommonExpressionIds(plan1), normalizeCommonExpressionIds(plan2))
+  }
+
   test("simple common expression") {
     val a = testRelation.output.head
-    val commonExprDef = CommonExpressionDef(a)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val plan = testRelation.select(With(ref + ref, Seq(commonExprDef)).as("col"))
+    val expr = With(a) { case Seq(ref) =>
+      ref + ref
+    }
+    val plan = testRelation.select(expr.as("col"))
     comparePlans(Optimizer.execute(plan), testRelation.select((a + a).as("col")))
   }
 
   test("non-cheap common expression") {
     val a = testRelation.output.head
-    val commonExprDef = CommonExpressionDef(a + a)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val plan = testRelation.select(With(ref * ref, Seq(commonExprDef)).as("col"))
-    val commonExprName = "_common_expr_0"
+    val expr = With(a + a) { case Seq(ref) =>
+      ref * ref
+    }
+    val plan = testRelation.select(expr.as("col"))
     comparePlans(
       Optimizer.execute(plan),
       testRelation
-        .select((testRelation.output :+ (a + a).as(commonExprName)): _*)
-        .select(($"$commonExprName" * $"$commonExprName").as("col"))
+        .select((testRelation.output :+ (a + a).as("_common_expr_0")): _*)
+        .select(($"_common_expr_0" * $"_common_expr_0").as("col"))
         .analyze
     )
   }
 
   test("nested WITH expression in the definition expression") {
-    val a = testRelation.output.head
-    val commonExprDef = CommonExpressionDef(a + a)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val innerExpr = With(ref + ref, Seq(commonExprDef))
-    val innerCommonExprName = "_common_expr_0"
-
-    val b = testRelation.output.last
-    val outerCommonExprDef = CommonExpressionDef(innerExpr + b)
-    val outerRef = new CommonExpressionRef(outerCommonExprDef)
-    val outerExpr = With(outerRef * outerRef, Seq(outerCommonExprDef))
-    val outerCommonExprName = "_common_expr_0"
+    val Seq(a, b) = testRelation.output
+    val innerExpr = With(a + a) { case Seq(ref) =>
+      ref + ref
+    }
+    val outerExpr = With(innerExpr + b) { case Seq(ref) =>
+      ref * ref
+    }
 
     val plan = testRelation.select(outerExpr.as("col"))
-    val rewrittenOuterExpr = ($"$innerCommonExprName" + $"$innerCommonExprName" + b)
-      .as(outerCommonExprName)
-    val outerExprAttr = AttributeReference(outerCommonExprName, IntegerType)(
-      exprId = rewrittenOuterExpr.exprId)
     comparePlans(
       Optimizer.execute(plan),
       testRelation
-        .select((testRelation.output :+ (a + a).as(innerCommonExprName)): _*)
-        .select((testRelation.output :+ $"$innerCommonExprName" :+ rewrittenOuterExpr): _*)
-        .select((outerExprAttr * outerExprAttr).as("col"))
+        .select((testRelation.output :+ (a + a).as("_common_expr_0")): _*)
+        .select((testRelation.output ++ Seq($"_common_expr_0",
+          ($"_common_expr_0" + $"_common_expr_0" + b).as("_common_expr_1"))): _*)
+        .select(($"_common_expr_1" * $"_common_expr_1").as("col"))
         .analyze
     )
   }
 
   test("nested WITH expression in the main expression") {
-    val a = testRelation.output.head
-    val commonExprDef = CommonExpressionDef(a + a)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val innerExpr = With(ref + ref, Seq(commonExprDef))
-    val innerCommonExprName = "_common_expr_0"
-
-    val b = testRelation.output.last
-    val outerCommonExprDef = CommonExpressionDef(b + b)
-    val outerRef = new CommonExpressionRef(outerCommonExprDef)
-    val outerExpr = With(outerRef * outerRef + innerExpr, Seq(outerCommonExprDef))
-    val outerCommonExprName = "_common_expr_0"
+    val Seq(a, b) = testRelation.output
+    val innerExpr = With(a + a) { case Seq(ref) =>
+      ref + ref
+    }
+    val outerExpr = With(b + b) { case Seq(ref) =>
+      ref * ref + innerExpr
+    }
 
     val plan = testRelation.select(outerExpr.as("col"))
-    val rewrittenInnerExpr = (a + a).as(innerCommonExprName)
-    val rewrittenOuterExpr = (b + b).as(outerCommonExprName)
+    val rewrittenInnerExpr = (a + a).as("_common_expr_0")
+    val rewrittenOuterExpr = (b + b).as("_common_expr_1")
     val finalExpr = rewrittenOuterExpr.toAttribute * rewrittenOuterExpr.toAttribute +
       (rewrittenInnerExpr.toAttribute + rewrittenInnerExpr.toAttribute)
     comparePlans(
@@ -115,13 +121,12 @@ class RewriteWithExpressionSuite extends PlanTest {
   }
 
   test("correlated nested WITH expression is not supported") {
-    val b = testRelation.output.last
-    val outerCommonExprDef = CommonExpressionDef(b + b)
+    val Seq(a, b) = testRelation.output
+    val outerCommonExprDef = CommonExpressionDef(b + b, CommonExpressionId(0))
     val outerRef = new CommonExpressionRef(outerCommonExprDef)
 
-    val a = testRelation.output.head
     // The inner expression definition references the outer expression
-    val commonExprDef1 = CommonExpressionDef(a + a + outerRef)
+    val commonExprDef1 = CommonExpressionDef(a + a + outerRef, CommonExpressionId(1))
     val ref1 = new CommonExpressionRef(commonExprDef1)
     val innerExpr1 = With(ref1 + ref1, Seq(commonExprDef1))
 
@@ -139,15 +144,15 @@ class RewriteWithExpressionSuite extends PlanTest {
 
   test("WITH expression in filter") {
     val a = testRelation.output.head
-    val commonExprDef = CommonExpressionDef(a + a)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val plan = testRelation.where(With(ref < 10 && ref > 0, Seq(commonExprDef)))
-    val commonExprName = "_common_expr_0"
+    val condition = With(a + a) { case Seq(ref) =>
+      ref < 10 && ref > 0
+    }
+    val plan = testRelation.where(condition)
     comparePlans(
       Optimizer.execute(plan),
       testRelation
-        .select((testRelation.output :+ (a + a).as(commonExprName)): _*)
-        .where($"$commonExprName" < 10 && $"$commonExprName" > 0)
+        .select((testRelation.output :+ (a + a).as("_common_expr_0")): _*)
+        .where($"_common_expr_0" < 10 && $"_common_expr_0" > 0)
         .select(testRelation.output: _*)
         .analyze
     )
@@ -155,16 +160,15 @@ class RewriteWithExpressionSuite extends PlanTest {
 
   test("WITH expression in join condition: only reference left child") {
     val a = testRelation.output.head
-    val commonExprDef = CommonExpressionDef(a + a)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val condition = With(ref < 10 && ref > 0, Seq(commonExprDef))
+    val condition = With(a + a) { case Seq(ref) =>
+      ref < 10 && ref > 0
+    }
     val plan = testRelation.join(testRelation2, condition = Some(condition))
-    val commonExprName = "_common_expr_0"
     comparePlans(
       Optimizer.execute(plan),
       testRelation
-        .select((testRelation.output :+ (a + a).as(commonExprName)): _*)
-        .join(testRelation2, condition = Some($"$commonExprName" < 10 && $"$commonExprName" > 0))
+        .select((testRelation.output :+ (a + a).as("_common_expr_0")): _*)
+        .join(testRelation2, condition = Some($"_common_expr_0" < 10 && $"_common_expr_0" > 0))
         .select((testRelation.output ++ testRelation2.output): _*)
         .analyze
     )
@@ -172,17 +176,16 @@ class RewriteWithExpressionSuite extends PlanTest {
 
   test("WITH expression in join condition: only reference right child") {
     val x = testRelation2.output.head
-    val commonExprDef = CommonExpressionDef(x + x)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val condition = With(ref < 10 && ref > 0, Seq(commonExprDef))
+    val condition = With(x + x) { case Seq(ref) =>
+      ref < 10 && ref > 0
+    }
     val plan = testRelation.join(testRelation2, condition = Some(condition))
-    val commonExprName = "_common_expr_0"
     comparePlans(
       Optimizer.execute(plan),
       testRelation
         .join(
-          testRelation2.select((testRelation2.output :+ (x + x).as(commonExprName)): _*),
-          condition = Some($"$commonExprName" < 10 && $"$commonExprName" > 0)
+          testRelation2.select((testRelation2.output :+ (x + x).as("_common_expr_0")): _*),
+          condition = Some($"_common_expr_0" < 10 && $"_common_expr_0" > 0)
         )
         .select((testRelation.output ++ testRelation2.output): _*)
         .analyze
@@ -192,9 +195,9 @@ class RewriteWithExpressionSuite extends PlanTest {
   test("WITH expression in join condition: reference both children") {
     val a = testRelation.output.head
     val x = testRelation2.output.head
-    val commonExprDef = CommonExpressionDef(a + x)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val condition = With(ref < 10 && ref > 0, Seq(commonExprDef))
+    val condition = With(a + x) { case Seq(ref) =>
+      ref < 10 && ref > 0
+    }
     val plan = testRelation.join(testRelation2, condition = Some(condition))
     comparePlans(
       Optimizer.execute(plan),
@@ -209,24 +212,244 @@ class RewriteWithExpressionSuite extends PlanTest {
 
   test("WITH expression inside conditional expression") {
     val a = testRelation.output.head
-    val commonExprDef = CommonExpressionDef(a + a)
-    val ref = new CommonExpressionRef(commonExprDef)
-    val expr = Coalesce(Seq(a, With(ref * ref, Seq(commonExprDef))))
+    val expr = Coalesce(Seq(a, With(a + a) { case Seq(ref) =>
+      ref * ref
+    }))
     val inlinedExpr = Coalesce(Seq(a, (a + a) * (a + a)))
     val plan = testRelation.select(expr.as("col"))
     // With in the conditional branches is always inlined.
     comparePlans(Optimizer.execute(plan), testRelation.select(inlinedExpr.as("col")))
 
-    val expr2 = Coalesce(Seq(With(ref * ref, Seq(commonExprDef)), a))
+    val expr2 = Coalesce(Seq(With(a + a) { case Seq(ref) =>
+      ref * ref
+    }, a))
     val plan2 = testRelation.select(expr2.as("col"))
-    val commonExprName = "_common_expr_0"
     // With in the always-evaluated branches can still be optimized.
     comparePlans(
       Optimizer.execute(plan2),
       testRelation
-        .select((testRelation.output :+ (a + a).as(commonExprName)): _*)
-        .select(Coalesce(Seq(($"$commonExprName" * $"$commonExprName"), a)).as("col"))
+        .select((testRelation.output :+ (a + a).as("_common_expr_0")): _*)
+        .select(Coalesce(Seq(($"_common_expr_0" * $"_common_expr_0"), a)).as("col"))
+        .analyze
+    )
+  }
+
+  test("WITH expression in grouping exprs") {
+    val a = testRelation.output.head
+    val expr1 = With(a + 1) { case Seq(ref) =>
+      ref * ref
+    }
+    val expr2 = With(a + 1) { case Seq(ref) =>
+      ref * ref
+    }
+    val expr3 = With(a + 1) { case Seq(ref) =>
+      ref * ref
+    }
+    val plan = testRelation.groupBy(expr1)(
+      (expr2 + 2).as("col1"),
+      count(expr3 - 3).as("col2")
+    )
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .select(testRelation.output :+ (a + 1).as("_common_expr_0"): _*)
+        .select(testRelation.output :+
+          ($"_common_expr_0" * $"_common_expr_0").as("_groupingexpression"): _*)
+        .select(testRelation.output ++ Seq($"_groupingexpression",
+          (a + 1).as("_common_expr_1")): _*)
+        .groupBy($"_groupingexpression")(
+          $"_groupingexpression",
+          count($"_common_expr_1" * $"_common_expr_1" - 3).as("_aggregateexpression")
+        )
+        .select(($"_groupingexpression" + 2).as("col1"), $"_aggregateexpression".as("col2"))
+        .analyze
+    )
+    // Running CollapseProject after the rule cleans up the unnecessary projections.
+    comparePlans(
+      CollapseProject(Optimizer.execute(plan)),
+      testRelation
+        .select(testRelation.output :+ (a + 1).as("_common_expr_0"): _*)
+        .select(testRelation.output ++ Seq(
+          ($"_common_expr_0" * $"_common_expr_0").as("_groupingexpression"),
+          (a + 1).as("_common_expr_1")): _*)
+        .groupBy($"_groupingexpression")(
+          ($"_groupingexpression" + 2).as("col1"),
+          count($"_common_expr_1" * $"_common_expr_1" - 3).as("col2")
+        )
+        .analyze
+    )
+  }
+
+  test("WITH expression in aggregate exprs") {
+    val Seq(a, b) = testRelation.output
+    val expr1 = With(a + 1) { case Seq(ref) =>
+      ref * ref
+    }
+    val expr2 = With(b + 2) { case Seq(ref) =>
+      ref * ref
+    }
+    val plan = testRelation.groupBy(a)(
+      (a + 3).as("col1"),
+      expr1.as("col2"),
+      max(expr2).as("col3")
+    )
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .select(testRelation.output :+ (b + 2).as("_common_expr_0"): _*)
+        .groupBy(a)(a, max($"_common_expr_0" * $"_common_expr_0").as("_aggregateexpression"))
+        .select(a, $"_aggregateexpression", (a + 1).as("_common_expr_1"))
+        .select(
+          (a + 3).as("col1"),
+          ($"_common_expr_1" * $"_common_expr_1").as("col2"),
+          $"_aggregateexpression".as("col3")
+        )
+        .analyze
+    )
+  }
+
+  test("WITH common expression is aggregate function") {
+    val a = testRelation.output.head
+    val expr = With(count(a - 1)) { case Seq(ref) =>
+      ref * ref
+    }
+    val plan = testRelation.groupBy(a)(
+      (a - 1).as("col1"),
+      expr.as("col2")
+    )
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .groupBy(a)(a, count(a - 1).as("_aggregateexpression"))
+        .select(
+          (a - 1).as("col1"),
+          ($"_aggregateexpression" * $"_aggregateexpression").as("col2")
+        )
+        .analyze
+    )
+  }
+
+  test("aggregate functions in child of WITH expression with ref is not supported") {
+    val a = testRelation.output.head
+    intercept[java.lang.AssertionError] {
+      val expr = With(a - 1) { case Seq(ref) =>
+        sum(ref * ref)
+      }
+      val plan = testRelation.groupBy(a)(
+        (a - 1).as("col1"),
+        expr.as("col2")
+      )
+      Optimizer.execute(plan)
+    }
+  }
+
+  test("WITH expression nested in aggregate function") {
+    val a = testRelation.output.head
+    val expr = With(a + 1) { case Seq(ref) =>
+      ref * ref
+    }
+    val nestedExpr = With(a - 1) { case Seq(ref) =>
+      ref * max(expr) + ref
+    }
+    val plan = testRelation.groupBy(a)(nestedExpr.as("col")).analyze
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .select(testRelation.output :+ (a + 1).as("_common_expr_0"): _*)
+        .groupBy(a)(a, max($"_common_expr_0" * $"_common_expr_0").as("_aggregateexpression"))
+        .select($"a", $"_aggregateexpression", (a - 1).as("_common_expr_1"))
+        .select(($"_common_expr_1" * $"_aggregateexpression" + $"_common_expr_1").as("col"))
+        .analyze
+    )
+  }
+
+  test("WITH expression in window exprs") {
+    val Seq(a, b) = testRelation.output
+    val expr1 = With(a + 1) { case Seq(ref) =>
+      ref * ref
+    }
+    val expr2 = With(b + 2) { case Seq(ref) =>
+      ref * ref
+    }
+    val frame = SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing)
+    val plan = testRelation
+      .window(
+        Seq(windowExpr(count(a), windowSpec(Seq(expr2), Nil, frame)).as("col2")),
+        Seq(expr2),
+        Nil
+      )
+      .window(
+        Seq(windowExpr(sum(expr1), windowSpec(Seq(a), Nil, frame)).as("col3")),
+        Seq(a),
+        Nil
+      )
+      .select((a - 1).as("col1"), $"col2", $"col3")
+      .analyze
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .select(a, b, (b + 2).as("_common_expr_0"))
+        .select(a, b, $"_common_expr_0", (b + 2).as("_common_expr_1"))
+        .window(
+          Seq(windowExpr(count(a), windowSpec(Seq($"_common_expr_0" * $"_common_expr_0"), Nil,
+            frame)).as("col2")),
+          Seq($"_common_expr_1" * $"_common_expr_1"),
+          Nil
+        )
+        .select(a, b, $"col2")
+        .select(a, b, $"col2", (a + 1).as("_common_expr_2"))
+        .window(
+          Seq(windowExpr(sum($"_common_expr_2" * $"_common_expr_2"),
+            windowSpec(Seq(a), Nil, frame)).as("col3")),
+          Seq(a),
+          Nil
+        )
+        .select(a, b, $"col2", $"col3")
+        .select((a - 1).as("col1"), $"col2", $"col3")
         .analyze
     )
   }
+
+  test("WITH common expression is window function") {
+    val a = testRelation.output.head
+    val frame = SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing)
+    val winExpr = windowExpr(sum(a), windowSpec(Seq(a), Nil, frame))
+    val expr = With(winExpr) {
+      case Seq(ref) => ref * ref
+    }
+    val plan = testRelation.select(expr.as("col")).analyze
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation
+        .select(a)
+        .window(Seq(winExpr.as("_we0")), Seq(a), Nil)
+        .select(a, $"_we0", ($"_we0" * $"_we0").as("col"))
+        .select($"col")
+        .analyze
+    )
+  }
+
+  test("window functions in child of WITH expression with ref is not supported") {
+    val a = testRelation.output.head
+    intercept[java.lang.AssertionError] {
+      val expr = With(a - 1) { case Seq(ref) =>
+        ref + windowExpr(sum(ref), windowSpec(Seq(a), Nil, UnspecifiedFrame))
+      }
+      val plan = testRelation.window(Seq(expr.as("col")), Seq(a), Nil)
+      Optimizer.execute(plan)
+    }
+  }
+
+  test("SPARK-48252: TempResolvedColumn in common expression") {
+    val a = testRelation.output.head
+    val tempResolved = TempResolvedColumn(a, Seq("a"))
+    val expr = With(tempResolved) { case Seq(ref) =>
+      ref === 1
+    }
+    val plan = testRelation.having($"b")(avg("a").as("a"))(expr).analyze
+    comparePlans(
+      Optimizer.execute(plan),
+      testRelation.groupBy($"b")(avg("a").as("a")).where($"a" === 1).analyze
+    )
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index b306ca3cd18a5..8612a6e9c50ff 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -1075,19 +1075,11 @@ class DDLParserSuite extends AnalysisTest {
         ifExists = true))
   }
 
-  // ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
   // ALTER TABLE table_name UNSET TBLPROPERTIES [IF EXISTS] ('comment', 'key');
   test("alter table: alter table properties") {
-    val sql1_table = "ALTER TABLE table_name SET TBLPROPERTIES ('test' = 'test', " +
-        "'comment' = 'new_comment')"
     val sql2_table = "ALTER TABLE table_name UNSET TBLPROPERTIES ('comment', 'test')"
     val sql3_table = "ALTER TABLE table_name UNSET TBLPROPERTIES IF EXISTS ('comment', 'test')"
 
-    comparePlans(
-      parsePlan(sql1_table),
-      SetTableProperties(
-        UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET TBLPROPERTIES", true),
-        Map("test" -> "test", "comment" -> "new_comment")))
     comparePlans(
       parsePlan(sql2_table),
       UnsetTableProperties(
@@ -1875,7 +1867,8 @@ class DDLParserSuite extends AnalysisTest {
             Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
         Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("delete")))),
           UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update"))),
-            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete")))))))
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete"))))),
+        withSchemaEvolution = false))
   }
 
   test("merge into table: using subquery") {
@@ -1906,7 +1899,8 @@ class DDLParserSuite extends AnalysisTest {
             Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
         Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("delete")))),
           UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update"))),
-            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete")))))))
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete"))))),
+        withSchemaEvolution = false))
   }
 
   test("merge into table: cte") {
@@ -1939,7 +1933,8 @@ class DDLParserSuite extends AnalysisTest {
             Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
         Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("delete")))),
           UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update"))),
-            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete")))))))
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete"))))),
+        withSchemaEvolution = false))
   }
 
   test("merge into table: no additional condition") {
@@ -1962,7 +1957,8 @@ class DDLParserSuite extends AnalysisTest {
       Seq(InsertAction(None,
         Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
           Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
-      Seq(DeleteAction(None))))
+      Seq(DeleteAction(None)),
+      withSchemaEvolution = false))
   }
 
   test("merge into table: star") {
@@ -1983,7 +1979,8 @@ class DDLParserSuite extends AnalysisTest {
       Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
         UpdateStarAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))))),
       Seq(InsertStarAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))))),
-      Seq.empty))
+      Seq.empty,
+      withSchemaEvolution = false))
   }
 
   test("merge into table: invalid star in not matched by source") {
@@ -2024,7 +2021,8 @@ class DDLParserSuite extends AnalysisTest {
             Seq(Assignment(UnresolvedAttribute("target.col1"), Literal(1)),
               Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))),
           InsertStarAction(None)),
-        Seq.empty))
+        Seq.empty,
+        withSchemaEvolution = false))
   }
 
   test("merge into table: column aliases are not allowed") {
@@ -2085,7 +2083,26 @@ class DDLParserSuite extends AnalysisTest {
           UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update1"))),
             Seq(Assignment(UnresolvedAttribute("target.col3"), Literal(1)))),
           UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update2"))),
-            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal(2)))))))
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal(2))))),
+        withSchemaEvolution = false))
+  }
+
+  test("merge into table: schema evolution") {
+    parseCompare(
+      """
+        |MERGE WITH SCHEMA EVOLUTION INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN NOT MATCHED BY SOURCE THEN DELETE
+    """.stripMargin,
+      MergeIntoTable(
+        SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+        SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))),
+        EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+        matchedActions = Seq.empty,
+        notMatchedActions = Seq.empty,
+        notMatchedBySourceActions = Seq(DeleteAction(None)),
+        withSchemaEvolution = true))
   }
 
   test("merge into table: only the last matched clause can omit the condition") {
@@ -2824,7 +2841,8 @@ class DDLParserSuite extends AnalysisTest {
         Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
           UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))),
             Seq(Assignment(UnresolvedAttribute("target.col2"),
-              UnresolvedAttribute("DEFAULT")))))))
+              UnresolvedAttribute("DEFAULT"))))),
+        withSchemaEvolution = false))
   }
 
   test("SPARK-40944: Relax ordering constraint for CREATE TABLE column options") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index 6fb37ae33fa8d..0130ae72a03c4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -323,4 +323,37 @@ class ErrorParserSuite extends AnalysisTest {
       parameters = Map("type" -> "\"CHARACTER\""),
       context = ExpectedContext(fragment = "Character", start = 19, stop = 27))
   }
+
+  test("'!' where only NOT should be allowed") {
+    checkError(
+      exception = parseException("SELECT 1 ! IN (2)"),
+      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      parameters = Map("clause" -> "!"),
+      context = ExpectedContext(fragment = "!", start = 9, stop = 9))
+    checkError(
+      exception = parseException("SELECT 'a' ! LIKE 'b'"),
+      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      parameters = Map("clause" -> "!"),
+      context = ExpectedContext(fragment = "!", start = 11, stop = 11))
+    checkError(
+      exception = parseException("SELECT 1 ! BETWEEN 1 AND 2"),
+      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      parameters = Map("clause" -> "!"),
+      context = ExpectedContext(fragment = "!", start = 9, stop = 9))
+    checkError(
+      exception = parseException("SELECT 1 IS ! NULL"),
+      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      parameters = Map("clause" -> "!"),
+      context = ExpectedContext(fragment = "!", start = 12, stop = 12))
+    checkError(
+      exception = parseException("CREATE TABLE IF ! EXISTS t(c1 INT)"),
+      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      parameters = Map("clause" -> "!"),
+      context = ExpectedContext(fragment = "!", start = 16, stop = 16))
+    checkError(
+      exception = parseException("CREATE TABLE t(c1 INT ! NULL)"),
+      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      parameters = Map("clause" -> "!"),
+      context = ExpectedContext(fragment = "!", start = 22, stop = 22))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index d9f3067d30e51..218304db3d591 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -131,6 +131,18 @@ class ParserUtilsSuite extends SparkFunSuite {
         |cd\ef"""".stripMargin) ==
       """ab
         |cdef""".stripMargin)
+
+    // String with an invalid '\' as the last character.
+    assert(unescapeSQLString(""""abc\"""") == "abc\\")
+
+    // Strings containing invalid Unicode escapes with non-hex characters.
+    assert(unescapeSQLString("\"abc\\uXXXXa\"") == "abcuXXXXa")
+    assert(unescapeSQLString("\"abc\\uxxxxa\"") == "abcuxxxxa")
+    assert(unescapeSQLString("\"abc\\UXXXXXXXXa\"") == "abcUXXXXXXXXa")
+    assert(unescapeSQLString("\"abc\\Uxxxxxxxxa\"") == "abcUxxxxxxxxa")
+    // Guard against off-by-one errors in the "all chars are hex" routine:
+    assert(unescapeSQLString("\"abc\\uAAAXa\"") == "abcuAAAXa")
+
     // scalastyle:on nonascii
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 17dd7349e7bea..8d01040563361 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -1617,14 +1617,23 @@ class PlanParserSuite extends AnalysisTest {
           parameters = Map(
             "error" -> "'order'",
             "hint" -> ""))
-        val sql8 = s"select * from my_tvf(arg1 => table(select col1, col2, col3 from v2) " +
-          s"$partition by col1, col2 order by col2 asc, col3 desc)"
+        val sql8tableArg = "table(select col1, col2, col3 from v2)"
+        val sql8partition = s"$partition by col1, col2 order by col2 asc, col3 desc"
+        val sql8 = s"select * from my_tvf(arg1 => $sql8tableArg $sql8partition)"
         checkError(
           exception = parseException(sql8),
-          errorClass = "PARSE_SYNTAX_ERROR",
+          errorClass = "_LEGACY_ERROR_TEMP_0064",
           parameters = Map(
-            "error" -> "'order'",
-            "hint" -> ": extra input 'order'"))
+            "msg" ->
+              ("The table function call includes a table argument with an invalid " +
+              "partitioning/ordering specification: the PARTITION BY clause included multiple " +
+              "expressions without parentheses surrounding them; please add parentheses around " +
+              "these expressions and then retry the query again")),
+          context = ExpectedContext(
+            fragment = s"$sql8tableArg $sql8partition",
+            start = 29,
+            stop = 110 + partition.length)
+        )
       }
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
new file mode 100644
index 0000000000000..657e4b2232ee9
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+
+class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
+  import CatalystSqlParser._
+
+  test("single select") {
+    val sqlScriptText = "SELECT 1;"
+    val tree = parseScript(sqlScriptText)
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[SingleStatement])
+    val sparkStatement = tree.collection.head.asInstanceOf[SingleStatement]
+    assert(sparkStatement.getText(sqlScriptText) == "SELECT 1;")
+  }
+
+  test("single select without ;") {
+    val sqlScriptText = "SELECT 1"
+    val tree = parseScript(sqlScriptText)
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[SingleStatement])
+    val sparkStatement = tree.collection.head.asInstanceOf[SingleStatement]
+    assert(sparkStatement.getText(sqlScriptText) == "SELECT 1")
+  }
+
+  test("multi select without ; - should fail") {
+    val sqlScriptText = "SELECT 1 SELECT 1"
+    val e = intercept[ParseException] {
+      parseScript(sqlScriptText)
+    }
+    assert(e.getErrorClass === "PARSE_SYNTAX_ERROR")
+    assert(e.getMessage.contains("Syntax error"))
+    assert(e.getMessage.contains("SELECT 1 SELECT 1"))
+  }
+
+  test("multi select") {
+    val sqlScriptText = "BEGIN SELECT 1;SELECT 2; END"
+    val tree = parseScript(sqlScriptText)
+    assert(tree.collection.length == 2)
+    assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
+
+    sqlScriptText.split(";")
+      .map(cleanupStatementString)
+      .zip(tree.collection)
+      .foreach { case (expected, statement) =>
+        val sparkStatement = statement.asInstanceOf[SingleStatement]
+        val statementText = sparkStatement.getText(sqlScriptText)
+        assert(statementText == expected)
+      }
+  }
+
+  test("empty BEGIN END block") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |END""".stripMargin
+    val tree = parseScript(sqlScriptText)
+    assert(tree.collection.isEmpty)
+  }
+
+  test("multiple ; in row - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;;
+        |  SELECT 2;
+        |END""".stripMargin
+    val e = intercept[ParseException] {
+      parseScript(sqlScriptText)
+    }
+    assert(e.getErrorClass === "PARSE_SYNTAX_ERROR")
+    assert(e.getMessage.contains("Syntax error"))
+    assert(e.getMessage.contains("at or near ';'"))
+  }
+
+  test("without ; in last statement - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  SELECT 2
+        |END""".stripMargin
+    val e = intercept[ParseException] {
+      parseScript(sqlScriptText)
+    }
+    assert(e.getErrorClass === "PARSE_SYNTAX_ERROR")
+    assert(e.getMessage.contains("Syntax error"))
+    assert(e.getMessage.contains("at or near end of input"))
+  }
+
+  test("multi statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  SELECT 2;
+        |  INSERT INTO A VALUES (a, b, 3);
+        |  SELECT a, b, c FROM T;
+        |  SELECT * FROM T;
+        |END""".stripMargin
+    val tree = parseScript(sqlScriptText)
+    assert(tree.collection.length == 5)
+    assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
+    sqlScriptText.split(";")
+      .map(cleanupStatementString)
+      .zip(tree.collection)
+      .foreach { case (expected, statement) =>
+        val sparkStatement = statement.asInstanceOf[SingleStatement]
+        val statementText = sparkStatement.getText(sqlScriptText)
+        assert(statementText == expected)
+      }
+  }
+
+  test("nested begin end") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  BEGIN
+        |  SELECT 1;
+        |  END;
+        |  BEGIN
+        |    BEGIN
+        |      SELECT 2;
+        |      SELECT 3;
+        |    END;
+        |  END;
+        |END""".stripMargin
+    val tree = parseScript(sqlScriptText)
+    assert(tree.collection.length == 2)
+    assert(tree.collection.head.isInstanceOf[CompoundBody])
+    val body1 = tree.collection.head.asInstanceOf[CompoundBody]
+    assert(body1.collection.length == 1)
+    assert(body1.collection.head.asInstanceOf[SingleStatement].getText(sqlScriptText)
+      == "SELECT 1")
+
+    val body2 = tree.collection(1).asInstanceOf[CompoundBody]
+    assert(body2.collection.length == 1)
+    assert(body2.collection.head.isInstanceOf[CompoundBody])
+    val nestedBody = body2.collection.head.asInstanceOf[CompoundBody]
+    assert(nestedBody.collection.head.asInstanceOf[SingleStatement].getText(sqlScriptText)
+      == "SELECT 2")
+    assert(nestedBody.collection(1).asInstanceOf[SingleStatement].getText(sqlScriptText)
+      == "SELECT 3")
+  }
+
+  // Helper methods
+  def cleanupStatementString(statementStr: String): String = {
+    statementStr
+      .replace("\n", "")
+      .replace("BEGIN", "")
+      .replace("END", "")
+      .trim
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
index 43221bf60ca34..886b043ad79e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
@@ -19,6 +19,7 @@
 package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.AnalysisException
 
 class JoinTypesTest extends SparkFunSuite {
 
@@ -61,4 +62,18 @@ class JoinTypesTest extends SparkFunSuite {
     assert(JoinType("cross") === Cross)
   }
 
+  test("unsupported join type") {
+    val joinType = "unknown"
+    checkError(
+      exception = intercept[AnalysisException](
+        JoinType(joinType)
+      ),
+      errorClass = "UNSUPPORTED_JOIN_TYPE",
+      sqlState = "0A000",
+      parameters = Map(
+        "typ" -> joinType,
+        "supported" -> JoinType.supported.mkString("'", "', '", "'")
+      )
+    )
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
index 31f7e07143c50..f783083d0a44f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.plans
 
+import scala.annotation.nowarn
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -83,6 +85,26 @@ class LogicalPlanSuite extends SparkFunSuite {
   }
 
   test("transformExpressions works with a Stream") {
+    val id1 = NamedExpression.newExprId
+    val id2 = NamedExpression.newExprId
+    @nowarn("cat=deprecation")
+    val plan = Project(Stream(
+      Alias(Literal(1), "a")(exprId = id1),
+      Alias(Literal(2), "b")(exprId = id2)),
+      OneRowRelation())
+    val result = plan.transformExpressions {
+      case Literal(v: Int, IntegerType) if v != 1 =>
+        Literal(v + 1, IntegerType)
+    }
+    @nowarn("cat=deprecation")
+    val expected = Project(Stream(
+      Alias(Literal(1), "a")(exprId = id1),
+      Alias(Literal(3), "b")(exprId = id2)),
+      OneRowRelation())
+    assert(result.sameResult(expected))
+  }
+
+  test("SPARK-45685: transformExpressions works with a LazyList") {
     val id1 = NamedExpression.newExprId
     val id2 = NamedExpression.newExprId
     val plan = Project(LazyList(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 4dbadef93a071..21542d43eac98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.trees
 import java.math.BigInteger
 import java.util.UUID
 
+import scala.annotation.nowarn
 import scala.collection.mutable.ArrayBuffer
 
 import org.json4s.JsonAST._
@@ -693,6 +694,22 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("transform works on stream of children") {
+    @nowarn("cat=deprecation")
+    val before = Coalesce(Stream(Literal(1), Literal(2)))
+    // Note it is a bit tricky to exhibit the broken behavior. Basically we want to create the
+    // situation in which the TreeNode.mapChildren function's change detection is not triggered. A
+    // stream's first element is typically materialized, so in order to not trip the TreeNode change
+    // detection logic, we should not change the first element in the sequence.
+    val result = before.transform {
+      case Literal(v: Int, IntegerType) if v != 1 =>
+        Literal(v + 1, IntegerType)
+    }
+    @nowarn("cat=deprecation")
+    val expected = Coalesce(Stream(Literal(1), Literal(3)))
+    assert(result === expected)
+  }
+
+  test("SPARK-45685: transform works on LazyList of children") {
     val before = Coalesce(LazyList(Literal(1), Literal(2)))
     // Note it is a bit tricky to exhibit the broken behavior. Basically we want to create the
     // situation in which the TreeNode.mapChildren function's change detection is not triggered. A
@@ -707,6 +724,16 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("withNewChildren on stream of children") {
+    @nowarn("cat=deprecation")
+    val before = Coalesce(Stream(Literal(1), Literal(2)))
+    @nowarn("cat=deprecation")
+    val result = before.withNewChildren(Stream(Literal(1), Literal(3)))
+    @nowarn("cat=deprecation")
+    val expected = Coalesce(Stream(Literal(1), Literal(3)))
+    assert(result === expected)
+  }
+
+  test("SPARK-45685: withNewChildren on LazyList of children") {
     val before = Coalesce(LazyList(Literal(1), Literal(2)))
     val result = before.withNewChildren(LazyList(Literal(1), Literal(3)))
     val expected = Coalesce(LazyList(Literal(1), Literal(3)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index f68d485ac95fd..8d8669aece894 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -26,7 +26,7 @@ import java.util.concurrent.TimeUnit
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException}
+import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
@@ -1040,11 +1040,14 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     }
 
     checkError(
-      exception = intercept[SparkException] {
+      exception = intercept[SparkIllegalArgumentException] {
         timestampAdd("SECS", 1, date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")), getZoneId("UTC"))
       },
-      errorClass = "INTERNAL_ERROR",
-      parameters = Map("message" -> "Got the unexpected unit 'SECS'."))
+      errorClass = "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
+      parameters = Map(
+        "functionName" -> "`TIMESTAMPADD`",
+        "parameter" -> "`unit`",
+        "invalidValue" -> "'SECS'"))
   }
 
   test("SPARK-38284: difference between two timestamps in units") {
@@ -1092,14 +1095,17 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     }
 
     checkError(
-      exception = intercept[SparkException] {
+      exception = intercept[SparkIllegalArgumentException] {
         timestampDiff(
           "SECS",
           date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")),
           date(2022, 1, 1, 0, 0, 0, 1, getZoneId("UTC")),
           getZoneId("UTC"))
       },
-      errorClass = "INTERNAL_ERROR",
-      parameters = Map("message" -> "Got the unexpected unit 'SECS'."))
+      errorClass = "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
+      parameters =
+        Map("functionName" -> "`TIMESTAMPDIFF`",
+          "parameter" -> "`unit`",
+          "invalidValue" -> "'SECS'"))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapperBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapperBenchmark.scala
new file mode 100644
index 0000000000000..cc28e85525162
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapperBenchmark.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans.physical.KeyGroupedPartitioning
+import org.apache.spark.sql.connector.catalog.PartitionInternalRow
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Benchmark for [[InternalRowComparableWrapper]].
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/InternalRowComparableWrapperBenchmark-results.txt".
+ * }}}
+ */
+object InternalRowComparableWrapperBenchmark extends BenchmarkBase {
+
+  private def constructAndRunBenchmark(): Unit = {
+    val partitionNum = 200_000
+    val bucketNum = 4096
+    val day = 20240401
+    val partitions = (0 until partitionNum).map { i =>
+      val bucketId = i % bucketNum
+      PartitionInternalRow.apply(Array(day, bucketId));
+    }
+    val benchmark = new Benchmark("internal row comparable wrapper", partitionNum, output = output)
+
+    benchmark.addCase("toSet") { _ =>
+      val distinct = partitions
+        .map(new InternalRowComparableWrapper(_, Seq(IntegerType, IntegerType)))
+        .toSet
+      assert(distinct.size == bucketNum)
+    }
+
+    benchmark.addCase("mergePartitions") { _ =>
+      // just to mock the data types
+      val expressions = (Seq(Literal(day, IntegerType), Literal(0, IntegerType)))
+
+      val leftPartitioning = KeyGroupedPartitioning(expressions, bucketNum, partitions)
+      val rightPartitioning = KeyGroupedPartitioning(expressions, bucketNum, partitions)
+      val merged = InternalRowComparableWrapper.mergePartitions(
+        leftPartitioning, rightPartitioning, expressions)
+      assert(merged.size == bucketNum)
+    }
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    constructAndRunBenchmark()
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala
index b6e87c456de0c..0b3f1f1bdb79d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala
@@ -93,7 +93,7 @@ class UnsafeRowUtilsSuite extends SparkFunSuite {
   }
 
   test("isBinaryStable on complex types containing collated strings") {
-    val nonBinaryStringType = StringType(CollationFactory.collationNameToId("UTF8_BINARY_LCASE"))
+    val nonBinaryStringType = StringType(CollationFactory.collationNameToId("UTF8_LCASE"))
 
     // simple checks
     assert(UnsafeRowUtils.isBinaryStable(IntegerType))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 3293957282e22..8fd9b7c43a659 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -23,11 +23,13 @@ import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentExce
 import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.StringConcat
+import org.apache.spark.sql.catalyst.util.{CollationFactory, StringConcat}
 import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes}
 
 class DataTypeSuite extends SparkFunSuite {
 
+  private val UNICODE_COLLATION_ID = CollationFactory.collationNameToId("UNICODE")
+
   test("construct an ArrayType") {
     val array = ArrayType(StringType)
 
@@ -310,8 +312,8 @@ class DataTypeSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         DataType.fromJson("""{"fields": [{"a":123}], "type": "struct"}""")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3250",
-      parameters = Map("other" -> """{"a":123}"""))
+      errorClass = "INVALID_JSON_DATA_TYPE",
+      parameters = Map("invalidType" -> """{"a":123}"""))
 
     // Malformed JSON string
     val message = intercept[JsonParseException] {
@@ -687,6 +689,115 @@ class DataTypeSuite extends SparkFunSuite {
     false,
     caseSensitive = true)
 
+  def checkEqualsIgnoreCompatibleCollation(
+      from: DataType,
+      to: DataType,
+      expected: Boolean): Unit = {
+    val testName = s"equalsIgnoreCompatibleCollation: (from: $from, to: $to)"
+
+    test(testName) {
+      assert(DataType.equalsIgnoreCompatibleCollation(from, to) === expected)
+    }
+  }
+
+  // Simple types.
+  checkEqualsIgnoreCompatibleCollation(IntegerType, IntegerType, expected = true)
+  checkEqualsIgnoreCompatibleCollation(BooleanType, BooleanType, expected = true)
+  checkEqualsIgnoreCompatibleCollation(StringType, StringType, expected = true)
+  checkEqualsIgnoreCompatibleCollation(IntegerType, BooleanType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(BooleanType, IntegerType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(StringType, BooleanType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(BooleanType, StringType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(StringType, IntegerType, expected = false)
+  checkEqualsIgnoreCompatibleCollation(IntegerType, StringType, expected = false)
+  // Collated `StringType`.
+  checkEqualsIgnoreCompatibleCollation(StringType, StringType("UTF8_LCASE"),
+    expected = true)
+  checkEqualsIgnoreCompatibleCollation(
+    StringType("UTF8_BINARY"), StringType("UTF8_LCASE"), expected = true)
+  // Complex types.
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(StringType),
+    ArrayType(StringType("UTF8_LCASE")),
+    expected = true
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(StringType),
+    ArrayType(ArrayType(StringType("UTF8_LCASE"))),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(ArrayType(StringType)),
+    ArrayType(ArrayType(StringType("UTF8_LCASE"))),
+    expected = true
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType, StringType),
+    MapType(StringType, StringType("UTF8_LCASE")),
+    expected = true
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), StringType),
+    MapType(StringType, StringType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType("UTF8_LCASE"), ArrayType(StringType)),
+    MapType(StringType("UTF8_LCASE"), ArrayType(StringType("UTF8_LCASE"))),
+    expected = true
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(ArrayType(StringType), IntegerType),
+    MapType(ArrayType(StringType("UTF8_LCASE")), IntegerType),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(ArrayType(StringType("UTF8_LCASE")), IntegerType),
+    MapType(ArrayType(StringType("UTF8_LCASE")), IntegerType),
+    expected = true
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType) :: Nil),
+    StructType(StructField("a", StringType("UTF8_LCASE")) :: Nil),
+    expected = true
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", ArrayType(StringType)) :: Nil),
+    StructType(StructField("a", ArrayType(StringType("UTF8_LCASE"))) :: Nil),
+    expected = true
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", MapType(StringType, IntegerType)) :: Nil),
+    StructType(StructField("a", MapType(StringType("UTF8_LCASE"), IntegerType)) :: Nil),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType) :: Nil),
+    StructType(StructField("b", StringType("UTF8_LCASE")) :: Nil),
+    expected = false
+  )
+  // Null compatibility checks.
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(StringType, containsNull = true),
+    ArrayType(StringType, containsNull = false),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    ArrayType(StringType, containsNull = true),
+    ArrayType(StringType("UTF8_LCASE"), containsNull = false),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    MapType(StringType, StringType, valueContainsNull = true),
+    MapType(StringType, StringType, valueContainsNull = false),
+    expected = false
+  )
+  checkEqualsIgnoreCompatibleCollation(
+    StructType(StructField("a", StringType) :: Nil),
+    StructType(StructField("a", StringType, nullable = false) :: Nil),
+    expected = false
+  )
+
   test("SPARK-25031: MapType should produce current formatted string for complex types") {
     val keyType: DataType = StructType(Seq(
       StructField("a", DataTypes.IntegerType),
@@ -712,4 +823,190 @@ class DataTypeSuite extends SparkFunSuite {
 
     assert(result === expected)
   }
+
+  test("schema with collation should not change during ser/de") {
+    val simpleStruct = StructType(
+      StructField("c1", StringType(UNICODE_COLLATION_ID)) :: Nil)
+
+    val nestedStruct = StructType(
+      StructField("nested", simpleStruct) :: Nil)
+
+    val caseInsensitiveNames = StructType(
+      StructField("c1", StringType(UNICODE_COLLATION_ID)) ::
+      StructField("C1", StringType(UNICODE_COLLATION_ID)) :: Nil)
+
+    val specialCharsInName = StructType(
+      StructField("c1.*23?", StringType(UNICODE_COLLATION_ID)) :: Nil)
+
+    val arrayInSchema = StructType(
+      StructField("arrayField", ArrayType(StringType(UNICODE_COLLATION_ID))) :: Nil)
+
+    val mapInSchema = StructType(
+      StructField("mapField",
+        MapType(StringType(UNICODE_COLLATION_ID), StringType(UNICODE_COLLATION_ID))) :: Nil)
+
+    val mapWithKeyInNameInSchema = StructType(
+      StructField("name.key", StringType) ::
+      StructField("name",
+        MapType(StringType(UNICODE_COLLATION_ID), StringType(UNICODE_COLLATION_ID))) :: Nil)
+
+    val arrayInMapInNestedSchema = StructType(
+      StructField("arrInMap",
+        MapType(StringType(UNICODE_COLLATION_ID),
+        ArrayType(StringType(UNICODE_COLLATION_ID)))) :: Nil)
+
+    val nestedArrayInMap = StructType(
+      StructField("nestedArrayInMap",
+        ArrayType(MapType(StringType(UNICODE_COLLATION_ID),
+          ArrayType(ArrayType(StringType(UNICODE_COLLATION_ID)))))) :: Nil)
+
+    val schemaWithMultipleFields = StructType(
+      simpleStruct.fields ++ nestedStruct.fields ++ arrayInSchema.fields ++ mapInSchema.fields ++
+        mapWithKeyInNameInSchema ++ arrayInMapInNestedSchema.fields ++ nestedArrayInMap.fields)
+
+    Seq(
+      simpleStruct, caseInsensitiveNames, specialCharsInName, nestedStruct, arrayInSchema,
+      mapInSchema, mapWithKeyInNameInSchema, nestedArrayInMap, arrayInMapInNestedSchema,
+      schemaWithMultipleFields)
+      .foreach { schema =>
+        val json = schema.json
+        val parsed = DataType.fromJson(json)
+        assert(parsed === schema)
+      }
+  }
+
+  test("non string field has collation metadata") {
+    val json =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "c1",
+         |      "type": "integer",
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "c1": "icu.UNICODE"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        DataType.fromJson(json)
+      },
+      errorClass = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+      parameters = Map("jsonType" -> "integer")
+    )
+  }
+
+  test("non string field in map key has collation metadata") {
+    val json =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "mapField",
+         |      "type": {
+         |        "type": "map",
+         |        "keyType": "string",
+         |        "valueType": "integer",
+         |        "valueContainsNull": true
+         |      },
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "mapField.value": "icu.UNICODE"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        DataType.fromJson(json)
+      },
+      errorClass = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+      parameters = Map("jsonType" -> "integer")
+    )
+  }
+
+  test("map field has collation metadata") {
+    val json =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "mapField",
+         |      "type": {
+         |        "type": "map",
+         |        "keyType": "string",
+         |        "valueType": "integer",
+         |        "valueContainsNull": true
+         |      },
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "mapField": "icu.UNICODE"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        DataType.fromJson(json)
+      },
+      errorClass = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+      parameters = Map("jsonType" -> "map")
+    )
+  }
+
+  test("non existing collation provider") {
+    val json =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "c1",
+         |      "type": "string",
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "c1": "badProvider.UNICODE"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    checkError(
+      exception = intercept[SparkException] {
+        DataType.fromJson(json)
+      },
+      errorClass = "COLLATION_INVALID_PROVIDER",
+      parameters = Map("provider" -> "badProvider", "supportedProviders" -> "spark, icu")
+    )
+  }
+
+  test("SPARK-48680: Add CharType and VarcharType to DataTypes JAVA API") {
+    assert(DataTypes.createCharType(1) === CharType(1))
+    assert(DataTypes.createVarcharType(100) === VarcharType(100))
+    val exception = intercept[IllegalArgumentException] {
+      DataTypes.createVarcharType(-1)
+    }
+    assert(exception.getMessage.contains("The length of varchar type cannot be negative."))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index c165ab1bf61bd..562febe381130 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.types
 
+import com.fasterxml.jackson.databind.ObjectMapper
+
 import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
@@ -36,6 +38,10 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
 
   private val s = StructType.fromDDL("a INT, b STRING")
 
+  private val UNICODE_COLLATION = "UNICODE"
+  private val UTF8_LCASE_COLLATION = "UTF8_LCASE"
+  private val mapper = new ObjectMapper()
+
   test("lookup a single missing field should output existing fields") {
     checkError(
       exception = intercept[SparkIllegalArgumentException](s("c")),
@@ -606,4 +612,181 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
         "b STRING NOT NULL,c STRING COMMENT 'nullable comment'")
     assert(fromDDL(struct.toDDL) === struct)
   }
+
+  test("simple struct with collations to json") {
+    val simpleStruct = StructType(
+      StructField("c1", StringType(UNICODE_COLLATION)) :: Nil)
+
+    val expectedJson =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "c1",
+         |      "type": "string",
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "c1": "icu.$UNICODE_COLLATION"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    assert(mapper.readTree(simpleStruct.json) == mapper.readTree(expectedJson))
+  }
+
+  test("nested struct with collations to json") {
+    val nestedStruct = StructType(
+      StructField("nested", StructType(
+        StructField("c1", StringType(UTF8_LCASE_COLLATION)) :: Nil)) :: Nil)
+
+    val expectedJson =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "nested",
+         |      "type": {
+         |        "type": "struct",
+         |        "fields": [
+         |          {
+         |            "name": "c1",
+         |            "type": "string",
+         |            "nullable": true,
+         |            "metadata": {
+         |              "${DataType.COLLATIONS_METADATA_KEY}": {
+         |                "c1": "spark.$UTF8_LCASE_COLLATION"
+         |              }
+         |            }
+         |          }
+         |        ]
+         |      },
+         |      "nullable": true,
+         |      "metadata": {}
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    assert(mapper.readTree(nestedStruct.json) == mapper.readTree(expectedJson))
+  }
+
+  test("array with collations in schema to json") {
+    val arrayInSchema = StructType(
+      StructField("arrayField", ArrayType(StringType(UNICODE_COLLATION))) :: Nil)
+
+    val expectedJson =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "arrayField",
+         |      "type": {
+         |        "type": "array",
+         |        "elementType": "string",
+         |        "containsNull": true
+         |      },
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "arrayField.element": "icu.$UNICODE_COLLATION"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    assert(mapper.readTree(arrayInSchema.json) == mapper.readTree(expectedJson))
+  }
+
+  test("map with collations in schema to json") {
+    val arrayInSchema = StructType(
+      StructField("mapField",
+        MapType(StringType(UNICODE_COLLATION), StringType(UNICODE_COLLATION))) :: Nil)
+
+    val expectedJson =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "mapField",
+         |      "type": {
+         |        "type": "map",
+         |        "keyType": "string",
+         |        "valueType": "string",
+         |        "valueContainsNull": true
+         |      },
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "mapField.key": "icu.$UNICODE_COLLATION",
+         |          "mapField.value": "icu.$UNICODE_COLLATION"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    assert(mapper.readTree(arrayInSchema.json) == mapper.readTree(expectedJson))
+  }
+
+  test("nested array with collations in map to json" ) {
+    val mapWithNestedArray = StructType(
+      StructField("column", ArrayType(MapType(
+        StringType(UNICODE_COLLATION),
+        ArrayType(ArrayType(ArrayType(StringType(UNICODE_COLLATION))))))) :: Nil)
+
+    val expectedJson =
+      s"""
+         |{
+         |  "type": "struct",
+         |  "fields": [
+         |    {
+         |      "name": "column",
+         |      "type": {
+         |        "type": "array",
+         |        "elementType": {
+         |          "type": "map",
+         |          "keyType": "string",
+         |          "valueType": {
+         |            "type": "array",
+         |            "elementType": {
+         |              "type": "array",
+         |              "elementType": {
+         |                "type": "array",
+         |                "elementType": "string",
+         |                "containsNull": true
+         |              },
+         |              "containsNull": true
+         |            },
+         |            "containsNull": true
+         |          },
+         |          "valueContainsNull": true
+         |        },
+         |        "containsNull": true
+         |      },
+         |      "nullable": true,
+         |      "metadata": {
+         |        "${DataType.COLLATIONS_METADATA_KEY}": {
+         |          "column.element.key": "icu.$UNICODE_COLLATION",
+         |          "column.element.value.element.element.element": "icu.$UNICODE_COLLATION"
+         |        }
+         |      }
+         |    }
+         |  ]
+         |}
+         |""".stripMargin
+
+    assert(
+      mapper.readTree(mapWithNestedArray.json) == mapper.readTree(expectedJson))
+  }
 }
diff --git a/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt b/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
index c68ab02cc98dc..15fe089202fb8 100644
--- a/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      34004          34304         425         61.7          16.2       1.0X
-agg w/o group wholestage on                         717            728          10       2925.5           0.3      47.4X
+agg w/o group wholestage off                      35342          35962         876         59.3          16.9       1.0X
+agg w/o group wholestage on                        2831           2851          16        740.7           1.4      12.5X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              4163           4196          47         25.2          39.7       1.0X
-stddev wholestage on                                979            984           3        107.1           9.3       4.3X
+stddev wholestage off                              4117           4150          47         25.5          39.3       1.0X
+stddev wholestage on                                976            980           4        107.4           9.3       4.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           20832          20961         182          5.0         198.7       1.0X
-kurtosis wholestage on                              983            992           6        106.7           9.4      21.2X
+kurtosis wholestage off                           19477          19555         110          5.4         185.7       1.0X
+kurtosis wholestage on                              986            994           6        106.3           9.4      19.8X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        6917           6934          24         12.1          82.5       1.0X
-codegen = T, hashmap = F                           4302           4328          43         19.5          51.3       1.6X
-codegen = T, row-based hashmap = T                 1092           1102          10         76.8          13.0       6.3X
-codegen = T, vectorized hashmap = T                 758            766           6        110.6           9.0       9.1X
+codegen = F                                        6889           6905          23         12.2          82.1       1.0X
+codegen = T, hashmap = F                           3899           3935          32         21.5          46.5       1.8X
+codegen = T, row-based hashmap = T                 1248           1254           6         67.2          14.9       5.5X
+codegen = T, vectorized hashmap = T                 825            837          12        101.7           9.8       8.4X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        7543           7559          23         11.1          89.9       1.0X
-codegen = T, hashmap = F                           4895           4908          20         17.1          58.4       1.5X
-codegen = T, row-based hashmap = T                 1671           1676           3         50.2          19.9       4.5X
-codegen = T, vectorized hashmap = T                 983            995          12         85.3          11.7       7.7X
+codegen = F                                        7875           7877           2         10.7          93.9       1.0X
+codegen = T, hashmap = F                           4903           4941          43         17.1          58.4       1.6X
+codegen = T, row-based hashmap = T                 1807           1818           9         46.4          21.5       4.4X
+codegen = T, vectorized hashmap = T                1300           1344          49         64.5          15.5       6.1X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2312           2332          28          9.1         110.3       1.0X
-codegen = T, hashmap = F                           1605           1630          23         13.1          76.5       1.4X
-codegen = T, row-based hashmap = T                 1198           1208          12         17.5          57.1       1.9X
-codegen = T, vectorized hashmap = T                 920            936          24         22.8          43.9       2.5X
+codegen = F                                        2552           2573          30          8.2         121.7       1.0X
+codegen = T, hashmap = F                           1537           1545           9         13.6          73.3       1.7X
+codegen = T, row-based hashmap = T                  887            921          27         23.6          42.3       2.9X
+codegen = T, vectorized hashmap = T                 744            752           8         28.2          35.5       3.4X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2162           2193          43          9.7         103.1       1.0X
-codegen = T, hashmap = F                           1356           1361           8         15.5          64.6       1.6X
-codegen = T, row-based hashmap = T                  435            453          11         48.2          20.8       5.0X
-codegen = T, vectorized hashmap = T                 285            291           4         73.6          13.6       7.6X
+codegen = F                                        2460           2464           6          8.5         117.3       1.0X
+codegen = T, hashmap = F                           1601           1611          14         13.1          76.4       1.5X
+codegen = T, row-based hashmap = T                  475            497          29         44.1          22.7       5.2X
+codegen = T, vectorized hashmap = T                 336            343           5         62.5          16.0       7.3X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        4053           4053           0          5.2         193.3       1.0X
-codegen = T, hashmap = F                           2346           2352           9          8.9         111.9       1.7X
-codegen = T, row-based hashmap = T                 1855           1860           7         11.3          88.4       2.2X
-codegen = T, vectorized hashmap = T                1918           1947          41         10.9          91.5       2.1X
+codegen = F                                        4272           4302          43          4.9         203.7       1.0X
+codegen = T, hashmap = F                           2260           2262           2          9.3         107.8       1.9X
+codegen = T, row-based hashmap = T                 1652           1665          19         12.7          78.8       2.6X
+codegen = T, vectorized hashmap = T                1519           1527          11         13.8          72.4       2.8X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         362            380          29          1.8         552.9       1.0X
-codegen = T, hugeMethodLimit = 10000                128            144          14          5.1         195.7       2.8X
-codegen = T, hugeMethodLimit = 1500                 122            138          21          5.4         186.8       3.0X
+codegen = F                                         375            407          29          1.7         572.3       1.0X
+codegen = T, hugeMethodLimit = 10000                137            160          19          4.8         209.5       2.7X
+codegen = T, hugeMethodLimit = 1500                 132            143          13          5.0         201.8       2.8X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                2080           2088          11          2.5         396.7       1.0X
-cube wholestage on                                 1154           1167          12          4.5         220.1       1.8X
+cube wholestage off                                1986           2005          26          2.6         378.9       1.0X
+cube wholestage on                                 1079           1106          46          4.9         205.8       1.8X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 UnsafeRowhash                                       146            146           1        143.7           7.0       1.0X
-murmur3 hash                                         53             53           0        392.6           2.5       2.7X
-fast hash                                            24             24           1        887.8           1.1       6.2X
-arrayEqual                                          130            130           0        161.6           6.2       1.1X
-Java HashMap (Long)                                  65             69           4        322.2           3.1       2.2X
-Java HashMap (two ints)                              87             90           2        240.9           4.2       1.7X
-Java HashMap (UnsafeRow)                            499            501           2         42.1          23.8       0.3X
-LongToUnsafeRowMap (opt=false)                      349            350           1         60.1          16.6       0.4X
-LongToUnsafeRowMap (opt=true)                        76             78           6        275.1           3.6       1.9X
-BytesToBytesMap (off Heap)                          581            584           2         36.1          27.7       0.3X
-BytesToBytesMap (on Heap)                           580            588           6         36.2          27.6       0.3X
-Aggregate HashMap                                    30             31           2        698.9           1.4       4.9X
+murmur3 hash                                         53             54           0        392.5           2.5       2.7X
+fast hash                                            24             24           0        887.4           1.1       6.2X
+arrayEqual                                          136            136           0        153.9           6.5       1.1X
+Java HashMap (Long)                                  62             67           6        337.2           3.0       2.3X
+Java HashMap (two ints)                              87             91           8        242.3           4.1       1.7X
+Java HashMap (UnsafeRow)                            505            509           4         41.5          24.1       0.3X
+LongToUnsafeRowMap (opt=false)                      351            352           2         59.8          16.7       0.4X
+LongToUnsafeRowMap (opt=true)                        76             77           1        274.8           3.6       1.9X
+BytesToBytesMap (off Heap)                          450            460           9         46.6          21.5       0.3X
+BytesToBytesMap (on Heap)                           462            473          10         45.4          22.0       0.3X
+Aggregate HashMap                                    30             30           1        699.0           1.4       4.9X
 
 
diff --git a/sql/core/benchmarks/AggregateBenchmark-results.txt b/sql/core/benchmarks/AggregateBenchmark-results.txt
index a546e2fb0bc61..bdfa6bd673586 100644
--- a/sql/core/benchmarks/AggregateBenchmark-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      30915          32941        2865         67.8          14.7       1.0X
-agg w/o group wholestage on                         717            720           2       2924.3           0.3      43.1X
+agg w/o group wholestage off                      38161          38820         933         55.0          18.2       1.0X
+agg w/o group wholestage on                        2472           2488          10        848.5           1.2      15.4X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              4304           4311          11         24.4          41.0       1.0X
-stddev wholestage on                                980            982           2        107.0           9.3       4.4X
+stddev wholestage off                              4488           4498          14         23.4          42.8       1.0X
+stddev wholestage on                                961            975           8        109.1           9.2       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           20793          20816          32          5.0         198.3       1.0X
-kurtosis wholestage on                              988            993           4        106.1           9.4      21.0X
+kurtosis wholestage off                           20771          20817          65          5.0         198.1       1.0X
+kurtosis wholestage on                             1004           1009           4        104.5           9.6      20.7X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        6582           6606          34         12.7          78.5       1.0X
-codegen = T, hashmap = F                           3769           3785          18         22.3          44.9       1.7X
-codegen = T, row-based hashmap = T                 1124           1132           9         74.6          13.4       5.9X
-codegen = T, vectorized hashmap = T                 766            775           6        109.5           9.1       8.6X
+codegen = F                                        6648           6749         142         12.6          79.3       1.0X
+codegen = T, hashmap = F                           3893           3974          83         21.6          46.4       1.7X
+codegen = T, row-based hashmap = T                 1198           1218          17         70.0          14.3       5.5X
+codegen = T, vectorized hashmap = T                 815            826           7        102.9           9.7       8.2X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        7355           7372          24         11.4          87.7       1.0X
-codegen = T, hashmap = F                           4568           4627          61         18.4          54.5       1.6X
-codegen = T, row-based hashmap = T                 1635           1660          21         51.3          19.5       4.5X
-codegen = T, vectorized hashmap = T                1084           1180         131         77.4          12.9       6.8X
+codegen = F                                        7395           7411          24         11.3          88.1       1.0X
+codegen = T, hashmap = F                           4675           4833         165         17.9          55.7       1.6X
+codegen = T, row-based hashmap = T                 1658           1709          82         50.6          19.8       4.5X
+codegen = T, vectorized hashmap = T                1066           1080          23         78.7          12.7       6.9X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2409           2432          33          8.7         114.9       1.0X
-codegen = T, hashmap = F                           1476           1503          33         14.2          70.4       1.6X
-codegen = T, row-based hashmap = T                  947            950           3         22.2          45.1       2.5X
-codegen = T, vectorized hashmap = T                 717            722           4         29.3          34.2       3.4X
+codegen = F                                        2400           2406           8          8.7         114.5       1.0X
+codegen = T, hashmap = F                           1499           1512          19         14.0          71.5       1.6X
+codegen = T, row-based hashmap = T                  942            972          28         22.3          44.9       2.5X
+codegen = T, vectorized hashmap = T                 756            764           5         27.7          36.1       3.2X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2024           2029           7         10.4          96.5       1.0X
-codegen = T, hashmap = F                           1333           1333           0         15.7          63.6       1.5X
-codegen = T, row-based hashmap = T                  491            506          14         42.7          23.4       4.1X
-codegen = T, vectorized hashmap = T                 284            288           3         74.0          13.5       7.1X
+codegen = F                                        2103           2115          17         10.0         100.3       1.0X
+codegen = T, hashmap = F                           1324           1330           9         15.8          63.1       1.6X
+codegen = T, row-based hashmap = T                  454            473          13         46.1          21.7       4.6X
+codegen = T, vectorized hashmap = T                 313            324           6         66.9          14.9       6.7X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        4035           4061          37          5.2         192.4       1.0X
-codegen = T, hashmap = F                           2336           2343           9          9.0         111.4       1.7X
-codegen = T, row-based hashmap = T                 1599           1630          43         13.1          76.2       2.5X
-codegen = T, vectorized hashmap = T                1481           1508          38         14.2          70.6       2.7X
+codegen = F                                        4133           4161          39          5.1         197.1       1.0X
+codegen = T, hashmap = F                           2402           2405           4          8.7         114.5       1.7X
+codegen = T, row-based hashmap = T                 1618           1626          12         13.0          77.1       2.6X
+codegen = T, vectorized hashmap = T                1516           1525          12         13.8          72.3       2.7X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         358            377          18          1.8         546.8       1.0X
-codegen = T, hugeMethodLimit = 10000                127            144          12          5.2         193.6       2.8X
-codegen = T, hugeMethodLimit = 1500                 123            136          13          5.3         188.0       2.9X
+codegen = F                                         402            410           4          1.6         614.1       1.0X
+codegen = T, hugeMethodLimit = 10000                164            194          14          4.0         249.7       2.5X
+codegen = T, hugeMethodLimit = 1500                 132            153          15          5.0         201.8       3.0X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                2001           2011          14          2.6         381.7       1.0X
-cube wholestage on                                 1063           1079          29          4.9         202.7       1.9X
+cube wholestage off                                2101           2141          56          2.5         400.8       1.0X
+cube wholestage on                                 1072           1084          13          4.9         204.5       2.0X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       210            211           1        100.0          10.0       1.0X
-murmur3 hash                                         68             69           1        309.1           3.2       3.1X
-fast hash                                            67             72           1        311.9           3.2       3.1X
-arrayEqual                                          143            145           1        146.2           6.8       1.5X
-Java HashMap (Long)                                  62             64           3        340.9           2.9       3.4X
-Java HashMap (two ints)                              81             83           2        260.4           3.8       2.6X
-Java HashMap (UnsafeRow)                            522            527           5         40.1          24.9       0.4X
-LongToUnsafeRowMap (opt=false)                      345            346           1         60.9          16.4       0.6X
-LongToUnsafeRowMap (opt=true)                        77             78           1        273.6           3.7       2.7X
-BytesToBytesMap (off Heap)                          486            492           5         43.2          23.2       0.4X
-BytesToBytesMap (on Heap)                           502            507           6         41.8          23.9       0.4X
-Aggregate HashMap                                    30             31           3        689.7           1.4       6.9X
+UnsafeRowhash                                       198            203          14        106.1           9.4       1.0X
+murmur3 hash                                         66             68           1        320.1           3.1       3.0X
+fast hash                                            69             71           1        305.8           3.3       2.9X
+arrayEqual                                          142            145           2        148.0           6.8       1.4X
+Java HashMap (Long)                                  64             68           4        327.5           3.1       3.1X
+Java HashMap (two ints)                              82             84           2        257.0           3.9       2.4X
+Java HashMap (UnsafeRow)                            537            542           5         39.1          25.6       0.4X
+LongToUnsafeRowMap (opt=false)                      335            338           2         62.5          16.0       0.6X
+LongToUnsafeRowMap (opt=true)                        74             75           2        281.7           3.6       2.7X
+BytesToBytesMap (off Heap)                          489            494           7         42.9          23.3       0.4X
+BytesToBytesMap (on Heap)                           496            499           3         42.3          23.7       0.4X
+Aggregate HashMap                                    30             31           2        705.1           1.4       6.6X
 
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
index ceb70f9f4ded3..c9f022901b947 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           22445          22598         237          4.5         224.4       1.0X
-year month interval one column disable radix          32401          32416          20          3.1         324.0       0.7X
+year month interval one column enable radix           22681          22902         342          4.4         226.8       1.0X
+year month interval one column disable radix          31984          32121         199          3.1         319.8       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           33624          33664          36          3.0         336.2       1.0X
-year month interval two columns disable radix          33562          33647          79          3.0         335.6       1.0X
+year month interval two columns enable radix           33382          33682         365          3.0         333.8       1.0X
+year month interval two columns disable radix          33180          33612         586          3.0         331.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           22444          22552         166          4.5         224.4       1.0X
-day time interval one columns disable radix          34000          34058          90          2.9         340.0       0.7X
+day time interval one columns enable radix           20327          20446         140          4.9         203.3       1.0X
+day time interval one columns disable radix          31683          32138         457          3.2         316.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           35780          35816          34          2.8         357.8       1.0X
-day time interval two columns disable radix          36041          36575         472          2.8         360.4       1.0X
+day time interval two columns enable radix           32522          32691         224          3.1         325.2       1.0X
+day time interval two columns disable radix          32478          32743         285          3.1         324.8       1.0X
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
index 9fb2e3b6b8c3f..def37b0d628ab 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           23013          23045          49          4.3         230.1       1.0X
-year month interval one column disable radix          33043          33140         166          3.0         330.4       0.7X
+year month interval one column enable radix           22540          22607          70          4.4         225.4       1.0X
+year month interval one column disable radix          32453          32592         126          3.1         324.5       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           33865          33990         109          3.0         338.6       1.0X
-year month interval two columns disable radix          35043          35124          81          2.9         350.4       1.0X
+year month interval two columns enable radix           33313          33384         114          3.0         333.1       1.0X
+year month interval two columns disable radix          33284          33357          83          3.0         332.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           20614          20666          45          4.9         206.1       1.0X
-day time interval one columns disable radix          33399          33655         242          3.0         334.0       0.6X
+day time interval one columns enable radix           21112          21150          63          4.7         211.1       1.0X
+day time interval one columns disable radix          32667          32837         159          3.1         326.7       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           35242          35495         284          2.8         352.4       1.0X
-day time interval two columns disable radix          35315          35481         181          2.8         353.1       1.0X
+day time interval two columns enable radix           36203          36323         110          2.8         362.0       1.0X
+day time interval two columns disable radix          34964          35031          70          2.9         349.6       1.0X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt b/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
index 9f156cfca71b0..f9ddb8465f4f0 100644
--- a/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2210           2233          38          9.1         110.5       1.0X
-apache                                            11572          11644          66          1.7         578.6       0.2X
+java                                               2038           2103          63          9.8         101.9       1.0X
+apache                                            11269          11369          86          1.8         563.4       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2586           2604          18          7.7         129.3       1.0X
-apache                                            12940          12986          41          1.5         647.0       0.2X
+java                                               2462           2507          76          8.1         123.1       1.0X
+apache                                            12414          12475          54          1.6         620.7       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3053           3065          16          6.6         152.7       1.0X
-apache                                            14277          14321          60          1.4         713.9       0.2X
+java                                               3084           3093           8          6.5         154.2       1.0X
+apache                                            13548          13629          86          1.5         677.4       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3106           3113          10          6.4         155.3       1.0X
-apache                                            15346          15382          37          1.3         767.3       0.2X
+java                                               3182           3189           9          6.3         159.1       1.0X
+apache                                            14637          14678          40          1.4         731.8       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3494           3500           5          5.7         174.7       1.0X
-apache                                            12827          12950         107          1.6         641.4       0.3X
+java                                               3117           3254         175          6.4         155.8       1.0X
+apache                                            12666          12718          47          1.6         633.3       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4203           4205           3          4.8         210.1       1.0X
-apache                                            15071          15122          61          1.3         753.5       0.3X
+java                                               3759           3765           6          5.3         187.9       1.0X
+apache                                            13854          13870          15          1.4         692.7       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5025           5042          16          4.0         251.3       1.0X
-apache                                            17056          17140          88          1.2         852.8       0.3X
+java                                               4773           4781          12          4.2         238.6       1.0X
+apache                                            15439          15482          42          1.3         771.9       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5414           5497         139          3.7         270.7       1.0X
-apache                                            18678          18825         130          1.1         933.9       0.3X
+java                                               5200           5228          25          3.8         260.0       1.0X
+apache                                            16847          16904          69          1.2         842.4       0.3X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-results.txt b/sql/core/benchmarks/Base64Benchmark-results.txt
index 4e574da76c9a1..975e6b9bcad23 100644
--- a/sql/core/benchmarks/Base64Benchmark-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2334           2374          61          8.6         116.7       1.0X
-apache                                            10888          10892           8          1.8         544.4       0.2X
+java                                               2292           2321          30          8.7         114.6       1.0X
+apache                                            11003          11085          79          1.8         550.2       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3172           3209          48          6.3         158.6       1.0X
-apache                                            12136          12153          15          1.6         606.8       0.3X
+java                                               2945           2956          18          6.8         147.3       1.0X
+apache                                            12199          12295         135          1.6         609.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3525           3530           4          5.7         176.3       1.0X
-apache                                            13421          13449          30          1.5         671.1       0.3X
+java                                               3477           3489          12          5.8         173.9       1.0X
+apache                                            13666          13776          96          1.5         683.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3917           3924           9          5.1         195.9       1.0X
-apache                                            14449          14474          22          1.4         722.5       0.3X
+java                                               3958           3973          22          5.1         197.9       1.0X
+apache                                            14953          14993          45          1.3         747.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3412           3656         212          5.9         170.6       1.0X
-apache                                            12318          12326          14          1.6         615.9       0.3X
+java                                               3223           3313          79          6.2         161.1       1.0X
+apache                                            12096          12321         196          1.7         604.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4950           4964          12          4.0         247.5       1.0X
-apache                                            15030          15050          34          1.3         751.5       0.3X
+java                                               4637           4655          18          4.3         231.9       1.0X
+apache                                            14167          14307         124          1.4         708.4       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6552           6562          10          3.1         327.6       1.0X
-apache                                            16803          16855          46          1.2         840.2       0.4X
+java                                               5811           5821           9          3.4         290.6       1.0X
+apache                                            15871          15886          15          1.3         793.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               7340           7346           8          2.7         367.0       1.0X
-apache                                            18708          18789          76          1.1         935.4       0.4X
+java                                               6410           6436          23          3.1         320.5       1.0X
+apache                                            17301          17395         109          1.2         865.1       0.4X
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
index 2be64a77ae5d7..fd0fe949392b3 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
@@ -2,191 +2,191 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               7752           7991         337         12.9          77.5       1.0X
-With bloom filter                                 10081          10242         228          9.9         100.8       0.8X
+Without bloom filter                               8033           8137         147         12.4          80.3       1.0X
+With bloom filter                                 10113          10202         125          9.9         101.1       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            838            890          45        119.3           8.4       1.0X
-With bloom filter, blocksize: 2097152               579            596          17        172.8           5.8       1.4X
+Without bloom filter, blocksize: 2097152            895            909          17        111.7           8.9       1.0X
+With bloom filter, blocksize: 2097152               592            603           9        169.1           5.9       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            824            848          22        121.3           8.2       1.0X
-With bloom filter, blocksize: 4194304               554            574          22        180.4           5.5       1.5X
+Without bloom filter, blocksize: 4194304            852            871          23        117.4           8.5       1.0X
+With bloom filter, blocksize: 4194304               542            573          37        184.6           5.4       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            805            822          20        124.3           8.0       1.0X
-With bloom filter, blocksize: 6291456               527            582          72        189.9           5.3       1.5X
+Without bloom filter, blocksize: 6291456            834            857          32        120.0           8.3       1.0X
+With bloom filter, blocksize: 6291456               547            567          26        182.9           5.5       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            796            819          23        125.6           8.0       1.0X
-With bloom filter, blocksize: 8388608               742            757          15        134.7           7.4       1.1X
+Without bloom filter, blocksize: 8388608            819            844          21        122.0           8.2       1.0X
+With bloom filter, blocksize: 8388608               542            572          25        184.4           5.4       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912           1266           1267           1         79.0          12.7       1.0X
-With bloom filter, blocksize: 12582912               766            778          16        130.6           7.7       1.7X
+Without bloom filter, blocksize: 12582912            866            882          16        115.5           8.7       1.0X
+With bloom filter, blocksize: 12582912               537            560          21        186.1           5.4       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216           1209           1215           8         82.7          12.1       1.0X
-With bloom filter, blocksize: 16777216               760            766           6        131.5           7.6       1.6X
+Without bloom filter, blocksize: 16777216            805            829          21        124.3           8.0       1.0X
+With bloom filter, blocksize: 16777216               537            567          30        186.2           5.4       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432           1220           1221           2         82.0          12.2       1.0X
-With bloom filter, blocksize: 33554432               745            763          23        134.3           7.4       1.6X
+Without bloom filter, blocksize: 33554432            807            826          17        123.9           8.1       1.0X
+With bloom filter, blocksize: 33554432               535            552          12        186.9           5.3       1.5X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               9950           9963          19         10.1          99.5       1.0X
-With bloom filter                                 12809          12908         140          7.8         128.1       0.8X
+Without bloom filter                              10510          10559          69          9.5         105.1       1.0X
+With bloom filter                                 13372          13429          81          7.5         133.7       0.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            403            419          12        248.3           4.0       1.0X
-With bloom filter, blocksize: 2097152               149            160          10        672.9           1.5       2.7X
+Without bloom filter, blocksize: 2097152            400            430          24        250.0           4.0       1.0X
+With bloom filter, blocksize: 2097152               148            160          12        677.1           1.5       2.7X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            387            397          11        258.4           3.9       1.0X
-With bloom filter, blocksize: 4194304               104            109           5        966.1           1.0       3.7X
+Without bloom filter, blocksize: 4194304            380            385           4        263.0           3.8       1.0X
+With bloom filter, blocksize: 4194304               103            115           8        972.0           1.0       3.7X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            388            394           6        257.6           3.9       1.0X
-With bloom filter, blocksize: 6291456               114            124           5        878.6           1.1       3.4X
+Without bloom filter, blocksize: 6291456            380            384           5        263.0           3.8       1.0X
+With bloom filter, blocksize: 6291456               123            136           7        811.1           1.2       3.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            386            402          19        259.0           3.9       1.0X
-With bloom filter, blocksize: 8388608               159            174          11        628.9           1.6       2.4X
+Without bloom filter, blocksize: 8388608            382            391          13        261.8           3.8       1.0X
+With bloom filter, blocksize: 8388608               175            188           7        571.1           1.8       2.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            404            432          24        247.4           4.0       1.0X
-With bloom filter, blocksize: 12582912               267            281          14        375.1           2.7       1.5X
+Without bloom filter, blocksize: 12582912            386            393           7        259.1           3.9       1.0X
+With bloom filter, blocksize: 12582912               316            322           5        316.2           3.2       1.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            391            398           6        255.9           3.9       1.0X
-With bloom filter, blocksize: 16777216               351            358           9        285.2           3.5       1.1X
+Without bloom filter, blocksize: 16777216            390            395           6        256.6           3.9       1.0X
+With bloom filter, blocksize: 16777216               299            305           4        334.1           3.0       1.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            398            402           3        251.4           4.0       1.0X
-With bloom filter, blocksize: 33554432               401            409           7        249.3           4.0       1.0X
+Without bloom filter, blocksize: 33554432            397            409          10        252.0           4.0       1.0X
+With bloom filter, blocksize: 33554432               583            640          39        171.7           5.8       0.7X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index cb1be863cb651..7dfcdecded143 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -2,191 +2,191 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               7507           7658         213         13.3          75.1       1.0X
-With bloom filter                                  9532           9564          44         10.5          95.3       0.8X
+Without bloom filter                               7751           7823         102         12.9          77.5       1.0X
+With bloom filter                                  9924           9966          59         10.1          99.2       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            834            888          78        119.8           8.3       1.0X
-With bloom filter, blocksize: 2097152               626            647          17        159.9           6.3       1.3X
+Without bloom filter, blocksize: 2097152            882            896          20        113.3           8.8       1.0X
+With bloom filter, blocksize: 2097152               589            597           8        169.7           5.9       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            811            817           6        123.4           8.1       1.0X
-With bloom filter, blocksize: 4194304               535            541           6        187.1           5.3       1.5X
+Without bloom filter, blocksize: 4194304            817            823           5        122.4           8.2       1.0X
+With bloom filter, blocksize: 4194304               524            534          10        191.0           5.2       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            788            809          18        126.9           7.9       1.0X
-With bloom filter, blocksize: 6291456               513            531          20        195.0           5.1       1.5X
+Without bloom filter, blocksize: 6291456            821            828           9        121.7           8.2       1.0X
+With bloom filter, blocksize: 6291456               516            531          10        193.7           5.2       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            774            777           4        129.3           7.7       1.0X
-With bloom filter, blocksize: 8388608               519            526           5        192.5           5.2       1.5X
+Without bloom filter, blocksize: 8388608            791            811          24        126.5           7.9       1.0X
+With bloom filter, blocksize: 8388608               531            566          27        188.5           5.3       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912           1273           1282          13         78.6          12.7       1.0X
-With bloom filter, blocksize: 12582912               761            771           9        131.4           7.6       1.7X
+Without bloom filter, blocksize: 12582912            851            861          15        117.5           8.5       1.0X
+With bloom filter, blocksize: 12582912               500            513           9        199.9           5.0       1.7X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216           1232           1234           2         81.1          12.3       1.0X
-With bloom filter, blocksize: 16777216               756            764           8        132.3           7.6       1.6X
+Without bloom filter, blocksize: 16777216            811            819           8        123.2           8.1       1.0X
+With bloom filter, blocksize: 16777216               502            516           9        199.0           5.0       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432           1239           1249          15         80.7          12.4       1.0X
-With bloom filter, blocksize: 33554432               760            785          32        131.6           7.6       1.6X
+Without bloom filter, blocksize: 33554432            819            843          21        122.1           8.2       1.0X
+With bloom filter, blocksize: 33554432               512            517           6        195.3           5.1       1.6X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              10496          10555          84          9.5         105.0       1.0X
-With bloom filter                                 14059          14124          92          7.1         140.6       0.7X
+Without bloom filter                              11467          11609         202          8.7         114.7       1.0X
+With bloom filter                                 14502          14626         176          6.9         145.0       0.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            439            474          38        227.9           4.4       1.0X
-With bloom filter, blocksize: 2097152               148            162          11        677.5           1.5       3.0X
+Without bloom filter, blocksize: 2097152            430            450          14        232.6           4.3       1.0X
+With bloom filter, blocksize: 2097152               146            158           9        684.2           1.5       2.9X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            414            421           8        241.6           4.1       1.0X
-With bloom filter, blocksize: 4194304               112            120           9        895.8           1.1       3.7X
+Without bloom filter, blocksize: 4194304            410            414           3        243.8           4.1       1.0X
+With bloom filter, blocksize: 4194304               103            109           4        968.2           1.0       4.0X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            406            410           3        246.0           4.1       1.0X
-With bloom filter, blocksize: 6291456               138            147           6        722.6           1.4       2.9X
+Without bloom filter, blocksize: 6291456            409            414           5        244.5           4.1       1.0X
+With bloom filter, blocksize: 6291456               130            139           7        772.2           1.3       3.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            404            421          12        247.3           4.0       1.0X
-With bloom filter, blocksize: 8388608               222            232           6        451.4           2.2       1.8X
+Without bloom filter, blocksize: 8388608            413            417           4        242.3           4.1       1.0X
+With bloom filter, blocksize: 8388608               179            191          13        559.3           1.8       2.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            407            410           3        245.4           4.1       1.0X
-With bloom filter, blocksize: 12582912               266            277          10        376.2           2.7       1.5X
+Without bloom filter, blocksize: 12582912            412            418           5        242.7           4.1       1.0X
+With bloom filter, blocksize: 12582912               346            351           3        288.8           3.5       1.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            408            417          19        245.1           4.1       1.0X
-With bloom filter, blocksize: 16777216               434            458          20        230.5           4.3       0.9X
+Without bloom filter, blocksize: 16777216            416            424          12        240.3           4.2       1.0X
+With bloom filter, blocksize: 16777216               327            336           7        306.2           3.3       1.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            494            506          10        202.6           4.9       1.0X
-With bloom filter, blocksize: 33554432               421            456          55        237.6           4.2       1.2X
+Without bloom filter, blocksize: 33554432            423            427           3        236.7           4.2       1.0X
+With bloom filter, blocksize: 33554432               683            695          11        146.5           6.8       0.6X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
index 32cc37491423e..a260bc0396455 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1744           1797          75          9.0         110.9       1.0X
-Output Single Double Column                        1753           1775          31          9.0         111.4       1.0X
-Output Int and String Column                       4384           4389           8          3.6         278.7       0.4X
-Output Partitions                                  2948           3021         103          5.3         187.4       0.6X
-Output Buckets                                     4110           4219         154          3.8         261.3       0.4X
+Output Single Int Column                           1732           1745          19          9.1         110.1       1.0X
+Output Single Double Column                        1754           1758           7          9.0         111.5       1.0X
+Output Int and String Column                       4309           4363          76          3.7         273.9       0.4X
+Output Partitions                                  3252           3350         139          4.8         206.8       0.5X
+Output Buckets                                     4487           4575         124          3.5         285.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1605           1640          50          9.8         102.0       1.0X
-Output Single Double Column                        1709           1726          25          9.2         108.6       0.9X
-Output Int and String Column                       4570           4587          24          3.4         290.5       0.4X
-Output Partitions                                  2943           2960          23          5.3         187.1       0.5X
-Output Buckets                                     3816           3851          50          4.1         242.6       0.4X
+Output Single Int Column                           1938           1978          55          8.1         123.2       1.0X
+Output Single Double Column                        1762           1769          10          8.9         112.0       1.1X
+Output Int and String Column                       4920           4932          17          3.2         312.8       0.4X
+Output Partitions                                  3385           3389           7          4.6         215.2       0.6X
+Output Buckets                                     4528           4538          14          3.5         287.9       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                            932            946          21         16.9          59.3       1.0X
-Output Single Double Column                        1571           1577           8         10.0          99.9       0.6X
-Output Int and String Column                       3621           3693         103          4.3         230.2       0.3X
-Output Partitions                                  2301           2303           2          6.8         146.3       0.4X
-Output Buckets                                     3033           3037           5          5.2         192.9       0.3X
+Output Single Int Column                           1137           1142           7         13.8          72.3       1.0X
+Output Single Double Column                        1700           1705           6          9.3         108.1       0.7X
+Output Int and String Column                       4028           4096          97          3.9         256.1       0.3X
+Output Partitions                                  2562           2582          28          6.1         162.9       0.4X
+Output Buckets                                     3524           3530           9          4.5         224.1       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1606           1607           2          9.8         102.1       1.0X
-Output Single Double Column                        2363           2367           6          6.7         150.2       0.7X
-Output Int and String Column                       4054           4062          12          3.9         257.7       0.4X
-Output Partitions                                  2924           2976          74          5.4         185.9       0.5X
-Output Buckets                                     3826           3830           7          4.1         243.2       0.4X
+Output Single Int Column                           1618           1645          37          9.7         102.9       1.0X
+Output Single Double Column                        2398           2399           1          6.6         152.5       0.7X
+Output Int and String Column                       3766           3778          17          4.2         239.5       0.4X
+Output Partitions                                  3162           3164           3          5.0         201.0       0.5X
+Output Buckets                                     4015           4028          18          3.9         255.3       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3863           3909          65          4.1         245.6       1.0X
-Output Single Double Column                        4411           4471          85          3.6         280.4       0.9X
-Output Int and String Column                       6697           6702           7          2.3         425.8       0.6X
-Output Partitions                                  5281           5298          24          3.0         335.8       0.7X
-Output Buckets                                     6902           6903           2          2.3         438.8       0.6X
+Output Single Int Column                           3985           3993          11          3.9         253.4       1.0X
+Output Single Double Column                        4148           4210          88          3.8         263.7       1.0X
+Output Int and String Column                       6728           6741          18          2.3         427.8       0.6X
+Output Partitions                                  5431           5447          23          2.9         345.3       0.7X
+Output Buckets                                     6927           6942          22          2.3         440.4       0.6X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
index a63258d40ee57..e43b3b53dfb25 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1657           1701          63          9.5         105.3       1.0X
-Output Single Double Column                        1766           1786          28          8.9         112.3       0.9X
-Output Int and String Column                       4365           4375          15          3.6         277.5       0.4X
-Output Partitions                                  3042           3082          57          5.2         193.4       0.5X
-Output Buckets                                     4252           4277          35          3.7         270.4       0.4X
+Output Single Int Column                           1813           1881          96          8.7         115.3       1.0X
+Output Single Double Column                        1976           1977           1          8.0         125.6       0.9X
+Output Int and String Column                       4403           4438          50          3.6         279.9       0.4X
+Output Partitions                                  3388           3421          46          4.6         215.4       0.5X
+Output Buckets                                     4670           4680          15          3.4         296.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1666           1667           2          9.4         105.9       1.0X
-Output Single Double Column                        1686           1689           5          9.3         107.2       1.0X
-Output Int and String Column                       4778           4845          94          3.3         303.8       0.3X
-Output Partitions                                  3067           3068           2          5.1         195.0       0.5X
-Output Buckets                                     4045           4082          51          3.9         257.2       0.4X
+Output Single Int Column                           1903           1926          33          8.3         121.0       1.0X
+Output Single Double Column                        1998           1998           0          7.9         127.0       1.0X
+Output Int and String Column                       4916           4936          29          3.2         312.6       0.4X
+Output Partitions                                  3366           3375          13          4.7         214.0       0.6X
+Output Buckets                                     4560           4583          33          3.4         289.9       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                            980            983           4         16.1          62.3       1.0X
-Output Single Double Column                        1564           1573          12         10.1          99.5       0.6X
-Output Int and String Column                       3639           3673          47          4.3         231.4       0.3X
-Output Partitions                                  2345           2349           6          6.7         149.1       0.4X
-Output Buckets                                     3579           3579           0          4.4         227.5       0.3X
+Output Single Int Column                           1034           1039           7         15.2          65.8       1.0X
+Output Single Double Column                        1687           1691           7          9.3         107.2       0.6X
+Output Int and String Column                       3941           3955          20          4.0         250.6       0.3X
+Output Partitions                                  2553           2674         172          6.2         162.3       0.4X
+Output Buckets                                     3544           3548           6          4.4         225.3       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1600           1600           0          9.8         101.7       1.0X
-Output Single Double Column                        2347           2353           9          6.7         149.2       0.7X
-Output Int and String Column                       3992           4010          25          3.9         253.8       0.4X
-Output Partitions                                  3035           3048          18          5.2         193.0       0.5X
-Output Buckets                                     3985           3996          15          3.9         253.4       0.4X
+Output Single Int Column                           1669           1686          24          9.4         106.1       1.0X
+Output Single Double Column                        2342           2369          37          6.7         148.9       0.7X
+Output Int and String Column                       3776           3805          42          4.2         240.0       0.4X
+Output Partitions                                  3060           3064           7          5.1         194.5       0.5X
+Output Buckets                                     4009           4052          60          3.9         254.9       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3633           3640          11          4.3         231.0       1.0X
-Output Single Double Column                        4154           4164          15          3.8         264.1       0.9X
-Output Int and String Column                       6469           6477          10          2.4         411.3       0.6X
-Output Partitions                                  5158           5164           8          3.0         328.0       0.7X
-Output Buckets                                     6467           6477          15          2.4         411.1       0.6X
+Output Single Int Column                           3877           3889          18          4.1         246.5       1.0X
+Output Single Double Column                        4079           4086          10          3.9         259.3       1.0X
+Output Int and String Column                       6266           6269           4          2.5         398.4       0.6X
+Output Partitions                                  5432           5438           8          2.9         345.4       0.7X
+Output Buckets                                     6528           6530           4          2.4         415.0       0.6X
 
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
index 4f1571f639984..7fe68e003db73 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
@@ -2,26 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            254            258           2        257.9           3.9       1.0X
-8-16 byte                                           408            443          46        160.8           6.2       0.6X
-16-32 byte                                          407            408           1        161.2           6.2       0.6X
-512-1024 byte                                       545            548           1        120.2           8.3       0.5X
-512 byte slow                                      1524           1554          22         43.0          23.3       0.2X
-2-7 byte                                            313            313           1        209.5           4.8       0.8X
+2-7 byte                                            254            258           2        257.7           3.9       1.0X
+8-16 byte                                           386            408          28        170.0           5.9       0.7X
+16-32 byte                                          384            386           1        170.5           5.9       0.7X
+512-1024 byte                                       518            521           3        126.5           7.9       0.5X
+512 byte slow                                      1530           1555          22         42.8          23.4       0.2X
+2-7 byte                                            313            314           1        209.2           4.8       0.8X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                   516            518           1        310.0           3.2       1.0X
+Byte Array equals                                   517            518           1        309.7           3.2       1.0X
 
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
index ae09ecd0f50e7..028b1ea55b5d6 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
@@ -2,26 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            258            259           1        254.0           3.9       1.0X
-8-16 byte                                           432            463          22        151.7           6.6       0.6X
-16-32 byte                                          477            479           1        137.4           7.3       0.5X
-512-1024 byte                                       606            610           2        108.2           9.2       0.4X
-512 byte slow                                      1493           1502           9         43.9          22.8       0.2X
-2-7 byte                                            276            276           1        237.6           4.2       0.9X
+2-7 byte                                            259            260           1        253.4           3.9       1.0X
+8-16 byte                                           411            445          24        159.4           6.3       0.6X
+16-32 byte                                          458            461           5        143.0           7.0       0.6X
+512-1024 byte                                       587            591           3        111.7           9.0       0.4X
+512 byte slow                                      1496           1507           9         43.8          22.8       0.2X
+2-7 byte                                            276            277           1        237.4           4.2       0.9X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                   518            521           5        309.1           3.2       1.0X
+Byte Array equals                                   523            524           4        306.2           3.3       1.0X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt b/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
index 7c0f30840b7b0..b37d2fa3060e7 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
@@ -2,69 +2,69 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 23926          24027          88          0.0      478511.1       1.0X
+One quoted string                                 23353          23432          75          0.0      467067.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               57625          57971         589          0.0       57624.8       1.0X
-Select 100 columns                                23109          23250         123          0.0       23109.2       2.5X
-Select one column                                 19951          20034          74          0.1       19951.3       2.9X
-count()                                            3690           3911         361          0.3        3689.7      15.6X
-Select 100 columns, one bad input field           33099          33184         113          0.0       33099.2       1.7X
-Select 100 columns, corrupt record field          36824          36966         141          0.0       36824.4       1.6X
+Select 1000 columns                               56825          57244         679          0.0       56825.1       1.0X
+Select 100 columns                                20482          20568          86          0.0       20481.7       2.8X
+Select one column                                 16968          17000          36          0.1       16967.7       3.3X
+count()                                            3366           3378          11          0.3        3366.4      16.9X
+Select 100 columns, one bad input field           28347          28379          30          0.0       28346.6       2.0X
+Select 100 columns, corrupt record field          32401          32450          42          0.0       32401.2       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       11170          11203          49          0.9        1117.0       1.0X
-Select 1 column + count()                          7659           7674          13          1.3         765.9       1.5X
-count()                                            1712           1718           5          5.8         171.2       6.5X
+Select 10 columns + count()                       11174          11195          18          0.9        1117.4       1.0X
+Select 1 column + count()                          7666           7694          24          1.3         766.6       1.5X
+count()                                            2042           2048           5          4.9         204.2       5.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      855            860           5         11.7          85.5       1.0X
-to_csv(timestamp)                                  5694           5705          15          1.8         569.4       0.2X
-write timestamps to files                          6203           6210           7          1.6         620.3       0.1X
-Create a dataset of dates                           945            959          12         10.6          94.5       0.9X
-to_csv(date)                                       3955           3958           3          2.5         395.5       0.2X
-write dates to files                               4158           4175          26          2.4         415.8       0.2X
+Create a dataset of timestamps                      854            882          27         11.7          85.4       1.0X
+to_csv(timestamp)                                  6166           6174          13          1.6         616.6       0.1X
+write timestamps to files                          6480           6575         158          1.5         648.0       0.1X
+Create a dataset of dates                           948            949           1         10.6          94.8       0.9X
+to_csv(date)                                       4471           4474           3          2.2         447.1       0.2X
+write dates to files                               4599           4616          15          2.2         459.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                  1143           1232         150          8.7         114.3       1.0X
-read timestamps from files                                                     11824          11858          33          0.8        1182.4       0.1X
-infer timestamps from files                                                    23198          23221          20          0.4        2319.8       0.0X
-read date text from files                                                       1046           1051           5          9.6         104.6       1.1X
-read date from files                                                           10592          10603          10          0.9        1059.2       0.1X
-infer date from files                                                          21896          21934          52          0.5        2189.6       0.1X
-timestamp strings                                                               1422           1423           2          7.0         142.2       0.8X
-parse timestamps from Dataset[String]                                          13380          13423          38          0.7        1338.0       0.1X
-infer timestamps from Dataset[String]                                          24873          24906          33          0.4        2487.3       0.0X
-date strings                                                                    1796           1801           4          5.6         179.6       0.6X
-parse dates from Dataset[String]                                               12513          12563          45          0.8        1251.3       0.1X
-from_csv(timestamp)                                                            12067          12103          40          0.8        1206.7       0.1X
-from_csv(date)                                                                 11803          11816          12          0.8        1180.3       0.1X
-infer error timestamps from Dataset[String] with default format                15522          15627         105          0.6        1552.2       0.1X
-infer error timestamps from Dataset[String] with user-provided format          15493          15561          61          0.6        1549.3       0.1X
-infer error timestamps from Dataset[String] with legacy format                 15507          15571          73          0.6        1550.7       0.1X
+read timestamp text from files                                                  1200           1213          12          8.3         120.0       1.0X
+read timestamps from files                                                     11576          11601          22          0.9        1157.6       0.1X
+infer timestamps from files                                                    23234          23253          16          0.4        2323.4       0.1X
+read date text from files                                                       1115           1162          44          9.0         111.5       1.1X
+read date from files                                                           10978          11006          43          0.9        1097.8       0.1X
+infer date from files                                                          22588          22604          13          0.4        2258.8       0.1X
+timestamp strings                                                               1224           1236          21          8.2         122.4       1.0X
+parse timestamps from Dataset[String]                                          13566          13595          41          0.7        1356.6       0.1X
+infer timestamps from Dataset[String]                                          25057          25094          36          0.4        2505.7       0.0X
+date strings                                                                    1618           1626           7          6.2         161.8       0.7X
+parse dates from Dataset[String]                                               12784          12816          34          0.8        1278.4       0.1X
+from_csv(timestamp)                                                            12008          12088          69          0.8        1200.8       0.1X
+from_csv(date)                                                                 11930          11938          12          0.8        1193.0       0.1X
+infer error timestamps from Dataset[String] with default format                14366          14394          35          0.7        1436.6       0.1X
+infer error timestamps from Dataset[String] with user-provided format          14380          14412          52          0.7        1438.0       0.1X
+infer error timestamps from Dataset[String] with legacy format                 14439          14453          21          0.7        1443.9       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        4097           4114          16          0.0       40974.1       1.0X
-pushdown disabled                                  4043           4059          15          0.0       40426.6       1.0X
-w/ filters                                          742            747           5          0.1        7417.0       5.5X
+w/o filters                                        4302           4383         137          0.0       43020.6       1.0X
+pushdown disabled                                  4206           4220          13          0.0       42058.8       1.0X
+w/ filters                                          776            784          10          0.1        7756.3       5.5X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index 9d941fa7b4cc8..522e164f80c8c 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,69 +2,69 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 25479          25496          15          0.0      509588.9       1.0X
+One quoted string                                 24681          24724          74          0.0      493616.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               53810          54394         906          0.0       53810.4       1.0X
-Select 100 columns                                22994          23062          77          0.0       22994.3       2.3X
-Select one column                                 19875          20001         126          0.1       19875.1       2.7X
-count()                                            4243           4309          90          0.2        4243.2      12.7X
-Select 100 columns, one bad input field           35226          35392         146          0.0       35226.2       1.5X
-Select 100 columns, corrupt record field          39131          39211          71          0.0       39130.6       1.4X
+Select 1000 columns                               55362          55719         576          0.0       55361.6       1.0X
+Select 100 columns                                22947          22975          36          0.0       22946.7       2.4X
+Select one column                                 19695          19714          18          0.1       19694.7       2.8X
+count()                                            3474           3514          54          0.3        3473.8      15.9X
+Select 100 columns, one bad input field           32366          32417          47          0.0       32365.6       1.7X
+Select 100 columns, corrupt record field          35921          35986          77          0.0       35921.3       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                        9294           9305          15          1.1         929.4       1.0X
-Select 1 column + count()                          6665           6689          29          1.5         666.5       1.4X
-count()                                            1882           1892          12          5.3         188.2       4.9X
+Select 10 columns + count()                        9523           9537          15          1.1         952.3       1.0X
+Select 1 column + count()                          6868           6883          13          1.5         686.8       1.4X
+count()                                            1820           1836          20          5.5         182.0       5.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      916            926          13         10.9          91.6       1.0X
-to_csv(timestamp)                                  7008           7013           4          1.4         700.8       0.1X
-write timestamps to files                          7146           7159          14          1.4         714.6       0.1X
-Create a dataset of dates                          1163           1168           5          8.6         116.3       0.8X
-to_csv(date)                                       4703           4715          12          2.1         470.3       0.2X
-write dates to files                               4524           4532           8          2.2         452.4       0.2X
+Create a dataset of timestamps                      899            912          12         11.1          89.9       1.0X
+to_csv(timestamp)                                  7355           7371          14          1.4         735.5       0.1X
+write timestamps to files                          7751           7761          12          1.3         775.1       0.1X
+Create a dataset of dates                          1171           1174           6          8.5         117.1       0.8X
+to_csv(date)                                       5040           5044           4          2.0         504.0       0.2X
+write dates to files                               5277           5292          24          1.9         527.7       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                  1269           1270           1          7.9         126.9       1.0X
-read timestamps from files                                                     10019          10032          16          1.0        1001.9       0.1X
-infer timestamps from files                                                    19853          19855           2          0.5        1985.3       0.1X
-read date text from files                                                       1142           1143           1          8.8         114.2       1.1X
-read date from files                                                           10157          10160           3          1.0        1015.7       0.1X
-infer date from files                                                          21005          21040          31          0.5        2100.5       0.1X
-timestamp strings                                                               1306           1312           9          7.7         130.6       1.0X
-parse timestamps from Dataset[String]                                          11618          11712          85          0.9        1161.8       0.1X
-infer timestamps from Dataset[String]                                          21545          21613         105          0.5        2154.5       0.1X
-date strings                                                                    1741           1742           1          5.7         174.1       0.7X
-parse dates from Dataset[String]                                               12115          12187          63          0.8        1211.5       0.1X
-from_csv(timestamp)                                                            10162          10220          71          1.0        1016.2       0.1X
-from_csv(date)                                                                 11185          11296         177          0.9        1118.5       0.1X
-infer error timestamps from Dataset[String] with default format                12461          12582         107          0.8        1246.1       0.1X
-infer error timestamps from Dataset[String] with user-provided format          12575          12592          16          0.8        1257.5       0.1X
-infer error timestamps from Dataset[String] with legacy format                 12598          12609          13          0.8        1259.8       0.1X
+read timestamp text from files                                                  1245           1251           7          8.0         124.5       1.0X
+read timestamps from files                                                     10059          10074          23          1.0        1005.9       0.1X
+infer timestamps from files                                                    20189          20223          36          0.5        2018.9       0.1X
+read date text from files                                                       1151           1167          24          8.7         115.1       1.1X
+read date from files                                                           10547          10568          25          0.9        1054.7       0.1X
+infer date from files                                                          21527          21540          11          0.5        2152.7       0.1X
+timestamp strings                                                               1355           1364          15          7.4         135.5       0.9X
+parse timestamps from Dataset[String]                                          11522          11553          28          0.9        1152.2       0.1X
+infer timestamps from Dataset[String]                                          21195          21203          10          0.5        2119.5       0.1X
+date strings                                                                    1785           1788           5          5.6         178.5       0.7X
+parse dates from Dataset[String]                                               12245          12288          44          0.8        1224.5       0.1X
+from_csv(timestamp)                                                            10102          10144          51          1.0        1010.2       0.1X
+from_csv(date)                                                                 11329          11353          29          0.9        1132.9       0.1X
+infer error timestamps from Dataset[String] with default format                12067          12091          36          0.8        1206.7       0.1X
+infer error timestamps from Dataset[String] with user-provided format          12077          12093          24          0.8        1207.7       0.1X
+infer error timestamps from Dataset[String] with legacy format                 12047          12076          26          0.8        1204.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        4243           4254          13          0.0       42431.9       1.0X
-pushdown disabled                                  4271           4316          51          0.0       42714.3       1.0X
-w/ filters                                          735            743           8          0.1        7354.2       5.8X
+w/o filters                                        4119           4139          17          0.0       41191.2       1.0X
+pushdown disabled                                  4092           4110          16          0.0       40922.3       1.0X
+w/ filters                                          691            702          13          0.1        6911.5       6.0X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
index 347255e96cb78..25b685baf20d1 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         5848           6125         383          6.8         146.2       1.0X
-write char with length 5                           9155           9177          21          4.4         228.9       0.6X
-write varchar with length 5                        6536           6634         100          6.1         163.4       0.9X
+write string with length 5                         6814           6942         128          5.9         170.4       1.0X
+write char with length 5                           9886           9963          67          4.0         247.1       0.7X
+write varchar with length 5                        7603           7633          27          5.3         190.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        3012           3029          26          6.6         150.6       1.0X
-write char with length 10                          5692           5714          19          3.5         284.6       0.5X
-write varchar with length 10                       3263           3277          23          6.1         163.1       0.9X
+write string with length 10                        3542           3558          21          5.6         177.1       1.0X
+write char with length 10                          6351           6465         100          3.1         317.5       0.6X
+write varchar with length 10                       3772           3776           4          5.3         188.6       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        1540           1559          18          6.5         154.0       1.0X
-write char with length 20                          4242           4252          12          2.4         424.2       0.4X
-write varchar with length 20                       1697           1709          13          5.9         169.7       0.9X
+write string with length 20                        1810           1841          42          5.5         181.0       1.0X
+write char with length 20                          4390           4411          18          2.3         439.0       0.4X
+write varchar with length 20                       2008           2023          19          5.0         200.8       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                         857            865          10          5.8         171.3       1.0X
-write char with length 40                          3436           3449          16          1.5         687.3       0.2X
-write varchar with length 40                        907            912           8          5.5         181.4       0.9X
+write string with length 40                        1061           1068           6          4.7         212.3       1.0X
+write char with length 40                          3566           3568           2          1.4         713.3       0.3X
+write varchar with length 40                       1042           1052           9          4.8         208.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                         585            589           6          5.7         175.4       1.0X
-write char with length 60                          3126           3133          13          1.1         937.7       0.2X
-write varchar with length 60                        627            637           9          5.3         188.1       0.9X
+write string with length 60                         683            689           5          4.9         204.9       1.0X
+write char with length 60                          3179           3188           7          1.0         953.7       0.2X
+write varchar with length 60                        741            753          11          4.5         222.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                         450            455           5          5.6         179.8       1.0X
-write char with length 80                          2967           2973           7          0.8        1186.9       0.2X
-write varchar with length 80                        496            498           3          5.0         198.5       0.9X
+write string with length 80                         577            590          20          4.3         230.9       1.0X
+write char with length 80                          3064           3071          11          0.8        1225.5       0.2X
+write varchar with length 80                        554            560           7          4.5         221.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        374            380           6          5.4         186.8       1.0X
-write char with length 100                         2934           2941           6          0.7        1466.9       0.1X
-write varchar with length 100                       407            423          14          4.9         203.7       0.9X
+write string with length 100                        472            491          16          4.2         235.9       1.0X
+write char with length 100                         2972           2975           5          0.7        1485.8       0.2X
+write varchar with length 100                       479            485           5          4.2         239.6       1.0X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         8686           8701          16          4.6         217.2       1.0X
-write char with length 5                          10316          10357          63          3.9         257.9       0.8X
-write varchar with length 5                       10300          10324          36          3.9         257.5       0.8X
+write string with length 5                        10481          10507          33          3.8         262.0       1.0X
+write char with length 5                          11773          11799          33          3.4         294.3       0.9X
+write varchar with length 5                       11851          11879          28          3.4         296.3       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        4575           4589          21          4.4         228.7       1.0X
-write char with length 10                          6856           6868          14          2.9         342.8       0.7X
-write varchar with length 10                       6752           6759           9          3.0         337.6       0.7X
+write string with length 10                        5211           5215           4          3.8         260.6       1.0X
+write char with length 10                          7437           7455          16          2.7         371.8       0.7X
+write varchar with length 10                       7284           7301          22          2.7         364.2       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        2895           2900           5          3.5         289.5       1.0X
-write char with length 20                          5061           5073          11          2.0         506.1       0.6X
-write varchar with length 20                       4975           4980           4          2.0         497.5       0.6X
+write string with length 20                        3315           3339          23          3.0         331.5       1.0X
+write char with length 20                          5353           5358           8          1.9         535.3       0.6X
+write varchar with length 20                       5318           5322           4          1.9         531.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        2010           2018           7          2.5         402.0       1.0X
-write char with length 40                          4130           4136           8          1.2         826.1       0.5X
-write varchar with length 40                       4068           4075           6          1.2         813.7       0.5X
+write string with length 40                        2229           2231           2          2.2         445.8       1.0X
+write char with length 40                          4283           4287           3          1.2         856.6       0.5X
+write varchar with length 40                       4269           4270           1          1.2         853.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1758           1768           9          1.9         527.3       1.0X
-write char with length 60                          3836           3839           3          0.9        1150.9       0.5X
-write varchar with length 60                       3780           3790          12          0.9        1134.1       0.5X
+write string with length 60                        1839           1845           5          1.8         551.8       1.0X
+write char with length 60                          3958           3961           4          0.8        1187.3       0.5X
+write varchar with length 60                       3895           3900           4          0.9        1168.5       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1588           1590           2          1.6         635.3       1.0X
-write char with length 80                          3698           3722          24          0.7        1479.3       0.4X
-write varchar with length 80                       3644           3649           6          0.7        1457.5       0.4X
+write string with length 80                        1646           1650           4          1.5         658.4       1.0X
+write char with length 80                          3789           3790           2          0.7        1515.4       0.4X
+write varchar with length 80                       3704           3705           1          0.7        1481.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       1475           1485          12          1.4         737.4       1.0X
-write char with length 100                         3669           3677          12          0.5        1834.4       0.4X
-write varchar with length 100                      3542           3546           4          0.6        1770.9       0.4X
+write string with length 100                       1543           1547           4          1.3         771.6       1.0X
+write char with length 100                         3663           3676          21          0.5        1831.6       0.4X
+write varchar with length 100                      3611           3612           2          0.6        1805.3       0.4X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
index 7a41f9deacae2..e3d4e34db1489 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         6314           6378          96          6.3         157.9       1.0X
-write char with length 5                           9041           9108          96          4.4         226.0       0.7X
-write varchar with length 5                        6868           6878          14          5.8         171.7       0.9X
+write string with length 5                         6296           6549         352          6.4         157.4       1.0X
+write char with length 5                           9227           9375         209          4.3         230.7       0.7X
+write varchar with length 5                        6706           6722          15          6.0         167.7       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        3209           3214           6          6.2         160.5       1.0X
-write char with length 10                          5946           5992          76          3.4         297.3       0.5X
-write varchar with length 10                       3496           3514          18          5.7         174.8       0.9X
+write string with length 10                        3218           3245          23          6.2         160.9       1.0X
+write char with length 10                          6113           6165          55          3.3         305.6       0.5X
+write varchar with length 10                       3604           3621          19          5.5         180.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        1701           1704           4          5.9         170.1       1.0X
-write char with length 20                          4598           4600           2          2.2         459.8       0.4X
-write varchar with length 20                       1851           1854           4          5.4         185.1       0.9X
+write string with length 20                        1700           1717          25          5.9         170.0       1.0X
+write char with length 20                          4275           4283           9          2.3         427.5       0.4X
+write varchar with length 20                       1737           1743           7          5.8         173.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                         864            868           5          5.8         172.7       1.0X
-write char with length 40                          3482           3494          15          1.4         696.4       0.2X
-write varchar with length 40                        983           1002          17          5.1         196.7       0.9X
+write string with length 40                         915            916           1          5.5         183.0       1.0X
+write char with length 40                          3430           3456          22          1.5         686.0       0.3X
+write varchar with length 40                        958            969          17          5.2         191.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                         625            629           7          5.3         187.4       1.0X
-write char with length 60                          3217           3227          15          1.0         965.1       0.2X
-write varchar with length 60                        693            699           8          4.8         208.0       0.9X
+write string with length 60                         614            629          15          5.4         184.1       1.0X
+write char with length 60                          3176           3189          12          1.0         952.8       0.2X
+write varchar with length 60                        643            648           6          5.2         192.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                         483            486           2          5.2         193.3       1.0X
-write char with length 80                          3032           3036           4          0.8        1212.8       0.2X
-write varchar with length 80                        505            512           5          4.9         202.1       1.0X
+write string with length 80                         500            503           2          5.0         200.2       1.0X
+write char with length 80                          3003           3004           2          0.8        1201.1       0.2X
+write varchar with length 80                        507            517          11          4.9         202.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        409            430          30          4.9         204.3       1.0X
-write char with length 100                         2915           2945          41          0.7        1457.4       0.1X
-write varchar with length 100                       423            430          11          4.7         211.7       1.0X
+write string with length 100                        388            404          14          5.2         193.9       1.0X
+write char with length 100                         2927           2932           6          0.7        1463.6       0.1X
+write varchar with length 100                       422            431          10          4.7         211.1       0.9X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         8099           8131          28          4.9         202.5       1.0X
-write char with length 5                          10486          10511          23          3.8         262.1       0.8X
-write varchar with length 5                       10656          10711          68          3.8         266.4       0.8X
+write string with length 5                         8732           8757          29          4.6         218.3       1.0X
+write char with length 5                          10464          10517          46          3.8         261.6       0.8X
+write varchar with length 5                       10783          10834          50          3.7         269.6       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        4834           4844          14          4.1         241.7       1.0X
-write char with length 10                          6757           6795          44          3.0         337.8       0.7X
-write varchar with length 10                       6773           6816          38          3.0         338.6       0.7X
+write string with length 10                        4713           4733          21          4.2         235.6       1.0X
+write char with length 10                          6723           6746          37          3.0         336.2       0.7X
+write varchar with length 10                       6682           6694          11          3.0         334.1       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        3028           3041          13          3.3         302.8       1.0X
-write char with length 20                          4762           4766           4          2.1         476.2       0.6X
-write varchar with length 20                       4813           4817           6          2.1         481.3       0.6X
+write string with length 20                        3067           3081          12          3.3         306.7       1.0X
+write char with length 20                          4638           4654          17          2.2         463.8       0.7X
+write varchar with length 20                       4698           4705           7          2.1         469.8       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        1958           1966          11          2.6         391.6       1.0X
-write char with length 40                          3628           3635           8          1.4         725.6       0.5X
-write varchar with length 40                       3658           3667          14          1.4         731.6       0.5X
+write string with length 40                        1967           1971           4          2.5         393.4       1.0X
+write char with length 40                          3615           3628          20          1.4         723.0       0.5X
+write varchar with length 40                       3603           3609           5          1.4         720.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1692           1697           5          2.0         507.7       1.0X
-write char with length 60                          3372           3389          16          1.0        1011.6       0.5X
-write varchar with length 60                       3361           3367           7          1.0        1008.3       0.5X
+write string with length 60                        1651           1658           8          2.0         495.3       1.0X
+write char with length 60                          3360           3370          10          1.0        1007.9       0.5X
+write varchar with length 60                       3305           3307           2          1.0         991.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1533           1549          24          1.6         613.1       1.0X
-write char with length 80                          3168           3176          10          0.8        1267.0       0.5X
-write varchar with length 80                       3240           3245           9          0.8        1296.0       0.5X
+write string with length 80                        1470           1475           5          1.7         587.9       1.0X
+write char with length 80                          3158           3168          10          0.8        1263.1       0.5X
+write varchar with length 80                       3091           3101          15          0.8        1236.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       1436           1438           4          1.4         717.9       1.0X
-write char with length 100                         3200           3206           5          0.6        1599.9       0.4X
-write varchar with length 100                      3154           3178          28          0.6        1577.2       0.5X
+write string with length 100                       1348           1358          10          1.5         673.8       1.0X
+write char with length 100                         3034           3040           7          0.7        1517.0       0.4X
+write varchar with length 100                      3029           3033           5          0.7        1514.3       0.4X
 
 
diff --git a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
index 32cbbc74e9112..56b1523344a72 100644
--- a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
@@ -1,27 +1,54 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                    6910           6912           3          0.0       69099.7       1.0X
-UNICODE                                              4367           4368           1          0.0       43669.6       1.6X
-UTF8_BINARY                                          4361           4364           4          0.0       43606.5       1.6X
-UNICODE_CI                                          46480          46526          66          0.0      464795.7       0.1X
+UTF8_BINARY                                          1352           1352           1          0.1       13516.4       1.0X
+UTF8_LCASE                                           4678           4693          21          0.0       46778.6       0.3X
+UNICODE                                             17213          17223          13          0.0      172131.7       0.1X
+UNICODE_CI                                          17101          17133          46          0.0      171009.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                     6522           6526           4          0.0       65223.9       1.0X
-UNICODE                                              45792          45797           7          0.0      457922.3       0.1X
-UTF8_BINARY                                           7092           7112          29          0.0       70921.7       0.9X
-UNICODE_CI                                           47548          47564          22          0.0      475476.7       0.1X
+UTF8_BINARY                                           1775           1775           0          0.1       17749.8       1.0X
+UTF8_LCASE                                            5342           5367          35          0.0       53423.0       0.3X
+UNICODE                                              17011          17020          12          0.0      170110.1       0.1X
+UNICODE_CI                                           16734          16760          37          0.0      167338.2       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                 11716          11716           1          0.0      117157.9       1.0X
-UNICODE                                          180133         180137           5          0.0     1801332.1       0.1X
-UTF8_BINARY                                       10476          10477           1          0.0      104757.4       1.1X
-UNICODE_CI                                       148171         148190          28          0.0     1481705.6       0.1X
+UTF8_BINARY                                        7253           7256           4          0.0       72529.6       1.0X
+UTF8_LCASE                                        16634          16676          59          0.0      166342.5       0.4X
+UNICODE                                           66146          66163          23          0.0      661461.1       0.1X
+UNICODE_CI                                        54563          54606          62          0.0      545625.5       0.1X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        6570           6577          10          0.0       65696.6       1.0X
+UTF8_LCASE                                       120073         120137          91          0.0     1200726.4       0.1X
+UNICODE                                          364027         364291         374          0.0     3640267.9       0.0X
+UNICODE_CI                                       421444         422138         981          0.0     4214438.7       0.0X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        6573           6596          32          0.0       65733.4       1.0X
+UTF8_LCASE                                        60284          60293          12          0.0      602844.4       0.1X
+UNICODE                                          363685         364220         757          0.0     3636848.4       0.0X
+UNICODE_CI                                       422761         423000         337          0.0     4227611.0       0.0X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        6507           6508           1          0.0       65068.3       1.0X
+UTF8_LCASE                                        59098          59118          28          0.0      590983.6       0.1X
+UNICODE                                          378437         378790         499          0.0     3784367.4       0.0X
+UNICODE_CI                                       433987         434294         435          0.0     4339869.2       0.0X
 
diff --git a/sql/core/benchmarks/CollationBenchmark-results.txt b/sql/core/benchmarks/CollationBenchmark-results.txt
index 4028b0f005a37..09847bbcaa260 100644
--- a/sql/core/benchmarks/CollationBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-results.txt
@@ -1,27 +1,54 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                    7692           7731          55          0.0       76919.2       1.0X
-UNICODE                                              4378           4379           0          0.0       43784.6       1.8X
-UTF8_BINARY                                          4382           4396          19          0.0       43821.6       1.8X
-UNICODE_CI                                          48344          48360          23          0.0      483436.5       0.2X
+UTF8_BINARY                                          1372           1374           3          0.1       13718.1       1.0X
+UTF8_LCASE                                           6311           6311           0          0.0       63106.7       0.2X
+UNICODE                                             19273          19300          37          0.0      192731.3       0.1X
+UNICODE_CI                                          18991          18998          10          0.0      189906.3       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                     9819           9820           0          0.0       98194.9       1.0X
-UNICODE                                              49507          49518          17          0.0      495066.2       0.2X
-UTF8_BINARY                                           7354           7365          17          0.0       73536.3       1.3X
-UNICODE_CI                                           52149          52163          20          0.0      521489.4       0.2X
+UTF8_BINARY                                           1725           1726           2          0.1       17249.0       1.0X
+UTF8_LCASE                                            5806           5828          31          0.0       58061.5       0.3X
+UNICODE                                              19105          19111           8          0.0      191051.5       0.1X
+UNICODE_CI                                           18991          18996           7          0.0      189913.3       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                 18110          18127          24          0.0      181103.9       1.0X
-UNICODE                                          171375         171435          85          0.0     1713752.3       0.1X
-UTF8_BINARY                                       14012          14030          26          0.0      140116.7       1.3X
-UNICODE_CI                                       153847         153901          76          0.0     1538471.1       0.1X
+UTF8_BINARY                                        3019           3021           3          0.0       30194.7       1.0X
+UTF8_LCASE                                        19437          19439           3          0.0      194372.6       0.2X
+UNICODE                                           63550          63568          25          0.0      635504.3       0.0X
+UNICODE_CI                                        57839          57866          39          0.0      578385.0       0.1X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        7022           7023           2          0.0       70216.8       1.0X
+UTF8_LCASE                                       118836         118887          72          0.0     1188364.9       0.1X
+UNICODE                                          376381         376546         234          0.0     3763807.3       0.0X
+UNICODE_CI                                       427858         427981         174          0.0     4278584.6       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        6720           6723           5          0.0       67197.9       1.0X
+UTF8_LCASE                                        67132          67177          63          0.0      671324.6       0.1X
+UNICODE                                          368690         369292         852          0.0     3686899.6       0.0X
+UNICODE_CI                                       431481         431583         144          0.0     4314814.9       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        7097           7099           3          0.0       70970.8       1.0X
+UTF8_LCASE                                        57327          57351          35          0.0      573265.4       0.1X
+UNICODE                                          375819         376473         924          0.0     3758191.8       0.0X
+UNICODE_CI                                       445167         445212          64          0.0     4451666.7       0.0X
 
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
index dc68b747203fa..a7d2afb3fffc9 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
@@ -1,27 +1,54 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                   18244          18258          20          0.0      456096.4       1.0X
-UNICODE                                               498            498           0          0.1       12440.3      36.7X
-UTF8_BINARY                                           499            500           1          0.1       12467.7      36.6X
-UNICODE_CI                                          13429          13443          19          0.0      335725.4       1.4X
+UTF8_BINARY                                           171            171           1          0.2        4268.9       1.0X
+UTF8_LCASE                                           6540           6549          12          0.0      163512.0       0.0X
+UNICODE                                              5195           5196           2          0.0      129870.7       0.0X
+UNICODE_CI                                           5129           5134           7          0.0      128222.9       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                    18377          18399          31          0.0      459430.5       1.0X
-UNICODE                                              14238          14240           3          0.0      355957.4       1.3X
-UTF8_BINARY                                            975            976           1          0.0       24371.3      18.9X
-UNICODE_CI                                           13819          13826          10          0.0      345482.6       1.3X
+UTF8_BINARY                                            316            316           1          0.1        7899.6       1.0X
+UTF8_LCASE                                            6525           6528           3          0.0      163136.8       0.0X
+UNICODE                                               5186           5201          21          0.0      129654.8       0.1X
+UNICODE_CI                                            5119           5120           0          0.0      127985.4       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                  9183           9230          67          0.0      229564.0       1.0X
-UNICODE                                           38937          38952          22          0.0      973421.3       0.2X
-UTF8_BINARY                                        1376           1376           0          0.0       34397.5       6.7X
-UNICODE_CI                                        32881          32882           1          0.0      822027.4       0.3X
+UTF8_BINARY                                         384            386           2          0.1        9604.9       1.0X
+UTF8_LCASE                                         3250           3255           7          0.0       81245.9       0.1X
+UNICODE                                           14666          14668           3          0.0      366645.0       0.0X
+UNICODE_CI                                        11055          11073          25          0.0      276376.4       0.0X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        1439           1440           1          0.0       35973.9       1.0X
+UTF8_LCASE                                        33643          33664          30          0.0      841072.8       0.0X
+UNICODE                                           69901          69945          62          0.0     1747527.1       0.0X
+UNICODE_CI                                        78298          78390         129          0.0     1957458.9       0.0X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        1019           1020           1          0.0       25470.5       1.0X
+UTF8_LCASE                                        18811          18833          32          0.0      470272.7       0.1X
+UNICODE                                           67687          67758         101          0.0     1692181.1       0.0X
+UNICODE_CI                                        77039          77148         154          0.0     1925975.7       0.0X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        1020           1031          16          0.0       25493.0       1.0X
+UTF8_LCASE                                        18574          18583          12          0.0      464350.7       0.1X
+UNICODE                                           73937          74335         563          0.0     1848436.0       0.0X
+UNICODE_CI                                        82022          82303         397          0.0     2050548.1       0.0X
 
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
index bb58968764c7a..06d2e883cf788 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
@@ -1,27 +1,54 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                   17881          17885           6          0.0      447017.7       1.0X
-UNICODE                                               493            495           2          0.1       12328.9      36.3X
-UTF8_BINARY                                           493            494           1          0.1       12331.4      36.3X
-UNICODE_CI                                          13731          13737           8          0.0      343284.6       1.3X
+UTF8_BINARY                                           133            133           0          0.3        3318.0       1.0X
+UTF8_LCASE                                           9483           9494          16          0.0      237079.3       0.0X
+UNICODE                                              5963           5965           3          0.0      149081.4       0.0X
+UNICODE_CI                                           5661           5663           3          0.0      141518.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                    18041          18047           8          0.0      451030.2       1.0X
-UNICODE                                              14023          14047          34          0.0      350573.9       1.3X
-UTF8_BINARY                                           1387           1397          14          0.0       34680.4      13.0X
-UNICODE_CI                                           14232          14242          14          0.0      355808.4       1.3X
+UTF8_BINARY                                            560            561           1          0.1       14008.3       1.0X
+UTF8_LCASE                                            7535           7550          21          0.0      188384.6       0.1X
+UNICODE                                               5868           5873           8          0.0      146691.2       0.1X
+UNICODE_CI                                            5838           5839           1          0.0      145945.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1017-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY_LCASE                                 10494          10499           6          0.0      262360.0       1.0X
-UNICODE                                           40410          40422          17          0.0     1010261.8       0.3X
-UTF8_BINARY                                        2035           2035           1          0.0       50877.8       5.2X
-UNICODE_CI                                        31470          31493          32          0.0      786752.4       0.3X
+UTF8_BINARY                                         420            420           0          0.1       10489.3       1.0X
+UTF8_LCASE                                         3524           3529           7          0.0       88101.6       0.1X
+UNICODE                                           15630          15659          40          0.0      390755.8       0.0X
+UNICODE_CI                                        12822          12838          22          0.0      320560.2       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        1269           1270           2          0.0       31731.7       1.0X
+UTF8_LCASE                                        34422          34448          37          0.0      860554.5       0.0X
+UNICODE                                           66641          66780         196          0.0     1666024.0       0.0X
+UNICODE_CI                                        76047          76084          52          0.0     1901185.9       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        1114           1115           2          0.0       27839.1       1.0X
+UTF8_LCASE                                        19656          19659           4          0.0      491401.9       0.1X
+UNICODE                                           65990          66056          93          0.0     1649760.3       0.0X
+UNICODE_CI                                        75764          75877         161          0.0     1894091.8       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                        1137           1137           0          0.0       28427.5       1.0X
+UTF8_LCASE                                        18734          18765          44          0.0      468347.9       0.1X
+UNICODE                                           74629          74646          24          0.0     1865724.7       0.0X
+UNICODE_CI                                        83838          83888          70          0.0     2095948.2       0.0X
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
index e45790a0d92d7..daa0fff3b464b 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          122            123           0       2679.5           0.4       1.0X
-ByteBuffer Unsafe                                   198            205           5       1658.3           0.6       0.6X
-ByteBuffer API                                      426            426           0        768.6           1.3       0.3X
-DirectByteBuffer                                    476            477           1        687.9           1.5       0.3X
-Unsafe Buffer                                       153            154           1       2135.2           0.5       0.8X
-Column(on heap)                                     123            124           2       2668.8           0.4       1.0X
-Column(off heap)                                    154            154           0       2133.7           0.5       0.8X
-Column(off heap direct)                             154            154           1       2126.9           0.5       0.8X
-UnsafeRow (on heap)                                 460            460           0        712.8           1.4       0.3X
-UnsafeRow (off heap)                                293            294           1       1119.3           0.9       0.4X
-Column On Heap Append                               336            337           1        974.7           1.0       0.4X
+Java Array                                          118            119           1       2768.7           0.4       1.0X
+ByteBuffer Unsafe                                   198            208           4       1655.2           0.6       0.6X
+ByteBuffer API                                      388            394           7        845.5           1.2       0.3X
+DirectByteBuffer                                    494            496           5        663.8           1.5       0.2X
+Unsafe Buffer                                       154            154           0       2129.1           0.5       0.8X
+Column(on heap)                                     123            123           0       2663.7           0.4       1.0X
+Column(off heap)                                    154            154           0       2129.4           0.5       0.8X
+Column(off heap direct)                             154            154           0       2124.4           0.5       0.8X
+UnsafeRow (on heap)                                 460            461           1        712.8           1.4       0.3X
+UnsafeRow (off heap)                                293            294           1       1116.6           0.9       0.4X
+Column On Heap Append                               336            337           2        975.8           1.0       0.4X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              408            409           1        821.5           1.2       1.0X
-Byte Array                                          249            249           1       1350.1           0.7       1.6X
+Bitset                                              420            421           2        799.8           1.3       1.0X
+Byte Array                                          236            236           0       1421.4           0.7       1.8X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             122            128           5        134.2           7.5       1.0X
-Off Heap                                            348            365          14         47.1          21.2       0.4X
+On Heap                                             126            127           1        130.0           7.7       1.0X
+Off Heap                                            427            463          23         38.4          26.0       0.3X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               87             88           0       1874.5           0.5       1.0X
-Off Heap Read Size Only                             411            411           0        398.9           2.5       0.2X
-On Heap Read Elements                              2465           2465           0         66.5          15.0       0.0X
-Off Heap Read Elements                             2529           2529           0         64.8          15.4       0.0X
+On Heap Read Size Only                               87             88           1       1873.1           0.5       1.0X
+Off Heap Read Size Only                             306            306           1        535.8           1.9       0.3X
+On Heap Read Elements                              2464           2464           1         66.5          15.0       0.0X
+Off Heap Read Elements                             2518           2519           2         65.1          15.4       0.0X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
index 5d1109fb52915..cd00e0bbd71e9 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          174            174           1       1883.6           0.5       1.0X
-ByteBuffer Unsafe                                   283            284           1       1156.3           0.9       0.6X
-ByteBuffer API                                      496            496           1        660.8           1.5       0.4X
-DirectByteBuffer                                    485            486           1        675.4           1.5       0.4X
-Unsafe Buffer                                       161            163           0       2032.4           0.5       1.1X
-Column(on heap)                                     177            177           0       1855.5           0.5       1.0X
-Column(off heap)                                    162            162           0       2022.7           0.5       1.1X
-Column(off heap direct)                             156            156           1       2105.3           0.5       1.1X
-UnsafeRow (on heap)                                 447            448           1        732.6           1.4       0.4X
-UnsafeRow (off heap)                                313            316           2       1045.3           1.0       0.6X
-Column On Heap Append                               361            362           0        906.5           1.1       0.5X
+Java Array                                          174            174           1       1884.2           0.5       1.0X
+ByteBuffer Unsafe                                   283            284           0       1157.5           0.9       0.6X
+ByteBuffer API                                      499            500           1        656.1           1.5       0.3X
+DirectByteBuffer                                    404            405           1        812.1           1.2       0.4X
+Unsafe Buffer                                       161            163           1       2039.7           0.5       1.1X
+Column(on heap)                                     177            177           0       1855.7           0.5       1.0X
+Column(off heap)                                    162            162           0       2025.9           0.5       1.1X
+Column(off heap direct)                             155            156           1       2108.0           0.5       1.1X
+UnsafeRow (on heap)                                 447            448           0        732.3           1.4       0.4X
+UnsafeRow (off heap)                                312            316           2       1049.5           1.0       0.6X
+Column On Heap Append                               361            388          65        907.1           1.1       0.5X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              512            513           2        655.1           1.5       1.0X
-Byte Array                                          320            321           1       1049.6           1.0       1.6X
+Bitset                                              516            518           2        650.3           1.5       1.0X
+Byte Array                                          314            315           2       1067.8           0.9       1.6X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             137            137           0        119.6           8.4       1.0X
-Off Heap                                            365            398          21         44.9          22.3       0.4X
+On Heap                                             193            242          21         84.7          11.8       1.0X
+Off Heap                                            340            375          21         48.2          20.7       0.6X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               89             90           1       1835.7           0.5       1.0X
-Off Heap Read Size Only                              84             85           0       1941.2           0.5       1.1X
-On Heap Read Elements                              2295           2297           2         71.4          14.0       0.0X
-Off Heap Read Elements                             2681           2683           2         61.1          16.4       0.0X
+On Heap Read Size Only                               89             90           1       1831.4           0.5       1.0X
+Off Heap Read Size Only                              85             85           1       1937.9           0.5       1.1X
+On Heap Read Elements                              2298           2302           6         71.3          14.0       0.0X
+Off Heap Read Elements                             2615           2617           3         62.7          16.0       0.0X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
index b33de7d152e80..df10c315b6871 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    1              1           0      45933.0           0.0       1.0X
-RunLengthEncoding(2.498)                            931            933           2         72.1          13.9       0.0X
-BooleanBitSet(0.125)                                295            296           0        227.3           4.4       0.0X
+PassThrough(1.000)                                    1              1           0      49573.5           0.0       1.0X
+RunLengthEncoding(2.501)                            931            957          41         72.1          13.9       0.0X
+BooleanBitSet(0.125)                                295            296           1        227.6           4.4       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         210            211           1        319.9           3.1       1.0X
-RunLengthEncoding                                   590            591           1        113.8           8.8       0.4X
-BooleanBitSet                                       683            684           1         98.3          10.2       0.3X
+PassThrough                                         210            212           2        319.6           3.1       1.0X
+RunLengthEncoding                                   593            594           0        113.1           8.8       0.4X
+BooleanBitSet                                       681            684           3         98.5          10.1       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23353.5           0.0       1.0X
-RunLengthEncoding(1.495)                           1058           1060           3         63.4          15.8       0.0X
+PassThrough(1.000)                                    3              3           0      24654.7           0.0       1.0X
+RunLengthEncoding(1.502)                           1208           1209           1         55.6          18.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         607            609           2        110.5           9.0       1.0X
-RunLengthEncoding                                   999           1000           2         67.2          14.9       0.6X
+PassThrough                                         894            895           2         75.1          13.3       1.0X
+RunLengthEncoding                                  1053           1055           2         63.7          15.7       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23461.0           0.0       1.0X
-RunLengthEncoding(1.996)                           1126           1132           8         59.6          16.8       0.0X
+PassThrough(1.000)                                    3              3           0      23243.9           0.0       1.0X
+RunLengthEncoding(1.994)                           1224           1226           2         54.8          18.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         566            596          20        118.6           8.4       1.0X
-RunLengthEncoding                                   947            949           2         70.9          14.1       0.6X
+PassThrough                                         894            896           4         75.1          13.3       1.0X
+RunLengthEncoding                                  1000           1001           1         67.1          14.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11431.5           0.1       1.0X
-RunLengthEncoding(1.001)                           1029           1030           1         65.2          15.3       0.0X
-DictionaryEncoding(0.500)                           339            340           1        197.8           5.1       0.0X
-IntDelta(0.250)                                     109            110           0        613.5           1.6       0.1X
+PassThrough(1.000)                                    6              6           0      11188.7           0.1       1.0X
+RunLengthEncoding(1.003)                           1245           1246           2         53.9          18.6       0.0X
+DictionaryEncoding(0.500)                          1376           1379           4         48.8          20.5       0.0X
+IntDelta(0.250)                                     110            113           9        612.8           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         652            654           2        102.9           9.7       1.0X
-RunLengthEncoding                                  1115           1117           3         60.2          16.6       0.6X
-DictionaryEncoding                                  541            544           5        124.0           8.1       1.2X
-IntDelta                                            498            498           0        134.8           7.4       1.3X
+PassThrough                                         644            645           2        104.2           9.6       1.0X
+RunLengthEncoding                                  1155           1156           0         58.1          17.2       0.6X
+DictionaryEncoding                                  523            525           3        128.4           7.8       1.2X
+IntDelta                                            499            501           3        134.5           7.4       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11179.4           0.1       1.0X
-RunLengthEncoding(1.344)                           1060           1061           0         63.3          15.8       0.0X
-DictionaryEncoding(0.501)                           342            343           1        196.0           5.1       0.0X
-IntDelta(0.250)                                     111            111           1        607.1           1.6       0.1X
+PassThrough(1.000)                                    6              6           0      11231.0           0.1       1.0X
+RunLengthEncoding(1.337)                           1087           1104          24         61.7          16.2       0.0X
+DictionaryEncoding(0.501)                           565            567           3        118.9           8.4       0.0X
+IntDelta(0.250)                                     109            110           1        613.0           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         712            714           2         94.3          10.6       1.0X
-RunLengthEncoding                                  1214           1214           1         55.3          18.1       0.6X
-DictionaryEncoding                                  667            670           3        100.6           9.9       1.1X
-IntDelta                                            519            547          53        129.3           7.7       1.4X
+PassThrough                                         706            709           4         95.0          10.5       1.0X
+RunLengthEncoding                                  1132           1136           5         59.3          16.9       0.6X
+DictionaryEncoding                                  659            663           4        101.8           9.8       1.1X
+IntDelta                                            624            626           4        107.5           9.3       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   18             18           0       3788.5           0.3       1.0X
-RunLengthEncoding(0.753)                           1059           1059           0         63.4          15.8       0.0X
-DictionaryEncoding(0.250)                           397            398           2        169.0           5.9       0.0X
-LongDelta(0.125)                                    110            110           0        609.4           1.6       0.2X
+PassThrough(1.000)                                   13             13           0       5066.3           0.2       1.0X
+RunLengthEncoding(0.744)                           1050           1056           8         63.9          15.6       0.0X
+DictionaryEncoding(0.250)                           593            595           2        113.1           8.8       0.0X
+LongDelta(0.125)                                    110            111           1        608.6           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         770            771           1         87.2          11.5       1.0X
-RunLengthEncoding                                  1220           1220           0         55.0          18.2       0.6X
-DictionaryEncoding                                  710            712           2         94.6          10.6       1.1X
-LongDelta                                           540            541           1        124.3           8.0       1.4X
+PassThrough                                         646            647           2        103.9           9.6       1.0X
+RunLengthEncoding                                  1203           1205           3         55.8          17.9       0.5X
+DictionaryEncoding                                  724            725           2         92.7          10.8       0.9X
+LongDelta                                           524            525           1        128.1           7.8       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   18             18           0       3782.6           0.3       1.0X
-RunLengthEncoding(1.006)                           1099           1100           1         61.1          16.4       0.0X
-DictionaryEncoding(0.251)                           397            398           1        169.1           5.9       0.0X
-LongDelta(0.125)                                    111            111           1        603.8           1.7       0.2X
+PassThrough(1.000)                                   13             13           0       5065.9           0.2       1.0X
+RunLengthEncoding(1.002)                           1099           1107          10         61.0          16.4       0.0X
+DictionaryEncoding(0.251)                           603            605           2        111.2           9.0       0.0X
+LongDelta(0.125)                                    110            111           1        608.5           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         770            771           2         87.2          11.5       1.0X
-RunLengthEncoding                                  1215           1215           1         55.3          18.1       0.6X
-DictionaryEncoding                                  710            712           2         94.5          10.6       1.1X
-LongDelta                                           667            668           2        100.6           9.9       1.2X
+PassThrough                                         750            754           4         89.5          11.2       1.0X
+RunLengthEncoding                                  1213           1216           3         55.3          18.1       0.6X
+DictionaryEncoding                                  722            725           3         92.9          10.8       1.0X
+LongDelta                                           653            653           0        102.8           9.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   24             27           1       2851.8           0.4       1.0X
-RunLengthEncoding(0.892)                           1611           1620          13         41.7          24.0       0.0X
-DictionaryEncoding(0.167)                          1953           1955           3         34.4          29.1       0.0X
+PassThrough(1.000)                                   29             29           0       2332.2           0.4       1.0X
+RunLengthEncoding(0.889)                           1744           1745           1         38.5          26.0       0.0X
+DictionaryEncoding(0.167)                          1441           1443           2         46.6          21.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1421           1435          19         47.2          21.2       1.0X
-RunLengthEncoding                                  2070           2071           2         32.4          30.9       0.7X
-DictionaryEncoding                                 1782           1786           6         37.7          26.6       0.8X
+PassThrough                                        1613           1614           1         41.6          24.0       1.0X
+RunLengthEncoding                                  2107           2108           1         31.8          31.4       0.8X
+DictionaryEncoding                                 1830           1832           3         36.7          27.3       0.9X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
index f58c63afc691d..ea810d3a26e36 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    1              1           0      49430.1           0.0       1.0X
-RunLengthEncoding(2.491)                            895            899           5         75.0          13.3       0.0X
-BooleanBitSet(0.125)                                289            289           0        232.2           4.3       0.0X
+PassThrough(1.000)                                    1              1           0      50902.6           0.0       1.0X
+RunLengthEncoding(2.510)                            897            898           2         74.8          13.4       0.0X
+BooleanBitSet(0.125)                                229            229           0        293.3           3.4       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         167            167           1        402.2           2.5       1.0X
-RunLengthEncoding                                   560            561           1        119.9           8.3       0.3X
-BooleanBitSet                                       660            660           1        101.7           9.8       0.3X
+PassThrough                                         166            167           1        403.2           2.5       1.0X
+RunLengthEncoding                                   534            535           2        125.7           8.0       0.3X
+BooleanBitSet                                       659            663           2        101.8           9.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23350.8           0.0       1.0X
-RunLengthEncoding(1.500)                           1041           1042           2         64.5          15.5       0.0X
+PassThrough(1.000)                                    3              3           0      23313.0           0.0       1.0X
+RunLengthEncoding(1.503)                           1142           1144           4         58.8          17.0       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         644            646           2        104.2           9.6       1.0X
-RunLengthEncoding                                  1047           1048           1         64.1          15.6       0.6X
+PassThrough                                         812            868          62         82.7          12.1       1.0X
+RunLengthEncoding                                  1094           1094           1         61.3          16.3       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23448.1           0.0       1.0X
-RunLengthEncoding(2.007)                           1074           1075           2         62.5          16.0       0.0X
+PassThrough(1.000)                                    3              3           0      23874.9           0.0       1.0X
+RunLengthEncoding(2.021)                           1131           1133           3         59.4          16.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         572            573           2        117.4           8.5       1.0X
-RunLengthEncoding                                   946            947           1         70.9          14.1       0.6X
+PassThrough                                         851            877          39         78.8          12.7       1.0X
+RunLengthEncoding                                  1063           1068           6         63.1          15.8       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11484.4           0.1       1.0X
-RunLengthEncoding(0.995)                            994            996           2         67.5          14.8       0.0X
-DictionaryEncoding(0.500)                           384            386           1        174.6           5.7       0.0X
-IntDelta(0.250)                                     110            110           1        612.4           1.6       0.1X
+PassThrough(1.000)                                    6              6           0      11690.1           0.1       1.0X
+RunLengthEncoding(1.001)                            948            959          12         70.8          14.1       0.0X
+DictionaryEncoding(0.500)                           628            631           3        106.9           9.4       0.0X
+IntDelta(0.250)                                     112            115           1        600.4           1.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         644            647           4        104.2           9.6       1.0X
-RunLengthEncoding                                  1197           1199           2         56.0          17.8       0.5X
-DictionaryEncoding                                  522            527           5        128.5           7.8       1.2X
-IntDelta                                            457            458           1        146.8           6.8       1.4X
+PassThrough                                         632            639           7        106.2           9.4       1.0X
+RunLengthEncoding                                  1053           1053           1         63.7          15.7       0.6X
+DictionaryEncoding                                  502            506           4        133.7           7.5       1.3X
+IntDelta                                            449            456           4        149.3           6.7       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11058.8           0.1       1.0X
-RunLengthEncoding(1.335)                           1028           1028           0         65.3          15.3       0.0X
-DictionaryEncoding(0.501)                           386            387           1        173.9           5.7       0.0X
-IntDelta(0.250)                                     110            110           1        612.7           1.6       0.1X
+PassThrough(1.000)                                    6              6           0      11598.2           0.1       1.0X
+RunLengthEncoding(1.336)                            979            984           7         68.5          14.6       0.0X
+DictionaryEncoding(0.501)                           640            646           5        104.9           9.5       0.0X
+IntDelta(0.250)                                     114            115           1        589.0           1.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         728            729           2         92.2          10.8       1.0X
-RunLengthEncoding                                  1144           1146           3         58.7          17.0       0.6X
-DictionaryEncoding                                  542            545           5        123.7           8.1       1.3X
-IntDelta                                            662            663           2        101.4           9.9       1.1X
+PassThrough                                         764            766           2         87.8          11.4       1.0X
+RunLengthEncoding                                  1142           1144           3         58.8          17.0       0.7X
+DictionaryEncoding                                  671            679           7        100.0          10.0       1.1X
+IntDelta                                            466            470           2        143.9           6.9       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5058.4           0.2       1.0X
-RunLengthEncoding(0.751)                           1037           1038           2         64.7          15.4       0.0X
-DictionaryEncoding(0.250)                           426            428           3        157.4           6.4       0.0X
-LongDelta(0.125)                                    110            111           1        607.8           1.6       0.1X
+PassThrough(1.000)                                   13             13           0       5217.0           0.2       1.0X
+RunLengthEncoding(0.751)                            990            990           1         67.8          14.7       0.0X
+DictionaryEncoding(0.250)                           615            616           2        109.2           9.2       0.0X
+LongDelta(0.125)                                    108            110           1        622.0           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         753            760           6         89.1          11.2       1.0X
-RunLengthEncoding                                  1208           1208           0         55.5          18.0       0.6X
-DictionaryEncoding                                  772            774           3         86.9          11.5       1.0X
-LongDelta                                           520            521           2        129.1           7.7       1.4X
+PassThrough                                         740            759          16         90.7          11.0       1.0X
+RunLengthEncoding                                  1169           1178          12         57.4          17.4       0.6X
+DictionaryEncoding                                  757            763           7         88.7          11.3       1.0X
+LongDelta                                           499            502           2        134.5           7.4       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5066.8           0.2       1.0X
-RunLengthEncoding(1.002)                           1060           1062           2         63.3          15.8       0.0X
-DictionaryEncoding(0.251)                           427            429           2        157.0           6.4       0.0X
-LongDelta(0.125)                                    110            110           1        609.5           1.6       0.1X
+PassThrough(1.000)                                   13             19           2       5062.3           0.2       1.0X
+RunLengthEncoding(1.001)                           1005           1008           4         66.8          15.0       0.0X
+DictionaryEncoding(0.251)                           612            613           1        109.7           9.1       0.0X
+LongDelta(0.125)                                    106            110           1        634.4           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         752            753           1         89.3          11.2       1.0X
-RunLengthEncoding                                  1206           1207           0         55.6          18.0       0.6X
-DictionaryEncoding                                  773            773           0         86.8          11.5       1.0X
-LongDelta                                           662            687          41        101.4           9.9       1.1X
+PassThrough                                         865            875           9         77.6          12.9       1.0X
+RunLengthEncoding                                  1185           1188           4         56.6          17.7       0.7X
+DictionaryEncoding                                  754            761           7         89.0          11.2       1.1X
+LongDelta                                           660            667           7        101.8           9.8       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   20             20           0       3386.0           0.3       1.0X
-RunLengthEncoding(0.890)                           1563           1570          11         42.9          23.3       0.0X
-DictionaryEncoding(0.167)                          1956           1958           2         34.3          29.1       0.0X
+PassThrough(1.000)                                   27             27           0       2497.2           0.4       1.0X
+RunLengthEncoding(0.888)                           1584           1586           3         42.4          23.6       0.0X
+DictionaryEncoding(0.167)                          1597           1600           4         42.0          23.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1131           1141          14         59.3          16.9       1.0X
-RunLengthEncoding                                  1884           1887           4         35.6          28.1       0.6X
-DictionaryEncoding                                 1705           1706           2         39.4          25.4       0.7X
+PassThrough                                        1465           1466           1         45.8          21.8       1.0X
+RunLengthEncoding                                  1894           1894           1         35.4          28.2       0.8X
+DictionaryEncoding                                 1775           1776           2         37.8          26.4       0.8X
 
 
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
index 4a39ed8843fbe..8c09e95988536 100644
--- a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
@@ -1,280 +1,280 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     365173.7           0.0       1.0X
-OnHeapColumnVector                                   2655           2664          12        154.3           6.5       0.0X
-OffHeapColumnVector                                  4597           4611          19         89.1          11.2       0.0X
+ConstantColumnVector                                    1              1           0     386171.0           0.0       1.0X
+OnHeapColumnVector                                   2709           2711           3        151.2           6.6       0.0X
+OffHeapColumnVector                                  5028           5031           4         81.5          12.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     432863.4           0.0       1.0X
-OnHeapColumnVector                                   3271           3289          26        125.2           8.0       0.0X
-OffHeapColumnVector                                  4300           4320          28         95.3          10.5       0.0X
+ConstantColumnVector                                    1              1           0     382579.9           0.0       1.0X
+OnHeapColumnVector                                   3353           3353           1        122.2           8.2       0.0X
+OffHeapColumnVector                                  5136           5142           7         79.7          12.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     446014.9           0.0       1.0X
-OnHeapColumnVector                                    3576           3581           8        114.5           8.7       0.0X
-OffHeapColumnVector                                   4502           4512          13         91.0          11.0       0.0X
+ConstantColumnVector                                     1              1           0     382916.1           0.0       1.0X
+OnHeapColumnVector                                    3715           3715           1        110.3           9.1       0.0X
+OffHeapColumnVector                                   5746           5747           2         71.3          14.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     445490.0           0.0       1.0X
-OnHeapColumnVector                                    3539           3549          15        115.8           8.6       0.0X
-OffHeapColumnVector                                   4221           4223           3         97.0          10.3       0.0X
+ConstantColumnVector                                     1              1           0     382658.2           0.0       1.0X
+OnHeapColumnVector                                    3514           3522          11        116.5           8.6       0.0X
+OffHeapColumnVector                                   5138           5142           6         79.7          12.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     446005.7           0.0       1.0X
-OnHeapColumnVector                                    3684           3693          12        111.2           9.0       0.0X
-OffHeapColumnVector                                   4422           4422           1         92.6          10.8       0.0X
+ConstantColumnVector                                     1              1           0     382564.9           0.0       1.0X
+OnHeapColumnVector                                    3808           3810           3        107.6           9.3       0.0X
+OffHeapColumnVector                                   5476           5481           7         74.8          13.4       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     411269.8           0.0       1.0X
-OnHeapColumnVector                                    3895           3900           7        105.2           9.5       0.0X
-OffHeapColumnVector                                   4402           4403           1         93.0          10.7       0.0X
+ConstantColumnVector                                     1              1           0     382528.8           0.0       1.0X
+OnHeapColumnVector                                    3875           3877           3        105.7           9.5       0.0X
+OffHeapColumnVector                                   5487           5495          11         74.6          13.4       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     720547.0           0.0       1.0X
-OnHeapColumnVector                                   15             16           0      27430.8           0.0       0.0X
-OffHeapColumnVector                                  62             64           1       6642.4           0.2       0.0X
+ConstantColumnVector                                  1              1           0     632701.2           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25265.4           0.0       0.0X
+OffHeapColumnVector                                  65             67           7       6265.1           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     686246.6           0.0       1.0X
-OnHeapColumnVector                                   31             32           1      13203.3           0.1       0.0X
-OffHeapColumnVector                                  64             67           2       6377.9           0.2       0.0X
+ConstantColumnVector                                  1              1           0     603950.7           0.0       1.0X
+OnHeapColumnVector                                   33             33           0      12426.1           0.1       0.0X
+OffHeapColumnVector                                  68             68           1       6051.2           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     496467.4           0.0       1.0X
-OnHeapColumnVector                                   16             17           1      26196.4           0.0       0.1X
-OffHeapColumnVector                                 122            125           2       3347.4           0.3       0.0X
+ConstantColumnVector                                  1              1           0     491622.3           0.0       1.0X
+OnHeapColumnVector                                   17             17           0      24658.2           0.0       0.1X
+OffHeapColumnVector                                 127            128           0       3214.9           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     575876.2           0.0       1.0X
-OnHeapColumnVector                                   34             36           3      12183.4           0.1       0.0X
-OffHeapColumnVector                                 123            127           2       3342.3           0.3       0.0X
+ConstantColumnVector                                  1              1           0     491621.7           0.0       1.0X
+OnHeapColumnVector                                   34             34           3      12179.0           0.1       0.0X
+OffHeapColumnVector                                 129            129           0       3181.8           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                   0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                    94             97           2       4342.9           0.2       0.0X
-OffHeapColumnVector                                 2065           2072          11        198.4           5.0       0.0X
+ConstantColumnVector                                   0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                    99             99           1       4150.4           0.2       0.0X
+OffHeapColumnVector                                 1988           1997          12        206.0           4.9       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                   0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                    94             96           2       4372.5           0.2       0.0X
-OffHeapColumnVector                                 2033           2048          22        201.5           5.0       0.0X
+ConstantColumnVector                                   0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                    99             99           0       4149.0           0.2       0.0X
+OffHeapColumnVector                                 2065           2065           1        198.4           5.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                     93             95           1       4393.5           0.2       0.0X
-OffHeapColumnVector                                  2031           2033           2        201.6           5.0       0.0X
+ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                     99             99           0       4150.1           0.2       0.0X
+OffHeapColumnVector                                  1980           1983           5        206.9           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                     94             98           1       4353.0           0.2       0.0X
-OffHeapColumnVector                                  2083           2110          38        196.6           5.1       0.0X
+ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                     99            102          13       4152.9           0.2       0.0X
+OffHeapColumnVector                                  1980           1981           2        206.8           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                     96             98           1       4279.0           0.2       0.0X
-OffHeapColumnVector                                  2086           2089           4        196.4           5.1       0.0X
+ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                     99             99           1       4152.0           0.2       0.0X
+OffHeapColumnVector                                  1977           1978           2        207.2           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                     99             99           0       4157.8           0.2       0.0X
-OffHeapColumnVector                                  2048           2052           6        200.0           5.0       0.0X
+ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                     99             99           1       4148.5           0.2       0.0X
+OffHeapColumnVector                                  1973           1975           2        207.6           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0   14395670.1           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    1601132.1           0.0       0.1X
-OffHeapColumnVector                                 883            887           3        464.0           2.2       0.0X
+ConstantColumnVector                                  0              0           0   53029518.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1203831.3           0.0       0.0X
+OffHeapColumnVector                                 889            889           1        461.0           2.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1904           1910           8        215.1           4.6       1.0X
-OnHeapColumnVector                                 2039           2045           9        200.9           5.0       0.9X
-OffHeapColumnVector                                2580           2581           1        158.8           6.3       0.7X
+ConstantColumnVector                               1935           1935           0        211.7           4.7       1.0X
+OnHeapColumnVector                                 2089           2097          11        196.0           5.1       0.9X
+OffHeapColumnVector                                2593           2594           1        157.9           6.3       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1822           1823           1        224.8           4.4       1.0X
-OnHeapColumnVector                                 2071           2078          10        197.8           5.1       0.9X
-OffHeapColumnVector                                2588           2590           2        158.3           6.3       0.7X
+ConstantColumnVector                               2011           2011           1        203.7           4.9       1.0X
+OnHeapColumnVector                                 2196           2199           4        186.5           5.4       0.9X
+OffHeapColumnVector                                2606           2623          25        157.2           6.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1960           1962           3        209.0           4.8       1.0X
-OnHeapColumnVector                                 2079           2087          12        197.1           5.1       0.9X
-OffHeapColumnVector                                2724           2730           7        150.3           6.7       0.7X
+ConstantColumnVector                               2112           2112           1        194.0           5.2       1.0X
+OnHeapColumnVector                                 2255           2257           2        181.6           5.5       0.9X
+OffHeapColumnVector                                2759           2792          45        148.4           6.7       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                          1920           1926           8        213.3           4.7       1.0X
-OnHeapColumnVector                                            4208           4210           3         97.3          10.3       0.5X
-OffHeapColumnVector                                           4045           4053          11        101.3           9.9       0.5X
+ConstantColumnVector                                          1936           1941           8        211.6           4.7       1.0X
+OnHeapColumnVector                                            4457           4459           2         91.9          10.9       0.4X
+OffHeapColumnVector                                           3980           3982           4        102.9           9.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                          1908           1910           4        214.7           4.7       1.0X
-OnHeapColumnVector                                            4214           4214           1         97.2          10.3       0.5X
-OffHeapColumnVector                                           4065           4068           4        100.8           9.9       0.5X
+ConstantColumnVector                                          1936           1936           1        211.6           4.7       1.0X
+OnHeapColumnVector                                            4453           4459           9         92.0          10.9       0.4X
+OffHeapColumnVector                                           3974           3974           0        103.1           9.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1919           1922           3        213.4           4.7       1.0X
-OnHeapColumnVector                                             4209           4210           1         97.3          10.3       0.5X
-OffHeapColumnVector                                            4036           4045          13        101.5           9.9       0.5X
+ConstantColumnVector                                           1928           1930           3        212.4           4.7       1.0X
+OnHeapColumnVector                                             4460           4462           3         91.8          10.9       0.4X
+OffHeapColumnVector                                            3961           3966           8        103.4           9.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1933           1934           0        211.8           4.7       1.0X
-OnHeapColumnVector                                             4185           4194          12         97.9          10.2       0.5X
-OffHeapColumnVector                                            4077           4097          28        100.5          10.0       0.5X
+ConstantColumnVector                                           1934           1938           5        211.8           4.7       1.0X
+OnHeapColumnVector                                             4458           4462           6         91.9          10.9       0.4X
+OffHeapColumnVector                                            3978           3980           3        103.0           9.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1922           1933          15        213.1           4.7       1.0X
-OnHeapColumnVector                                             4195           4200           7         97.6          10.2       0.5X
-OffHeapColumnVector                                            4079           4097          26        100.4          10.0       0.5X
+ConstantColumnVector                                           1937           1938           2        211.5           4.7       1.0X
+OnHeapColumnVector                                             4465           4467           3         91.7          10.9       0.4X
+OffHeapColumnVector                                            3979           3983           5        102.9           9.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1934           1936           3        211.8           4.7       1.0X
-OnHeapColumnVector                                             4212           4214           3         97.2          10.3       0.5X
-OffHeapColumnVector                                            4064           4083          28        100.8           9.9       0.5X
+ConstantColumnVector                                           1941           1944           4        211.0           4.7       1.0X
+OnHeapColumnVector                                             4453           4455           2         92.0          10.9       0.4X
+OffHeapColumnVector                                            3981           3982           1        102.9           9.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                751            755           3        545.5           1.8       1.0X
-OnHeapColumnVector                                  888            888           0        461.2           2.2       0.8X
-OffHeapColumnVector                                 885            886           1        463.0           2.2       0.8X
+ConstantColumnVector                                888            888           0        461.4           2.2       1.0X
+OnHeapColumnVector                                 1020           1020           1        401.7           2.5       0.9X
+OffHeapColumnVector                                 888            889           1        461.5           2.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2816           2818           3        145.5           6.9       1.0X
-OnHeapColumnVector                                 2959           2960           2        138.4           7.2       1.0X
-OffHeapColumnVector                                2950           2951           1        138.8           7.2       1.0X
+ConstantColumnVector                               2849           2849           0        143.8           7.0       1.0X
+OnHeapColumnVector                                 2971           2974           5        137.9           7.3       1.0X
+OffHeapColumnVector                                2978           2979           1        137.5           7.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2837           2841           5        144.4           6.9       1.0X
-OnHeapColumnVector                                 2960           2961           2        138.4           7.2       1.0X
-OffHeapColumnVector                                2954           2956           3        138.7           7.2       1.0X
+ConstantColumnVector                               2859           2865           8        143.3           7.0       1.0X
+OnHeapColumnVector                                 3111           3112           1        131.6           7.6       0.9X
+OffHeapColumnVector                                2981           2982           2        137.4           7.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2968           2979          15        138.0           7.2       1.0X
-OnHeapColumnVector                                 3071           3083          16        133.4           7.5       1.0X
-OffHeapColumnVector                                3099           3101           3        132.2           7.6       1.0X
+ConstantColumnVector                               2363           2365           3        173.3           5.8       1.0X
+OnHeapColumnVector                                 3130           3133           4        130.9           7.6       0.8X
+OffHeapColumnVector                                3127           3127           0        131.0           7.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3602335.9           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  439957035.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3602335.9           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  439957035.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3602367.6           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  439957035.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3602335.9           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  439957035.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  439957035.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3602367.6           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  439957035.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    3321386.3           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
index 745e822b1b1db..71245ab989f80 100644
--- a/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
@@ -1,280 +1,280 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     328565.8           0.0       1.0X
-OnHeapColumnVector                                   2907           2907           0        140.9           7.1       0.0X
-OffHeapColumnVector                                  3321           3326           7        123.4           8.1       0.0X
+ConstantColumnVector                                    1              1           0     375313.9           0.0       1.0X
+OnHeapColumnVector                                   2845           2855          14        144.0           6.9       0.0X
+OffHeapColumnVector                                  3241           3246           7        126.4           7.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     387374.0           0.0       1.0X
-OnHeapColumnVector                                   3630           3632           3        112.8           8.9       0.0X
-OffHeapColumnVector                                  4341           4341           0         94.3          10.6       0.0X
+ConstantColumnVector                                    1              1           0     385212.6           0.0       1.0X
+OnHeapColumnVector                                   3602           3608           9        113.7           8.8       0.0X
+OffHeapColumnVector                                  4441           4442           1         92.2          10.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     387407.0           0.0       1.0X
-OnHeapColumnVector                                    3607           3608           1        113.5           8.8       0.0X
-OffHeapColumnVector                                   4164           4166           3         98.4          10.2       0.0X
+ConstantColumnVector                                     1              1           0     385394.2           0.0       1.0X
+OnHeapColumnVector                                    3931           3932           3        104.2           9.6       0.0X
+OffHeapColumnVector                                   4526           4527           1         90.5          11.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     337614.8           0.0       1.0X
-OnHeapColumnVector                                    3563           3563           1        115.0           8.7       0.0X
-OffHeapColumnVector                                   5135           5138           4         79.8          12.5       0.0X
+ConstantColumnVector                                     1              1           0     385444.6           0.0       1.0X
+OnHeapColumnVector                                    3625           3639          20        113.0           8.9       0.0X
+OffHeapColumnVector                                   4792           4792           0         85.5          11.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     387278.8           0.0       1.0X
-OnHeapColumnVector                                    3562           3568           8        115.0           8.7       0.0X
-OffHeapColumnVector                                   5469           5475           8         74.9          13.4       0.0X
+ConstantColumnVector                                     1              1           0     385238.3           0.0       1.0X
+OnHeapColumnVector                                    3706           3711           7        110.5           9.0       0.0X
+OffHeapColumnVector                                   5015           5015           1         81.7          12.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     387345.1           0.0       1.0X
-OnHeapColumnVector                                    3790           3794           6        108.1           9.3       0.0X
-OffHeapColumnVector                                   5339           5340           1         76.7          13.0       0.0X
+ConstantColumnVector                                     1              1           0     385509.9           0.0       1.0X
+OnHeapColumnVector                                    4026           4051          35        101.7           9.8       0.0X
+OffHeapColumnVector                                   5144           5166          30         79.6          12.6       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     699317.4           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25569.9           0.0       0.0X
-OffHeapColumnVector                                  65             65           0       6297.2           0.2       0.0X
+ConstantColumnVector                                  1              1           0     699304.3           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25324.1           0.0       0.0X
+OffHeapColumnVector                                  66             66           0       6241.0           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     664350.5           0.0       1.0X
-OnHeapColumnVector                                   34             34           0      12217.2           0.1       0.0X
-OffHeapColumnVector                                  67             67           0       6090.0           0.2       0.0X
+ConstantColumnVector                                  1              1           0     664338.7           0.0       1.0X
+OnHeapColumnVector                                   34             35           0      11906.0           0.1       0.0X
+OffHeapColumnVector                                  66             66           0       6201.7           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     577132.3           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      24993.9           0.0       0.0X
-OffHeapColumnVector                                 127            127           0       3215.5           0.3       0.0X
+ConstantColumnVector                                  1              1           0     553053.1           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25324.9           0.0       0.0X
+OffHeapColumnVector                                 127            127           0       3216.6           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     553055.4           0.0       1.0X
-OnHeapColumnVector                                   34             34           0      12093.7           0.1       0.0X
-OffHeapColumnVector                                 128            129           0       3189.9           0.3       0.0X
+ConstantColumnVector                                  1              1           0     553051.6           0.0       1.0X
+OnHeapColumnVector                                   36             36           0      11516.6           0.1       0.0X
+OffHeapColumnVector                                 128            129           0       3190.1           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                 918            918           1        446.4           2.2       1.0X
-OnHeapColumnVector                                  1925           1925           0        212.8           4.7       0.5X
-OffHeapColumnVector                                 2933           2934           2        139.7           7.2       0.3X
+ConstantColumnVector                                 693            698           9        591.4           1.7       1.0X
+OnHeapColumnVector                                  1672           1673           0        244.9           4.1       0.4X
+OffHeapColumnVector                                 3044           3046           2        134.5           7.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                 784            785           1        522.6           1.9       1.0X
-OnHeapColumnVector                                  3273           3273           0        125.2           8.0       0.2X
-OffHeapColumnVector                                 2991           2992           1        136.9           7.3       0.3X
+ConstantColumnVector                                 795            797           2        515.0           1.9       1.0X
+OnHeapColumnVector                                  3428           3429           1        119.5           8.4       0.2X
+OffHeapColumnVector                                 3089           3101          18        132.6           7.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  785            791           6        521.5           1.9       1.0X
-OnHeapColumnVector                                   3407           3411           5        120.2           8.3       0.2X
-OffHeapColumnVector                                  2992           2993           0        136.9           7.3       0.3X
+ConstantColumnVector                                  793            795           3        516.4           1.9       1.0X
+OnHeapColumnVector                                   3442           3443           2        119.0           8.4       0.2X
+OffHeapColumnVector                                  3083           3085           4        132.9           7.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  785            789           6        522.1           1.9       1.0X
-OnHeapColumnVector                                   3424           3426           3        119.6           8.4       0.2X
-OffHeapColumnVector                                  3003           3004           2        136.4           7.3       0.3X
+ConstantColumnVector                                  796            799           4        514.9           1.9       1.0X
+OnHeapColumnVector                                   3460           3462           3        118.4           8.4       0.2X
+OffHeapColumnVector                                  3073           3073           1        133.3           7.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  784            784           0        522.6           1.9       1.0X
-OnHeapColumnVector                                   3397           3399           2        120.6           8.3       0.2X
-OffHeapColumnVector                                  2999           2999           0        136.6           7.3       0.3X
+ConstantColumnVector                                  795            796           1        515.1           1.9       1.0X
+OnHeapColumnVector                                   3447           3447           0        118.8           8.4       0.2X
+OffHeapColumnVector                                  3076           3084          12        133.2           7.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  785            787           1        521.5           1.9       1.0X
-OnHeapColumnVector                                   3402           3406           5        120.4           8.3       0.2X
-OffHeapColumnVector                                  2996           2999           4        136.7           7.3       0.3X
+ConstantColumnVector                                  795            795           1        515.5           1.9       1.0X
+OnHeapColumnVector                                   3453           3453           1        118.6           8.4       0.2X
+OffHeapColumnVector                                  3084           3084           0        132.8           7.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1811           1815           5        226.2           4.4       1.0X
-OnHeapColumnVector                                 2077           2078           2        197.2           5.1       0.9X
-OffHeapColumnVector                                2575           2576           1        159.0           6.3       0.7X
+ConstantColumnVector                                  0              0           0    6641264.7           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1476254.1           0.0       0.2X
+OffHeapColumnVector                                 762            786          39        537.5           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    2657186.6           0.0       1.0X
-OnHeapColumnVector                                    0              0           0     949055.5           0.0       0.4X
-OffHeapColumnVector                                 763            764           1        537.1           1.9       0.0X
+ConstantColumnVector                                  0              0           0    3321170.8           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1328632.1           0.0       0.4X
+OffHeapColumnVector                                 762            762           0        537.2           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    2657186.6           0.0       1.0X
-OnHeapColumnVector                                    0              1           0     949055.5           0.0       0.4X
-OffHeapColumnVector                                 762            763           2        537.3           1.9       0.0X
+ConstantColumnVector                                  0              0           0    2952306.1           0.0       1.0X
+OnHeapColumnVector                                    0              1           0    1207849.8           0.0       0.4X
+OffHeapColumnVector                                 762            765           2        537.5           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    1207529.4           0.0       1.0X
-OnHeapColumnVector                                    1              1           0     738059.2           0.0       0.6X
-OffHeapColumnVector                                 763            765           2        537.0           1.9       0.0X
+ConstantColumnVector                                  0              0           0    2214341.3           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1207885.5           0.0       0.5X
+OffHeapColumnVector                                 890            891           2        460.5           2.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                             0              0           0    1107203.6           0.0       1.0X
-OnHeapColumnVector                                            2359           2359           1        173.6           5.8       0.0X
-OffHeapColumnVector                                           1793           1793           1        228.5           4.4       0.0X
+ConstantColumnVector                                             0              0           0    1897930.6           0.0       1.0X
+OnHeapColumnVector                                            2249           2251           3        182.1           5.5       0.0X
+OffHeapColumnVector                                           1965           1966           1        208.4           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                             0              0           0    1107203.6           0.0       1.0X
-OnHeapColumnVector                                            2375           2384          12        172.5           5.8       0.0X
-OffHeapColumnVector                                           1803           1803           1        227.2           4.4       0.0X
+ConstantColumnVector                                             0              0           0    1897930.6           0.0       1.0X
+OnHeapColumnVector                                            2249           2252           4        182.1           5.5       0.0X
+OffHeapColumnVector                                           1976           1980           5        207.2           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1107203.6           0.0       1.0X
-OnHeapColumnVector                                             2366           2372           9        173.2           5.8       0.0X
-OffHeapColumnVector                                            1798           1809          15        227.8           4.4       0.0X
+ConstantColumnVector                                              0              0           0    1897948.2           0.0       1.0X
+OnHeapColumnVector                                             2272           2272           1        180.3           5.5       0.0X
+OffHeapColumnVector                                            1980           1989          12        206.9           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1107203.6           0.0       1.0X
-OnHeapColumnVector                                             2369           2369           1        172.9           5.8       0.0X
-OffHeapColumnVector                                            1799           1800           0        227.6           4.4       0.0X
+ConstantColumnVector                                              0              0           0    1897939.4           0.0       1.0X
+OnHeapColumnVector                                             2267           2273           9        180.7           5.5       0.0X
+OffHeapColumnVector                                            1973           1974           3        207.7           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1107203.6           0.0       1.0X
-OnHeapColumnVector                                             2364           2365           1        173.3           5.8       0.0X
-OffHeapColumnVector                                            1792           1793           1        228.5           4.4       0.0X
+ConstantColumnVector                                              0              0           0    1897939.4           0.0       1.0X
+OnHeapColumnVector                                             2263           2265           4        181.0           5.5       0.0X
+OffHeapColumnVector                                            1979           1981           4        207.0           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1107203.6           0.0       1.0X
-OnHeapColumnVector                                             2367           2368           1        173.0           5.8       0.0X
-OffHeapColumnVector                                            1795           1796           1        228.2           4.4       0.0X
+ConstantColumnVector                                              0              0           0    1897939.4           0.0       1.0X
+OnHeapColumnVector                                             2253           2253           0        181.8           5.5       0.0X
+OffHeapColumnVector                                            1966           1969           4        208.4           4.8       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               3429           3430           1        119.4           8.4       1.0X
-OnHeapColumnVector                                 2572           2572           0        159.3           6.3       1.3X
-OffHeapColumnVector                                2581           2589          11        158.7           6.3       1.3X
+ConstantColumnVector                                761            762           0        538.1           1.9       1.0X
+OnHeapColumnVector                                  888            891           4        461.3           2.2       0.9X
+OffHeapColumnVector                                 888            889           1        461.3           2.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                761            761           1        538.3           1.9       1.0X
-OnHeapColumnVector                                  763            764           1        537.0           1.9       1.0X
-OffHeapColumnVector                                 762            763           1        537.2           1.9       1.0X
+ConstantColumnVector                                761            762           1        538.3           1.9       1.0X
+OnHeapColumnVector                                  763            764           0        536.6           1.9       1.0X
+OffHeapColumnVector                                 764            764           1        536.5           1.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                760            761           0        538.7           1.9       1.0X
-OnHeapColumnVector                                  763            763           1        537.1           1.9       1.0X
-OffHeapColumnVector                                 762            763           1        537.2           1.9       1.0X
+ConstantColumnVector                                761            764           4        538.0           1.9       1.0X
+OnHeapColumnVector                                  766            767           1        534.9           1.9       1.0X
+OffHeapColumnVector                                 762            763           1        537.6           1.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                888            888           1        461.5           2.2       1.0X
-OnHeapColumnVector                                  763            774          18        536.8           1.9       1.2X
-OffHeapColumnVector                                 762            763           1        537.4           1.9       1.2X
+ConstantColumnVector                                761            762           1        538.3           1.9       1.0X
+OnHeapColumnVector                                  889            889           0        460.7           2.2       0.9X
+OffHeapColumnVector                                 890            894           5        460.1           2.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  209728622.6           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211961.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  209728622.6           0.0       1.0X
+ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  209728622.6           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211961.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  209728622.6           0.0       1.0X
+ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  209728622.6           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211961.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  209728622.6           0.0       1.0X
+ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  209728622.6           0.0       1.0X
+ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
 OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  209728622.6           0.0       1.0X
+OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  209728622.6           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211961.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  209728622.6           0.0       1.0X
+ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
index 8217e1561c174..43d7eb15b0ea5 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
@@ -2,430 +2,430 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7930           7984          77          2.0         504.2       1.0X
-SQL Json                                           8135           8250         163          1.9         517.2       1.0X
-SQL Parquet Vectorized: DataPageV1                   76             87           9        205.7           4.9     103.7X
-SQL Parquet Vectorized: DataPageV2                   55             65           8        285.3           3.5     143.8X
-SQL Parquet MR: DataPageV1                         1785           1787           3          8.8         113.5       4.4X
-SQL Parquet MR: DataPageV2                         1643           1680          52          9.6         104.5       4.8X
-SQL ORC Vectorized                                  114            124          10        138.2           7.2      69.7X
-SQL ORC MR                                         1494           1496           3         10.5          95.0       5.3X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            9893           9962          97          1.6         629.0       1.0X
+SQL Json                                           7942           8051         155          2.0         504.9       1.2X
+SQL Parquet Vectorized: DataPageV1                   84             96           8        187.9           5.3     118.2X
+SQL Parquet Vectorized: DataPageV2                   95            107           9        166.3           6.0     104.6X
+SQL Parquet MR: DataPageV1                         1727           1730           3          9.1         109.8       5.7X
+SQL Parquet MR: DataPageV2                         1615           1615           1          9.7         102.6       6.1X
+SQL ORC Vectorized                                  135            146           8        116.4           8.6      73.2X
+SQL ORC MR                                         1495           1511          22         10.5          95.0       6.6X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    35             36           1        449.0           2.2       1.0X
-ParquetReader Vectorized: DataPageV2                    25             26           1        638.4           1.6       1.4X
-ParquetReader Vectorized -> Row: DataPageV1             29             30           1        548.0           1.8       1.2X
-ParquetReader Vectorized -> Row: DataPageV2             18             20           2        851.6           1.2       1.9X
+ParquetReader Vectorized: DataPageV1                    92             93           1        170.7           5.9       1.0X
+ParquetReader Vectorized: DataPageV2                   112            113           1        140.8           7.1       0.8X
+ParquetReader Vectorized -> Row: DataPageV1             72             73           1        218.6           4.6       1.3X
+ParquetReader Vectorized -> Row: DataPageV2             94             96           2        167.5           6.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9218           9237          26          1.7         586.1       1.0X
-SQL Json                                           8885           8900          21          1.8         564.9       1.0X
-SQL Parquet Vectorized: DataPageV1                   74             86           9        212.6           4.7     124.6X
-SQL Parquet Vectorized: DataPageV2                   74             88          12        211.4           4.7     123.9X
-SQL Parquet MR: DataPageV1                         1832           1837           8          8.6         116.5       5.0X
-SQL Parquet MR: DataPageV2                         1761           1763           3          8.9         112.0       5.2X
-SQL ORC Vectorized                                  104            114          11        150.9           6.6      88.5X
-SQL ORC MR                                         1523           1560          52         10.3          96.8       6.1X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            9431           9439          11          1.7         599.6       1.0X
+SQL Json                                           8552           8570          26          1.8         543.7       1.1X
+SQL Parquet Vectorized: DataPageV1                   96            105           9        164.4           6.1      98.6X
+SQL Parquet Vectorized: DataPageV2                   93            104           9        168.4           5.9     101.0X
+SQL Parquet MR: DataPageV1                         1816           1821           6          8.7         115.5       5.2X
+SQL Parquet MR: DataPageV2                         1742           1746           5          9.0         110.8       5.4X
+SQL ORC Vectorized                                  107            113           6        146.6           6.8      87.9X
+SQL ORC MR                                         1582           1598          22          9.9         100.6       6.0X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   125            138          14        125.8           7.9       1.0X
-ParquetReader Vectorized: DataPageV2                   125            137          11        126.2           7.9       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             44             47           5        355.9           2.8       2.8X
-ParquetReader Vectorized -> Row: DataPageV2             44             47           5        357.8           2.8       2.8X
+ParquetReader Vectorized: DataPageV1                    66             68           2        238.1           4.2       1.0X
+ParquetReader Vectorized: DataPageV2                    66             67           1        239.4           4.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             44             46           3        357.8           2.8       1.5X
+ParquetReader Vectorized -> Row: DataPageV2             44             45           1        357.9           2.8       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9794           9896         144          1.6         622.7       1.0X
-SQL Json                                           9146           9163          24          1.7         581.5       1.1X
-SQL Parquet Vectorized: DataPageV1                  109            117           7        144.1           6.9      89.7X
-SQL Parquet Vectorized: DataPageV2                  126            136           5        124.8           8.0      77.7X
-SQL Parquet MR: DataPageV1                         2090           2102          16          7.5         132.9       4.7X
-SQL Parquet MR: DataPageV2                         1898           1907          14          8.3         120.6       5.2X
-SQL ORC Vectorized                                  138            149          14        114.1           8.8      71.0X
-SQL ORC MR                                         1574           1605          43         10.0         100.1       6.2X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            9996          10013          25          1.6         635.5       1.0X
+SQL Json                                           8898           8902           5          1.8         565.7       1.1X
+SQL Parquet Vectorized: DataPageV1                  121            137          14        129.7           7.7      82.4X
+SQL Parquet Vectorized: DataPageV2                  139            153          14        113.1           8.8      71.9X
+SQL Parquet MR: DataPageV1                         2015           2035          28          7.8         128.1       5.0X
+SQL Parquet MR: DataPageV2                         2000           2012          17          7.9         127.2       5.0X
+SQL ORC Vectorized                                  143            174          27        109.8           9.1      69.8X
+SQL ORC MR                                         1959           1990          44          8.0         124.6       5.1X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   140            161          67        112.2           8.9       1.0X
-ParquetReader Vectorized: DataPageV2                   163            166           3         96.4          10.4       0.9X
-ParquetReader Vectorized -> Row: DataPageV1            139            140           2        113.1           8.8       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            166            182          10         94.8          10.6       0.8X
+ParquetReader Vectorized: DataPageV1                   151            160           8        104.3           9.6       1.0X
+ParquetReader Vectorized: DataPageV2                   168            180          14         93.5          10.7       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            160            166           6         98.3          10.2       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            164            175          12         96.1          10.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11232          11256          33          1.4         714.1       1.0X
-SQL Json                                           9725           9740          22          1.6         618.3       1.2X
-SQL Parquet Vectorized: DataPageV1                   84             97          15        187.8           5.3     134.1X
-SQL Parquet Vectorized: DataPageV2                  162            181          13         96.8          10.3      69.1X
-SQL Parquet MR: DataPageV1                         1882           1900          26          8.4         119.6       6.0X
-SQL Parquet MR: DataPageV2                         1898           1899           2          8.3         120.7       5.9X
-SQL ORC Vectorized                                  148            157          13        106.1           9.4      75.7X
-SQL ORC MR                                         1667           1674          10          9.4         106.0       6.7X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           11250          11336         121          1.4         715.3       1.0X
+SQL Json                                           9272           9279          10          1.7         589.5       1.2X
+SQL Parquet Vectorized: DataPageV1                  109            126          14        144.4           6.9     103.3X
+SQL Parquet Vectorized: DataPageV2                  190            195           5         82.8          12.1      59.2X
+SQL Parquet MR: DataPageV1                         2338           2342           6          6.7         148.6       4.8X
+SQL Parquet MR: DataPageV2                         2332           2343          17          6.7         148.2       4.8X
+SQL ORC Vectorized                                  179            193          12         87.9          11.4      62.9X
+SQL ORC MR                                         2094           2095           1          7.5         133.2       5.4X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   130            140          11        121.1           8.3       1.0X
-ParquetReader Vectorized: DataPageV2                   213            230          10         74.0          13.5       0.6X
-ParquetReader Vectorized -> Row: DataPageV1            128            132           6        122.9           8.1       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            222            226           5         70.7          14.1       0.6X
+ParquetReader Vectorized: DataPageV1                   134            138           2        117.7           8.5       1.0X
+ParquetReader Vectorized: DataPageV2                   210            215           7         74.8          13.4       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            128            133           8        123.3           8.1       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            225            232           6         70.0          14.3       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14617          14690         103          1.1         929.3       1.0X
-SQL Json                                          10772          10780          11          1.5         684.9       1.4X
-SQL Parquet Vectorized: DataPageV1                  118            132          13        133.4           7.5     124.0X
-SQL Parquet Vectorized: DataPageV2                  268            300          20         58.7          17.0      54.5X
-SQL Parquet MR: DataPageV1                         2289           2314          36          6.9         145.5       6.4X
-SQL Parquet MR: DataPageV2                         1993           1995           3          7.9         126.7       7.3X
-SQL ORC Vectorized                                  215            224          12         73.1          13.7      68.0X
-SQL ORC MR                                         1840           1851          17          8.6         117.0       7.9X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           11683          11683           1          1.3         742.8       1.0X
+SQL Json                                           9457           9460           4          1.7         601.3       1.2X
+SQL Parquet Vectorized: DataPageV1                  277            312          21         56.9          17.6      42.2X
+SQL Parquet Vectorized: DataPageV2                  281            291          10         56.0          17.9      41.6X
+SQL Parquet MR: DataPageV1                         2506           2517          15          6.3         159.4       4.7X
+SQL Parquet MR: DataPageV2                         2053           2058           7          7.7         130.5       5.7X
+SQL ORC Vectorized                                  166            172          12         95.0          10.5      70.5X
+SQL ORC MR                                         1709           1738          40          9.2         108.7       6.8X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   167            179          12         94.0          10.6       1.0X
-ParquetReader Vectorized: DataPageV2                   324            331           4         48.5          20.6       0.5X
-ParquetReader Vectorized -> Row: DataPageV1            181            185           5         87.1          11.5       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            322            331           6         48.8          20.5       0.5X
+ParquetReader Vectorized: DataPageV1                   311            331          16         50.6          19.8       1.0X
+ParquetReader Vectorized: DataPageV2                   265            280          21         59.4          16.8       1.2X
+ParquetReader Vectorized -> Row: DataPageV1            317            321           3         49.6          20.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            254            262          13         62.0          16.1       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11070          11076           9          1.4         703.8       1.0X
-SQL Json                                          11574          11602          39          1.4         735.9       1.0X
-SQL Parquet Vectorized: DataPageV1                   86             97          15        182.7           5.5     128.6X
-SQL Parquet Vectorized: DataPageV2                   94            103           5        166.9           6.0     117.4X
-SQL Parquet MR: DataPageV1                         2065           2130          93          7.6         131.3       5.4X
-SQL Parquet MR: DataPageV2                         2157           2169          17          7.3         137.1       5.1X
-SQL ORC Vectorized                                  266            288          20         59.0          16.9      41.5X
-SQL ORC MR                                         1740           1780          57          9.0         110.6       6.4X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           11446          11452           8          1.4         727.7       1.0X
+SQL Json                                          10952          10955           4          1.4         696.3       1.0X
+SQL Parquet Vectorized: DataPageV1                   83             97          16        189.5           5.3     137.9X
+SQL Parquet Vectorized: DataPageV2                   82             94          12        192.7           5.2     140.2X
+SQL Parquet MR: DataPageV1                         2107           2120          18          7.5         134.0       5.4X
+SQL Parquet MR: DataPageV2                         1975           2003          40          8.0         125.5       5.8X
+SQL ORC Vectorized                                  235            245          14         66.9          14.9      48.7X
+SQL ORC MR                                         1779           1801          30          8.8         113.1       6.4X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   144            144           1        109.5           9.1       1.0X
-ParquetReader Vectorized: DataPageV2                   140            142           1        112.1           8.9       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            149            156           6        105.6           9.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            148            153           5        106.2           9.4       1.0X
+ParquetReader Vectorized: DataPageV1                   134            141           8        117.1           8.5       1.0X
+ParquetReader Vectorized: DataPageV2                   147            151           4        107.3           9.3       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            144            151           7        109.2           9.2       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            128            139           7        123.3           8.1       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14612          14718         150          1.1         929.0       1.0X
-SQL Json                                          14802          14812          14          1.1         941.1       1.0X
-SQL Parquet Vectorized: DataPageV1                  126            144          15        124.3           8.0     115.5X
-SQL Parquet Vectorized: DataPageV2                  161            167           5         97.4          10.3      90.5X
-SQL Parquet MR: DataPageV1                         2239           2249          14          7.0         142.4       6.5X
-SQL Parquet MR: DataPageV2                         2125           2169          63          7.4         135.1       6.9X
-SQL ORC Vectorized                                  352            366          11         44.6          22.4      41.5X
-SQL ORC MR                                         1823           1824           1          8.6         115.9       8.0X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           11723          11745          31          1.3         745.3       1.0X
+SQL Json                                          11373          11395          31          1.4         723.1       1.0X
+SQL Parquet Vectorized: DataPageV1                  304            316          11         51.7          19.3      38.6X
+SQL Parquet Vectorized: DataPageV2                  276            301          16         56.9          17.6      42.4X
+SQL Parquet MR: DataPageV1                         2427           2438          16          6.5         154.3       4.8X
+SQL Parquet MR: DataPageV2                         2365           2381          22          6.7         150.4       5.0X
+SQL ORC Vectorized                                  577            580           2         27.3          36.7      20.3X
+SQL ORC MR                                         2149           2174          35          7.3         136.6       5.5X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   202            205           2         77.7          12.9       1.0X
-ParquetReader Vectorized: DataPageV2                   200            205           5         78.5          12.7       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            182            187           5         86.2          11.6       1.1X
-ParquetReader Vectorized -> Row: DataPageV2            182            186           4         86.3          11.6       1.1X
+ParquetReader Vectorized: DataPageV1                   325            333           5         48.4          20.6       1.0X
+ParquetReader Vectorized: DataPageV2                   324            333           8         48.5          20.6       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            312            326          14         50.4          19.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            323            329           6         48.6          20.6       1.0X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            1989           2016          38          7.9         126.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           1965           1966           2          8.0         124.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             195            207          15         80.6          12.4      10.2X
-SQL Parquet MR: DataPageV1                                            2261           2267           9          7.0         143.7       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2698           2708          14          5.8         171.5       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             100            105           4        157.5           6.3      19.9X
-SQL Parquet MR: DataPageV2                                            2108           2109           1          7.5         134.0       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2617           2636          27          6.0         166.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              90             98           9        175.2           5.7      22.2X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2116           2119           4          7.4         134.5       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2127           2157          42          7.4         135.3       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             146            153           9        107.5           9.3      14.5X
+SQL Parquet MR: DataPageV1                                            2589           2609          28          6.1         164.6       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2883           2886           4          5.5         183.3       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             104            121          18        151.6           6.6      20.4X
+SQL Parquet MR: DataPageV2                                            2472           2505          46          6.4         157.2       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2834           2851          25          5.6         180.2       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             106            121          13        148.8           6.7      20.0X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2099           2122          32          7.5         133.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2154           2157           4          7.3         137.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             275            287          14         57.3          17.5       7.6X
-SQL Parquet MR: DataPageV1                                            2310           2320          15          6.8         146.9       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2891           2907          23          5.4         183.8       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              99            122          23        158.5           6.3      21.2X
-SQL Parquet MR: DataPageV2                                            2250           2254           7          7.0         143.0       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2848           2874          37          5.5         181.0       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             124            137          10        127.2           7.9      17.0X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2332           2378          65          6.7         148.3       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2331           2360          41          6.7         148.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             257            270          10         61.2          16.3       9.1X
+SQL Parquet MR: DataPageV1                                            2383           2385           2          6.6         151.5       1.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2944           2945           1          5.3         187.2       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             120            134          16        130.9           7.6      19.4X
+SQL Parquet MR: DataPageV2                                            2323           2334          17          6.8         147.7       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2983           2992          12          5.3         189.7       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             231            263          16         68.0          14.7      10.1X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2147           2182          49          7.3         136.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2138           2160          30          7.4         136.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             307            315          10         51.2          19.5       7.0X
-SQL Parquet MR: DataPageV1                                            2349           2351           3          6.7         149.3       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2783           2823          56          5.7         177.0       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              92            111          18        170.3           5.9      23.3X
-SQL Parquet MR: DataPageV2                                            2394           2416          31          6.6         152.2       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2774           2776           3          5.7         176.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             206            227          18         76.3          13.1      10.4X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2196           2201           7          7.2         139.6       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2243           2312          97          7.0         142.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             278            292          18         56.6          17.7       7.9X
+SQL Parquet MR: DataPageV1                                            2539           2540           1          6.2         161.4       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3499           3514          20          4.5         222.5       0.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             112            117           4        139.9           7.2      19.5X
+SQL Parquet MR: DataPageV2                                            2555           2563          12          6.2         162.4       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3424           3441          25          4.6         217.7       0.6X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             242            250           5         64.9          15.4       9.1X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2253           2258           7          7.0         143.3       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2311           2324          18          6.8         146.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             356            377          29         44.2          22.6       6.3X
-SQL Parquet MR: DataPageV1                                            2600           2609          13          6.0         165.3       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3090           3097           9          5.1         196.5       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             128            144          16        122.6           8.2      17.6X
-SQL Parquet MR: DataPageV2                                            2303           2325          31          6.8         146.4       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2816           2821           7          5.6         179.0       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             332            339           7         47.3          21.1       6.8X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2219           2229          15          7.1         141.1       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2234           2248          21          7.0         142.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             290            309          18         54.2          18.5       7.6X
+SQL Parquet MR: DataPageV1                                            2806           2812           8          5.6         178.4       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3281           3296          20          4.8         208.6       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             326            335          10         48.3          20.7       6.8X
+SQL Parquet MR: DataPageV2                                            2430           2454          34          6.5         154.5       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2898           2912          20          5.4         184.3       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             251            288          23         62.6          16.0       8.8X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2274           2315          58          6.9         144.6       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2291           2319          40          6.9         145.6       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             364            381          25         43.3          23.1       6.3X
-SQL Parquet MR: DataPageV1                                            2379           2384           7          6.6         151.3       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2946           2955          13          5.3         187.3       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              98            105           5        160.0           6.2      23.1X
-SQL Parquet MR: DataPageV2                                            2303           2311          11          6.8         146.4       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2866           2878          18          5.5         182.2       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              90            101           8        175.1           5.7      25.3X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2313           2372          83          6.8         147.1       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2405           2419          19          6.5         152.9       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             337            355          19         46.6          21.5       6.9X
+SQL Parquet MR: DataPageV1                                            2604           2617          17          6.0         165.6       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3103           3112          12          5.1         197.3       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              95            100           4        165.2           6.1      24.3X
+SQL Parquet MR: DataPageV2                                            2674           2698          34          5.9         170.0       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3215           3237          32          4.9         204.4       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              87            101           9        180.4           5.5      26.5X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2265           2272          10          6.9         144.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2301           2323          31          6.8         146.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             442            457          19         35.6          28.1       5.1X
-SQL Parquet MR: DataPageV1                                            2573           2587          20          6.1         163.6       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3230           3231           1          4.9         205.4       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             130            148          15        120.9           8.3      17.4X
-SQL Parquet MR: DataPageV2                                            2539           2555          23          6.2         161.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3169           3176           9          5.0         201.5       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             128            145          13        122.4           8.2      17.6X
+SQL ORC MR                                                            2676           2684          12          5.9         170.1       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2595           2596           2          6.1         165.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             697            708          16         22.6          44.3       3.8X
+SQL Parquet MR: DataPageV1                                            2836           2854          25          5.5         180.3       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3428           3435          10          4.6         218.0       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             307            319          11         51.2          19.5       8.7X
+SQL Parquet MR: DataPageV2                                            2903           2904           2          5.4         184.6       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3511           3518           9          4.5         223.2       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             317            322           4         49.7          20.1       8.5X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           12831          12987         144          0.1       12236.2       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          12819          12984         139          0.1       12224.7       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                            7154           7188          21          0.1        6822.6       1.8X
-SQL Parquet MR: DataPageV1                                            8782           8811          21          0.1        8375.1       1.5X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9453           9503          28          0.1        9015.2       1.4X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5994           6037          29          0.2        5716.6       2.1X
-SQL Parquet MR: DataPageV2                                            9566           9608          23          0.1        9123.0       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           9873           9912          24          0.1        9415.5       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5739           5775          25          0.2        5473.3       2.2X
+SQL ORC MR                                                           12857          12956          97          0.1       12261.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          12868          12963          93          0.1       12272.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            7063           7109          31          0.1        6735.6       1.8X
+SQL Parquet MR: DataPageV1                                            9067           9173          81          0.1        8646.8       1.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9287           9373          59          0.1        8856.4       1.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5899           5931          25          0.2        5625.7       2.2X
+SQL Parquet MR: DataPageV2                                            9529           9579          54          0.1        9087.2       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           9864          10035         165          0.1        9406.6       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5650           5702          49          0.2        5388.4       2.3X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10391          10454          88          1.0         991.0       1.0X
-SQL Json                                          10116          10160          62          1.0         964.7       1.0X
-SQL Parquet Vectorized: DataPageV1                 1684           1687           4          6.2         160.6       6.2X
-SQL Parquet Vectorized: DataPageV2                 1919           1938          26          5.5         183.1       5.4X
-SQL Parquet MR: DataPageV1                         3735           3735           1          2.8         356.2       2.8X
-SQL Parquet MR: DataPageV2                         3767           3770           5          2.8         359.2       2.8X
-SQL ORC Vectorized                                 1725           1744          27          6.1         164.5       6.0X
-SQL ORC MR                                         3469           3469           1          3.0         330.8       3.0X
+SQL CSV                                           10098          10209         156          1.0         963.0       1.0X
+SQL Json                                           9940           9993          75          1.1         947.9       1.0X
+SQL Parquet Vectorized: DataPageV1                 1682           1707          36          6.2         160.4       6.0X
+SQL Parquet Vectorized: DataPageV2                 1912           1930          25          5.5         182.4       5.3X
+SQL Parquet MR: DataPageV1                         3861           3870          13          2.7         368.2       2.6X
+SQL Parquet MR: DataPageV2                         3961           3969          10          2.6         377.8       2.5X
+SQL ORC Vectorized                                 1768           1780          18          5.9         168.6       5.7X
+SQL ORC MR                                         3478           3493          21          3.0         331.7       2.9X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5749           5778          41          1.8         548.3       1.0X
-SQL Json                                           6428           6433           7          1.6         613.0       0.9X
-SQL Parquet Vectorized: DataPageV1                  428            437           9         24.5          40.9      13.4X
-SQL Parquet Vectorized: DataPageV2                  440            464          17         23.8          42.0      13.1X
-SQL Parquet MR: DataPageV1                         1640           1669          40          6.4         156.4       3.5X
-SQL Parquet MR: DataPageV2                         1652           1670          25          6.3         157.6       3.5X
-SQL ORC Vectorized                                  365            369           4         28.7          34.9      15.7X
-SQL ORC MR                                         1628           1628           1          6.4         155.2       3.5X
+SQL CSV                                            5870           5882          17          1.8         559.8       1.0X
+SQL Json                                           6337           6345          10          1.7         604.4       0.9X
+SQL Parquet Vectorized: DataPageV1                  457            473          22         23.0          43.5      12.9X
+SQL Parquet Vectorized: DataPageV2                  491            501           8         21.3          46.9      11.9X
+SQL Parquet MR: DataPageV1                         1631           1648          24          6.4         155.6       3.6X
+SQL Parquet MR: DataPageV2                         1580           1606          36          6.6         150.7       3.7X
+SQL ORC Vectorized                                  372            378           8         28.2          35.5      15.8X
+SQL ORC MR                                         1732           1735           5          6.1         165.1       3.4X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          14556          14605          69          1.1         925.5       1.0X
-Data column - Json                                         10309          10312           4          1.5         655.4       1.4X
-Data column - Parquet Vectorized: DataPageV1                 139            147          11        113.4           8.8     105.0X
-Data column - Parquet Vectorized: DataPageV2                 265            283          15         59.4          16.8      54.9X
-Data column - Parquet MR: DataPageV1                        2451           2456           8          6.4         155.8       5.9X
-Data column - Parquet MR: DataPageV2                        2284           2303          27          6.9         145.2       6.4X
-Data column - ORC Vectorized                                 184            193          12         85.5          11.7      79.1X
-Data column - ORC MR                                        1952           1971          27          8.1         124.1       7.5X
-Partition column - CSV                                      4241           4257          23          3.7         269.6       3.4X
-Partition column - Json                                     9027           9034          10          1.7         573.9       1.6X
-Partition column - Parquet Vectorized: DataPageV1             21             27           7        732.6           1.4     678.0X
-Partition column - Parquet Vectorized: DataPageV2             21             27           7        746.0           1.3     690.4X
-Partition column - Parquet MR: DataPageV1                   1219           1220           2         12.9          77.5      11.9X
-Partition column - Parquet MR: DataPageV2                   1190           1197          10         13.2          75.6      12.2X
-Partition column - ORC Vectorized                             22             28           6        699.8           1.4     647.6X
-Partition column - ORC MR                                   1259           1266           9         12.5          80.0      11.6X
-Both columns - CSV                                         14815          14838          32          1.1         941.9       1.0X
-Both columns - Json                                        11180          11208          40          1.4         710.8       1.3X
-Both columns - Parquet Vectorized: DataPageV1                158            173          15         99.8          10.0      92.3X
-Both columns - Parquet Vectorized: DataPageV2                300            310          11         52.5          19.1      48.6X
-Both columns - Parquet MR: DataPageV1                       2543           2548           6          6.2         161.7       5.7X
-Both columns - Parquet MR: DataPageV2                       2264           2275          15          6.9         143.9       6.4X
-Both columns - ORC Vectorized                                225            243          27         70.1          14.3      64.8X
-Both columns - ORC MR                                       2090           2096           7          7.5         132.9       7.0X
+Data column - CSV                                          10956          10967          15          1.4         696.5       1.0X
+Data column - Json                                          9169           9189          29          1.7         583.0       1.2X
+Data column - Parquet Vectorized: DataPageV1                 108            126          16        145.8           6.9     101.6X
+Data column - Parquet Vectorized: DataPageV2                 217            233          20         72.5          13.8      50.5X
+Data column - Parquet MR: DataPageV1                        2229           2346         166          7.1         141.7       4.9X
+Data column - Parquet MR: DataPageV2                        2224           2240          23          7.1         141.4       4.9X
+Data column - ORC Vectorized                                 178            184           4         88.3          11.3      61.5X
+Data column - ORC MR                                        2040           2069          41          7.7         129.7       5.4X
+Partition column - CSV                                      3493           3514          30          4.5         222.1       3.1X
+Partition column - Json                                     8200           8367         236          1.9         521.3       1.3X
+Partition column - Parquet Vectorized: DataPageV1             29             36           7        543.6           1.8     378.6X
+Partition column - Parquet Vectorized: DataPageV2             28             35           7        560.2           1.8     390.2X
+Partition column - Parquet MR: DataPageV1                   1233           1255          31         12.8          78.4       8.9X
+Partition column - Parquet MR: DataPageV2                   1239           1248          13         12.7          78.8       8.8X
+Partition column - ORC Vectorized                             29             34           6        547.3           1.8     381.2X
+Partition column - ORC MR                                   1300           1304           5         12.1          82.6       8.4X
+Both columns - CSV                                         10899          10923          34          1.4         693.0       1.0X
+Both columns - Json                                         9755           9777          31          1.6         620.2       1.1X
+Both columns - Parquet Vectorized: DataPageV1                187            215          18         83.9          11.9      58.5X
+Both columns - Parquet Vectorized: DataPageV2                266            290          24         59.0          16.9      41.1X
+Both columns - Parquet MR: DataPageV1                       2368           2379          15          6.6         150.6       4.6X
+Both columns - Parquet MR: DataPageV2                       2315           2323          11          6.8         147.2       4.7X
+Both columns - ORC Vectorized                                181            210          27         86.8          11.5      60.4X
+Both columns - ORC MR                                       2214           2274          86          7.1         140.7       4.9X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7259           7326          95          1.4         692.3       1.0X
-SQL Json                                           8815           8826          16          1.2         840.7       0.8X
-SQL Parquet Vectorized: DataPageV1                  995           1005          13         10.5          94.9       7.3X
-SQL Parquet Vectorized: DataPageV2                 1355           1355           0          7.7         129.2       5.4X
-SQL Parquet MR: DataPageV1                         3218           3225          10          3.3         306.9       2.3X
-SQL Parquet MR: DataPageV2                         3445           3492          67          3.0         328.5       2.1X
-ParquetReader Vectorized: DataPageV1                692            700           9         15.1          66.0      10.5X
-ParquetReader Vectorized: DataPageV2                997            999           2         10.5          95.1       7.3X
-SQL ORC Vectorized                                  782            803          30         13.4          74.6       9.3X
-SQL ORC MR                                         2808           2824          23          3.7         267.8       2.6X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            6979           7004          35          1.5         665.6       1.0X
+SQL Json                                           8795           8811          23          1.2         838.8       0.8X
+SQL Parquet Vectorized: DataPageV1                 1153           1174          30          9.1         110.0       6.1X
+SQL Parquet Vectorized: DataPageV2                 1419           1454          51          7.4         135.3       4.9X
+SQL Parquet MR: DataPageV1                         3349           3358          14          3.1         319.3       2.1X
+SQL Parquet MR: DataPageV2                         3710           3720          13          2.8         353.8       1.9X
+ParquetReader Vectorized: DataPageV1                788            795          10         13.3          75.2       8.9X
+ParquetReader Vectorized: DataPageV2               1033           1057          35         10.2          98.5       6.8X
+SQL ORC Vectorized                                  815            820           4         12.9          77.7       8.6X
+SQL ORC MR                                         2914           2955          58          3.6         277.9       2.4X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5382           5390          10          1.9         513.3       1.0X
-SQL Json                                           7375           7386          15          1.4         703.4       0.7X
-SQL Parquet Vectorized: DataPageV1                  748            777          35         14.0          71.4       7.2X
-SQL Parquet Vectorized: DataPageV2                  975            990          14         10.8          93.0       5.5X
-SQL Parquet MR: DataPageV1                         2691           2695           6          3.9         256.6       2.0X
-SQL Parquet MR: DataPageV2                         2885           2885           0          3.6         275.1       1.9X
-ParquetReader Vectorized: DataPageV1                650            654           4         16.1          62.0       8.3X
-ParquetReader Vectorized: DataPageV2                861            864           2         12.2          82.1       6.2X
-SQL ORC Vectorized                                  934            949          24         11.2          89.1       5.8X
-SQL ORC MR                                         2598           2634          51          4.0         247.8       2.1X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            5379           5379           1          1.9         513.0       1.0X
+SQL Json                                           7512           7522          14          1.4         716.4       0.7X
+SQL Parquet Vectorized: DataPageV1                  766            773          10         13.7          73.1       7.0X
+SQL Parquet Vectorized: DataPageV2                  953            973          29         11.0          90.9       5.6X
+SQL Parquet MR: DataPageV1                         2627           2634          11          4.0         250.5       2.0X
+SQL Parquet MR: DataPageV2                         2857           2863           8          3.7         272.4       1.9X
+ParquetReader Vectorized: DataPageV1                686            701          22         15.3          65.4       7.8X
+ParquetReader Vectorized: DataPageV2                868            882          16         12.1          82.8       6.2X
+SQL ORC Vectorized                                  952            980          34         11.0          90.8       5.6X
+SQL ORC MR                                         2794           2796           3          3.8         266.4       1.9X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4165           4169           6          2.5         397.2       1.0X
-SQL Json                                           5382           5391          14          1.9         513.2       0.8X
-SQL Parquet Vectorized: DataPageV1                  167            174          11         62.9          15.9      25.0X
-SQL Parquet Vectorized: DataPageV2                  196            202           5         53.5          18.7      21.3X
-SQL Parquet MR: DataPageV1                         1736           1737           1          6.0         165.6       2.4X
-SQL Parquet MR: DataPageV2                         1628           1642          20          6.4         155.2       2.6X
-ParquetReader Vectorized: DataPageV1                141            142           1         74.4          13.4      29.5X
-ParquetReader Vectorized: DataPageV2                168            170           1         62.5          16.0      24.8X
-SQL ORC Vectorized                                  307            320          10         34.1          29.3      13.6X
-SQL ORC MR                                         1506           1510           5          7.0         143.6       2.8X
+SQL CSV                                            4196           4197           2          2.5         400.2       1.0X
+SQL Json                                           5466           5479          19          1.9         521.3       0.8X
+SQL Parquet Vectorized: DataPageV1                  156            159           4         67.2          14.9      26.9X
+SQL Parquet Vectorized: DataPageV2                  184            190           6         57.0          17.5      22.8X
+SQL Parquet MR: DataPageV1                         1656           1659           4          6.3         157.9       2.5X
+SQL Parquet MR: DataPageV2                         1604           1604           0          6.5         153.0       2.6X
+ParquetReader Vectorized: DataPageV1                163            164           1         64.5          15.5      25.8X
+ParquetReader Vectorized: DataPageV2                190            193           2         55.3          18.1      22.1X
+SQL ORC Vectorized                                  315            322           6         33.3          30.0      13.3X
+SQL ORC MR                                         1610           1615           6          6.5         153.6       2.6X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            1559           1559           0          0.7        1487.0       1.0X
-SQL Json                                           1935           1953          25          0.5        1845.4       0.8X
-SQL Parquet Vectorized: DataPageV1                   25             29           5         42.5          23.5      63.2X
-SQL Parquet Vectorized: DataPageV2                   35             40           5         29.8          33.6      44.3X
-SQL Parquet MR: DataPageV1                          171            182          12          6.1         162.8       9.1X
-SQL Parquet MR: DataPageV2                          150            159           9          7.0         142.9      10.4X
-SQL ORC Vectorized                                   32             39           8         33.0          30.3      49.0X
-SQL ORC MR                                          131            138           7          8.0         125.2      11.9X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            1157           1159           3          0.9        1103.2       1.0X
+SQL Json                                           1698           1702           5          0.6        1619.7       0.7X
+SQL Parquet Vectorized: DataPageV1                   24             29           6         43.3          23.1      47.8X
+SQL Parquet Vectorized: DataPageV2                   32             38           6         32.5          30.8      35.8X
+SQL Parquet MR: DataPageV1                          163            170           8          6.4         155.3       7.1X
+SQL Parquet MR: DataPageV2                          159            167           6          6.6         151.8       7.3X
+SQL ORC Vectorized                                   28             34           7         37.5          26.7      41.4X
+SQL ORC MR                                          130            136           6          8.1         123.8       8.9X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            3490           3503          19          0.3        3328.3       1.0X
-SQL Json                                           6330           6341          15          0.2        6037.0       0.6X
-SQL Parquet Vectorized: DataPageV1                   33             41           7         32.0          31.3     106.3X
-SQL Parquet Vectorized: DataPageV2                   42             47           8         24.8          40.3      82.7X
-SQL Parquet MR: DataPageV1                          190            199           8          5.5         181.5      18.3X
-SQL Parquet MR: DataPageV2                          161            168           6          6.5         153.9      21.6X
-SQL ORC Vectorized                                   40             45           5         26.0          38.5      86.5X
-SQL ORC MR                                          147            155          10          7.1         140.5      23.7X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            2485           2523          55          0.4        2369.7       1.0X
+SQL Json                                           5915           5940          35          0.2        5641.2       0.4X
+SQL Parquet Vectorized: DataPageV1                   29             36           7         36.2          27.6      85.8X
+SQL Parquet Vectorized: DataPageV2                   35             39           6         30.2          33.1      71.6X
+SQL Parquet MR: DataPageV1                          168            173           4          6.2         160.0      14.8X
+SQL Parquet MR: DataPageV2                          164            175           8          6.4         156.2      15.2X
+SQL ORC Vectorized                                   32             35           5         33.1          30.2      78.5X
+SQL ORC MR                                          142            148           5          7.4         135.1      17.5X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5835           5860          36          0.2        5564.9       1.0X
-SQL Json                                          11235          11374         197          0.1       10714.3       0.5X
-SQL Parquet Vectorized: DataPageV1                   48             60          11         22.0          45.5     122.2X
-SQL Parquet Vectorized: DataPageV2                   52             60           6         20.2          49.6     112.2X
-SQL Parquet MR: DataPageV1                          204            212           7          5.1         194.6      28.6X
-SQL Parquet MR: DataPageV2                          178            183           4          5.9         169.6      32.8X
-SQL ORC Vectorized                                   52             61           9         20.3          49.4     112.7X
-SQL ORC MR                                          162            175          16          6.5         154.4      36.0X
+SQL CSV                                            4100           4175         105          0.3        3910.5       1.0X
+SQL Json                                           9817           9951         190          0.1        9362.4       0.4X
+SQL Parquet Vectorized: DataPageV1                   34             45          10         31.0          32.2     121.4X
+SQL Parquet Vectorized: DataPageV2                   41             47           7         25.5          39.2      99.7X
+SQL Parquet MR: DataPageV1                          179            187           8          5.9         170.8      22.9X
+SQL Parquet MR: DataPageV2                          169            183          14          6.2         161.0      24.3X
+SQL ORC Vectorized                                   38             45           9         27.4          36.5     107.1X
+SQL ORC MR                                          143            146           3          7.3         136.1      28.7X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index 36a1bc00be182..76bbbfa26ae96 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -1,431 +1,431 @@
-================================================================================================
+DataSourceReadBenchmark-jdk21-results.txt================================================================================================
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9411           9431          29          1.7         598.3       1.0X
-SQL Json                                           8995           9008          19          1.7         571.9       1.0X
-SQL Parquet Vectorized: DataPageV1                   68             82          11        232.7           4.3     139.2X
-SQL Parquet Vectorized: DataPageV2                   49             57           6        318.5           3.1     190.5X
-SQL Parquet MR: DataPageV1                         1798           1846          69          8.7         114.3       5.2X
-SQL Parquet MR: DataPageV2                         1712           1712           1          9.2         108.8       5.5X
-SQL ORC Vectorized                                  115            125           9        137.3           7.3      82.1X
-SQL ORC MR                                         1533           1540           9         10.3          97.5       6.1X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           10363          10364           2          1.5         658.9       1.0X
+SQL Json                                           8667           8699          46          1.8         551.0       1.2X
+SQL Parquet Vectorized: DataPageV1                  103            114           8        153.3           6.5     101.0X
+SQL Parquet Vectorized: DataPageV2                  101            111           6        155.4           6.4     102.4X
+SQL Parquet MR: DataPageV1                         1809           1813           6          8.7         115.0       5.7X
+SQL Parquet MR: DataPageV2                         1715           1720           8          9.2         109.0       6.0X
+SQL ORC Vectorized                                  139            146           5        113.1           8.8      74.5X
+SQL ORC MR                                         1508           1511           5         10.4          95.8       6.9X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    37             38           2        430.0           2.3       1.0X
-ParquetReader Vectorized: DataPageV2                    27             28           1        586.2           1.7       1.4X
-ParquetReader Vectorized -> Row: DataPageV1             29             30           1        536.5           1.9       1.2X
-ParquetReader Vectorized -> Row: DataPageV2             20             21           3        790.6           1.3       1.8X
+ParquetReader Vectorized: DataPageV1                    88             90           2        178.9           5.6       1.0X
+ParquetReader Vectorized: DataPageV2                    95             96           1        166.2           6.0       0.9X
+ParquetReader Vectorized -> Row: DataPageV1             73             74           1        215.3           4.6       1.2X
+ParquetReader Vectorized -> Row: DataPageV2             81             83           1        193.1           5.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9874           9959         120          1.6         627.8       1.0X
-SQL Json                                           9359           9367          11          1.7         595.0       1.1X
-SQL Parquet Vectorized: DataPageV1                   75             83           7        211.0           4.7     132.5X
-SQL Parquet Vectorized: DataPageV2                   74             82           7        213.5           4.7     134.0X
-SQL Parquet MR: DataPageV1                         1856           1868          17          8.5         118.0       5.3X
-SQL Parquet MR: DataPageV2                         1759           1776          25          8.9         111.8       5.6X
-SQL ORC Vectorized                                  120            124           4        131.3           7.6      82.4X
-SQL ORC MR                                         1545           1549           6         10.2          98.2       6.4X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           11538          11589          73          1.4         733.5       1.0X
+SQL Json                                           9586           9596          14          1.6         609.5       1.2X
+SQL Parquet Vectorized: DataPageV1                  109            116           6        144.8           6.9     106.2X
+SQL Parquet Vectorized: DataPageV2                  110            118           8        142.6           7.0     104.6X
+SQL Parquet MR: DataPageV1                         1901           1953          74          8.3         120.9       6.1X
+SQL Parquet MR: DataPageV2                         1817           1832          22          8.7         115.5       6.4X
+SQL ORC Vectorized                                  118            126           7        133.6           7.5      98.0X
+SQL ORC MR                                         1505           1535          43         10.5          95.7       7.7X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    69             70           2        228.9           4.4       1.0X
-ParquetReader Vectorized: DataPageV2                    69             70           2        228.4           4.4       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             48             50           3        328.0           3.0       1.4X
-ParquetReader Vectorized -> Row: DataPageV2             48             49           2        327.4           3.1       1.4X
+ParquetReader Vectorized: DataPageV1                    93             94           1        169.9           5.9       1.0X
+ParquetReader Vectorized: DataPageV2                    93             94           1        169.1           5.9       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             61             62           1        258.0           3.9       1.5X
+ParquetReader Vectorized -> Row: DataPageV2             61             62           1        258.4           3.9       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10495          10514          26          1.5         667.3       1.0X
-SQL Json                                           9793           9794           2          1.6         622.6       1.1X
-SQL Parquet Vectorized: DataPageV1                   87             95           9        181.8           5.5     121.3X
-SQL Parquet Vectorized: DataPageV2                  115            120           4        136.8           7.3      91.3X
-SQL Parquet MR: DataPageV1                         1892           1903          16          8.3         120.3       5.5X
-SQL Parquet MR: DataPageV2                         1852           1859          10          8.5         117.7       5.7X
-SQL ORC Vectorized                                  141            144           3        111.6           9.0      74.5X
-SQL ORC MR                                         1612           1614           2          9.8         102.5       6.5X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           12200          12203           5          1.3         775.7       1.0X
+SQL Json                                           9813           9854          57          1.6         623.9       1.2X
+SQL Parquet Vectorized: DataPageV1                  101            107           6        156.1           6.4     121.0X
+SQL Parquet Vectorized: DataPageV2                  129            135           6        122.3           8.2      94.9X
+SQL Parquet MR: DataPageV1                         1968           1989          29          8.0         125.1       6.2X
+SQL Parquet MR: DataPageV2                         1913           1916           3          8.2         121.6       6.4X
+SQL ORC Vectorized                                  130            135           6        120.8           8.3      93.7X
+SQL ORC MR                                         1593           1600          10          9.9         101.3       7.7X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   139            140           1        113.3           8.8       1.0X
-ParquetReader Vectorized: DataPageV2                   168            170           1         93.6          10.7       0.8X
-ParquetReader Vectorized -> Row: DataPageV1            137            138           2        115.1           8.7       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            164            166           2         95.7          10.4       0.8X
+ParquetReader Vectorized: DataPageV1                   138            140           2        113.9           8.8       1.0X
+ParquetReader Vectorized: DataPageV2                   166            168           3         94.8          10.6       0.8X
+ParquetReader Vectorized -> Row: DataPageV1            136            138           6        115.6           8.6       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            164            166           2         96.1          10.4       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11467          11490          31          1.4         729.1       1.0X
-SQL Json                                          10337          10343           9          1.5         657.2       1.1X
-SQL Parquet Vectorized: DataPageV1                   86             91           4        181.9           5.5     132.6X
-SQL Parquet Vectorized: DataPageV2                  163            168           6         96.5          10.4      70.4X
-SQL Parquet MR: DataPageV1                         1979           1992          19          7.9         125.8       5.8X
-SQL Parquet MR: DataPageV2                         1929           1941          17          8.2         122.6       5.9X
-SQL ORC Vectorized                                  163            168           4         96.8          10.3      70.5X
-SQL ORC MR                                         1692           1697           7          9.3         107.6       6.8X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           13361          13368           9          1.2         849.5       1.0X
+SQL Json                                          10099          10118          27          1.6         642.1       1.3X
+SQL Parquet Vectorized: DataPageV1                  108            131          29        145.0           6.9     123.2X
+SQL Parquet Vectorized: DataPageV2                  177            185           7         88.9          11.3      75.5X
+SQL Parquet MR: DataPageV1                         2031           2083          74          7.7         129.1       6.6X
+SQL Parquet MR: DataPageV2                         2022           2026           5          7.8         128.6       6.6X
+SQL ORC Vectorized                                  146            151           4        107.7           9.3      91.5X
+SQL ORC MR                                         1642           1642           0          9.6         104.4       8.1X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   141            145           7        111.9           8.9       1.0X
-ParquetReader Vectorized: DataPageV2                   215            217           3         73.2          13.7       0.7X
-ParquetReader Vectorized -> Row: DataPageV1            138            140           2        113.6           8.8       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            212            213           2         74.3          13.5       0.7X
+ParquetReader Vectorized: DataPageV1                   141            143           2        111.9           8.9       1.0X
+ParquetReader Vectorized: DataPageV2                   209            210           1         75.3          13.3       0.7X
+ParquetReader Vectorized -> Row: DataPageV1            138            140           2        113.9           8.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            207            210           7         76.1          13.1       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           15201          15213          17          1.0         966.4       1.0X
-SQL Json                                          11480          11486           7          1.4         729.9       1.3X
-SQL Parquet Vectorized: DataPageV1                  123            128           5        127.6           7.8     123.3X
-SQL Parquet Vectorized: DataPageV2                  215            220           7         73.1          13.7      70.7X
-SQL Parquet MR: DataPageV1                         2300           2335          49          6.8         146.2       6.6X
-SQL Parquet MR: DataPageV2                         2082           2100          25          7.6         132.4       7.3X
-SQL ORC Vectorized                                  218            221           3         72.0          13.9      69.6X
-SQL ORC MR                                         1722           1727           7          9.1         109.5       8.8X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           13316          13326          13          1.2         846.6       1.0X
+SQL Json                                           9808           9885         109          1.6         623.6       1.4X
+SQL Parquet Vectorized: DataPageV1                  290            293           3         54.3          18.4      46.0X
+SQL Parquet Vectorized: DataPageV2                  235            238           3         66.9          14.9      56.6X
+SQL Parquet MR: DataPageV1                         2404           2409           7          6.5         152.9       5.5X
+SQL Parquet MR: DataPageV2                         2007           2030          33          7.8         127.6       6.6X
+SQL ORC Vectorized                                  150            153           3        104.8           9.5      88.7X
+SQL ORC MR                                         1625           1634          13          9.7         103.3       8.2X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   177            181           4         88.7          11.3       1.0X
-ParquetReader Vectorized: DataPageV2                   266            269           2         59.1          16.9       0.7X
-ParquetReader Vectorized -> Row: DataPageV1            175            177           1         89.7          11.2       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            266            267           2         59.1          16.9       0.7X
+ParquetReader Vectorized: DataPageV1                   334            335           2         47.1          21.2       1.0X
+ParquetReader Vectorized: DataPageV2                   277            279           2         56.9          17.6       1.2X
+ParquetReader Vectorized -> Row: DataPageV1            351            355           3         44.8          22.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            297            303           7         52.9          18.9       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12187          12207          29          1.3         774.8       1.0X
-SQL Json                                          12291          12296           7          1.3         781.4       1.0X
-SQL Parquet Vectorized: DataPageV1                   83             87           7        190.1           5.3     147.3X
-SQL Parquet Vectorized: DataPageV2                   82             86           3        191.1           5.2     148.1X
-SQL Parquet MR: DataPageV1                         2020           2023           4          7.8         128.4       6.0X
-SQL Parquet MR: DataPageV2                         1938           1944           9          8.1         123.2       6.3X
-SQL ORC Vectorized                                  270            272           2         58.3          17.1      45.2X
-SQL ORC MR                                         1810           1837          38          8.7         115.1       6.7X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           13826          13835          13          1.1         879.0       1.0X
+SQL Json                                          11577          11606          40          1.4         736.1       1.2X
+SQL Parquet Vectorized: DataPageV1                   87            103          11        181.0           5.5     159.1X
+SQL Parquet Vectorized: DataPageV2                   88            101           7        178.8           5.6     157.2X
+SQL Parquet MR: DataPageV1                         2072           2075           4          7.6         131.7       6.7X
+SQL Parquet MR: DataPageV2                         2075           2087          17          7.6         131.9       6.7X
+SQL ORC Vectorized                                  261            273          10         60.2          16.6      52.9X
+SQL ORC MR                                         1720           1726           8          9.1         109.4       8.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   137            138           2        115.2           8.7       1.0X
-ParquetReader Vectorized: DataPageV2                   134            135           1        117.3           8.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            133            135           3        118.1           8.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            133            134           1        118.1           8.5       1.0X
+ParquetReader Vectorized: DataPageV1                   135            138           5        116.9           8.6       1.0X
+ParquetReader Vectorized: DataPageV2                   134            135           2        117.7           8.5       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            149            155           5        105.3           9.5       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            133            140          11        118.4           8.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           15788          15813          35          1.0        1003.8       1.0X
-SQL Json                                          15330          15351          29          1.0         974.7       1.0X
-SQL Parquet Vectorized: DataPageV1                  122            128           7        128.7           7.8     129.2X
-SQL Parquet Vectorized: DataPageV2                  120            124           4        130.6           7.7     131.1X
-SQL Parquet MR: DataPageV1                         2321           2322           2          6.8         147.5       6.8X
-SQL Parquet MR: DataPageV2                         2193           2236          60          7.2         139.5       7.2X
-SQL ORC Vectorized                                  356            357           2         44.2          22.6      44.3X
-SQL ORC MR                                         1816           1825          11          8.7         115.5       8.7X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                           14086          14095          13          1.1         895.6       1.0X
+SQL Json                                          11716          11726          14          1.3         744.9       1.2X
+SQL Parquet Vectorized: DataPageV1                  280            291           8         56.2          17.8      50.3X
+SQL Parquet Vectorized: DataPageV2                  282            287           4         55.8          17.9      50.0X
+SQL Parquet MR: DataPageV1                         2479           2498          27          6.3         157.6       5.7X
+SQL Parquet MR: DataPageV2                         2492           2509          23          6.3         158.4       5.7X
+SQL ORC Vectorized                                  622            628           7         25.3          39.5      22.6X
+SQL ORC MR                                         2084           2093          14          7.5         132.5       6.8X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   175            176           2         90.1          11.1       1.0X
-ParquetReader Vectorized: DataPageV2                   174            176           5         90.5          11.0       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            172            175           2         91.3          11.0       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            172            175           3         91.4          10.9       1.0X
+ParquetReader Vectorized: DataPageV1                   346            348           2         45.4          22.0       1.0X
+ParquetReader Vectorized: DataPageV2                   347            349           4         45.4          22.0       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            355            358           4         44.3          22.6       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            354            357           5         44.4          22.5       1.0X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2124           2313         267          7.4         135.1       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2112           2131          27          7.4         134.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             183            189           3         86.0          11.6      11.6X
-SQL Parquet MR: DataPageV1                                            2359           2374          22          6.7         150.0       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2774           2777           5          5.7         176.3       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              93            100           5        169.8           5.9      22.9X
-SQL Parquet MR: DataPageV2                                            2225           2231           9          7.1         141.5       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2579           2588          13          6.1         164.0       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              92             97           3        170.5           5.9      23.0X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2210           2239          41          7.1         140.5       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2196           2226          43          7.2         139.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             106            138          35        148.1           6.8      20.8X
+SQL Parquet MR: DataPageV1                                            2436           2446          14          6.5         154.9       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2790           2819          40          5.6         177.4       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             107            113           7        146.4           6.8      20.6X
+SQL Parquet MR: DataPageV2                                            2308           2310           4          6.8         146.7       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2855           2862           9          5.5         181.5       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             125            137          11        125.9           7.9      17.7X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2155           2186          45          7.3         137.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2221           2222           2          7.1         141.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             272            279           6         57.8          17.3       7.9X
-SQL Parquet MR: DataPageV1                                            2470           2485          21          6.4         157.0       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2805           2816          17          5.6         178.3       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              99            105           5        159.1           6.3      21.8X
-SQL Parquet MR: DataPageV2                                            2333           2338           6          6.7         148.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2649           2655           8          5.9         168.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             138            143           5        114.2           8.8      15.6X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2174           2175           2          7.2         138.2       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2170           2183          19          7.2         137.9       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             272            279           7         57.7          17.3       8.0X
+SQL Parquet MR: DataPageV1                                            2539           2547          11          6.2         161.4       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2723           2741          25          5.8         173.1       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             131            140           8        119.7           8.4      16.5X
+SQL Parquet MR: DataPageV2                                            2430           2430           0          6.5         154.5       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2748           2749           2          5.7         174.7       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             244            254           8         64.4          15.5       8.9X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2159           2227          95          7.3         137.3       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2200           2206           9          7.2         139.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             295            306          17         53.3          18.8       7.3X
-SQL Parquet MR: DataPageV1                                            2378           2387          13          6.6         151.2       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2754           2755           2          5.7         175.1       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             102            109           6        154.9           6.5      21.3X
-SQL Parquet MR: DataPageV2                                            2345           2352          10          6.7         149.1       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2696           2706          14          5.8         171.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             262            272          10         60.0          16.7       8.2X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2156           2188          46          7.3         137.1       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2176           2228          73          7.2         138.4       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             272            295          19         57.8          17.3       7.9X
+SQL Parquet MR: DataPageV1                                            2542           2544           3          6.2         161.6       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2963           2973          14          5.3         188.4       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             135            144           9        116.8           8.6      16.0X
+SQL Parquet MR: DataPageV2                                            2393           2412          28          6.6         152.1       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2939           2942           4          5.4         186.9       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             267            275           7         58.9          17.0       8.1X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2348           2371          32          6.7         149.3       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2447           2500          74          6.4         155.6       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             418            428           7         37.7          26.5       5.6X
-SQL Parquet MR: DataPageV1                                            2602           2634          45          6.0         165.4       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3019           3030          17          5.2         191.9       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             135            150          14        116.9           8.6      17.4X
-SQL Parquet MR: DataPageV2                                            2419           2420           2          6.5         153.8       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2814           2822          12          5.6         178.9       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             244            257          16         64.5          15.5       9.6X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2236           2261          35          7.0         142.2       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2212           2256          63          7.1         140.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             279            294          17         56.3          17.8       8.0X
+SQL Parquet MR: DataPageV1                                            2785           2796          15          5.6         177.1       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3213           3327         162          4.9         204.3       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             308            321          10         51.1          19.6       7.3X
+SQL Parquet MR: DataPageV2                                            2454           2496          59          6.4         156.0       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2719           2744          36          5.8         172.9       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             278            285           3         56.6          17.7       8.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2297           2315          25          6.8         146.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2319           2356          51          6.8         147.5       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             393            406          12         40.0          25.0       5.8X
-SQL Parquet MR: DataPageV1                                            2318           2344          37          6.8         147.4       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2881           2927          66          5.5         183.1       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              97            117          12        163.0           6.1      23.8X
-SQL Parquet MR: DataPageV2                                            2290           2307          25          6.9         145.6       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2808           2812           6          5.6         178.5       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              96            115          14        164.6           6.1      24.0X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL ORC MR                                                            2286           2327          57          6.9         145.4       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2290           2299          13          6.9         145.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             356            385          18         44.2          22.6       6.4X
+SQL Parquet MR: DataPageV1                                            2374           2410          51          6.6         150.9       1.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3159           3169          14          5.0         200.8       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             103            122          14        153.3           6.5      22.3X
+SQL Parquet MR: DataPageV2                                            2446           2456          14          6.4         155.5       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3008           3010           3          5.2         191.3       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              93            107          10        169.1           5.9      24.6X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2441           2492          73          6.4         155.2       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2452           2464          16          6.4         155.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             482            502          13         32.6          30.6       5.1X
-SQL Parquet MR: DataPageV1                                            2649           2650           2          5.9         168.4       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3132           3143          15          5.0         199.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             149            164          11        105.7           9.5      16.4X
-SQL Parquet MR: DataPageV2                                            2656           2661           8          5.9         168.8       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3102           3157          79          5.1         197.2       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             129            145          13        121.9           8.2      18.9X
+SQL ORC MR                                                            2626           2658          45          6.0         167.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2738           2746          11          5.7         174.1       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             778            779           1         20.2          49.5       3.4X
+SQL Parquet MR: DataPageV1                                            2911           2911           1          5.4         185.0       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3340           3354          19          4.7         212.4       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             298            310           9         52.7          19.0       8.8X
+SQL Parquet MR: DataPageV2                                            2959           2966          11          5.3         188.1       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3281           3289          10          4.8         208.6       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             297            305           8         52.9          18.9       8.8X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           13140          13312         105          0.1       12530.8       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          13227          13351         118          0.1       12614.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                            7145           7209          34          0.1        6813.8       1.8X
-SQL Parquet MR: DataPageV1                                            8789           8807          17          0.1        8382.1       1.5X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9112           9130          12          0.1        8690.1       1.4X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5897           6081         111          0.2        5623.7       2.2X
-SQL Parquet MR: DataPageV2                                            9841           9939          78          0.1        9385.0       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           9850           9884          16          0.1        9393.3       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5678           5705          28          0.2        5414.9       2.3X
+SQL ORC MR                                                           13102          13223         110          0.1       12495.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          12894          13024         101          0.1       12296.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            7180           7220          36          0.1        6847.0       1.8X
+SQL Parquet MR: DataPageV1                                            8625           8658          23          0.1        8225.2       1.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9197           9324          94          0.1        8771.2       1.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5862           6041          81          0.2        5590.5       2.2X
+SQL Parquet MR: DataPageV2                                            9564           9731         184          0.1        9120.6       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           9814           9865          50          0.1        9359.5       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5651           5735          38          0.2        5389.3       2.3X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10869          10983         161          1.0        1036.6       1.0X
-SQL Json                                          10536          10542           8          1.0        1004.8       1.0X
-SQL Parquet Vectorized: DataPageV1                 1780           1786           9          5.9         169.7       6.1X
-SQL Parquet Vectorized: DataPageV2                 1891           1893           4          5.5         180.3       5.7X
-SQL Parquet MR: DataPageV1                         4057           4065          11          2.6         386.9       2.7X
-SQL Parquet MR: DataPageV2                         3947           3957          14          2.7         376.4       2.8X
-SQL ORC Vectorized                                 1797           1806          12          5.8         171.4       6.0X
-SQL ORC MR                                         3511           3513           3          3.0         334.8       3.1X
+SQL CSV                                           12381          12387           8          0.8        1180.8       1.0X
+SQL Json                                          10369          10422          75          1.0         988.8       1.2X
+SQL Parquet Vectorized: DataPageV1                 1801           1809          12          5.8         171.8       6.9X
+SQL Parquet Vectorized: DataPageV2                 2010           2024          21          5.2         191.7       6.2X
+SQL Parquet MR: DataPageV1                         3932           3944          16          2.7         375.0       3.1X
+SQL Parquet MR: DataPageV2                         4029           4043          20          2.6         384.2       3.1X
+SQL ORC Vectorized                                 1838           1839           2          5.7         175.3       6.7X
+SQL ORC MR                                         3529           3549          28          3.0         336.5       3.5X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            6037           6105          96          1.7         575.7       1.0X
-SQL Json                                           6878           6911          47          1.5         655.9       0.9X
-SQL Parquet Vectorized: DataPageV1                  388            392           3         27.0          37.0      15.6X
-SQL Parquet Vectorized: DataPageV2                  390            391           1         26.9          37.2      15.5X
-SQL Parquet MR: DataPageV1                         1712           1731          26          6.1         163.3       3.5X
-SQL Parquet MR: DataPageV2                         1620           1638          26          6.5         154.5       3.7X
-SQL ORC Vectorized                                  392            396           4         26.8          37.4      15.4X
-SQL ORC MR                                         1791           1794           4          5.9         170.8       3.4X
+SQL CSV                                            7396           7452          80          1.4         705.4       1.0X
+SQL Json                                           6836           6847          14          1.5         652.0       1.1X
+SQL Parquet Vectorized: DataPageV1                  468            474           5         22.4          44.6      15.8X
+SQL Parquet Vectorized: DataPageV2                  458            475          12         22.9          43.7      16.1X
+SQL Parquet MR: DataPageV1                         1621           1625           4          6.5         154.6       4.6X
+SQL Parquet MR: DataPageV2                         1645           1654          13          6.4         156.8       4.5X
+SQL ORC Vectorized                                  390            395           3         26.9          37.2      19.0X
+SQL ORC MR                                         1787           1791           5          5.9         170.4       4.1X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          15303          15419         164          1.0         972.9       1.0X
-Data column - Json                                         11147          11172          35          1.4         708.7       1.4X
-Data column - Parquet Vectorized: DataPageV1                 122            140          14        129.3           7.7     125.8X
-Data column - Parquet Vectorized: DataPageV2                 231            242           7         68.0          14.7      66.1X
-Data column - Parquet MR: DataPageV1                        2670           2681          16          5.9         169.7       5.7X
-Data column - Parquet MR: DataPageV2                        2364           2373          12          6.7         150.3       6.5X
-Data column - ORC Vectorized                                 227            257          24         69.4          14.4      67.5X
-Data column - ORC MR                                        2301           2323          31          6.8         146.3       6.7X
-Partition column - CSV                                      4486           4490           6          3.5         285.2       3.4X
-Partition column - Json                                     9443           9449           8          1.7         600.4       1.6X
-Partition column - Parquet Vectorized: DataPageV1             24             26           4        669.2           1.5     651.1X
-Partition column - Parquet Vectorized: DataPageV2             23             27           4        681.3           1.5     662.9X
-Partition column - Parquet MR: DataPageV1                   1328           1341          18         11.8          84.4      11.5X
-Partition column - Parquet MR: DataPageV2                   1333           1335           3         11.8          84.8      11.5X
-Partition column - ORC Vectorized                             24             28           4        642.2           1.6     624.8X
-Partition column - ORC MR                                   1431           1435           5         11.0          91.0      10.7X
-Both columns - CSV                                         15209          15218          11          1.0         967.0       1.0X
-Both columns - Json                                        11530          11541          16          1.4         733.0       1.3X
-Both columns - Parquet Vectorized: DataPageV1                191            201          10         82.5          12.1      80.3X
-Both columns - Parquet Vectorized: DataPageV2                294            311          10         53.5          18.7      52.0X
-Both columns - Parquet MR: DataPageV1                       2794           2797           4          5.6         177.7       5.5X
-Both columns - Parquet MR: DataPageV2                       2405           2435          44          6.5         152.9       6.4X
-Both columns - ORC Vectorized                                227            237          10         69.3          14.4      67.4X
-Both columns - ORC MR                                       2330           2338          12          6.7         148.2       6.6X
+Data column - CSV                                          13711          13750          55          1.1         871.7       1.0X
+Data column - Json                                          9919           9951          44          1.6         630.7       1.4X
+Data column - Parquet Vectorized: DataPageV1                 111            130          16        142.2           7.0     124.0X
+Data column - Parquet Vectorized: DataPageV2                 259            274           9         60.7          16.5      52.9X
+Data column - Parquet MR: DataPageV1                        2372           2381          13          6.6         150.8       5.8X
+Data column - Parquet MR: DataPageV2                        2337           2339           4          6.7         148.6       5.9X
+Data column - ORC Vectorized                                 139            162          16        113.0           8.9      98.5X
+Data column - ORC MR                                        2068           2078          15          7.6         131.4       6.6X
+Partition column - CSV                                      3797           3846          69          4.1         241.4       3.6X
+Partition column - Json                                     8388           8396          10          1.9         533.3       1.6X
+Partition column - Parquet Vectorized: DataPageV1             32             35           4        498.4           2.0     434.5X
+Partition column - Parquet Vectorized: DataPageV2             31             35           4        500.3           2.0     436.1X
+Partition column - Parquet MR: DataPageV1                   1241           1242           1         12.7          78.9      11.1X
+Partition column - Parquet MR: DataPageV2                   1222           1224           3         12.9          77.7      11.2X
+Partition column - ORC Vectorized                             30             33           3        531.0           1.9     462.9X
+Partition column - ORC MR                                   1232           1241          13         12.8          78.3      11.1X
+Both columns - CSV                                         13510          13516           9          1.2         858.9       1.0X
+Both columns - Json                                        10324          10374          71          1.5         656.4       1.3X
+Both columns - Parquet Vectorized: DataPageV1                121            144          18        130.3           7.7     113.6X
+Both columns - Parquet Vectorized: DataPageV2                259            274          16         60.8          16.4      53.0X
+Both columns - Parquet MR: DataPageV1                       2338           2356          25          6.7         148.7       5.9X
+Both columns - Parquet MR: DataPageV2                       2320           2322           2          6.8         147.5       5.9X
+Both columns - ORC Vectorized                                177            193          17         89.1          11.2      77.7X
+Both columns - ORC MR                                       2109           2135          36          7.5         134.1       6.5X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7592           7616          34          1.4         724.1       1.0X
-SQL Json                                           8827           8828           2          1.2         841.8       0.9X
-SQL Parquet Vectorized: DataPageV1                 1194           1203          13          8.8         113.9       6.4X
-SQL Parquet Vectorized: DataPageV2                 1232           1248          23          8.5         117.4       6.2X
-SQL Parquet MR: DataPageV1                         3481           3531          71          3.0         332.0       2.2X
-SQL Parquet MR: DataPageV2                         3585           3585           1          2.9         341.8       2.1X
-ParquetReader Vectorized: DataPageV1                786            787           1         13.3          75.0       9.7X
-ParquetReader Vectorized: DataPageV2                858            861           3         12.2          81.8       8.8X
-SQL ORC Vectorized                                  891            908          22         11.8          85.0       8.5X
-SQL ORC MR                                         2943           2972          41          3.6         280.7       2.6X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            8866           8885          26          1.2         845.5       1.0X
+SQL Json                                           9201           9207           8          1.1         877.5       1.0X
+SQL Parquet Vectorized: DataPageV1                 1286           1291           6          8.2         122.7       6.9X
+SQL Parquet Vectorized: DataPageV2                 1554           1566          17          6.7         148.2       5.7X
+SQL Parquet MR: DataPageV1                         3482           3506          34          3.0         332.1       2.5X
+SQL Parquet MR: DataPageV2                         3607           3635          40          2.9         344.0       2.5X
+ParquetReader Vectorized: DataPageV1                792            794           2         13.2          75.5      11.2X
+ParquetReader Vectorized: DataPageV2               1116           1123          10          9.4         106.5       7.9X
+SQL ORC Vectorized                                  912            934          20         11.5          87.0       9.7X
+SQL ORC MR                                         2987           3000          18          3.5         284.9       3.0X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5836           5847          15          1.8         556.6       1.0X
-SQL Json                                           7771           7775           6          1.3         741.1       0.8X
-SQL Parquet Vectorized: DataPageV1                  791            795           4         13.3          75.4       7.4X
-SQL Parquet Vectorized: DataPageV2                  854            858           4         12.3          81.5       6.8X
-SQL Parquet MR: DataPageV1                         2893           2906          19          3.6         275.9       2.0X
-SQL Parquet MR: DataPageV2                         3072           3078           8          3.4         293.0       1.9X
-ParquetReader Vectorized: DataPageV1                726            730           7         14.5          69.2       8.0X
-ParquetReader Vectorized: DataPageV2                794            800           7         13.2          75.7       7.3X
-SQL ORC Vectorized                                  991            997           6         10.6          94.5       5.9X
-SQL ORC MR                                         2815           2823          11          3.7         268.4       2.1X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            6247           6258          16          1.7         595.8       1.0X
+SQL Json                                           7887           7902          22          1.3         752.1       0.8X
+SQL Parquet Vectorized: DataPageV1                  824            836          19         12.7          78.5       7.6X
+SQL Parquet Vectorized: DataPageV2                 1027           1033          10         10.2          97.9       6.1X
+SQL Parquet MR: DataPageV1                         2799           2799           0          3.7         266.9       2.2X
+SQL Parquet MR: DataPageV2                         2883           2893          15          3.6         274.9       2.2X
+ParquetReader Vectorized: DataPageV1                740            741           1         14.2          70.6       8.4X
+ParquetReader Vectorized: DataPageV2                905            906           1         11.6          86.3       6.9X
+SQL ORC Vectorized                                  983            986           3         10.7          93.8       6.4X
+SQL ORC MR                                         2738           2741           4          3.8         261.1       2.3X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4310           4329          27          2.4         411.0       1.0X
-SQL Json                                           5772           5797          35          1.8         550.5       0.7X
-SQL Parquet Vectorized: DataPageV1                  162            166           3         64.8          15.4      26.6X
-SQL Parquet Vectorized: DataPageV2                  178            181           2         58.9          17.0      24.2X
-SQL Parquet MR: DataPageV1                         1905           1906           1          5.5         181.7       2.3X
-SQL Parquet MR: DataPageV2                         1843           1845           2          5.7         175.8       2.3X
-ParquetReader Vectorized: DataPageV1                166            167           2         63.3          15.8      26.0X
-ParquetReader Vectorized: DataPageV2                183            185           4         57.4          17.4      23.6X
-SQL ORC Vectorized                                  324            329           7         32.4          30.9      13.3X
-SQL ORC MR                                         1760           1823          89          6.0         167.9       2.4X
+SQL CSV                                            4395           4398           4          2.4         419.2       1.0X
+SQL Json                                           5649           5663          20          1.9         538.7       0.8X
+SQL Parquet Vectorized: DataPageV1                  164            170           7         64.1          15.6      26.9X
+SQL Parquet Vectorized: DataPageV2                  186            190           4         56.4          17.7      23.6X
+SQL Parquet MR: DataPageV1                         1769           1771           2          5.9         168.7       2.5X
+SQL Parquet MR: DataPageV2                         1721           1730          13          6.1         164.2       2.6X
+ParquetReader Vectorized: DataPageV1                169            170           2         62.1          16.1      26.0X
+ParquetReader Vectorized: DataPageV2                193            195           2         54.3          18.4      22.8X
+SQL ORC Vectorized                                  313            316           3         33.5          29.9      14.0X
+SQL ORC MR                                         1580           1592          18          6.6         150.6       2.8X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            1745           1776          43          0.6        1664.2       1.0X
-SQL Json                                           2042           2046           5          0.5        1947.8       0.9X
-SQL Parquet Vectorized: DataPageV1                   27             31           5         39.1          25.6      65.1X
-SQL Parquet Vectorized: DataPageV2                   32             36           4         32.3          31.0      53.7X
-SQL Parquet MR: DataPageV1                          183            188           3          5.7         174.9       9.5X
-SQL Parquet MR: DataPageV2                          160            164           3          6.6         152.5      10.9X
-SQL ORC Vectorized                                   32             35           4         32.3          31.0      53.7X
-SQL ORC MR                                          133            136           3          7.9         127.2      13.1X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            1197           1198           1          0.9        1141.7       1.0X
+SQL Json                                           1855           1857           3          0.6        1769.2       0.6X
+SQL Parquet Vectorized: DataPageV1                   25             29           4         41.4          24.2      47.3X
+SQL Parquet Vectorized: DataPageV2                   34             37           5         30.9          32.4      35.2X
+SQL Parquet MR: DataPageV1                          160            167           6          6.6         152.7       7.5X
+SQL Parquet MR: DataPageV2                          154            158           4          6.8         146.7       7.8X
+SQL ORC Vectorized                                   29             32           3         36.6          27.3      41.8X
+SQL ORC MR                                          135            148          37          7.8         128.3       8.9X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            3718           3734          23          0.3        3545.9       1.0X
-SQL Json                                           7458           7541         116          0.1        7112.8       0.5X
-SQL Parquet Vectorized: DataPageV1                   35             38           4         30.2          33.1     107.2X
-SQL Parquet Vectorized: DataPageV2                   40             44           5         26.1          38.3      92.5X
-SQL Parquet MR: DataPageV1                          194            198           5          5.4         184.9      19.2X
-SQL Parquet MR: DataPageV2                          172            177           3          6.1         164.0      21.6X
-SQL ORC Vectorized                                   42             46           7         25.2          39.7      89.3X
-SQL ORC MR                                          143            147           3          7.3         136.8      25.9X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+SQL CSV                                            2630           2651          29          0.4        2508.3       1.0X
+SQL Json                                           6628           6696          96          0.2        6321.0       0.4X
+SQL Parquet Vectorized: DataPageV1                   29             33           4         36.2          27.6      90.8X
+SQL Parquet Vectorized: DataPageV2                   38             41           4         27.7          36.1      69.4X
+SQL Parquet MR: DataPageV1                          164            167           2          6.4         156.9      16.0X
+SQL Parquet MR: DataPageV2                          160            165           4          6.5         152.9      16.4X
+SQL ORC Vectorized                                   33             36           4         31.6          31.6      79.3X
+SQL ORC MR                                          141            145           6          7.5         134.2      18.7X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            6223           6233          14          0.2        5935.2       1.0X
-SQL Json                                          13981          14087         151          0.1       13332.9       0.4X
-SQL Parquet Vectorized: DataPageV1                   50             59           8         20.8          48.1     123.5X
-SQL Parquet Vectorized: DataPageV2                   54             56           4         19.4          51.5     115.4X
-SQL Parquet MR: DataPageV1                          214            218           3          4.9         204.4      29.0X
-SQL Parquet MR: DataPageV2                          192            199           4          5.5         183.4      32.4X
-SQL ORC Vectorized                                   52             56           5         20.1          49.7     119.4X
-SQL ORC MR                                          155            158           2          6.8         147.8      40.1X
+SQL CSV                                            4436           4536         141          0.2        4230.6       1.0X
+SQL Json                                          12445          12624         253          0.1       11868.7       0.4X
+SQL Parquet Vectorized: DataPageV1                   36             39           4         29.2          34.3     123.5X
+SQL Parquet Vectorized: DataPageV2                   46             49           3         23.0          43.5      97.3X
+SQL Parquet MR: DataPageV1                          176            182           4          6.0         167.8      25.2X
+SQL Parquet MR: DataPageV2                          172            180           7          6.1         164.4      25.7X
+SQL ORC Vectorized                                   39             43           4         26.8          37.3     113.6X
+SQL ORC MR                                          148            154          11          7.1         141.5      29.9X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt b/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
index a0aa9ec3b08dd..321b8c5014cb8 100644
--- a/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                6427           6526         141         15.6          64.3       1.0X
-DataFrame                                          1136           1225         126         88.0          11.4       5.7X
-Dataset                                            1920           1994         105         52.1          19.2       3.3X
+RDD                                                6408           6469          86         15.6          64.1       1.0X
+DataFrame                                          1151           1152           2         86.9          11.5       5.6X
+Dataset                                            1725           1850         177         58.0          17.2       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                7373           7383          14         13.6          73.7       1.0X
-DataFrame                                          2811           2824          18         35.6          28.1       2.6X
-Dataset                                            6370           6564         274         15.7          63.7       1.2X
+RDD                                                7374           7378           6         13.6          73.7       1.0X
+DataFrame                                          3111           3119          11         32.1          31.1       2.4X
+Dataset                                            6397           6516         168         15.6          64.0       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                3972           4002          42         25.2          39.7       1.0X
-DataFrame                                           775            789          15        129.1           7.7       5.1X
-Dataset                                            1540           1547          10         64.9          15.4       2.6X
+RDD                                                4022           4162         197         24.9          40.2       1.0X
+DataFrame                                           701            722          19        142.6           7.0       5.7X
+Dataset                                            1528           1545          24         65.4          15.3       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                2069           2082          19         48.3          20.7       1.0X
-DataFrame                                           102            117          13        978.9           1.0      20.3X
-Dataset                                            2242           2258          22         44.6          22.4       0.9X
+RDD                                                2065           2095          43         48.4          20.6       1.0X
+DataFrame                                           106            120          12        944.9           1.1      19.5X
+Dataset                                            2302           2326          34         43.4          23.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            1493           1518          36         67.0          14.9       1.0X
-DataFrame sum                                        34             46           8       2962.3           0.3      44.2X
-Dataset sum using Aggregator                       1937           1976          55         51.6          19.4       0.8X
-Dataset complex Aggregator                         4915           5032         166         20.3          49.1       0.3X
+RDD sum                                            1424           1452          40         70.2          14.2       1.0X
+DataFrame sum                                        61             73           9       1636.9           0.6      23.3X
+Dataset sum using Aggregator                       1953           2020          94         51.2          19.5       0.7X
+Dataset complex Aggregator                         5019           5030          16         19.9          50.2       0.3X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-results.txt b/sql/core/benchmarks/DatasetBenchmark-results.txt
index bb1fe1af99330..79a6ed1e9ce33 100644
--- a/sql/core/benchmarks/DatasetBenchmark-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                6839           7062         315         14.6          68.4       1.0X
-DataFrame                                          1279           1369         127         78.2          12.8       5.3X
-Dataset                                            1689           1755          93         59.2          16.9       4.0X
+RDD                                                6764           6789          36         14.8          67.6       1.0X
+DataFrame                                          1296           1297           0         77.1          13.0       5.2X
+Dataset                                            1448           1472          35         69.1          14.5       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                8003           8003           0         12.5          80.0       1.0X
-DataFrame                                          2833           2849          23         35.3          28.3       2.8X
-Dataset                                            7895           7909          20         12.7          78.9       1.0X
+RDD                                                8070           8080          14         12.4          80.7       1.0X
+DataFrame                                          3215           3220           7         31.1          32.2       2.5X
+Dataset                                            7842           7868          36         12.8          78.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                4231           4342         157         23.6          42.3       1.0X
-DataFrame                                           754            775          28        132.6           7.5       5.6X
-Dataset                                            1634           1656          30         61.2          16.3       2.6X
+RDD                                                4145           4221         107         24.1          41.5       1.0X
+DataFrame                                           758            782          32        131.9           7.6       5.5X
+Dataset                                            1601           1622          29         62.5          16.0       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                2244           2351         151         44.6          22.4       1.0X
-DataFrame                                           106            123          16        939.3           1.1      21.1X
-Dataset                                            2484           2541          81         40.3          24.8       0.9X
+RDD                                                2267           2394         180         44.1          22.7       1.0X
+DataFrame                                           110            121          10        907.1           1.1      20.6X
+Dataset                                            2384           2413          42         42.0          23.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            1405           1406           1         71.2          14.1       1.0X
-DataFrame sum                                        35             46           6       2824.3           0.4      39.7X
-Dataset sum using Aggregator                       2130           2174          62         46.9          21.3       0.7X
-Dataset complex Aggregator                         5208           5275          95         19.2          52.1       0.3X
+RDD sum                                            1406           1433          38         71.1          14.1       1.0X
+DataFrame sum                                        69             81          11       1459.0           0.7      20.5X
+Dataset sum using Aggregator                       2216           2225          13         45.1          22.2       0.6X
+Dataset complex Aggregator                         4974           5165         269         20.1          49.7       0.3X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
index 143f433a31604..dc4af0de55307 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                  850            887          33         11.8          85.0       1.0X
-date + interval(m, d)                               863            864           2         11.6          86.3       1.0X
-date + interval(m, d, ms)                          3507           3511           5          2.9         350.7       0.2X
-date - interval(m)                                  841            851           9         11.9          84.1       1.0X
-date - interval(m, d)                               864            870           5         11.6          86.4       1.0X
-date - interval(m, d, ms)                          3518           3519           2          2.8         351.8       0.2X
-timestamp + interval(m)                            1756           1759           5          5.7         175.6       0.5X
-timestamp + interval(m, d)                         1802           1805           4          5.5         180.2       0.5X
-timestamp + interval(m, d, ms)                     1958           1961           4          5.1         195.8       0.4X
-timestamp - interval(m)                            1744           1745           2          5.7         174.4       0.5X
-timestamp - interval(m, d)                         1796           1799           4          5.6         179.6       0.5X
-timestamp - interval(m, d, ms)                     1944           1947           5          5.1         194.4       0.4X
+date + interval(m)                                  845            871          24         11.8          84.5       1.0X
+date + interval(m, d)                               871            882          10         11.5          87.1       1.0X
+date + interval(m, d, ms)                          3744           3747           4          2.7         374.4       0.2X
+date - interval(m)                                  833            836           5         12.0          83.3       1.0X
+date - interval(m, d)                               879            886           9         11.4          87.9       1.0X
+date - interval(m, d, ms)                          3703           3710           9          2.7         370.3       0.2X
+timestamp + interval(m)                            1768           1771           5          5.7         176.8       0.5X
+timestamp + interval(m, d)                         1809           1811           2          5.5         180.9       0.5X
+timestamp + interval(m, d, ms)                     1739           1746          10          5.8         173.9       0.5X
+timestamp - interval(m)                            1519           1530          16          6.6         151.9       0.6X
+timestamp - interval(m, d)                         1565           1584          27          6.4         156.5       0.5X
+timestamp - interval(m, d, ms)                     1734           1736           3          5.8         173.4       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    209            209           0         47.9          20.9       1.0X
-cast to timestamp wholestage on                     209            225          15         47.8          20.9       1.0X
+cast to timestamp wholestage off                    197            199           3         50.8          19.7       1.0X
+cast to timestamp wholestage on                     207            217           7         48.3          20.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    639            640           0         15.6          63.9       1.0X
-year of timestamp wholestage on                     631            635           6         15.9          63.1       1.0X
+year of timestamp wholestage off                    628            636          11         15.9          62.8       1.0X
+year of timestamp wholestage on                     626            632          11         16.0          62.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 685            694          12         14.6          68.5       1.0X
-quarter of timestamp wholestage on                  676            681           5         14.8          67.6       1.0X
+quarter of timestamp wholestage off                 664            665           2         15.1          66.4       1.0X
+quarter of timestamp wholestage on                  666            668           3         15.0          66.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   651            653           3         15.4          65.1       1.0X
-month of timestamp wholestage on                    644            649           4         15.5          64.4       1.0X
+month of timestamp wholestage off                   642            643           2         15.6          64.2       1.0X
+month of timestamp wholestage on                    631            636           3         15.8          63.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off              920            920           0         10.9          92.0       1.0X
-weekofyear of timestamp wholestage on              1054           1062           5          9.5         105.4       0.9X
+weekofyear of timestamp wholestage off             1004           1005           2         10.0         100.4       1.0X
+weekofyear of timestamp wholestage on              1059           1068           6          9.4         105.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     667            670           4         15.0          66.7       1.0X
-day of timestamp wholestage on                      657            662           6         15.2          65.7       1.0X
+day of timestamp wholestage off                     655            666          15         15.3          65.5       1.0X
+day of timestamp wholestage on                      643            648           4         15.5          64.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               714            714           1         14.0          71.4       1.0X
-dayofyear of timestamp wholestage on                689            695           4         14.5          68.9       1.0X
+dayofyear of timestamp wholestage off               685            686           1         14.6          68.5       1.0X
+dayofyear of timestamp wholestage on                680            685           5         14.7          68.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              685            686           2         14.6          68.5       1.0X
-dayofmonth of timestamp wholestage on               653            657           4         15.3          65.3       1.1X
+dayofmonth of timestamp wholestage off              671            677           8         14.9          67.1       1.0X
+dayofmonth of timestamp wholestage on               639            643           3         15.6          63.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               822            830          11         12.2          82.2       1.0X
-dayofweek of timestamp wholestage on                807            812           3         12.4          80.7       1.0X
+dayofweek of timestamp wholestage off               813            818           8         12.3          81.3       1.0X
+dayofweek of timestamp wholestage on                804            810           6         12.4          80.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 753            765          16         13.3          75.3       1.0X
-weekday of timestamp wholestage on                  751            753           2         13.3          75.1       1.0X
+weekday of timestamp wholestage off                 745            748           5         13.4          74.5       1.0X
+weekday of timestamp wholestage on                  746            752           7         13.4          74.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    554            554           1         18.1          55.4       1.0X
-hour of timestamp wholestage on                     559            567           9         17.9          55.9       1.0X
+hour of timestamp wholestage off                    537            541           6         18.6          53.7       1.0X
+hour of timestamp wholestage on                     544            547           3         18.4          54.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  552            558           8         18.1          55.2       1.0X
-minute of timestamp wholestage on                   557            561           5         18.0          55.7       1.0X
+minute of timestamp wholestage off                  547            548           1         18.3          54.7       1.0X
+minute of timestamp wholestage on                   554            557           3         18.1          55.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  555            563          11         18.0          55.5       1.0X
-second of timestamp wholestage on                   558            561           4         17.9          55.8       1.0X
+second of timestamp wholestage off                  540            544           6         18.5          54.0       1.0X
+second of timestamp wholestage on                   546            551           5         18.3          54.6       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         184            188           6         54.4          18.4       1.0X
-current_date wholestage on                          215            219           5         46.6          21.5       0.9X
+current_date wholestage off                         172            174           3         58.2          17.2       1.0X
+current_date wholestage on                          207            212           6         48.3          20.7       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    186            199          18         53.7          18.6       1.0X
-current_timestamp wholestage on                     221            245          29         45.2          22.1       0.8X
+current_timestamp wholestage off                    172            174           2         58.0          17.2       1.0X
+current_timestamp wholestage on                     224            239          24         44.7          22.4       0.8X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         652            658           8         15.3          65.2       1.0X
-cast to date wholestage on                          670            689          37         14.9          67.0       1.0X
+cast to date wholestage off                         599            602           4         16.7          59.9       1.0X
+cast to date wholestage on                          603            606           4         16.6          60.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             735            737           3         13.6          73.5       1.0X
-last_day wholestage on                              728            729           2         13.7          72.8       1.0X
+last_day wholestage off                             714            718           6         14.0          71.4       1.0X
+last_day wholestage on                              700            708           6         14.3          70.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             697            702           7         14.3          69.7       1.0X
-next_day wholestage on                              696            703           7         14.4          69.6       1.0X
+next_day wholestage off                             637            639           4         15.7          63.7       1.0X
+next_day wholestage on                              635            644          13         15.7          63.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             626            633          10         16.0          62.6       1.0X
-date_add wholestage on                              631            638          11         15.9          63.1       1.0X
+date_add wholestage off                             585            600          22         17.1          58.5       1.0X
+date_add wholestage on                              596            598           1         16.8          59.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             630            630           1         15.9          63.0       1.0X
-date_sub wholestage on                              625            632           5         16.0          62.5       1.0X
+date_sub wholestage off                             585            588           4         17.1          58.5       1.0X
+date_sub wholestage on                              597            600           2         16.7          59.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                           858            859           1         11.7          85.8       1.0X
-add_months wholestage on                            848            851           4         11.8          84.8       1.0X
+add_months wholestage off                           816            817           1         12.3          81.6       1.0X
+add_months wholestage on                            830            835           5         12.1          83.0       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3217           3224           9          3.1         321.7       1.0X
-format date wholestage on                          3156           3172          14          3.2         315.6       1.0X
+format date wholestage off                         3079           3082           5          3.2         307.9       1.0X
+format date wholestage on                          3310           3363          31          3.0         331.0       0.9X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       2765           2768           5          3.6         276.5       1.0X
-from_unixtime wholestage on                        2774           2784           8          3.6         277.4       1.0X
+from_unixtime wholestage off                       2774           2777           3          3.6         277.4       1.0X
+from_unixtime wholestage on                        2749           2794          67          3.6         274.9       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   664            665           2         15.1          66.4       1.0X
-from_utc_timestamp wholestage on                    786            792           4         12.7          78.6       0.8X
+from_utc_timestamp wholestage off                   616            621           6         16.2          61.6       1.0X
+from_utc_timestamp wholestage on                    736            740           3         13.6          73.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                     775            814          56         12.9          77.5       1.0X
-to_utc_timestamp wholestage on                      816            822           4         12.3          81.6       0.9X
+to_utc_timestamp wholestage off                     764            764           1         13.1          76.4       1.0X
+to_utc_timestamp wholestage on                      825            827           2         12.1          82.5       0.9X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        221            229          12         45.3          22.1       1.0X
-cast interval wholestage on                         242            245           5         41.4          24.2       0.9X
+cast interval wholestage off                        228            232           6         43.8          22.8       1.0X
+cast interval wholestage on                         207            220          17         48.2          20.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1061           1065           6          9.4         106.1       1.0X
-datediff wholestage on                             1081           1085           3          9.3         108.1       1.0X
+datediff wholestage off                             999            999           1         10.0          99.9       1.0X
+datediff wholestage on                              997            999           2         10.0          99.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3517           3525          11          2.8         351.7       1.0X
-months_between wholestage on                       3514           3521           7          2.8         351.4       1.0X
+months_between wholestage off                      3358           3360           3          3.0         335.8       1.0X
+months_between wholestage on                       3328           3347          22          3.0         332.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               310            324          19          3.2         310.3       1.0X
-window wholestage on                                623            638          16          1.6         622.8       0.5X
+window wholestage off                               375            382          11          2.7         374.7       1.0X
+window wholestage on                                676            687          13          1.5         675.8       0.6X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1870           1881          16          5.3         187.0       1.0X
-date_trunc YEAR wholestage on                      1839           1850          16          5.4         183.9       1.0X
+date_trunc YEAR wholestage off                     1715           1719           5          5.8         171.5       1.0X
+date_trunc YEAR wholestage on                      1742           1743           2          5.7         174.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1873           1874           2          5.3         187.3       1.0X
-date_trunc YYYY wholestage on                      1849           1851           2          5.4         184.9       1.0X
+date_trunc YYYY wholestage off                     1722           1732          14          5.8         172.2       1.0X
+date_trunc YYYY wholestage on                      1734           1738           2          5.8         173.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1869           1870           1          5.3         186.9       1.0X
-date_trunc YY wholestage on                        1849           1851           2          5.4         184.9       1.0X
+date_trunc YY wholestage off                       1726           1730           5          5.8         172.6       1.0X
+date_trunc YY wholestage on                        1739           1745           7          5.8         173.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1902           1906           5          5.3         190.2       1.0X
-date_trunc MON wholestage on                       2046           2048           2          4.9         204.6       0.9X
+date_trunc MON wholestage off                      1703           1705           3          5.9         170.3       1.0X
+date_trunc MON wholestage on                       1745           1748           3          5.7         174.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1900           1907           9          5.3         190.0       1.0X
-date_trunc MONTH wholestage on                     2048           2049           2          4.9         204.8       0.9X
+date_trunc MONTH wholestage off                    1703           1703           1          5.9         170.3       1.0X
+date_trunc MONTH wholestage on                     1744           1748           3          5.7         174.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1899           1900           1          5.3         189.9       1.0X
-date_trunc MM wholestage on                        2045           2049           4          4.9         204.5       0.9X
+date_trunc MM wholestage off                       1701           1703           3          5.9         170.1       1.0X
+date_trunc MM wholestage on                        1744           1762          20          5.7         174.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1251           1255           7          8.0         125.1       1.0X
-date_trunc DAY wholestage on                       1198           1203           4          8.3         119.8       1.0X
+date_trunc DAY wholestage off                      1228           1233           8          8.1         122.8       1.0X
+date_trunc DAY wholestage on                       1199           1207           7          8.3         119.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1257           1260           4          8.0         125.7       1.0X
-date_trunc DD wholestage on                        1200           1205           5          8.3         120.0       1.0X
+date_trunc DD wholestage off                       1228           1229           3          8.1         122.8       1.0X
+date_trunc DD wholestage on                        1204           1206           2          8.3         120.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1259           1262           4          7.9         125.9       1.0X
-date_trunc HOUR wholestage on                      1241           1246           5          8.1         124.1       1.0X
+date_trunc HOUR wholestage off                     1238           1244           8          8.1         123.8       1.0X
+date_trunc HOUR wholestage on                      1228           1238          15          8.1         122.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1219           1220           1          8.2         121.9       1.0X
-date_trunc MINUTE wholestage on                    1201           1208          11          8.3         120.1       1.0X
+date_trunc MINUTE wholestage off                   1223           1228           8          8.2         122.3       1.0X
+date_trunc MINUTE wholestage on                    1226           1239          15          8.2         122.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    309            314           8         32.4          30.9       1.0X
-date_trunc SECOND wholestage on                     279            285           4         35.8          27.9       1.1X
+date_trunc SECOND wholestage off                    326            336          13         30.6          32.6       1.0X
+date_trunc SECOND wholestage on                     295            300           6         33.9          29.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1788           1789           0          5.6         178.8       1.0X
-date_trunc WEEK wholestage on                      1760           1764           5          5.7         176.0       1.0X
+date_trunc WEEK wholestage off                     1648           1649           0          6.1         164.8       1.0X
+date_trunc WEEK wholestage on                      1633           1638           6          6.1         163.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2416           2417           1          4.1         241.6       1.0X
-date_trunc QUARTER wholestage on                   2419           2421           3          4.1         241.9       1.0X
+date_trunc QUARTER wholestage off                  2063           2064           2          4.8         206.3       1.0X
+date_trunc QUARTER wholestage on                   2064           2069           4          4.8         206.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           296            300           7         33.8          29.6       1.0X
-trunc year wholestage on                            275            278           2         36.3          27.5       1.1X
+trunc year wholestage off                           821            822           1         12.2          82.1       1.0X
+trunc year wholestage on                            793            796           3         12.6          79.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           301            305           5         33.2          30.1       1.0X
-trunc yyyy wholestage on                            271            275           3         36.9          27.1       1.1X
+trunc yyyy wholestage off                           822            823           2         12.2          82.2       1.0X
+trunc yyyy wholestage on                            790            799          11         12.7          79.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             295            295           0         33.9          29.5       1.0X
-trunc yy wholestage on                              270            277           4         37.0          27.0       1.1X
+trunc yy wholestage off                             818            820           2         12.2          81.8       1.0X
+trunc yy wholestage on                              792            795           2         12.6          79.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            296            297           1         33.8          29.6       1.0X
-trunc mon wholestage on                             275            278           2         36.4          27.5       1.1X
+trunc mon wholestage off                            767            767           0         13.0          76.7       1.0X
+trunc mon wholestage on                             741            745           5         13.5          74.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          304            308           6         32.9          30.4       1.0X
-trunc month wholestage on                           276            280           4         36.2          27.6       1.1X
+trunc month wholestage off                          765            766           2         13.1          76.5       1.0X
+trunc month wholestage on                           742            746           2         13.5          74.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             297            298           1         33.6          29.7       1.0X
-trunc mm wholestage on                              274            276           2         36.5          27.4       1.1X
+trunc mm wholestage off                             766            767           2         13.1          76.6       1.0X
+trunc mm wholestage on                              742            744           3         13.5          74.2       1.0X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                      94             95           1         10.6          94.1       1.0X
-to timestamp str wholestage on                       93             96           2         10.7          93.2       1.0X
+to timestamp str wholestage off                      99            100           2         10.1          98.7       1.0X
+to timestamp str wholestage on                       93             99           6         10.8          93.0       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         659            659           1          1.5         658.7       1.0X
-to_timestamp wholestage on                          654            659           9          1.5         654.2       1.0X
+to_timestamp wholestage off                         656            664          10          1.5         656.4       1.0X
+to_timestamp wholestage on                          664            668           4          1.5         664.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    649            652           5          1.5         648.7       1.0X
-to_unix_timestamp wholestage on                     656            660           4          1.5         656.0       1.0X
+to_unix_timestamp wholestage off                    679            681           3          1.5         679.3       1.0X
+to_unix_timestamp wholestage on                     658            660           1          1.5         658.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          119            121           2          8.4         119.3       1.0X
-to date str wholestage on                           117            121           4          8.5         117.3       1.0X
+to date str wholestage off                          129            133           6          7.8         128.6       1.0X
+to date str wholestage on                           123            129           6          8.1         122.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                             1071           1073           2          0.9        1071.3       1.0X
-to_date wholestage on                              1055           1057           2          0.9        1055.1       1.0X
+to_date wholestage off                              662            664           3          1.5         661.6       1.0X
+to_date wholestage on                               659            665           4          1.5         659.4       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  293            299           5         17.0          58.7       1.0X
-From java.time.LocalDate                            241            246           6         20.7          48.3       1.2X
-Collect java.sql.Date                              1243           1366         169          4.0         248.5       0.2X
-Collect java.time.LocalDate                        1042           1080          40          4.8         208.3       0.3X
-From java.sql.Timestamp                             237            251          14         21.1          47.4       1.2X
-From java.time.Instant                              195            210          14         25.7          38.9       1.5X
-Collect longs                                       979           1100         106          5.1         195.8       0.3X
-Collect java.sql.Timestamp                         1115           1165          60          4.5         223.0       0.3X
-Collect java.time.Instant                          1021           1095          98          4.9         204.3       0.3X
-java.sql.Date to Hive string                       4109           4252         124          1.2         821.8       0.1X
-java.time.LocalDate to Hive string                 3162           3288         125          1.6         632.3       0.1X
-java.sql.Timestamp to Hive string                  3714           3835         106          1.3         742.8       0.1X
-java.time.Instant to Hive string                   4319           4344          23          1.2         863.8       0.1X
+From java.sql.Date                                  335            337           1         14.9          67.0       1.0X
+From java.time.LocalDate                            232            238           6         21.6          46.3       1.4X
+Collect java.sql.Date                              1204           1314          95          4.2         240.9       0.3X
+Collect java.time.LocalDate                         946           1072         112          5.3         189.3       0.4X
+From java.sql.Timestamp                             239            256          15         20.9          47.8       1.4X
+From java.time.Instant                              201            219          26         24.9          40.2       1.7X
+Collect longs                                       948           1007          61          5.3         189.7       0.4X
+Collect java.sql.Timestamp                         1067           1183         100          4.7         213.5       0.3X
+Collect java.time.Instant                           961           1044          75          5.2         192.2       0.3X
+java.sql.Date to Hive string                       4059           4129          64          1.2         811.7       0.1X
+java.time.LocalDate to Hive string                 3007           3166         141          1.7         601.3       0.1X
+java.sql.Timestamp to Hive string                  3795           3876          88          1.3         759.1       0.1X
+java.time.Instant to Hive string                   4301           4398          84          1.2         860.1       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index e972533e0b10b..051b9107109d3 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1248           1273          35          8.0         124.8       1.0X
-date + interval(m, d)                              1247           1263          24          8.0         124.7       1.0X
-date + interval(m, d, ms)                          4155           4156           2          2.4         415.5       0.3X
-date - interval(m)                                 1294           1326          46          7.7         129.4       1.0X
-date - interval(m, d)                              1342           1342           1          7.5         134.2       0.9X
-date - interval(m, d, ms)                          4168           4176          11          2.4         416.8       0.3X
-timestamp + interval(m)                            1903           1913          15          5.3         190.3       0.7X
-timestamp + interval(m, d)                         1986           1986           0          5.0         198.6       0.6X
-timestamp + interval(m, d, ms)                     2216           2225          13          4.5         221.6       0.6X
-timestamp - interval(m)                            1936           1942           8          5.2         193.6       0.6X
-timestamp - interval(m, d)                         2033           2037           5          4.9         203.3       0.6X
-timestamp - interval(m, d, ms)                     2219           2239          28          4.5         221.9       0.6X
+date + interval(m)                                 1014           1017           5          9.9         101.4       1.0X
+date + interval(m, d)                              1015           1015           0          9.9         101.5       1.0X
+date + interval(m, d, ms)                          3966           3979          19          2.5         396.6       0.3X
+date - interval(m)                                  971            985          13         10.3          97.1       1.0X
+date - interval(m, d)                              1008           1011           5          9.9         100.8       1.0X
+date - interval(m, d, ms)                          4016           4024          11          2.5         401.6       0.3X
+timestamp + interval(m)                            1917           1939          31          5.2         191.7       0.5X
+timestamp + interval(m, d)                         1948           1959          17          5.1         194.8       0.5X
+timestamp + interval(m, d, ms)                     2056           2057           0          4.9         205.6       0.5X
+timestamp - interval(m)                            1814           1822          11          5.5         181.4       0.6X
+timestamp - interval(m, d)                         1871           1877           9          5.3         187.1       0.5X
+timestamp - interval(m, d, ms)                     2012           2017           7          5.0         201.2       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    196            219          33         51.0          19.6       1.0X
-cast to timestamp wholestage on                     203            220          15         49.2          20.3       1.0X
+cast to timestamp wholestage off                    205            218          19         48.9          20.5       1.0X
+cast to timestamp wholestage on                     225            228           3         44.5          22.5       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    838            847          12         11.9          83.8       1.0X
-year of timestamp wholestage on                     846            850           5         11.8          84.6       1.0X
+year of timestamp wholestage off                    777            788          16         12.9          77.7       1.0X
+year of timestamp wholestage on                     777            781           4         12.9          77.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 861            863           3         11.6          86.1       1.0X
-quarter of timestamp wholestage on                  877            882           4         11.4          87.7       1.0X
+quarter of timestamp wholestage off                 793            804          16         12.6          79.3       1.0X
+quarter of timestamp wholestage on                  791            801          11         12.6          79.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   850            854           6         11.8          85.0       1.0X
-month of timestamp wholestage on                    836            843           8         12.0          83.6       1.0X
+month of timestamp wholestage off                   766            769           5         13.1          76.6       1.0X
+month of timestamp wholestage on                    772            775           2         13.0          77.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1240           1250          13          8.1         124.0       1.0X
-weekofyear of timestamp wholestage on              1266           1269           4          7.9         126.6       1.0X
+weekofyear of timestamp wholestage off             1114           1124          15          9.0         111.4       1.0X
+weekofyear of timestamp wholestage on              1190           1196           4          8.4         119.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     848            849           2         11.8          84.8       1.0X
-day of timestamp wholestage on                      842            850           7         11.9          84.2       1.0X
+day of timestamp wholestage off                     766            773          10         13.1          76.6       1.0X
+day of timestamp wholestage on                      770            775           5         13.0          77.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               880            881           1         11.4          88.0       1.0X
-dayofyear of timestamp wholestage on                912            914           2         11.0          91.2       1.0X
+dayofyear of timestamp wholestage off               809            812           5         12.4          80.9       1.0X
+dayofyear of timestamp wholestage on                811            815           4         12.3          81.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              870            871           2         11.5          87.0       1.0X
-dayofmonth of timestamp wholestage on               845            848           3         11.8          84.5       1.0X
+dayofmonth of timestamp wholestage off              779            783           5         12.8          77.9       1.0X
+dayofmonth of timestamp wholestage on               769            773           4         13.0          76.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               982            989          10         10.2          98.2       1.0X
-dayofweek of timestamp wholestage on               1030           1037           8          9.7         103.0       1.0X
+dayofweek of timestamp wholestage off               918            918           0         10.9          91.8       1.0X
+dayofweek of timestamp wholestage on                915            920           5         10.9          91.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 950            956           9         10.5          95.0       1.0X
-weekday of timestamp wholestage on                  976            980           5         10.3          97.6       1.0X
+weekday of timestamp wholestage off                 868            868           0         11.5          86.8       1.0X
+weekday of timestamp wholestage on                  874            880           5         11.4          87.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    600            601           2         16.7          60.0       1.0X
-hour of timestamp wholestage on                     606            610           3         16.5          60.6       1.0X
+hour of timestamp wholestage off                    604            608           5         16.6          60.4       1.0X
+hour of timestamp wholestage on                     619            625           5         16.2          61.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  595            597           4         16.8          59.5       1.0X
-minute of timestamp wholestage on                   608            613           4         16.4          60.8       1.0X
+minute of timestamp wholestage off                  608            616          12         16.4          60.8       1.0X
+minute of timestamp wholestage on                   614            621           4         16.3          61.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  630            633           4         15.9          63.0       1.0X
-second of timestamp wholestage on                   609            610           1         16.4          60.9       1.0X
+second of timestamp wholestage off                  606            612           9         16.5          60.6       1.0X
+second of timestamp wholestage on                   616            620           4         16.2          61.6       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         181            183           2         55.1          18.1       1.0X
-current_date wholestage on                          215            218           4         46.5          21.5       0.8X
+current_date wholestage off                         192            194           3         52.1          19.2       1.0X
+current_date wholestage on                          214            228          16         46.8          21.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    189            193           6         52.9          18.9       1.0X
-current_timestamp wholestage on                     214            259          48         46.8          21.4       0.9X
+current_timestamp wholestage off                    187            189           3         53.4          18.7       1.0X
+current_timestamp wholestage on                     227            238          13         44.0          22.7       0.8X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         792            792           1         12.6          79.2       1.0X
-cast to date wholestage on                          795            805           7         12.6          79.5       1.0X
+cast to date wholestage off                         705            708           5         14.2          70.5       1.0X
+cast to date wholestage on                          677            681           2         14.8          67.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             924            928           5         10.8          92.4       1.0X
-last_day wholestage on                              927            931           4         10.8          92.7       1.0X
+last_day wholestage off                             791            791           1         12.6          79.1       1.0X
+last_day wholestage on                              775            782           6         12.9          77.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             820            820           0         12.2          82.0       1.0X
-next_day wholestage on                              862            866           5         11.6          86.2       1.0X
+next_day wholestage off                             696            697           2         14.4          69.6       1.0X
+next_day wholestage on                              704            706           2         14.2          70.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             795            798           3         12.6          79.5       1.0X
-date_add wholestage on                              800            806          11         12.5          80.0       1.0X
+date_add wholestage off                             646            648           3         15.5          64.6       1.0X
+date_add wholestage on                              648            651           2         15.4          64.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             772            772           0         13.0          77.2       1.0X
-date_sub wholestage on                              787            791           5         12.7          78.7       1.0X
+date_sub wholestage off                             646            647           1         15.5          64.6       1.0X
+date_sub wholestage on                              653            659           9         15.3          65.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                          1168           1169           1          8.6         116.8       1.0X
-add_months wholestage on                           1209           1214           6          8.3         120.9       1.0X
+add_months wholestage off                           922            930          11         10.8          92.2       1.0X
+add_months wholestage on                            908            911           2         11.0          90.8       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3809           3809           0          2.6         380.9       1.0X
-format date wholestage on                          3724           3733           9          2.7         372.4       1.0X
+format date wholestage off                         3228           3232           6          3.1         322.8       1.0X
+format date wholestage on                          3205           3215          10          3.1         320.5       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       4096           4097           2          2.4         409.6       1.0X
-from_unixtime wholestage on                        3998           4007          12          2.5         399.8       1.0X
+from_unixtime wholestage off                       3681           3684           5          2.7         368.1       1.0X
+from_unixtime wholestage on                        3557           3603          27          2.8         355.7       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   738            739           2         13.6          73.8       1.0X
-from_utc_timestamp wholestage on                    821            825           3         12.2          82.1       0.9X
+from_utc_timestamp wholestage off                   690            692           4         14.5          69.0       1.0X
+from_utc_timestamp wholestage on                    782            785           2         12.8          78.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1059           1059           0          9.4         105.9       1.0X
-to_utc_timestamp wholestage on                     1037           1045          10          9.6         103.7       1.0X
+to_utc_timestamp wholestage off                    1040           1040           0          9.6         104.0       1.0X
+to_utc_timestamp wholestage on                     1026           1032           7          9.7         102.6       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        209            213           5         47.9          20.9       1.0X
-cast interval wholestage on                         198            208           8         50.4          19.8       1.1X
+cast interval wholestage off                        225            226           2         44.5          22.5       1.0X
+cast interval wholestage on                         216            225           7         46.4          21.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1374           1380           8          7.3         137.4       1.0X
-datediff wholestage on                             1384           1390           5          7.2         138.4       1.0X
+datediff wholestage off                            1139           1141           4          8.8         113.9       1.0X
+datediff wholestage on                             1157           1162           5          8.6         115.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3564           3569           8          2.8         356.4       1.0X
-months_between wholestage on                       3601           3603           2          2.8         360.1       1.0X
+months_between wholestage off                      3217           3219           4          3.1         321.7       1.0X
+months_between wholestage on                       3254           3266           9          3.1         325.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               390            415          35          2.6         390.0       1.0X
-window wholestage on                                616            636          14          1.6         616.3       0.6X
+window wholestage off                               406            410           6          2.5         406.2       1.0X
+window wholestage on                                667            677          10          1.5         667.2       0.6X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1871           1878          10          5.3         187.1       1.0X
-date_trunc YEAR wholestage on                      1823           1829           8          5.5         182.3       1.0X
+date_trunc YEAR wholestage off                     1720           1726           9          5.8         172.0       1.0X
+date_trunc YEAR wholestage on                      1742           1752           7          5.7         174.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1878           1888          15          5.3         187.8       1.0X
-date_trunc YYYY wholestage on                      1838           1843           4          5.4         183.8       1.0X
+date_trunc YYYY wholestage off                     1722           1724           2          5.8         172.2       1.0X
+date_trunc YYYY wholestage on                      1734           1748          11          5.8         173.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1877           1878           1          5.3         187.7       1.0X
-date_trunc YY wholestage on                        1839           1844           4          5.4         183.9       1.0X
+date_trunc YY wholestage off                       1721           1722           1          5.8         172.1       1.0X
+date_trunc YY wholestage on                        1738           1749           8          5.8         173.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1978           1978           0          5.1         197.8       1.0X
-date_trunc MON wholestage on                       1937           1939           1          5.2         193.7       1.0X
+date_trunc MON wholestage off                      1806           1808           4          5.5         180.6       1.0X
+date_trunc MON wholestage on                       1863           1870           5          5.4         186.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1986           1991           7          5.0         198.6       1.0X
-date_trunc MONTH wholestage on                     1939           1950          12          5.2         193.9       1.0X
+date_trunc MONTH wholestage off                    1808           1811           5          5.5         180.8       1.0X
+date_trunc MONTH wholestage on                     1865           1871           8          5.4         186.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1987           1989           3          5.0         198.7       1.0X
-date_trunc MM wholestage on                        1944           1958          12          5.1         194.4       1.0X
+date_trunc MM wholestage off                       1809           1820          15          5.5         180.9       1.0X
+date_trunc MM wholestage on                        1862           1865           2          5.4         186.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1392           1394           3          7.2         139.2       1.0X
-date_trunc DAY wholestage on                       1320           1323           5          7.6         132.0       1.1X
+date_trunc DAY wholestage off                      1364           1365           2          7.3         136.4       1.0X
+date_trunc DAY wholestage on                       1339           1344           3          7.5         133.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1386           1388           2          7.2         138.6       1.0X
-date_trunc DD wholestage on                        1314           1320           4          7.6         131.4       1.1X
+date_trunc DD wholestage off                       1371           1374           3          7.3         137.1       1.0X
+date_trunc DD wholestage on                        1336           1338           3          7.5         133.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1380           1382           3          7.2         138.0       1.0X
-date_trunc HOUR wholestage on                      1340           1355          22          7.5         134.0       1.0X
+date_trunc HOUR wholestage off                     1408           1409           1          7.1         140.8       1.0X
+date_trunc HOUR wholestage on                      1316           1322           9          7.6         131.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1396           1400           6          7.2         139.6       1.0X
-date_trunc MINUTE wholestage on                    1346           1355          11          7.4         134.6       1.0X
+date_trunc MINUTE wholestage off                   1412           1413           1          7.1         141.2       1.0X
+date_trunc MINUTE wholestage on                    1317           1321           4          7.6         131.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    366            373          10         27.3          36.6       1.0X
-date_trunc SECOND wholestage on                     309            315           4         32.4          30.9       1.2X
+date_trunc SECOND wholestage off                    357            361           6         28.0          35.7       1.0X
+date_trunc SECOND wholestage on                     306            308           3         32.7          30.6       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1847           1849           2          5.4         184.7       1.0X
-date_trunc WEEK wholestage on                      1819           1827           6          5.5         181.9       1.0X
+date_trunc WEEK wholestage off                     1646           1664          25          6.1         164.6       1.0X
+date_trunc WEEK wholestage on                      1667           1671           7          6.0         166.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2679           2694          21          3.7         267.9       1.0X
-date_trunc QUARTER wholestage on                   2691           2704          12          3.7         269.1       1.0X
+date_trunc QUARTER wholestage off                  2239           2241           3          4.5         223.9       1.0X
+date_trunc QUARTER wholestage on                   2199           2202           4          4.5         219.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           335            337           3         29.9          33.5       1.0X
-trunc year wholestage on                            302            303           1         33.1          30.2       1.1X
+trunc year wholestage off                          1006           1010           5          9.9         100.6       1.0X
+trunc year wholestage on                            925            931           8         10.8          92.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           332            333           2         30.1          33.2       1.0X
-trunc yyyy wholestage on                            303            304           1         33.0          30.3       1.1X
+trunc yyyy wholestage off                          1008           1009           2          9.9         100.8       1.0X
+trunc yyyy wholestage on                            925            927           2         10.8          92.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             332            333           2         30.1          33.2       1.0X
-trunc yy wholestage on                              301            303           1         33.2          30.1       1.1X
+trunc yy wholestage off                            1004           1010           9         10.0         100.4       1.0X
+trunc yy wholestage on                              926            928           1         10.8          92.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            332            340          10         30.1          33.2       1.0X
-trunc mon wholestage on                             302            303           2         33.2          30.2       1.1X
+trunc mon wholestage off                            947            947           0         10.6          94.7       1.0X
+trunc mon wholestage on                             896            902           6         11.2          89.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          333            340           9         30.0          33.3       1.0X
-trunc month wholestage on                           301            308           5         33.2          30.1       1.1X
+trunc month wholestage off                          948            950           2         10.5          94.8       1.0X
+trunc month wholestage on                           895            899           6         11.2          89.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             332            338           9         30.1          33.2       1.0X
-trunc mm wholestage on                              301            304           4         33.2          30.1       1.1X
+trunc mm wholestage off                             950            950           1         10.5          95.0       1.0X
+trunc mm wholestage on                              895            897           2         11.2          89.5       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     111            111           0          9.0         111.4       1.0X
-to timestamp str wholestage on                       97            102           3         10.3          96.7       1.2X
+to timestamp str wholestage off                     102            107           6          9.8         102.2       1.0X
+to timestamp str wholestage on                       94             99           4         10.6          94.4       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         729            729           1          1.4         729.0       1.0X
-to_timestamp wholestage on                          745            748           2          1.3         744.8       1.0X
+to_timestamp wholestage off                         717            719           3          1.4         716.5       1.0X
+to_timestamp wholestage on                          704            706           2          1.4         703.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    743            750          10          1.3         742.7       1.0X
-to_unix_timestamp wholestage on                     739            742           2          1.4         739.4       1.0X
+to_unix_timestamp wholestage off                    706            707           0          1.4         706.2       1.0X
+to_unix_timestamp wholestage on                     698            701           2          1.4         697.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          118            120           3          8.5         118.1       1.0X
-to date str wholestage on                           126            131           7          7.9         126.5       0.9X
+to date str wholestage off                          140            142           3          7.1         139.9       1.0X
+to date str wholestage on                           131            137           3          7.6         131.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                             1175           1175           0          0.9        1175.3       1.0X
-to_date wholestage on                              1173           1176           2          0.9        1173.3       1.0X
+to_date wholestage off                              596            597           2          1.7         596.2       1.0X
+to_date wholestage on                               604            606           2          1.7         603.6       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  307            310           2         16.3          61.4       1.0X
-From java.time.LocalDate                            322            322           1         15.6          64.3       1.0X
-Collect java.sql.Date                              1093           1298         190          4.6         218.6       0.3X
-Collect java.time.LocalDate                        1122           1227         163          4.5         224.5       0.3X
-From java.sql.Timestamp                             261            275          13         19.2          52.1       1.2X
-From java.time.Instant                              228            237           8         21.9          45.7       1.3X
-Collect longs                                       952           1083         132          5.3         190.5       0.3X
-Collect java.sql.Timestamp                         1152           1197          40          4.3         230.3       0.3X
-Collect java.time.Instant                           919           1033         115          5.4         183.8       0.3X
-java.sql.Date to Hive string                       3984           4102         145          1.3         796.9       0.1X
-java.time.LocalDate to Hive string                 3606           3709          90          1.4         721.2       0.1X
-java.sql.Timestamp to Hive string                  3774           3865         102          1.3         754.8       0.1X
-java.time.Instant to Hive string                   5409           5480          84          0.9        1081.7       0.1X
+From java.sql.Date                                  304            317          13         16.4          60.8       1.0X
+From java.time.LocalDate                            269            271           1         18.6          53.9       1.1X
+Collect java.sql.Date                              1269           1296          38          3.9         253.8       0.2X
+Collect java.time.LocalDate                         987           1054          59          5.1         197.5       0.3X
+From java.sql.Timestamp                             251            264          22         19.9          50.3       1.2X
+From java.time.Instant                              194            201           6         25.7          38.9       1.6X
+Collect longs                                       876            977          87          5.7         175.1       0.3X
+Collect java.sql.Timestamp                         1028           1081          53          4.9         205.5       0.3X
+Collect java.time.Instant                          1017           1114          85          4.9         203.4       0.3X
+java.sql.Date to Hive string                       3697           3897         178          1.4         739.3       0.1X
+java.time.LocalDate to Hive string                 3366           3505         131          1.5         673.1       0.1X
+java.sql.Timestamp to Hive string                  3490           3648         151          1.4         698.1       0.1X
+java.time.Instant to Hive string                   5279           5339          97          0.9        1055.7       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
index ac6fff4fe5b30..8119824cb769f 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  10761          10761           0          9.3         107.6       1.0X
-before 1582, noop                                  6737           6737           0         14.8          67.4       1.6X
-after 1582, rebase EXCEPTION                      18652          18652           0          5.4         186.5       0.6X
-after 1582, rebase LEGACY                         18558          18558           0          5.4         185.6       0.6X
-after 1582, rebase CORRECTED                      18805          18805           0          5.3         188.0       0.6X
-before 1582, rebase LEGACY                        14983          14983           0          6.7         149.8       0.7X
-before 1582, rebase CORRECTED                     14583          14583           0          6.9         145.8       0.7X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+after 1582, noop                                  10287          10287           0          9.7         102.9       1.0X
+before 1582, noop                                  6015           6015           0         16.6          60.2       1.7X
+after 1582, rebase EXCEPTION                      19200          19200           0          5.2         192.0       0.5X
+after 1582, rebase LEGACY                         19267          19267           0          5.2         192.7       0.5X
+after 1582, rebase CORRECTED                      19175          19175           0          5.2         191.7       0.5X
+before 1582, rebase LEGACY                        15262          15262           0          6.6         152.6       0.7X
+before 1582, rebase CORRECTED                     15273          15273           0          6.5         152.7       0.7X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             10738          10862         144          9.3         107.4       1.0X
-after 1582, vec off, rebase LEGACY                10910          10937          25          9.2         109.1       1.0X
-after 1582, vec off, rebase CORRECTED             10842          10901          63          9.2         108.4       1.0X
-after 1582, vec on, rebase EXCEPTION               2454           2484          35         40.8          24.5       4.4X
-after 1582, vec on, rebase LEGACY                  2464           2484          20         40.6          24.6       4.4X
-after 1582, vec on, rebase CORRECTED               2457           2493          32         40.7          24.6       4.4X
-before 1582, vec off, rebase LEGACY               11112          11125          21          9.0         111.1       1.0X
-before 1582, vec off, rebase CORRECTED            10859          10899          35          9.2         108.6       1.0X
-before 1582, vec on, rebase LEGACY                 2836           2848          13         35.3          28.4       3.8X
-before 1582, vec on, rebase CORRECTED              2416           2452          34         41.4          24.2       4.4X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+after 1582, vec off, rebase EXCEPTION             11299          11345          61          8.9         113.0       1.0X
+after 1582, vec off, rebase LEGACY                12489          12538          43          8.0         124.9       0.9X
+after 1582, vec off, rebase CORRECTED             12542          12552          16          8.0         125.4       0.9X
+after 1582, vec on, rebase EXCEPTION               2429           2471          42         41.2          24.3       4.7X
+after 1582, vec on, rebase LEGACY                  2473           2500          33         40.4          24.7       4.6X
+after 1582, vec on, rebase CORRECTED               2462           2483          29         40.6          24.6       4.6X
+before 1582, vec off, rebase LEGACY               12815          12872          59          7.8         128.2       0.9X
+before 1582, vec off, rebase CORRECTED            12553          12596          40          8.0         125.5       0.9X
+before 1582, vec on, rebase LEGACY                 2771           2802          29         36.1          27.7       4.1X
+before 1582, vec on, rebase CORRECTED              2403           2447          39         41.6          24.0       4.7X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2382           2382           0         42.0          23.8       1.0X
-before 1900, noop                                  2331           2331           0         42.9          23.3       1.0X
-after 1900, rebase EXCEPTION                      12497          12497           0          8.0         125.0       0.2X
-after 1900, rebase LEGACY                         12285          12285           0          8.1         122.9       0.2X
-after 1900, rebase CORRECTED                      11954          11954           0          8.4         119.5       0.2X
-before 1900, rebase LEGACY                        13867          13867           0          7.2         138.7       0.2X
-before 1900, rebase CORRECTED                     12243          12243           0          8.2         122.4       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+after 1900, noop                                   2413           2413           0         41.4          24.1       1.0X
+before 1900, noop                                  2439           2439           0         41.0          24.4       1.0X
+after 1900, rebase EXCEPTION                      12548          12548           0          8.0         125.5       0.2X
+after 1900, rebase LEGACY                         12525          12525           0          8.0         125.2       0.2X
+after 1900, rebase CORRECTED                      12548          12548           0          8.0         125.5       0.2X
+before 1900, rebase LEGACY                        14343          14343           0          7.0         143.4       0.2X
+before 1900, rebase CORRECTED                     12758          12758           0          7.8         127.6       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15537          15544           6          6.4         155.4       1.0X
-after 1900, vec off, rebase LEGACY                15699          15753          56          6.4         157.0       1.0X
-after 1900, vec off, rebase CORRECTED             15611          15621          10          6.4         156.1       1.0X
-after 1900, vec on, rebase EXCEPTION               4058           4074          14         24.6          40.6       3.8X
-after 1900, vec on, rebase LEGACY                  4015           4043          26         24.9          40.1       3.9X
-after 1900, vec on, rebase CORRECTED               4069           4121          48         24.6          40.7       3.8X
-before 1900, vec off, rebase LEGACY               18703          18729          31          5.3         187.0       0.8X
-before 1900, vec off, rebase CORRECTED            15777          15814          46          6.3         157.8       1.0X
-before 1900, vec on, rebase LEGACY                 6290           6313          22         15.9          62.9       2.5X
-before 1900, vec on, rebase CORRECTED              4056           4080          22         24.7          40.6       3.8X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+after 1900, vec off, rebase EXCEPTION             15626          15663          37          6.4         156.3       1.0X
+after 1900, vec off, rebase LEGACY                16695          16750          47          6.0         167.0       0.9X
+after 1900, vec off, rebase CORRECTED             15958          16047          77          6.3         159.6       1.0X
+after 1900, vec on, rebase EXCEPTION               4039           4056          20         24.8          40.4       3.9X
+after 1900, vec on, rebase LEGACY                  4113           4132          27         24.3          41.1       3.8X
+after 1900, vec on, rebase CORRECTED               4062           4071           9         24.6          40.6       3.8X
+before 1900, vec off, rebase LEGACY               18025          18067          48          5.5         180.3       0.9X
+before 1900, vec off, rebase CORRECTED            16044          16064          23          6.2         160.4       1.0X
+before 1900, vec on, rebase LEGACY                 6302           6317          22         15.9          63.0       2.5X
+before 1900, vec on, rebase CORRECTED              4041           4061          18         24.7          40.4       3.9X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2360           2360           0         42.4          23.6       1.0X
-before 1900, noop                                  2318           2318           0         43.1          23.2       1.0X
-after 1900, rebase EXCEPTION                      11233          11233           0          8.9         112.3       0.2X
-after 1900, rebase LEGACY                         11001          11001           0          9.1         110.0       0.2X
-after 1900, rebase CORRECTED                      10952          10952           0          9.1         109.5       0.2X
-before 1900, rebase LEGACY                        13110          13110           0          7.6         131.1       0.2X
-before 1900, rebase CORRECTED                     11511          11511           0          8.7         115.1       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+after 1900, noop                                   2449           2449           0         40.8          24.5       1.0X
+before 1900, noop                                  2448           2448           0         40.8          24.5       1.0X
+after 1900, rebase EXCEPTION                      11787          11787           0          8.5         117.9       0.2X
+after 1900, rebase LEGACY                         11894          11894           0          8.4         118.9       0.2X
+after 1900, rebase CORRECTED                      11807          11807           0          8.5         118.1       0.2X
+before 1900, rebase LEGACY                        13934          13934           0          7.2         139.3       0.2X
+before 1900, rebase CORRECTED                     11771          11771           0          8.5         117.7       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14455          14506          67          6.9         144.5       1.0X
-after 1900, vec off, rebase LEGACY                14512          14595          77          6.9         145.1       1.0X
-after 1900, vec off, rebase CORRECTED             14534          14551          16          6.9         145.3       1.0X
-after 1900, vec on, rebase EXCEPTION               3691           3705          13         27.1          36.9       3.9X
-after 1900, vec on, rebase LEGACY                  3715           3776          56         26.9          37.2       3.9X
-after 1900, vec on, rebase CORRECTED               3696           3712          14         27.1          37.0       3.9X
-before 1900, vec off, rebase LEGACY               16982          17027          60          5.9         169.8       0.9X
-before 1900, vec off, rebase CORRECTED            14446          14480          47          6.9         144.5       1.0X
-before 1900, vec on, rebase LEGACY                 5700           5722          20         17.5          57.0       2.5X
-before 1900, vec on, rebase CORRECTED              3766           3783          26         26.6          37.7       3.8X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+after 1900, vec off, rebase EXCEPTION             14723          14750          35          6.8         147.2       1.0X
+after 1900, vec off, rebase LEGACY                14871          14964         115          6.7         148.7       1.0X
+after 1900, vec off, rebase CORRECTED             14771          14797          27          6.8         147.7       1.0X
+after 1900, vec on, rebase EXCEPTION               3748           3753           8         26.7          37.5       3.9X
+after 1900, vec on, rebase LEGACY                  3754           3767          11         26.6          37.5       3.9X
+after 1900, vec on, rebase CORRECTED               3737           3756          24         26.8          37.4       3.9X
+before 1900, vec off, rebase LEGACY               17307          17328          31          5.8         173.1       0.9X
+before 1900, vec off, rebase CORRECTED            14712          14757          76          6.8         147.1       1.0X
+before 1900, vec on, rebase LEGACY                 5700           5718          16         17.5          57.0       2.6X
+before 1900, vec on, rebase CORRECTED              3734           3773          34         26.8          37.3       3.9X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2361           2361           0         42.4          23.6       1.0X
-before 1900, noop                                  2303           2303           0         43.4          23.0       1.0X
-after 1900, rebase EXCEPTION                      10793          10793           0          9.3         107.9       0.2X
-after 1900, rebase LEGACY                         10804          10804           0          9.3         108.0       0.2X
-after 1900, rebase CORRECTED                      10894          10894           0          9.2         108.9       0.2X
-before 1900, rebase LEGACY                        12759          12759           0          7.8         127.6       0.2X
-before 1900, rebase CORRECTED                     10916          10916           0          9.2         109.2       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+after 1900, noop                                   2380           2380           0         42.0          23.8       1.0X
+before 1900, noop                                  2378           2378           0         42.1          23.8       1.0X
+after 1900, rebase EXCEPTION                      11216          11216           0          8.9         112.2       0.2X
+after 1900, rebase LEGACY                         11924          11924           0          8.4         119.2       0.2X
+after 1900, rebase CORRECTED                      12410          12410           0          8.1         124.1       0.2X
+before 1900, rebase LEGACY                        13779          13779           0          7.3         137.8       0.2X
+before 1900, rebase CORRECTED                     11367          11367           0          8.8         113.7       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14148          14183          45          7.1         141.5       1.0X
-after 1900, vec off, rebase LEGACY                14408          14444          62          6.9         144.1       1.0X
-after 1900, vec off, rebase CORRECTED             14462          14498          44          6.9         144.6       1.0X
-after 1900, vec on, rebase EXCEPTION               4887           4908          21         20.5          48.9       2.9X
-after 1900, vec on, rebase LEGACY                  4465           4483          16         22.4          44.7       3.2X
-after 1900, vec on, rebase CORRECTED               4880           4922          54         20.5          48.8       2.9X
-before 1900, vec off, rebase LEGACY               16541          16553          17          6.0         165.4       0.9X
-before 1900, vec off, rebase CORRECTED            14451          14519          63          6.9         144.5       1.0X
-before 1900, vec on, rebase LEGACY                 6116           6127          11         16.3          61.2       2.3X
-before 1900, vec on, rebase CORRECTED              4898           4918          20         20.4          49.0       2.9X
+after 1900, vec off, rebase EXCEPTION             14340          14389          75          7.0         143.4       1.0X
+after 1900, vec off, rebase LEGACY                14608          14627          23          6.8         146.1       1.0X
+after 1900, vec off, rebase CORRECTED             14466          14528          60          6.9         144.7       1.0X
+after 1900, vec on, rebase EXCEPTION               4894           4932          38         20.4          48.9       2.9X
+after 1900, vec on, rebase LEGACY                  4569           4593          28         21.9          45.7       3.1X
+after 1900, vec on, rebase CORRECTED               4918           4943          23         20.3          49.2       2.9X
+before 1900, vec off, rebase LEGACY               16912          16926          15          5.9         169.1       0.8X
+before 1900, vec off, rebase CORRECTED            14495          14499           4          6.9         144.9       1.0X
+before 1900, vec on, rebase LEGACY                 6265           6307          41         16.0          62.7       2.3X
+before 1900, vec on, rebase CORRECTED              4892           4930          38         20.4          48.9       2.9X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  10993          10993           0          9.1         109.9       1.0X
-before 1582, noop                                  6556           6556           0         15.3          65.6       1.7X
-after 1582                                        14554          14554           0          6.9         145.5       0.8X
-before 1582                                       10400          10400           0          9.6         104.0       1.1X
+after 1582, noop                                  11339          11339           0          8.8         113.4       1.0X
+before 1582, noop                                  5604           5604           0         17.8          56.0       2.0X
+after 1582                                        15457          15457           0          6.5         154.6       0.7X
+before 1582                                       10049          10049           0         10.0         100.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                                8426           8444          15         11.9          84.3       1.0X
-after 1582, vec on                                 2381           2404          37         42.0          23.8       3.5X
-before 1582, vec off                               8541           8558          20         11.7          85.4       1.0X
-before 1582, vec on                                2527           2538          10         39.6          25.3       3.3X
+after 1582, vec off                                8478           8853         595         11.8          84.8       1.0X
+after 1582, vec on                                 2380           2428          60         42.0          23.8       3.6X
+before 1582, vec off                               8570           8583          14         11.7          85.7       1.0X
+before 1582, vec on                                2510           2518           7         39.8          25.1       3.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2216           2216           0         45.1          22.2       1.0X
-before 1900, noop                                  2241           2241           0         44.6          22.4       1.0X
-after 1900                                         9421           9421           0         10.6          94.2       0.2X
-before 1900                                       11674          11674           0          8.6         116.7       0.2X
+after 1900, noop                                   2275           2275           0         44.0          22.7       1.0X
+before 1900, noop                                  2288           2288           0         43.7          22.9       1.0X
+after 1900                                         9472           9472           0         10.6          94.7       0.2X
+before 1900                                       11470          11470           0          8.7         114.7       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                                9808          10011         312         10.2          98.1       1.0X
-after 1900, vec on                                 3963           3972          10         25.2          39.6       2.5X
-before 1900, vec off                              11884          11908          22          8.4         118.8       0.8X
-before 1900, vec on                                5435           5449          16         18.4          54.3       1.8X
+after 1900, vec off                                9871           9914          39         10.1          98.7       1.0X
+after 1900, vec on                                 4138           4153          13         24.2          41.4       2.4X
+before 1900, vec off                              11828          11874          53          8.5         118.3       0.8X
+before 1900, vec on                                5976           5984          13         16.7          59.8       1.7X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
index cd6235f177889..39d679bd8b1d9 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  13431          13431           0          7.4         134.3       1.0X
-before 1582, noop                                  7898           7898           0         12.7          79.0       1.7X
-after 1582, rebase EXCEPTION                      22422          22422           0          4.5         224.2       0.6X
-after 1582, rebase LEGACY                         21993          21993           0          4.5         219.9       0.6X
-after 1582, rebase CORRECTED                      21855          21855           0          4.6         218.5       0.6X
-before 1582, rebase LEGACY                        17426          17426           0          5.7         174.3       0.8X
-before 1582, rebase CORRECTED                     16427          16427           0          6.1         164.3       0.8X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+after 1582, noop                                  12846          12846           0          7.8         128.5       1.0X
+before 1582, noop                                  7874           7874           0         12.7          78.7       1.6X
+after 1582, rebase EXCEPTION                      20968          20968           0          4.8         209.7       0.6X
+after 1582, rebase LEGACY                         20802          20802           0          4.8         208.0       0.6X
+after 1582, rebase CORRECTED                      21961          21961           0          4.6         219.6       0.6X
+before 1582, rebase LEGACY                        16440          16440           0          6.1         164.4       0.8X
+before 1582, rebase CORRECTED                     16324          16324           0          6.1         163.2       0.8X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             11518          11573          48          8.7         115.2       1.0X
-after 1582, vec off, rebase LEGACY                11742          11775          50          8.5         117.4       1.0X
-after 1582, vec off, rebase CORRECTED             11654          11718          57          8.6         116.5       1.0X
-after 1582, vec on, rebase EXCEPTION               2602           2603           1         38.4          26.0       4.4X
-after 1582, vec on, rebase LEGACY                  2598           2607          11         38.5          26.0       4.4X
-after 1582, vec on, rebase CORRECTED               2556           2577          21         39.1          25.6       4.5X
-before 1582, vec off, rebase LEGACY               11959          11990          28          8.4         119.6       1.0X
-before 1582, vec off, rebase CORRECTED            11738          11758          33          8.5         117.4       1.0X
-before 1582, vec on, rebase LEGACY                 3013           3020          10         33.2          30.1       3.8X
-before 1582, vec on, rebase CORRECTED              2670           2677           6         37.4          26.7       4.3X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+after 1582, vec off, rebase EXCEPTION             11932          12128         170          8.4         119.3       1.0X
+after 1582, vec off, rebase LEGACY                11902          11976          67          8.4         119.0       1.0X
+after 1582, vec off, rebase CORRECTED             11866          11900          59          8.4         118.7       1.0X
+after 1582, vec on, rebase EXCEPTION               2554           2578          39         39.2          25.5       4.7X
+after 1582, vec on, rebase LEGACY                  2550           2599          84         39.2          25.5       4.7X
+after 1582, vec on, rebase CORRECTED               2529           2548          19         39.5          25.3       4.7X
+before 1582, vec off, rebase LEGACY               12073          12082          10          8.3         120.7       1.0X
+before 1582, vec off, rebase CORRECTED            11835          11890          47          8.4         118.4       1.0X
+before 1582, vec on, rebase LEGACY                 2809           2829          19         35.6          28.1       4.2X
+before 1582, vec on, rebase CORRECTED              2487           2509          21         40.2          24.9       4.8X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2332           2332           0         42.9          23.3       1.0X
-before 1900, noop                                  2316           2316           0         43.2          23.2       1.0X
-after 1900, rebase EXCEPTION                      12731          12731           0          7.9         127.3       0.2X
-after 1900, rebase LEGACY                         13684          13684           0          7.3         136.8       0.2X
-after 1900, rebase CORRECTED                      12925          12925           0          7.7         129.3       0.2X
-before 1900, rebase LEGACY                        14813          14813           0          6.8         148.1       0.2X
-before 1900, rebase CORRECTED                     13043          13043           0          7.7         130.4       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+after 1900, noop                                   2307           2307           0         43.3          23.1       1.0X
+before 1900, noop                                  2414           2414           0         41.4          24.1       1.0X
+after 1900, rebase EXCEPTION                      13251          13251           0          7.5         132.5       0.2X
+after 1900, rebase LEGACY                         13218          13218           0          7.6         132.2       0.2X
+after 1900, rebase CORRECTED                      12495          12495           0          8.0         124.9       0.2X
+before 1900, rebase LEGACY                        14825          14825           0          6.7         148.2       0.2X
+before 1900, rebase CORRECTED                     12741          12741           0          7.8         127.4       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15058          15125          62          6.6         150.6       1.0X
-after 1900, vec off, rebase LEGACY                14938          14973          34          6.7         149.4       1.0X
-after 1900, vec off, rebase CORRECTED             14827          14890          69          6.7         148.3       1.0X
-after 1900, vec on, rebase EXCEPTION               5442           5498          51         18.4          54.4       2.8X
-after 1900, vec on, rebase LEGACY                  5500           5532          54         18.2          55.0       2.7X
-after 1900, vec on, rebase CORRECTED               5493           5504          15         18.2          54.9       2.7X
-before 1900, vec off, rebase LEGACY               19000          19002           2          5.3         190.0       0.8X
-before 1900, vec off, rebase CORRECTED            16181          16225          41          6.2         161.8       0.9X
-before 1900, vec on, rebase LEGACY                 7514           7529          26         13.3          75.1       2.0X
-before 1900, vec on, rebase CORRECTED              5677           5698          25         17.6          56.8       2.7X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+after 1900, vec off, rebase EXCEPTION             14943          14997          65          6.7         149.4       1.0X
+after 1900, vec off, rebase LEGACY                15155          15243         101          6.6         151.6       1.0X
+after 1900, vec off, rebase CORRECTED             14988          15008          20          6.7         149.9       1.0X
+after 1900, vec on, rebase EXCEPTION               5430           5440          11         18.4          54.3       2.8X
+after 1900, vec on, rebase LEGACY                  5446           5458          11         18.4          54.5       2.7X
+after 1900, vec on, rebase CORRECTED               5409           5440          41         18.5          54.1       2.8X
+before 1900, vec off, rebase LEGACY               18150          18193          46          5.5         181.5       0.8X
+before 1900, vec off, rebase CORRECTED            15954          15969          16          6.3         159.5       0.9X
+before 1900, vec on, rebase LEGACY                 7145           7152           8         14.0          71.5       2.1X
+before 1900, vec on, rebase CORRECTED              5396           5408          11         18.5          54.0       2.8X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2356           2356           0         42.5          23.6       1.0X
-before 1900, noop                                  2483           2483           0         40.3          24.8       0.9X
-after 1900, rebase EXCEPTION                      11597          11597           0          8.6         116.0       0.2X
-after 1900, rebase LEGACY                         11557          11557           0          8.7         115.6       0.2X
-after 1900, rebase CORRECTED                      11536          11536           0          8.7         115.4       0.2X
-before 1900, rebase LEGACY                        13606          13606           0          7.3         136.1       0.2X
-before 1900, rebase CORRECTED                     11738          11738           0          8.5         117.4       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+after 1900, noop                                   2344           2344           0         42.7          23.4       1.0X
+before 1900, noop                                  2360           2360           0         42.4          23.6       1.0X
+after 1900, rebase EXCEPTION                      11075          11075           0          9.0         110.7       0.2X
+after 1900, rebase LEGACY                         11018          11018           0          9.1         110.2       0.2X
+after 1900, rebase CORRECTED                      15681          15681           0          6.4         156.8       0.1X
+before 1900, rebase LEGACY                        13002          13002           0          7.7         130.0       0.2X
+before 1900, rebase CORRECTED                     11179          11179           0          8.9         111.8       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14878          14912          47          6.7         148.8       1.0X
-after 1900, vec off, rebase LEGACY                14937          14992          49          6.7         149.4       1.0X
-after 1900, vec off, rebase CORRECTED             14880          14910          39          6.7         148.8       1.0X
-after 1900, vec on, rebase EXCEPTION               3768           3779          10         26.5          37.7       3.9X
-after 1900, vec on, rebase LEGACY                  3855           3881          34         25.9          38.6       3.9X
-after 1900, vec on, rebase CORRECTED               3832           3882          44         26.1          38.3       3.9X
-before 1900, vec off, rebase LEGACY               17291          17350          53          5.8         172.9       0.9X
-before 1900, vec off, rebase CORRECTED            14862          14898          33          6.7         148.6       1.0X
-before 1900, vec on, rebase LEGACY                 5701           5724          28         17.5          57.0       2.6X
-before 1900, vec on, rebase CORRECTED              3830           3844          12         26.1          38.3       3.9X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+after 1900, vec off, rebase EXCEPTION             15439          15460          29          6.5         154.4       1.0X
+after 1900, vec off, rebase LEGACY                15345          15375          44          6.5         153.5       1.0X
+after 1900, vec off, rebase CORRECTED             15418          15470          85          6.5         154.2       1.0X
+after 1900, vec on, rebase EXCEPTION               3850           3895          51         26.0          38.5       4.0X
+after 1900, vec on, rebase LEGACY                  3862           3896          31         25.9          38.6       4.0X
+after 1900, vec on, rebase CORRECTED               3827           3846          20         26.1          38.3       4.0X
+before 1900, vec off, rebase LEGACY               17672          17726          48          5.7         176.7       0.9X
+before 1900, vec off, rebase CORRECTED            15368          15407          45          6.5         153.7       1.0X
+before 1900, vec on, rebase LEGACY                 5715           5729          14         17.5          57.1       2.7X
+before 1900, vec on, rebase CORRECTED              3809           3872          63         26.3          38.1       4.1X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2186           2186           0         45.7          21.9       1.0X
-before 1900, noop                                  2096           2096           0         47.7          21.0       1.0X
-after 1900, rebase EXCEPTION                      11031          11031           0          9.1         110.3       0.2X
-after 1900, rebase LEGACY                         10989          10989           0          9.1         109.9       0.2X
-after 1900, rebase CORRECTED                      10861          10861           0          9.2         108.6       0.2X
-before 1900, rebase LEGACY                        13330          13330           0          7.5         133.3       0.2X
-before 1900, rebase CORRECTED                     11320          11320           0          8.8         113.2       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+after 1900, noop                                   2350           2350           0         42.6          23.5       1.0X
+before 1900, noop                                  2285           2285           0         43.8          22.8       1.0X
+after 1900, rebase EXCEPTION                      10977          10977           0          9.1         109.8       0.2X
+after 1900, rebase LEGACY                         10489          10489           0          9.5         104.9       0.2X
+after 1900, rebase CORRECTED                      10558          10558           0          9.5         105.6       0.2X
+before 1900, rebase LEGACY                        12991          12991           0          7.7         129.9       0.2X
+before 1900, rebase CORRECTED                     10591          10591           0          9.4         105.9       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14936          14989          88          6.7         149.4       1.0X
-after 1900, vec off, rebase LEGACY                15025          15083         101          6.7         150.2       1.0X
-after 1900, vec off, rebase CORRECTED             14907          14956          45          6.7         149.1       1.0X
-after 1900, vec on, rebase EXCEPTION               3940           3967          28         25.4          39.4       3.8X
-after 1900, vec on, rebase LEGACY                  4553           4569          27         22.0          45.5       3.3X
-after 1900, vec on, rebase CORRECTED               3944           3982          61         25.4          39.4       3.8X
-before 1900, vec off, rebase LEGACY               17301          17340          56          5.8         173.0       0.9X
-before 1900, vec off, rebase CORRECTED            14959          14998          45          6.7         149.6       1.0X
-before 1900, vec on, rebase LEGACY                 6380           6420          40         15.7          63.8       2.3X
-before 1900, vec on, rebase CORRECTED              3963           3989          28         25.2          39.6       3.8X
+after 1900, vec off, rebase EXCEPTION             15242          15326          82          6.6         152.4       1.0X
+after 1900, vec off, rebase LEGACY                15273          15325          51          6.5         152.7       1.0X
+after 1900, vec off, rebase CORRECTED             15273          15291          19          6.5         152.7       1.0X
+after 1900, vec on, rebase EXCEPTION               3942           3964          23         25.4          39.4       3.9X
+after 1900, vec on, rebase LEGACY                  4558           4595          33         21.9          45.6       3.3X
+after 1900, vec on, rebase CORRECTED               3912           3949          39         25.6          39.1       3.9X
+before 1900, vec off, rebase LEGACY               17593          17643          44          5.7         175.9       0.9X
+before 1900, vec off, rebase CORRECTED            15336          15346          11          6.5         153.4       1.0X
+before 1900, vec on, rebase LEGACY                 6310           6332          37         15.8          63.1       2.4X
+before 1900, vec on, rebase CORRECTED              3947           3956           8         25.3          39.5       3.9X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  13755          13755           0          7.3         137.5       1.0X
-before 1582, noop                                  7969           7969           0         12.5          79.7       1.7X
-after 1582                                        17101          17101           0          5.8         171.0       0.8X
-before 1582                                       11962          11962           0          8.4         119.6       1.1X
+after 1582, noop                                  12923          12923           0          7.7         129.2       1.0X
+before 1582, noop                                  7980           7980           0         12.5          79.8       1.6X
+after 1582                                        16656          16656           0          6.0         166.6       0.8X
+before 1582                                       11823          11823           0          8.5         118.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                                8810           8852          38         11.4          88.1       1.0X
-after 1582, vec on                                 2401           2441          39         41.7          24.0       3.7X
-before 1582, vec off                               8791           8812          19         11.4          87.9       1.0X
-before 1582, vec on                                2540           2547           7         39.4          25.4       3.5X
+after 1582, vec off                                8836           8854          18         11.3          88.4       1.0X
+after 1582, vec on                                 2492           2520          24         40.1          24.9       3.5X
+before 1582, vec off                               8903           8931          25         11.2          89.0       1.0X
+before 1582, vec on                                2644           2652           9         37.8          26.4       3.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2079           2079           0         48.1          20.8       1.0X
-before 1900, noop                                  2058           2058           0         48.6          20.6       1.0X
-after 1900                                         9653           9653           0         10.4          96.5       0.2X
-before 1900                                       11808          11808           0          8.5         118.1       0.2X
+after 1900, noop                                   2310           2310           0         43.3          23.1       1.0X
+before 1900, noop                                  2237           2237           0         44.7          22.4       1.0X
+after 1900                                         9656           9656           0         10.4          96.6       0.2X
+before 1900                                       11859          11859           0          8.4         118.6       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               10416          10464          45          9.6         104.2       1.0X
-after 1900, vec on                                 4435           4440           8         22.5          44.3       2.3X
-before 1900, vec off                              12357          12366          14          8.1         123.6       0.8X
-before 1900, vec on                                6040           6050          14         16.6          60.4       1.7X
+after 1900, vec off                               10102          10150          47          9.9         101.0       1.0X
+after 1900, vec on                                 4274           4296          20         23.4          42.7       2.4X
+before 1900, vec off                              12042          12119          76          8.3         120.4       0.8X
+before 1900, vec on                                5850           5859          13         17.1          58.5       1.7X
 
 
diff --git a/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt b/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..0a6164bc652e1
--- /dev/null
+++ b/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
@@ -0,0 +1,8 @@
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+encode:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF-32                                            47469          47482          19          0.2        4746.9       1.0X
+UTF-16                                            57463          57487          35          0.2        5746.3       0.8X
+UTF-8                                              2803           2805           3          3.6         280.3      16.9X
+
diff --git a/sql/core/benchmarks/EncodeBenchmark-results.txt b/sql/core/benchmarks/EncodeBenchmark-results.txt
new file mode 100644
index 0000000000000..404138db7d36d
--- /dev/null
+++ b/sql/core/benchmarks/EncodeBenchmark-results.txt
@@ -0,0 +1,8 @@
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+AMD EPYC 7763 64-Core Processor
+encode:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+UTF-32                                            31107          31205         138          0.3        3110.7       1.0X
+UTF-16                                            47904          47934          43          0.2        4790.4       0.6X
+UTF-8                                              2957           2978          30          3.4         295.7      10.5X
+
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
index 59b806bb5d5f3..08cd0f2c47f86 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        2553           2566          18         40.1          24.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                   3528           3545          24         29.0          34.5       0.7X
+ArrayBuffer                                        2445           2451          10         41.9          23.9       1.0X
+ExternalAppendOnlyUnsafeRowArray                   3464           3489          36         29.6          33.8       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        5503           5512          13         47.6          21.0       1.0X
-ExternalAppendOnlyUnsafeRowArray                  10635          10654          26         24.6          40.6       0.5X
+ArrayBuffer                                        5292           5328          50         49.5          20.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                  11921          11927           9         22.0          45.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       10547          10586          55         46.6          21.5       1.0X
-ExternalAppendOnlyUnsafeRowArray                  16275          16422         208         30.2          33.1       0.6X
+ArrayBuffer                                       10418          10422           6         47.2          21.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                  16589          16692         145         29.6          33.8       0.6X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                               8525           8546          29         30.7          32.5       1.0X
-ExternalAppendOnlyUnsafeRowArray                   6313           6315           3         41.5          24.1       1.4X
+UnsafeExternalSorter                               8507           8542          50         30.8          32.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                   6301           6314          18         41.6          24.0       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                  5              5           1         32.4          30.8       1.0X
-ExternalAppendOnlyUnsafeRowArray                      4              4           0         40.2          24.9       1.2X
+UnsafeExternalSorter                                  5              5           0         33.0          30.3       1.0X
+ExternalAppendOnlyUnsafeRowArray                      4              4           0         40.1          24.9       1.2X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
index 8e9cc9ecbba00..10af77fdd8bb2 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        2532           2539          10         40.4          24.7       1.0X
-ExternalAppendOnlyUnsafeRowArray                   3568           3577          12         28.7          34.8       0.7X
+ArrayBuffer                                        2330           2333           4         44.0          22.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                   3306           3317          15         31.0          32.3       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        5144           5154          13         51.0          19.6       1.0X
-ExternalAppendOnlyUnsafeRowArray                  10745          10770          35         24.4          41.0       0.5X
+ArrayBuffer                                        5594           5598           6         46.9          21.3       1.0X
+ExternalAppendOnlyUnsafeRowArray                  12278          12332          75         21.4          46.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        9749           9751           3         50.4          19.8       1.0X
-ExternalAppendOnlyUnsafeRowArray                  17484          17526          59         28.1          35.6       0.6X
+ArrayBuffer                                       10249          10252           4         48.0          20.9       1.0X
+ExternalAppendOnlyUnsafeRowArray                  16386          16397          16         30.0          33.3       0.6X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                               8372           8408          50         31.3          31.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                   6238           6243           7         42.0          23.8       1.3X
+UnsafeExternalSorter                               8294           8315          30         31.6          31.6       1.0X
+ExternalAppendOnlyUnsafeRowArray                   6767           6797          42         38.7          25.8       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                  5              5           0         34.7          28.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                      4              4           0         41.6          24.0       1.2X
+UnsafeExternalSorter                                  5              5           0         34.2          29.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                      4              4           0         38.8          25.8       1.1X
 
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt b/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
index 736518398edb6..a1c284712c3d4 100644
--- a/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   266            280          12         37.6          26.6       1.0X
-YEAR of timestamp                                   749            752           4         13.3          74.9       0.4X
-YEAROFWEEK of timestamp                             680            684           6         14.7          68.0       0.4X
-QUARTER of timestamp                                698            703           6         14.3          69.8       0.4X
-MONTH of timestamp                                  614            625          10         16.3          61.4       0.4X
-WEEK of timestamp                                   884            888           3         11.3          88.4       0.3X
-DAY of timestamp                                    614            621           7         16.3          61.4       0.4X
-DAYOFWEEK of timestamp                              781            787           8         12.8          78.1       0.3X
-DOW of timestamp                                    795            798           2         12.6          79.5       0.3X
-DOW_ISO of timestamp                                748            763          22         13.4          74.8       0.4X
-DAYOFWEEK_ISO of timestamp                          692            698           8         14.4          69.2       0.4X
-DOY of timestamp                                    640            643           4         15.6          64.0       0.4X
-HOUR of timestamp                                   473            479           8         21.2          47.3       0.6X
-MINUTE of timestamp                                 472            476           4         21.2          47.2       0.6X
-SECOND of timestamp                                 533            535           2         18.8          53.3       0.5X
+cast to timestamp                                   288            299          15         34.7          28.8       1.0X
+YEAR of timestamp                                   663            672           8         15.1          66.3       0.4X
+YEAROFWEEK of timestamp                             638            659          25         15.7          63.8       0.5X
+QUARTER of timestamp                                631            633           2         15.9          63.1       0.5X
+MONTH of timestamp                                  568            575           6         17.6          56.8       0.5X
+WEEK of timestamp                                   858            861           4         11.7          85.8       0.3X
+DAY of timestamp                                    573            576           2         17.4          57.3       0.5X
+DAYOFWEEK of timestamp                              745            748           5         13.4          74.5       0.4X
+DOW of timestamp                                    736            747          14         13.6          73.6       0.4X
+DOW_ISO of timestamp                                676            681           5         14.8          67.6       0.4X
+DAYOFWEEK_ISO of timestamp                          680            682           3         14.7          68.0       0.4X
+DOY of timestamp                                    591            598           6         16.9          59.1       0.5X
+HOUR of timestamp                                   474            479           4         21.1          47.4       0.6X
+MINUTE of timestamp                                 474            477           3         21.1          47.4       0.6X
+SECOND of timestamp                                 534            539           6         18.7          53.4       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   245            250           5         40.9          24.5       1.0X
-YEAR of timestamp                                   596            599           3         16.8          59.6       0.4X
-YEAROFWEEK of timestamp                             651            655           3         15.4          65.1       0.4X
-QUARTER of timestamp                                752            754           2         13.3          75.2       0.3X
-MONTH of timestamp                                  608            614          10         16.4          60.8       0.4X
-WEEK of timestamp                                   874            879           5         11.4          87.4       0.3X
-DAY of timestamp                                    609            611           2         16.4          60.9       0.4X
-DAYOFWEEK of timestamp                              734            747          18         13.6          73.4       0.3X
-DOW of timestamp                                    739            753          19         13.5          73.9       0.3X
-DOW_ISO of timestamp                                685            687           1         14.6          68.5       0.4X
-DAYOFWEEK_ISO of timestamp                          682            689          10         14.7          68.2       0.4X
-DOY of timestamp                                    638            641           4         15.7          63.8       0.4X
-HOUR of timestamp                                   470            480          15         21.3          47.0       0.5X
-MINUTE of timestamp                                 466            481          23         21.4          46.6       0.5X
-SECOND of timestamp                                 532            533           1         18.8          53.2       0.5X
+cast to timestamp                                   246            248           2         40.7          24.6       1.0X
+YEAR of timestamp                                   532            533           1         18.8          53.2       0.5X
+YEAROFWEEK of timestamp                             613            616           3         16.3          61.3       0.4X
+QUARTER of timestamp                                615            620           6         16.3          61.5       0.4X
+MONTH of timestamp                                  563            564           1         17.8          56.3       0.4X
+WEEK of timestamp                                   851            855           5         11.8          85.1       0.3X
+DAY of timestamp                                    567            568           1         17.6          56.7       0.4X
+DAYOFWEEK of timestamp                              731            738           8         13.7          73.1       0.3X
+DOW of timestamp                                    730            734           5         13.7          73.0       0.3X
+DOW_ISO of timestamp                                668            668           1         15.0          66.8       0.4X
+DAYOFWEEK_ISO of timestamp                          666            678          10         15.0          66.6       0.4X
+DOY of timestamp                                    586            591           5         17.1          58.6       0.4X
+HOUR of timestamp                                   471            472           2         21.2          47.1       0.5X
+MINUTE of timestamp                                 473            478           5         21.1          47.3       0.5X
+SECOND of timestamp                                 533            534           2         18.8          53.3       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        602            605           3         16.6          60.2       1.0X
-YEAR of date                                        596            601           7         16.8          59.6       1.0X
-YEAROFWEEK of date                                  651            657           5         15.4          65.1       0.9X
-QUARTER of date                                     747            750           2         13.4          74.7       0.8X
-MONTH of date                                       600            622          39         16.7          60.0       1.0X
-WEEK of date                                        876            882           7         11.4          87.6       0.7X
-DAY of date                                         624            630           6         16.0          62.4       1.0X
-DAYOFWEEK of date                                   731            735           3         13.7          73.1       0.8X
-DOW of date                                         731            735           7         13.7          73.1       0.8X
-DOW_ISO of date                                     677            679           2         14.8          67.7       0.9X
-DAYOFWEEK_ISO of date                               682            682           1         14.7          68.2       0.9X
-DOY of date                                         630            636           7         15.9          63.0       1.0X
-HOUR of date                                        980            983           5         10.2          98.0       0.6X
-MINUTE of date                                      982            987           4         10.2          98.2       0.6X
-SECOND of date                                     1037           1040           5          9.6         103.7       0.6X
+cast to date                                        511            514           3         19.6          51.1       1.0X
+YEAR of date                                        528            535           6         18.9          52.8       1.0X
+YEAROFWEEK of date                                  610            615           7         16.4          61.0       0.8X
+QUARTER of date                                     609            613           6         16.4          60.9       0.8X
+MONTH of date                                       559            560           1         17.9          55.9       0.9X
+WEEK of date                                        849            859          10         11.8          84.9       0.6X
+DAY of date                                         564            568           4         17.7          56.4       0.9X
+DAYOFWEEK of date                                   696            715          19         14.4          69.6       0.7X
+DOW of date                                         692            693           1         14.4          69.2       0.7X
+DOW_ISO of date                                     628            634           7         15.9          62.8       0.8X
+DAYOFWEEK_ISO of date                               629            633           7         15.9          62.9       0.8X
+DOY of date                                         550            556           7         18.2          55.0       0.9X
+HOUR of date                                        952            955           3         10.5          95.2       0.5X
+MINUTE of date                                      953            962          12         10.5          95.3       0.5X
+SECOND of date                                     1027           1031           6          9.7         102.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        599            604           6         16.7          59.9       1.0X
-YEAR of date                                        588            595           9         17.0          58.8       1.0X
-YEAROFWEEK of date                                  648            650           2         15.4          64.8       0.9X
-QUARTER of date                                     754            766          18         13.3          75.4       0.8X
-MONTH of date                                       598            601           3         16.7          59.8       1.0X
-WEEK of date                                        875            878           5         11.4          87.5       0.7X
-DAY of date                                         604            608           3         16.5          60.4       1.0X
-DAYOFWEEK of date                                   734            734           0         13.6          73.4       0.8X
-DOW of date                                         730            733           5         13.7          73.0       0.8X
-DOW_ISO of date                                     682            684           2         14.7          68.2       0.9X
-DAYOFWEEK_ISO of date                               678            680           2         14.8          67.8       0.9X
-DOY of date                                         632            634           3         15.8          63.2       0.9X
-HOUR of date                                        978            981           3         10.2          97.8       0.6X
-MINUTE of date                                      980            984           4         10.2          98.0       0.6X
-SECOND of date                                     1040           1042           1          9.6         104.0       0.6X
+cast to date                                        481            484           5         20.8          48.1       1.0X
+YEAR of date                                        489            495           5         20.5          48.9       1.0X
+YEAROFWEEK of date                                  569            574           5         17.6          56.9       0.8X
+QUARTER of date                                     573            574           1         17.5          57.3       0.8X
+MONTH of date                                       515            518           3         19.4          51.5       0.9X
+WEEK of date                                        816            818           3         12.3          81.6       0.6X
+DAY of date                                         528            528           0         18.9          52.8       0.9X
+DAYOFWEEK of date                                   694            706          18         14.4          69.4       0.7X
+DOW of date                                         692            693           2         14.4          69.2       0.7X
+DOW_ISO of date                                     628            630           3         15.9          62.8       0.8X
+DAYOFWEEK_ISO of date                               628            635           7         15.9          62.8       0.8X
+DOY of date                                         551            557           7         18.2          55.1       0.9X
+HOUR of date                                        954            961           9         10.5          95.4       0.5X
+MINUTE of date                                      954            955           3         10.5          95.4       0.5X
+SECOND of date                                     1034           1039           9          9.7         103.4       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    758            760           2         13.2          75.8       1.0X
-YEAR of interval                                    742            747           7         13.5          74.2       1.0X
-MONTH of interval                                   739            742           3         13.5          73.9       1.0X
-DAY of interval                                     738            739           2         13.6          73.8       1.0X
-HOUR of interval                                    746            748           2         13.4          74.6       1.0X
-MINUTE of interval                                  740            745           5         13.5          74.0       1.0X
-SECOND of interval                                  801            810           9         12.5          80.1       0.9X
+cast to interval                                    705            710           4         14.2          70.5       1.0X
+YEAR of interval                                    671            673           3         14.9          67.1       1.1X
+MONTH of interval                                   679            686           9         14.7          67.9       1.0X
+DAY of interval                                     674            678           6         14.8          67.4       1.0X
+HOUR of interval                                    680            684           4         14.7          68.0       1.0X
+MINUTE of interval                                  682            688           6         14.7          68.2       1.0X
+SECOND of interval                                  736            741           5         13.6          73.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    753            756           2         13.3          75.3       1.0X
-YEAR of interval                                    742            743           1         13.5          74.2       1.0X
-MONTH of interval                                   740            741           1         13.5          74.0       1.0X
-DAY of interval                                     736            739           3         13.6          73.6       1.0X
-HOUR of interval                                    738            740           2         13.5          73.8       1.0X
-MINUTE of interval                                  741            743           4         13.5          74.1       1.0X
-SECOND of interval                                  802            803           3         12.5          80.2       0.9X
+cast to interval                                    708            709           0         14.1          70.8       1.0X
+YEAR of interval                                    674            677           3         14.8          67.4       1.1X
+MONTH of interval                                   675            677           2         14.8          67.5       1.0X
+DAY of interval                                     670            671           3         14.9          67.0       1.1X
+HOUR of interval                                    681            683           2         14.7          68.1       1.0X
+MINUTE of interval                                  685            686           2         14.6          68.5       1.0X
+SECOND of interval                                  725            735          13         13.8          72.5       1.0X
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index 0c5d5f10880c4..e0c939c54947d 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   296            318          24         33.8          29.6       1.0X
-YEAR of timestamp                                   805            806           1         12.4          80.5       0.4X
-YEAROFWEEK of timestamp                             856            880          26         11.7          85.6       0.3X
-QUARTER of timestamp                                823            829           6         12.1          82.3       0.4X
-MONTH of timestamp                                  793            800           7         12.6          79.3       0.4X
-WEEK of timestamp                                  1124           1132           7          8.9         112.4       0.3X
-DAY of timestamp                                    802            806           5         12.5          80.2       0.4X
-DAYOFWEEK of timestamp                              945            948           2         10.6          94.5       0.3X
-DOW of timestamp                                    945            948           4         10.6          94.5       0.3X
-DOW_ISO of timestamp                                886            893           7         11.3          88.6       0.3X
-DAYOFWEEK_ISO of timestamp                          890            894           5         11.2          89.0       0.3X
-DOY of timestamp                                    831            831           1         12.0          83.1       0.4X
-HOUR of timestamp                                   577            581           5         17.3          57.7       0.5X
-MINUTE of timestamp                                 578            590          19         17.3          57.8       0.5X
-SECOND of timestamp                                 659            664           5         15.2          65.9       0.4X
+cast to timestamp                                   310            342          28         32.3          31.0       1.0X
+YEAR of timestamp                                   786            795          11         12.7          78.6       0.4X
+YEAROFWEEK of timestamp                             847            891          52         11.8          84.7       0.4X
+QUARTER of timestamp                                795            800           8         12.6          79.5       0.4X
+MONTH of timestamp                                  785            801          14         12.7          78.5       0.4X
+WEEK of timestamp                                  1087           1091           4          9.2         108.7       0.3X
+DAY of timestamp                                    783            784           0         12.8          78.3       0.4X
+DAYOFWEEK of timestamp                              919            921           2         10.9          91.9       0.3X
+DOW of timestamp                                    923            925           2         10.8          92.3       0.3X
+DOW_ISO of timestamp                                982            991          10         10.2          98.2       0.3X
+DAYOFWEEK_ISO of timestamp                          988            993           5         10.1          98.8       0.3X
+DOY of timestamp                                    791            793           2         12.6          79.1       0.4X
+HOUR of timestamp                                   549            551           2         18.2          54.9       0.6X
+MINUTE of timestamp                                 545            550           8         18.4          54.5       0.6X
+SECOND of timestamp                                 648            652           4         15.4          64.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   268            279          13         37.3          26.8       1.0X
-YEAR of timestamp                                   785            786           1         12.7          78.5       0.3X
-YEAROFWEEK of timestamp                             840            842           5         11.9          84.0       0.3X
-QUARTER of timestamp                                804            808           3         12.4          80.4       0.3X
-MONTH of timestamp                                  787            789           2         12.7          78.7       0.3X
-WEEK of timestamp                                  1122           1123           1          8.9         112.2       0.2X
-DAY of timestamp                                    789            794           7         12.7          78.9       0.3X
-DAYOFWEEK of timestamp                              934            935           1         10.7          93.4       0.3X
-DOW of timestamp                                    933            937           5         10.7          93.3       0.3X
-DOW_ISO of timestamp                                887            896           9         11.3          88.7       0.3X
-DAYOFWEEK_ISO of timestamp                          883            888           4         11.3          88.3       0.3X
-DOY of timestamp                                    826            828           4         12.1          82.6       0.3X
-HOUR of timestamp                                   579            584           5         17.3          57.9       0.5X
-MINUTE of timestamp                                 575            584          12         17.4          57.5       0.5X
-SECOND of timestamp                                 663            665           3         15.1          66.3       0.4X
+cast to timestamp                                   248            250           2         40.4          24.8       1.0X
+YEAR of timestamp                                   771            779          10         13.0          77.1       0.3X
+YEAROFWEEK of timestamp                             825            827           4         12.1          82.5       0.3X
+QUARTER of timestamp                                780            783           4         12.8          78.0       0.3X
+MONTH of timestamp                                  779            785           8         12.8          77.9       0.3X
+WEEK of timestamp                                  1075           1082          11          9.3         107.5       0.2X
+DAY of timestamp                                    777            781           7         12.9          77.7       0.3X
+DAYOFWEEK of timestamp                              908            915           7         11.0          90.8       0.3X
+DOW of timestamp                                    906            914           7         11.0          90.6       0.3X
+DOW_ISO of timestamp                                982            986           3         10.2          98.2       0.3X
+DAYOFWEEK_ISO of timestamp                          986            988           2         10.1          98.6       0.3X
+DOY of timestamp                                    792            801          11         12.6          79.2       0.3X
+HOUR of timestamp                                   546            549           3         18.3          54.6       0.5X
+MINUTE of timestamp                                 553            557           4         18.1          55.3       0.4X
+SECOND of timestamp                                 646            657          12         15.5          64.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        716            721           4         14.0          71.6       1.0X
-YEAR of date                                        782            783           1         12.8          78.2       0.9X
-YEAROFWEEK of date                                  830            833           3         12.0          83.0       0.9X
-QUARTER of date                                     801            805           4         12.5          80.1       0.9X
-MONTH of date                                       782            791          11         12.8          78.2       0.9X
-WEEK of date                                       1114           1116           1          9.0         111.4       0.6X
-DAY of date                                         790            795           5         12.7          79.0       0.9X
-DAYOFWEEK of date                                   934            940           6         10.7          93.4       0.8X
-DOW of date                                         938            940           1         10.7          93.8       0.8X
-DOW_ISO of date                                     879            883           4         11.4          87.9       0.8X
-DAYOFWEEK_ISO of date                               882            885           3         11.3          88.2       0.8X
-DOY of date                                         825            830           7         12.1          82.5       0.9X
-HOUR of date                                       1252           1255           4          8.0         125.2       0.6X
-MINUTE of date                                     1241           1242           1          8.1         124.1       0.6X
-SECOND of date                                     1405           1406           1          7.1         140.5       0.5X
+cast to date                                        706            728          37         14.2          70.6       1.0X
+YEAR of date                                        768            771           3         13.0          76.8       0.9X
+YEAROFWEEK of date                                  821            826           5         12.2          82.1       0.9X
+QUARTER of date                                     778            782           6         12.8          77.8       0.9X
+MONTH of date                                       779            780           1         12.8          77.9       0.9X
+WEEK of date                                       1074           1075           1          9.3         107.4       0.7X
+DAY of date                                         773            777           3         12.9          77.3       0.9X
+DAYOFWEEK of date                                   907            910           3         11.0          90.7       0.8X
+DOW of date                                         907            910           3         11.0          90.7       0.8X
+DOW_ISO of date                                     974            978           4         10.3          97.4       0.7X
+DAYOFWEEK_ISO of date                               978            979           1         10.2          97.8       0.7X
+DOY of date                                         790            803          16         12.7          79.0       0.9X
+HOUR of date                                       1188           1191           3          8.4         118.8       0.6X
+MINUTE of date                                     1189           1192           4          8.4         118.9       0.6X
+SECOND of date                                     1335           1340           6          7.5         133.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        714            718           5         14.0          71.4       1.0X
-YEAR of date                                        783            786           5         12.8          78.3       0.9X
-YEAROFWEEK of date                                  834            837           3         12.0          83.4       0.9X
-QUARTER of date                                     803            806           2         12.4          80.3       0.9X
-MONTH of date                                       780            788          11         12.8          78.0       0.9X
-WEEK of date                                       1109           1112           3          9.0         110.9       0.6X
-DAY of date                                         790            797          11         12.7          79.0       0.9X
-DAYOFWEEK of date                                   931            934           3         10.7          93.1       0.8X
-DOW of date                                         936            938           2         10.7          93.6       0.8X
-DOW_ISO of date                                     884            894          12         11.3          88.4       0.8X
-DAYOFWEEK_ISO of date                               882            885           3         11.3          88.2       0.8X
-DOY of date                                         821            837          21         12.2          82.1       0.9X
-HOUR of date                                       1251           1251           1          8.0         125.1       0.6X
-MINUTE of date                                     1245           1249           5          8.0         124.5       0.6X
-SECOND of date                                     1399           1404           7          7.1         139.9       0.5X
+cast to date                                        706            709           5         14.2          70.6       1.0X
+YEAR of date                                        771            773           2         13.0          77.1       0.9X
+YEAROFWEEK of date                                  820            823           4         12.2          82.0       0.9X
+QUARTER of date                                     776            779           3         12.9          77.6       0.9X
+MONTH of date                                       771            780          12         13.0          77.1       0.9X
+WEEK of date                                       1075           1078           3          9.3         107.5       0.7X
+DAY of date                                         772            774           2         13.0          77.2       0.9X
+DAYOFWEEK of date                                   902            911           8         11.1          90.2       0.8X
+DOW of date                                         901            912           9         11.1          90.1       0.8X
+DOW_ISO of date                                     973            976           4         10.3          97.3       0.7X
+DAYOFWEEK_ISO of date                               974            976           2         10.3          97.4       0.7X
+DOY of date                                         787            789           2         12.7          78.7       0.9X
+HOUR of date                                       1186           1187           2          8.4         118.6       0.6X
+MINUTE of date                                     1188           1191           3          8.4         118.8       0.6X
+SECOND of date                                     1278           1310          51          7.8         127.8       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    939            945           6         10.7          93.9       1.0X
-YEAR of interval                                    918            923           4         10.9          91.8       1.0X
-MONTH of interval                                   918            924           5         10.9          91.8       1.0X
-DAY of interval                                     918            921           3         10.9          91.8       1.0X
-HOUR of interval                                    934            937           3         10.7          93.4       1.0X
-MINUTE of interval                                  936            937           1         10.7          93.6       1.0X
-SECOND of interval                                 1083           1085           4          9.2         108.3       0.9X
+cast to interval                                   1059           1064           8          9.4         105.9       1.0X
+YEAR of interval                                   1054           1063          11          9.5         105.4       1.0X
+MONTH of interval                                  1046           1047           2          9.6         104.6       1.0X
+DAY of interval                                    1048           1052           4          9.5         104.8       1.0X
+HOUR of interval                                   1042           1047           4          9.6         104.2       1.0X
+MINUTE of interval                                 1070           1075           5          9.4         107.0       1.0X
+SECOND of interval                                 1142           1146           5          8.8         114.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    943            945           2         10.6          94.3       1.0X
-YEAR of interval                                    925            929           4         10.8          92.5       1.0X
-MONTH of interval                                   921            925           5         10.9          92.1       1.0X
-DAY of interval                                     927            933          10         10.8          92.7       1.0X
-HOUR of interval                                    928            936           8         10.8          92.8       1.0X
-MINUTE of interval                                  933            937           4         10.7          93.3       1.0X
-SECOND of interval                                 1081           1083           1          9.2         108.1       0.9X
+cast to interval                                   1061           1065           5          9.4         106.1       1.0X
+YEAR of interval                                   1054           1056           4          9.5         105.4       1.0X
+MONTH of interval                                  1049           1053           5          9.5         104.9       1.0X
+DAY of interval                                    1057           1063           6          9.5         105.7       1.0X
+HOUR of interval                                   1048           1051           3          9.5         104.8       1.0X
+MINUTE of interval                                 1079           1083           5          9.3         107.9       1.0X
+SECOND of interval                                 1137           1140           3          8.8         113.7       0.9X
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
index 8ba705faddf8c..d3b677b84562e 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
@@ -2,733 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6365           6420          67          2.5         404.7       1.0X
-Parquet Vectorized (Pushdown)                       315            341          24         49.9          20.0      20.2X
-Native ORC Vectorized                              4984           5073          69          3.2         316.9       1.3X
-Native ORC Vectorized (Pushdown)                    311            321          11         50.5          19.8      20.5X
+Parquet Vectorized                                 6309           6370          57          2.5         401.1       1.0X
+Parquet Vectorized (Pushdown)                       294            324          23         53.4          18.7      21.4X
+Native ORC Vectorized                              5129           5216          60          3.1         326.1       1.2X
+Native ORC Vectorized (Pushdown)                    323            330           6         48.7          20.5      19.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                            6261           6292          25          2.5         398.1       1.0X
-Parquet Vectorized (Pushdown)                                  281            298          12         56.1          17.8      22.3X
-Native ORC Vectorized                                         5053           5078          22          3.1         321.3       1.2X
-Native ORC Vectorized (Pushdown)                               300            328          35         52.4          19.1      20.9X
+Parquet Vectorized                                            6334           6358          22          2.5         402.7       1.0X
+Parquet Vectorized (Pushdown)                                  270            290          12         58.2          17.2      23.4X
+Native ORC Vectorized                                         5237           5252          12          3.0         332.9       1.2X
+Native ORC Vectorized (Pushdown)                               318            333          11         49.4          20.2      19.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6294           6356          90          2.5         400.1       1.0X
-Parquet Vectorized (Pushdown)                       270            283          14         58.3          17.1      23.3X
-Native ORC Vectorized                              5057           5086          20          3.1         321.5       1.2X
-Native ORC Vectorized (Pushdown)                    289            298           8         54.4          18.4      21.8X
+Parquet Vectorized                                 6322           6345          15          2.5         401.9       1.0X
+Parquet Vectorized (Pushdown)                       256            267           7         61.4          16.3      24.7X
+Native ORC Vectorized                              5290           5305           9          3.0         336.3       1.2X
+Native ORC Vectorized (Pushdown)                    297            312           9         52.9          18.9      21.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  6252           6274          17          2.5         397.5       1.0X
-Parquet Vectorized (Pushdown)                        256            272          15         61.4          16.3      24.4X
-Native ORC Vectorized                               5036           5054          12          3.1         320.2       1.2X
-Native ORC Vectorized (Pushdown)                     275            291           8         57.1          17.5      22.7X
+Parquet Vectorized                                  6313           6327          11          2.5         401.4       1.0X
+Parquet Vectorized (Pushdown)                        256            264           7         61.4          16.3      24.6X
+Native ORC Vectorized                               5262           5293          33          3.0         334.6       1.2X
+Native ORC Vectorized (Pushdown)                     289            306          14         54.4          18.4      21.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              6241           6259          11          2.5         396.8       1.0X
-Parquet Vectorized (Pushdown)                                    257            266          12         61.3          16.3      24.3X
-Native ORC Vectorized                                           5038           5055          20          3.1         320.3       1.2X
-Native ORC Vectorized (Pushdown)                                 277            290          10         56.8          17.6      22.5X
+Parquet Vectorized                                              6370           6387          11          2.5         405.0       1.0X
+Parquet Vectorized (Pushdown)                                    254            265          11         61.9          16.2      25.1X
+Native ORC Vectorized                                           5284           5294           7          3.0         335.9       1.2X
+Native ORC Vectorized (Pushdown)                                 292            306          14         53.8          18.6      21.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  12335          12564         207          1.3         784.2       1.0X
-Parquet Vectorized (Pushdown)                       12561          12587          20          1.3         798.6       1.0X
-Native ORC Vectorized                               11278          11295          12          1.4         717.0       1.1X
-Native ORC Vectorized (Pushdown)                    11398          11468          98          1.4         724.7       1.1X
+Parquet Vectorized                                  12536          12596          56          1.3         797.0       1.0X
+Parquet Vectorized (Pushdown)                       12610          12645          26          1.2         801.7       1.0X
+Native ORC Vectorized                               11428          11444          16          1.4         726.6       1.1X
+Native ORC Vectorized (Pushdown)                    11524          11532          10          1.4         732.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6150           6175          26          2.6         391.0       1.0X
-Parquet Vectorized (Pushdown)                       252            276          25         62.4          16.0      24.4X
-Native ORC Vectorized                              4484           4503          12          3.5         285.1       1.4X
-Native ORC Vectorized (Pushdown)                    276            290          10         57.0          17.5      22.3X
+Parquet Vectorized                                 6369           6487         151          2.5         404.9       1.0X
+Parquet Vectorized (Pushdown)                       292            381          77         53.9          18.5      21.8X
+Native ORC Vectorized                              4726           4808          85          3.3         300.5       1.3X
+Native ORC Vectorized (Pushdown)                    294            320          13         53.5          18.7      21.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5983           5997          10          2.6         380.4       1.0X
-Parquet Vectorized (Pushdown)                           258            274          18         61.1          16.4      23.2X
-Native ORC Vectorized                                  4470           4481           9          3.5         284.2       1.3X
-Native ORC Vectorized (Pushdown)                        284            289           6         55.3          18.1      21.1X
+Parquet Vectorized                                     6121           6133          16          2.6         389.2       1.0X
+Parquet Vectorized (Pushdown)                           257            276          25         61.3          16.3      23.8X
+Native ORC Vectorized                                  4735           4759          20          3.3         301.0       1.3X
+Native ORC Vectorized (Pushdown)                        294            309          11         53.6          18.7      20.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6050           6074          27          2.6         384.6       1.0X
-Parquet Vectorized (Pushdown)                       251            262          15         62.7          16.0      24.1X
-Native ORC Vectorized                              4539           4568          29          3.5         288.6       1.3X
-Native ORC Vectorized (Pushdown)                    270            286          11         58.4          17.1      22.4X
+Parquet Vectorized                                 6166           6182          16          2.6         392.0       1.0X
+Parquet Vectorized (Pushdown)                       249            261          13         63.2          15.8      24.8X
+Native ORC Vectorized                              4797           4812          14          3.3         305.0       1.3X
+Native ORC Vectorized (Pushdown)                    284            296           6         55.4          18.1      21.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 5983           6015          26          2.6         380.4       1.0X
-Parquet Vectorized (Pushdown)                       242            247           4         65.0          15.4      24.7X
-Native ORC Vectorized                              4502           4529          34          3.5         286.2       1.3X
-Native ORC Vectorized (Pushdown)                    267            276           6         58.8          17.0      22.4X
+Parquet Vectorized                                 6139           6164          17          2.6         390.3       1.0X
+Parquet Vectorized (Pushdown)                       241            256          16         65.3          15.3      25.5X
+Native ORC Vectorized                              4798           4837          59          3.3         305.1       1.3X
+Native ORC Vectorized (Pushdown)                    285            299           9         55.3          18.1      21.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5978           6000          14          2.6         380.1       1.0X
-Parquet Vectorized (Pushdown)                             243            254          12         64.8          15.4      24.6X
-Native ORC Vectorized                                    4520           4532          12          3.5         287.4       1.3X
-Native ORC Vectorized (Pushdown)                          267            280          11         58.9          17.0      22.4X
+Parquet Vectorized                                       6152           6176          26          2.6         391.2       1.0X
+Parquet Vectorized (Pushdown)                             244            253           7         64.3          15.5      25.2X
+Native ORC Vectorized                                    4789           4803          15          3.3         304.5       1.3X
+Native ORC Vectorized (Pushdown)                          285            294           7         55.2          18.1      21.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5992           6010          17          2.6         381.0       1.0X
-Parquet Vectorized (Pushdown)                           250            257           9         63.0          15.9      24.0X
-Native ORC Vectorized                                  4525           4536          12          3.5         287.7       1.3X
-Native ORC Vectorized (Pushdown)                        267            276           7         58.9          17.0      22.4X
+Parquet Vectorized                                     6146           6179          43          2.6         390.8       1.0X
+Parquet Vectorized (Pushdown)                           239            253          13         65.9          15.2      25.7X
+Native ORC Vectorized                                  4791           4801          10          3.3         304.6       1.3X
+Native ORC Vectorized (Pushdown)                        281            293           9         55.9          17.9      21.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6624           6642          20          2.4         421.1       1.0X
-Parquet Vectorized (Pushdown)                      1438           1450          11         10.9          91.4       4.6X
-Native ORC Vectorized                              5086           5101          17          3.1         323.4       1.3X
-Native ORC Vectorized (Pushdown)                   1293           1303           8         12.2          82.2       5.1X
+Parquet Vectorized                                 6748           6773          25          2.3         429.1       1.0X
+Parquet Vectorized (Pushdown)                      1435           1445          12         11.0          91.3       4.7X
+Native ORC Vectorized                              5345           5351           5          2.9         339.8       1.3X
+Native ORC Vectorized (Pushdown)                   1328           1335           4         11.8          84.4       5.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 8735           8748           8          1.8         555.3       1.0X
-Parquet Vectorized (Pushdown)                      5861           5871          12          2.7         372.7       1.5X
-Native ORC Vectorized                              7281           7331          29          2.2         462.9       1.2X
-Native ORC Vectorized (Pushdown)                   5242           5250          12          3.0         333.3       1.7X
+Parquet Vectorized                                 8886           8910          25          1.8         564.9       1.0X
+Parquet Vectorized (Pushdown)                      5996           6011          15          2.6         381.2       1.5X
+Native ORC Vectorized                              7499           7508          10          2.1         476.8       1.2X
+Native ORC Vectorized (Pushdown)                   5316           5330          18          3.0         338.0       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11127          11148          16          1.4         707.5       1.0X
-Parquet Vectorized (Pushdown)                     10606          10632          30          1.5         674.3       1.0X
-Native ORC Vectorized                              9596           9614          11          1.6         610.1       1.2X
-Native ORC Vectorized (Pushdown)                   9248           9257           8          1.7         588.0       1.2X
+Parquet Vectorized                                11048          11060          11          1.4         702.4       1.0X
+Parquet Vectorized (Pushdown)                     10492          10509          13          1.5         667.1       1.1X
+Native ORC Vectorized                              9684           9706          19          1.6         615.7       1.1X
+Native ORC Vectorized (Pushdown)                   9296           9311          21          1.7         591.0       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11514          11537          32          1.4         732.0       1.0X
-Parquet Vectorized (Pushdown)                     11493          11514          17          1.4         730.7       1.0X
-Native ORC Vectorized                             10169          10181          11          1.5         646.5       1.1X
-Native ORC Vectorized (Pushdown)                  10303          10321          15          1.5         655.1       1.1X
+Parquet Vectorized                                11567          11612          50          1.4         735.4       1.0X
+Parquet Vectorized (Pushdown)                     11631          11642           9          1.4         739.5       1.0X
+Native ORC Vectorized                             10373          10388          11          1.5         659.5       1.1X
+Native ORC Vectorized (Pushdown)                  10450          10466          11          1.5         664.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11501          11520          21          1.4         731.2       1.0X
-Parquet Vectorized (Pushdown)                     11549          11591          40          1.4         734.3       1.0X
-Native ORC Vectorized                             10206          10220          15          1.5         648.9       1.1X
-Native ORC Vectorized (Pushdown)                  10274          10305          30          1.5         653.2       1.1X
+Parquet Vectorized                                11581          11600          12          1.4         736.3       1.0X
+Parquet Vectorized (Pushdown)                     11623          11644          18          1.4         738.9       1.0X
+Native ORC Vectorized                             10326          10333           9          1.5         656.5       1.1X
+Native ORC Vectorized (Pushdown)                  10394          10407          14          1.5         660.8       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11571          11635          49          1.4         735.7       1.0X
-Parquet Vectorized (Pushdown)                     11589          11620          31          1.4         736.8       1.0X
-Native ORC Vectorized                             10168          10209          45          1.5         646.4       1.1X
-Native ORC Vectorized (Pushdown)                  10245          10262          20          1.5         651.4       1.1X
+Parquet Vectorized                                11803          11819          20          1.3         750.4       1.0X
+Parquet Vectorized (Pushdown)                     11859          11868           9          1.3         754.0       1.0X
+Native ORC Vectorized                             10609          10614           4          1.5         674.5       1.1X
+Native ORC Vectorized (Pushdown)                  10681          10697          16          1.5         679.1       1.1X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5490           5539          43          2.9         349.0       1.0X
-Parquet Vectorized (Pushdown)                           207            222          13         75.9          13.2      26.5X
-Native ORC Vectorized                                  6053           6083          42          2.6         384.8       0.9X
-Native ORC Vectorized (Pushdown)                        886            893           4         17.7          56.3       6.2X
+Parquet Vectorized                                     5546           5587          37          2.8         352.6       1.0X
+Parquet Vectorized (Pushdown)                           218            235          18         72.2          13.8      25.5X
+Native ORC Vectorized                                  6190           6206          15          2.5         393.6       0.9X
+Native ORC Vectorized (Pushdown)                        943            957          16         16.7          60.0       5.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             5479           5491          18          2.9         348.3       1.0X
-Parquet Vectorized (Pushdown)                                   208            222          11         75.7          13.2      26.4X
-Native ORC Vectorized                                          6230           6242          13          2.5         396.1       0.9X
-Native ORC Vectorized (Pushdown)                                900            904           7         17.5          57.2       6.1X
+Parquet Vectorized                                             5551           5567          13          2.8         352.9       1.0X
+Parquet Vectorized (Pushdown)                                   215            224           7         73.0          13.7      25.8X
+Native ORC Vectorized                                          6364           6372           6          2.5         404.6       0.9X
+Native ORC Vectorized (Pushdown)                                941            956          11         16.7          59.8       5.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5397           5434          67          2.9         343.1       1.0X
-Parquet Vectorized (Pushdown)                           251            273          17         62.8          15.9      21.5X
-Native ORC Vectorized                                  6187           6225          48          2.5         393.3       0.9X
-Native ORC Vectorized (Pushdown)                        934            951          16         16.8          59.4       5.8X
+Parquet Vectorized                                     5490           5498           6          2.9         349.1       1.0X
+Parquet Vectorized (Pushdown)                           259            271          10         60.8          16.4      21.2X
+Native ORC Vectorized                                  6359           6377          12          2.5         404.3       0.9X
+Native ORC Vectorized (Pushdown)                       1003           1008           7         15.7          63.8       5.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5397           5414          10          2.9         343.1       1.0X
-Parquet Vectorized (Pushdown)                             271            315          68         58.1          17.2      19.9X
-Native ORC Vectorized                                    6206           6221          16          2.5         394.6       0.9X
-Native ORC Vectorized (Pushdown)                          925            931           3         17.0          58.8       5.8X
+Parquet Vectorized                                       5492           5495           4          2.9         349.2       1.0X
+Parquet Vectorized (Pushdown)                             256            266           8         61.5          16.3      21.5X
+Native ORC Vectorized                                    6367           6378          16          2.5         404.8       0.9X
+Native ORC Vectorized (Pushdown)                         1002           1005           4         15.7          63.7       5.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               5486           5497          12          2.9         348.8       1.0X
-Parquet Vectorized (Pushdown)                                     254            264           9         62.0          16.1      21.6X
-Native ORC Vectorized                                            6246           6272          19          2.5         397.1       0.9X
-Native ORC Vectorized (Pushdown)                                  935            958          29         16.8          59.4       5.9X
+Parquet Vectorized                                               5566           5582          18          2.8         353.9       1.0X
+Parquet Vectorized (Pushdown)                                     275            278           5         57.3          17.5      20.3X
+Native ORC Vectorized                                            6443           6451           8          2.4         409.6       0.9X
+Native ORC Vectorized (Pushdown)                                 1003           1015          10         15.7          63.8       5.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           11760          11813          38          1.3         747.7       1.0X
-Parquet Vectorized (Pushdown)                                11729          11761          49          1.3         745.7       1.0X
-Native ORC Vectorized                                        13965          14180         169          1.1         887.9       0.8X
-Native ORC Vectorized (Pushdown)                             14779          14998         178          1.1         939.6       0.8X
+Parquet Vectorized                                           12200          12218          15          1.3         775.6       1.0X
+Parquet Vectorized (Pushdown)                                12173          12206          40          1.3         773.9       1.0X
+Native ORC Vectorized                                        13191          13208          19          1.2         838.7       0.9X
+Native ORC Vectorized (Pushdown)                             13378          13410          29          1.2         850.5       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    6784           6853          66          2.3         431.3       1.0X
-Parquet Vectorized (Pushdown)                          855            910          57         18.4          54.4       7.9X
-Native ORC Vectorized                                 5741           5816          64          2.7         365.0       1.2X
-Native ORC Vectorized (Pushdown)                      5678           5797          81          2.8         361.0       1.2X
+Parquet Vectorized                                    6707           6727          17          2.3         426.4       1.0X
+Parquet Vectorized (Pushdown)                          846            855          12         18.6          53.8       7.9X
+Native ORC Vectorized                                 5473           5506          67          2.9         347.9       1.2X
+Native ORC Vectorized (Pushdown)                      5550           5556           6          2.8         352.9       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      6516           6533          12          2.4         414.3       1.0X
-Parquet Vectorized (Pushdown)                            254            270          17         61.9          16.2      25.6X
-Native ORC Vectorized                                   5271           5324          40          3.0         335.1       1.2X
-Native ORC Vectorized (Pushdown)                        5173           5308         115          3.0         328.9       1.3X
+Parquet Vectorized                                      6438           6452          14          2.4         409.3       1.0X
+Parquet Vectorized (Pushdown)                            255            262           8         61.7          16.2      25.2X
+Native ORC Vectorized                                   5314           5320           5          3.0         337.8       1.2X
+Native ORC Vectorized (Pushdown)                        5404           5415          15          2.9         343.6       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        6412           6431          24          2.5         407.7       1.0X
-Parquet Vectorized (Pushdown)                              235            242           5         66.9          14.9      27.3X
-Native ORC Vectorized                                     5068           5206         157          3.1         322.2       1.3X
-Native ORC Vectorized (Pushdown)                          5139           5155          14          3.1         326.7       1.2X
+Parquet Vectorized                                        6436           6455          11          2.4         409.2       1.0X
+Parquet Vectorized (Pushdown)                              260            266           7         60.5          16.5      24.8X
+Native ORC Vectorized                                     5316           5327          10          3.0         338.0       1.2X
+Native ORC Vectorized (Pushdown)                          5394           5403           8          2.9         343.0       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for StringEndsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  5669           5828         139          2.8         360.4       1.0X
-Parquet Vectorized (Pushdown)                        395            414          18         39.8          25.1      14.4X
-Native ORC Vectorized                               6255           6272          19          2.5         397.7       0.9X
-Native ORC Vectorized (Pushdown)                    6397           6410          12          2.5         406.7       0.9X
+Parquet Vectorized                                  5500           5593          76          2.9         349.7       1.0X
+Parquet Vectorized (Pushdown)                        335            339           4         46.9          21.3      16.4X
+Native ORC Vectorized                               6373           6395          24          2.5         405.2       0.9X
+Native ORC Vectorized (Pushdown)                    6573           6584          15          2.4         417.9       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    5419           5429           9          2.9         344.5       1.0X
-Parquet Vectorized (Pushdown)                          260            276          14         60.5          16.5      20.8X
-Native ORC Vectorized                                 6195           6223          35          2.5         393.8       0.9X
-Native ORC Vectorized (Pushdown)                      6519           6706         118          2.4         414.4       0.8X
+Parquet Vectorized                                    5431           5435           5          2.9         345.3       1.0X
+Parquet Vectorized (Pushdown)                          246            249           3         64.0          15.6      22.1X
+Native ORC Vectorized                                 6318           6330           9          2.5         401.7       0.9X
+Native ORC Vectorized (Pushdown)                      6518           6545          32          2.4         414.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5401           5413           8          2.9         343.4       1.0X
-Parquet Vectorized (Pushdown)                            249            268          23         63.3          15.8      21.7X
-Native ORC Vectorized                                   6177           6183           7          2.5         392.7       0.9X
-Native ORC Vectorized (Pushdown)                        7467           7553         141          2.1         474.7       0.7X
+Parquet Vectorized                                      5427           5436          16          2.9         345.0       1.0X
+Parquet Vectorized (Pushdown)                            246            249           2         63.9          15.6      22.1X
+Native ORC Vectorized                                   6332           6345          12          2.5         402.6       0.9X
+Native ORC Vectorized (Pushdown)                        6501           6507           7          2.4         413.3       0.8X
 
 
 ================================================================================================
 Pushdown benchmark for StringContains
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   5852           6100         222          2.7         372.1       1.0X
-Parquet Vectorized (Pushdown)                         839            899          53         18.8          53.3       7.0X
-Native ORC Vectorized                                7383           7538         125          2.1         469.4       0.8X
-Native ORC Vectorized (Pushdown)                     7667           7872         159          2.1         487.5       0.8X
+Parquet Vectorized                                   5671           5760          67          2.8         360.6       1.0X
+Parquet Vectorized (Pushdown)                         737            745           9         21.3          46.9       7.7X
+Native ORC Vectorized                                6477           6515          48          2.4         411.8       0.9X
+Native ORC Vectorized (Pushdown)                     6669           6689          19          2.4         424.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5501           5540          27          2.9         349.7       1.0X
-Parquet Vectorized (Pushdown)                           268            286          16         58.6          17.1      20.5X
-Native ORC Vectorized                                  7027           7112          58          2.2         446.7       0.8X
-Native ORC Vectorized (Pushdown)                       6321           6345          26          2.5         401.8       0.9X
+Parquet Vectorized                                     5446           5453           5          2.9         346.2       1.0X
+Parquet Vectorized (Pushdown)                           245            256          12         64.2          15.6      22.2X
+Native ORC Vectorized                                  6275           6282           4          2.5         399.0       0.9X
+Native ORC Vectorized (Pushdown)                       6474           6482           7          2.4         411.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5390           5439          56          2.9         342.7       1.0X
-Parquet Vectorized (Pushdown)                             250            277          27         62.9          15.9      21.5X
-Native ORC Vectorized                                    6128           6157          28          2.6         389.6       0.9X
-Native ORC Vectorized (Pushdown)                         6298           6322          24          2.5         400.4       0.9X
+Parquet Vectorized                                       5438           5442           6          2.9         345.7       1.0X
+Parquet Vectorized (Pushdown)                             248            256           6         63.4          15.8      21.9X
+Native ORC Vectorized                                    6266           6273           8          2.5         398.4       0.9X
+Native ORC Vectorized (Pushdown)                         6462           6475          13          2.4         410.8       0.8X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     2399           2430          24          6.6         152.5       1.0X
-Parquet Vectorized (Pushdown)                            66             82          17        240.1           4.2      36.6X
-Native ORC Vectorized                                  3131           3149          25          5.0         199.0       0.8X
-Native ORC Vectorized (Pushdown)                         58             66           7        273.1           3.7      41.7X
+Parquet Vectorized                                     2417           2431          19          6.5         153.7       1.0X
+Parquet Vectorized (Pushdown)                            64             67           4        244.0           4.1      37.5X
+Native ORC Vectorized                                  3441           3463          31          4.6         218.7       0.7X
+Native ORC Vectorized (Pushdown)                         58             62           7        272.0           3.7      41.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        3517           3562          46          4.5         223.6       1.0X
-Parquet Vectorized (Pushdown)                             1638           1653          14          9.6         104.1       2.1X
-Native ORC Vectorized                                     4376           4397          30          3.6         278.2       0.8X
-Native ORC Vectorized (Pushdown)                          1913           1927          20          8.2         121.6       1.8X
+Parquet Vectorized                                        3560           3606          62          4.4         226.3       1.0X
+Parquet Vectorized (Pushdown)                             1678           1686          10          9.4         106.7       2.1X
+Native ORC Vectorized                                     4686           4710          24          3.4         297.9       0.8X
+Native ORC Vectorized (Pushdown)                          1978           1992          21          8.0         125.7       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        7145           7165          18          2.2         454.3       1.0X
-Parquet Vectorized (Pushdown)                             6858           6878          17          2.3         436.0       1.0X
-Native ORC Vectorized                                     8642           8697          98          1.8         549.5       0.8X
-Native ORC Vectorized (Pushdown)                          8334           8351          17          1.9         529.9       0.9X
+Parquet Vectorized                                        7360           7379          20          2.1         468.0       1.0X
+Parquet Vectorized (Pushdown)                             7055           7090          28          2.2         448.5       1.0X
+Native ORC Vectorized                                     8850           8867          21          1.8         562.7       0.8X
+Native ORC Vectorized (Pushdown)                          8382           8394          10          1.9         532.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8037           8063          24          2.0         511.0       1.0X
-Parquet Vectorized (Pushdown)                              8067           8094          30          1.9         512.9       1.0X
-Native ORC Vectorized                                      9665           9685          16          1.6         614.5       0.8X
-Native ORC Vectorized (Pushdown)                           9798           9849          41          1.6         623.0       0.8X
+Parquet Vectorized                                         8369           8388          20          1.9         532.1       1.0X
+Parquet Vectorized (Pushdown)                              8409           8420           9          1.9         534.6       1.0X
+Native ORC Vectorized                                      9926           9960          24          1.6         631.1       0.8X
+Native ORC Vectorized (Pushdown)                           9974          10012          34          1.6         634.2       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      2650           2672          27          5.9         168.5       1.0X
-Parquet Vectorized (Pushdown)                             62             70          10        253.1           4.0      42.6X
-Native ORC Vectorized                                   3181           3202          26          4.9         202.2       0.8X
-Native ORC Vectorized (Pushdown)                          55             63           6        287.6           3.5      48.5X
+Parquet Vectorized                                      2601           2612           7          6.0         165.4       1.0X
+Parquet Vectorized (Pushdown)                             64             69           8        247.5           4.0      40.9X
+Native ORC Vectorized                                   3243           3261          21          4.9         206.2       0.8X
+Native ORC Vectorized (Pushdown)                          56             63           5        279.6           3.6      46.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         3254           3305          73          4.8         206.9       1.0X
-Parquet Vectorized (Pushdown)                               931            945          12         16.9          59.2       3.5X
-Native ORC Vectorized                                      3783           3803          19          4.2         240.5       0.9X
-Native ORC Vectorized (Pushdown)                            954            975          24         16.5          60.7       3.4X
+Parquet Vectorized                                         3202           3240          25          4.9         203.6       1.0X
+Parquet Vectorized (Pushdown)                               919            952          45         17.1          58.4       3.5X
+Native ORC Vectorized                                      3936           3954          22          4.0         250.2       0.8X
+Native ORC Vectorized (Pushdown)                           1001           1009          10         15.7          63.7       3.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         5649           5660           6          2.8         359.2       1.0X
-Parquet Vectorized (Pushdown)                              4392           4412          21          3.6         279.3       1.3X
-Native ORC Vectorized                                      6152           6161           6          2.6         391.1       0.9X
-Native ORC Vectorized (Pushdown)                           4552           4568          19          3.5         289.4       1.2X
+Parquet Vectorized                                         5598           5605           8          2.8         355.9       1.0X
+Parquet Vectorized (Pushdown)                              4366           4389          26          3.6         277.6       1.3X
+Native ORC Vectorized                                      6410           6424          10          2.5         407.6       0.9X
+Native ORC Vectorized (Pushdown)                           4843           4852           9          3.2         307.9       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                          7937           7962          40          2.0         504.6       1.0X
-Parquet Vectorized (Pushdown)                               7661           7697          46          2.1         487.1       1.0X
-Native ORC Vectorized                                       8633           8665          25          1.8         548.9       0.9X
-Native ORC Vectorized (Pushdown)                            8296           8311          14          1.9         527.4       1.0X
+Parquet Vectorized                                          7861           7918          46          2.0         499.8       1.0X
+Parquet Vectorized (Pushdown)                               7637           7659          22          2.1         485.5       1.0X
+Native ORC Vectorized                                       8921           8930           9          1.8         567.2       0.9X
+Native ORC Vectorized (Pushdown)                            8753           8784          26          1.8         556.5       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      3699           3713          14          4.3         235.2       1.0X
-Parquet Vectorized (Pushdown)                             67             71           7        234.7           4.3      55.2X
-Native ORC Vectorized                                   3177           3192          19          5.0         202.0       1.2X
-Native ORC Vectorized (Pushdown)                          53             57           5        296.0           3.4      69.6X
+Parquet Vectorized                                      3766           3780          20          4.2         239.4       1.0X
+Parquet Vectorized (Pushdown)                             70             74           5        225.8           4.4      54.1X
+Native ORC Vectorized                                   3250           3259          10          4.8         206.6       1.2X
+Native ORC Vectorized (Pushdown)                          55             61           6        285.2           3.5      68.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         4523           4568          83          3.5         287.6       1.0X
-Parquet Vectorized (Pushdown)                              1226           1235           7         12.8          78.0       3.7X
-Native ORC Vectorized                                      3929           3956          38          4.0         249.8       1.2X
-Native ORC Vectorized (Pushdown)                           1098           1106           6         14.3          69.8       4.1X
+Parquet Vectorized                                         4596           4632          53          3.4         292.2       1.0X
+Parquet Vectorized (Pushdown)                              1245           1257           9         12.6          79.1       3.7X
+Native ORC Vectorized                                      4063           4077          14          3.9         258.3       1.1X
+Native ORC Vectorized (Pushdown)                           1126           1133           6         14.0          71.6       4.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         7733           7741           5          2.0         491.6       1.0X
-Parquet Vectorized (Pushdown)                              5937           5959          23          2.6         377.4       1.3X
-Native ORC Vectorized                                      6909           6913           4          2.3         439.2       1.1X
-Native ORC Vectorized (Pushdown)                           5286           5293           8          3.0         336.1       1.5X
+Parquet Vectorized                                         7843           7862          14          2.0         498.6       1.0X
+Parquet Vectorized (Pushdown)                              6005           6027          19          2.6         381.8       1.3X
+Native ORC Vectorized                                      7116           7128          17          2.2         452.5       1.1X
+Native ORC Vectorized (Pushdown)                           5410           5438          19          2.9         343.9       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         10916          10947          34          1.4         694.0       1.0X
-Parquet Vectorized (Pushdown)                              10577          10588           8          1.5         672.5       1.0X
-Native ORC Vectorized                                       9997          10028          33          1.6         635.6       1.1X
-Native ORC Vectorized (Pushdown)                            9623           9637           9          1.6         611.8       1.1X
+Parquet Vectorized                                         11034          11064          18          1.4         701.5       1.0X
+Parquet Vectorized (Pushdown)                              10676          10694          30          1.5         678.8       1.0X
+Native ORC Vectorized                                      10144          10166          21          1.6         644.9       1.1X
+Native ORC Vectorized (Pushdown)                            9838           9874          32          1.6         625.5       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6258           6286          27          2.5         397.8       1.0X
-Parquet Vectorized (Pushdown)                                     251            281          26         62.6          16.0      24.9X
-Native ORC Vectorized                                            4559           4669         109          3.4         289.9       1.4X
-Native ORC Vectorized (Pushdown)                                  282            302          21         55.9          17.9      22.2X
+Parquet Vectorized                                               6250           6291          27          2.5         397.4       1.0X
+Parquet Vectorized (Pushdown)                                     257            275          16         61.1          16.4      24.3X
+Native ORC Vectorized                                            4812           4839          21          3.3         305.9       1.3X
+Native ORC Vectorized (Pushdown)                                  312            320           7         50.4          19.9      20.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6098           6113          15          2.6         387.7       1.0X
-Parquet Vectorized (Pushdown)                                     246            253           4         63.9          15.6      24.8X
-Native ORC Vectorized                                            4549           4575          23          3.5         289.2       1.3X
-Native ORC Vectorized (Pushdown)                                  291            300           8         54.1          18.5      21.0X
+Parquet Vectorized                                               6162           6168           6          2.6         391.8       1.0X
+Parquet Vectorized (Pushdown)                                     272            274           1         57.9          17.3      22.7X
+Native ORC Vectorized                                            4794           4806          10          3.3         304.8       1.3X
+Native ORC Vectorized (Pushdown)                                  309            316           7         50.9          19.6      19.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6089           6112          14          2.6         387.1       1.0X
-Parquet Vectorized (Pushdown)                                     249            254           3         63.1          15.9      24.4X
-Native ORC Vectorized                                            4540           4550           7          3.5         288.7       1.3X
-Native ORC Vectorized (Pushdown)                                  279            294          11         56.4          17.7      21.8X
+Parquet Vectorized                                               6148           6156           5          2.6         390.9       1.0X
+Parquet Vectorized (Pushdown)                                     270            271           2         58.3          17.2      22.8X
+Native ORC Vectorized                                            4778           4786           8          3.3         303.7       1.3X
+Native ORC Vectorized (Pushdown)                                  293            302           7         53.6          18.6      21.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6096           6122          17          2.6         387.5       1.0X
-Parquet Vectorized (Pushdown)                                      266            271           4         59.2          16.9      22.9X
-Native ORC Vectorized                                             4567           4578          15          3.4         290.3       1.3X
-Native ORC Vectorized (Pushdown)                                   295            303           7         53.4          18.7      20.7X
+Parquet Vectorized                                                6165           6182          16          2.6         391.9       1.0X
+Parquet Vectorized (Pushdown)                                      269            280          11         58.4          17.1      22.9X
+Native ORC Vectorized                                             4812           4821          11          3.3         306.0       1.3X
+Native ORC Vectorized (Pushdown)                                   301            310           6         52.3          19.1      20.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6103           6131          26          2.6         388.0       1.0X
-Parquet Vectorized (Pushdown)                                      280            282           1         56.1          17.8      21.8X
-Native ORC Vectorized                                             4575           4589           9          3.4         290.9       1.3X
-Native ORC Vectorized (Pushdown)                                   292            300           6         53.9          18.6      20.9X
+Parquet Vectorized                                                6175           6178           5          2.5         392.6       1.0X
+Parquet Vectorized (Pushdown)                                      274            281           7         57.4          17.4      22.5X
+Native ORC Vectorized                                             4802           4830          42          3.3         305.3       1.3X
+Native ORC Vectorized (Pushdown)                                   322            326           8         48.8          20.5      19.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6107           6115           9          2.6         388.3       1.0X
-Parquet Vectorized (Pushdown)                                      266            270           3         59.2          16.9      23.0X
-Native ORC Vectorized                                             4573           4583          12          3.4         290.7       1.3X
-Native ORC Vectorized (Pushdown)                                   293            301           7         53.8          18.6      20.9X
+Parquet Vectorized                                                6153           6187          30          2.6         391.2       1.0X
+Parquet Vectorized (Pushdown)                                      289            289           1         54.5          18.3      21.3X
+Native ORC Vectorized                                             4816           4819           3          3.3         306.2       1.3X
+Native ORC Vectorized (Pushdown)                                   309            316           5         50.9          19.6      19.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6297           6319          15          2.5         400.4       1.0X
-Parquet Vectorized (Pushdown)                                      844            847           2         18.6          53.6       7.5X
-Native ORC Vectorized                                             4761           4767           6          3.3         302.7       1.3X
-Native ORC Vectorized (Pushdown)                                   382            387           4         41.1          24.3      16.5X
+Parquet Vectorized                                                6360           6379          13          2.5         404.3       1.0X
+Parquet Vectorized (Pushdown)                                      841            848           7         18.7          53.5       7.6X
+Native ORC Vectorized                                             5016           5025          11          3.1         318.9       1.3X
+Native ORC Vectorized (Pushdown)                                   429            431           2         36.6          27.3      14.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6293           6309          11          2.5         400.1       1.0X
-Parquet Vectorized (Pushdown)                                     3247           3256           8          4.8         206.4       1.9X
-Native ORC Vectorized                                             4785           4797          12          3.3         304.2       1.3X
-Native ORC Vectorized (Pushdown)                                   414            420           6         38.0          26.3      15.2X
+Parquet Vectorized                                                6363           6382          13          2.5         404.5       1.0X
+Parquet Vectorized (Pushdown)                                     3313           3325          11          4.7         210.6       1.9X
+Native ORC Vectorized                                             5009           5018          10          3.1         318.5       1.3X
+Native ORC Vectorized (Pushdown)                                   438            441           3         35.9          27.8      14.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6304           6314           6          2.5         400.8       1.0X
-Parquet Vectorized (Pushdown)                                     5548           5564          13          2.8         352.7       1.1X
-Native ORC Vectorized                                             4768           4785          31          3.3         303.1       1.3X
-Native ORC Vectorized (Pushdown)                                   419            421           2         37.5          26.7      15.0X
+Parquet Vectorized                                                6370           6384          14          2.5         405.0       1.0X
+Parquet Vectorized (Pushdown)                                     5709           5742          28          2.8         363.0       1.1X
+Native ORC Vectorized                                             5011           5015           6          3.1         318.6       1.3X
+Native ORC Vectorized (Pushdown)                                   433            436           2         36.3          27.5      14.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6275           6286           8          2.5         398.9       1.0X
-Parquet Vectorized (Pushdown)                                       839            843           3         18.8          53.3       7.5X
-Native ORC Vectorized                                              4747           4763          17          3.3         301.8       1.3X
-Native ORC Vectorized (Pushdown)                                    474            479           5         33.2          30.1      13.2X
+Parquet Vectorized                                                 6339           6346           7          2.5         403.0       1.0X
+Parquet Vectorized (Pushdown)                                       845            849           3         18.6          53.7       7.5X
+Native ORC Vectorized                                              4991           4997           5          3.2         317.3       1.3X
+Native ORC Vectorized (Pushdown)                                    513            519           6         30.7          32.6      12.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6262           6300          59          2.5         398.2       1.0X
-Parquet Vectorized (Pushdown)                                      3230           3246          12          4.9         205.3       1.9X
-Native ORC Vectorized                                              4739           4756          15          3.3         301.3       1.3X
-Native ORC Vectorized (Pushdown)                                    548            550           3         28.7          34.8      11.4X
+Parquet Vectorized                                                 6328           6342          18          2.5         402.3       1.0X
+Parquet Vectorized (Pushdown)                                      3233           3262          28          4.9         205.6       2.0X
+Native ORC Vectorized                                              4966           4979          15          3.2         315.7       1.3X
+Native ORC Vectorized (Pushdown)                                    566            570           3         27.8          36.0      11.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6288           6298          13          2.5         399.8       1.0X
-Parquet Vectorized (Pushdown)                                      5416           5424           9          2.9         344.3       1.2X
-Native ORC Vectorized                                              4761           4773          16          3.3         302.7       1.3X
-Native ORC Vectorized (Pushdown)                                    558            561           4         28.2          35.5      11.3X
+Parquet Vectorized                                                 6319           6329           7          2.5         401.8       1.0X
+Parquet Vectorized (Pushdown)                                      5713           5731          15          2.8         363.2       1.1X
+Native ORC Vectorized                                              4957           4967          13          3.2         315.1       1.3X
+Native ORC Vectorized (Pushdown)                                    576            581           5         27.3          36.6      11.0X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           2764           2782          23          5.7         175.8       1.0X
-Parquet Vectorized (Pushdown)                                  95            102           8        165.3           6.1      29.0X
-Native ORC Vectorized                                        2102           2139          28          7.5         133.6       1.3X
-Native ORC Vectorized (Pushdown)                              111            121          12        141.1           7.1      24.8X
+Parquet Vectorized                                           2771           2801          32          5.7         176.2       1.0X
+Parquet Vectorized (Pushdown)                                 100            112          15        157.7           6.3      27.8X
+Native ORC Vectorized                                        2143           2161          28          7.3         136.3       1.3X
+Native ORC Vectorized (Pushdown)                              115            120           4        136.8           7.3      24.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              3252           3280          38          4.8         206.7       1.0X
-Parquet Vectorized (Pushdown)                                    855            878          20         18.4          54.4       3.8X
-Native ORC Vectorized                                           2592           2595           5          6.1         164.8       1.3X
-Native ORC Vectorized (Pushdown)                                 782            784           1         20.1          49.7       4.2X
+Parquet Vectorized                                              3243           3289          62          4.9         206.2       1.0X
+Parquet Vectorized (Pushdown)                                    851            863          14         18.5          54.1       3.8X
+Native ORC Vectorized                                           2632           2648          21          6.0         167.3       1.2X
+Native ORC Vectorized (Pushdown)                                 791            794           3         19.9          50.3       4.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              5499           5507           7          2.9         349.6       1.0X
-Parquet Vectorized (Pushdown)                                   4165           4183          16          3.8         264.8       1.3X
-Native ORC Vectorized                                           4726           4747          25          3.3         300.4       1.2X
-Native ORC Vectorized (Pushdown)                                3729           3733           2          4.2         237.1       1.5X
+Parquet Vectorized                                              5304           5315          17          3.0         337.2       1.0X
+Parquet Vectorized (Pushdown)                                   4024           4034          15          3.9         255.8       1.3X
+Native ORC Vectorized                                           4663           4675          11          3.4         296.5       1.1X
+Native ORC Vectorized (Pushdown)                                3665           3675          10          4.3         233.0       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               7817           7832          14          2.0         497.0       1.0X
-Parquet Vectorized (Pushdown)                                    7581           7590           6          2.1         482.0       1.0X
-Native ORC Vectorized                                            7074           7097          40          2.2         449.7       1.1X
-Native ORC Vectorized (Pushdown)                                 6884           6889           4          2.3         437.7       1.1X
+Parquet Vectorized                                               7478           7496          14          2.1         475.4       1.0X
+Parquet Vectorized (Pushdown)                                    7250           7261          10          2.2         461.0       1.0X
+Native ORC Vectorized                                            6712           6716           3          2.3         426.7       1.1X
+Native ORC Vectorized (Pushdown)                                 6527           6538          18          2.4         415.0       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    3115           3124           9          5.0         198.1       1.0X
-Parquet Vectorized (Pushdown)                                                         3101           3110           8          5.1         197.2       1.0X
-Native ORC Vectorized                                                                 1984           2002          27          7.9         126.1       1.6X
-Native ORC Vectorized (Pushdown)                                                        38             42           6        413.3           2.4      81.9X
+Parquet Vectorized                                                                    3103           3115           9          5.1         197.3       1.0X
+Parquet Vectorized (Pushdown)                                                         3107           3123           9          5.1         197.6       1.0X
+Native ORC Vectorized                                                                 1978           1991          12          7.9         125.8       1.6X
+Native ORC Vectorized (Pushdown)                                                        40             44           5        390.9           2.6      77.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       3673           3688          19          4.3         233.5       1.0X
-Parquet Vectorized (Pushdown)                                                            3651           3674          20          4.3         232.1       1.0X
-Native ORC Vectorized                                                                    2497           2508          16          6.3         158.8       1.5X
-Native ORC Vectorized (Pushdown)                                                          751            771          17         20.9          47.7       4.9X
+Parquet Vectorized                                                                       3651           3670          20          4.3         232.1       1.0X
+Parquet Vectorized (Pushdown)                                                            3652           3660           7          4.3         232.2       1.0X
+Native ORC Vectorized                                                                    2524           2535          14          6.2         160.5       1.4X
+Native ORC Vectorized (Pushdown)                                                          786            793           9         20.0          50.0       4.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       6070           6082          14          2.6         385.9       1.0X
-Parquet Vectorized (Pushdown)                                                            6083           6098          13          2.6         386.7       1.0X
-Native ORC Vectorized                                                                    4669           4686          20          3.4         296.9       1.3X
-Native ORC Vectorized (Pushdown)                                                         3649           3671          13          4.3         232.0       1.7X
+Parquet Vectorized                                                                       5953           5959           7          2.6         378.5       1.0X
+Parquet Vectorized (Pushdown)                                                            5959           5970          12          2.6         378.9       1.0X
+Native ORC Vectorized                                                                    4855           4868           9          3.2         308.7       1.2X
+Native ORC Vectorized (Pushdown)                                                         3818           3823           4          4.1         242.7       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                        8292           8309          14          1.9         527.2       1.0X
-Parquet Vectorized (Pushdown)                                                             8311           8323          20          1.9         528.4       1.0X
-Native ORC Vectorized                                                                     6958           6983          31          2.3         442.4       1.2X
-Native ORC Vectorized (Pushdown)                                                          6779           6794           9          2.3         431.0       1.2X
+Parquet Vectorized                                                                        8418           8436          18          1.9         535.2       1.0X
+Parquet Vectorized (Pushdown)                                                             8407           8431          25          1.9         534.5       1.0X
+Native ORC Vectorized                                                                     7012           7030          17          2.2         445.8       1.2X
+Native ORC Vectorized (Pushdown)                                                          6794           6849          89          2.3         431.9       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2593           2606          14          6.1         164.8       1.0X
-Parquet Vectorized (Pushdown)                                                                      62             68           8        251.9           4.0      41.5X
-Native ORC Vectorized                                                                            1989           1996           4          7.9         126.5       1.3X
-Native ORC Vectorized (Pushdown)                                                                   39             45           6        401.1           2.5      66.1X
+Parquet Vectorized                                                                               2583           2596          17          6.1         164.2       1.0X
+Parquet Vectorized (Pushdown)                                                                      63             66           3        248.8           4.0      40.9X
+Native ORC Vectorized                                                                            1970           1976           4          8.0         125.2       1.3X
+Native ORC Vectorized (Pushdown)                                                                   39             45           7        401.1           2.5      65.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3152           3157           6          5.0         200.4       1.0X
-Parquet Vectorized (Pushdown)                                                                        888            894           9         17.7          56.5       3.5X
-Native ORC Vectorized                                                                               2537           2544           5          6.2         161.3       1.2X
-Native ORC Vectorized (Pushdown)                                                                     780            790          10         20.2          49.6       4.0X
+Parquet Vectorized                                                                                  3127           3140           8          5.0         198.8       1.0X
+Parquet Vectorized (Pushdown)                                                                        885            890           4         17.8          56.3       3.5X
+Native ORC Vectorized                                                                               2595           2599           6          6.1         165.0       1.2X
+Native ORC Vectorized (Pushdown)                                                                     794            799           7         19.8          50.5       3.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5491           5505          11          2.9         349.1       1.0X
-Parquet Vectorized (Pushdown)                                                                       4252           4262           6          3.7         270.4       1.3X
-Native ORC Vectorized                                                                               4765           4782          17          3.3         302.9       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3732           3746          12          4.2         237.3       1.5X
+Parquet Vectorized                                                                                  5492           5514          36          2.9         349.2       1.0X
+Parquet Vectorized (Pushdown)                                                                       4254           4266           7          3.7         270.5       1.3X
+Native ORC Vectorized                                                                               4680           4690           9          3.4         297.5       1.2X
+Native ORC Vectorized (Pushdown)                                                                    3612           3618           4          4.4         229.7       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   7482           7496          13          2.1         475.7       1.0X
-Parquet Vectorized (Pushdown)                                                                        7264           7275           7          2.2         461.8       1.0X
-Native ORC Vectorized                                                                                6919           6949          30          2.3         439.9       1.1X
-Native ORC Vectorized (Pushdown)                                                                     6742           6748           7          2.3         428.6       1.1X
+Parquet Vectorized                                                                                   7631           7642          10          2.1         485.2       1.0X
+Parquet Vectorized (Pushdown)                                                                        7408           7430          26          2.1         471.0       1.0X
+Native ORC Vectorized                                                                                7062           7081          14          2.2         449.0       1.1X
+Native ORC Vectorized (Pushdown)                                                                     6872           6890          10          2.3         436.9       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2763           2780          29          5.7         175.6       1.0X
-Parquet Vectorized (Pushdown)                                                                      64             66           3        247.6           4.0      43.5X
-Native ORC Vectorized                                                                            1992           1999          14          7.9         126.6       1.4X
-Native ORC Vectorized (Pushdown)                                                                   40             42           4        397.4           2.5      69.8X
+Parquet Vectorized                                                                               2739           2747           9          5.7         174.1       1.0X
+Parquet Vectorized (Pushdown)                                                                      64             67           5        247.5           4.0      43.1X
+Native ORC Vectorized                                                                            1962           1969           8          8.0         124.8       1.4X
+Native ORC Vectorized (Pushdown)                                                                   39             43           4        400.8           2.5      69.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3300           3313           9          4.8         209.8       1.0X
-Parquet Vectorized (Pushdown)                                                                        891            899          11         17.7          56.6       3.7X
-Native ORC Vectorized                                                                               2545           2562          15          6.2         161.8       1.3X
-Native ORC Vectorized (Pushdown)                                                                     783            788           4         20.1          49.8       4.2X
+Parquet Vectorized                                                                                  3295           3309          16          4.8         209.5       1.0X
+Parquet Vectorized (Pushdown)                                                                        899            910          17         17.5          57.1       3.7X
+Native ORC Vectorized                                                                               2589           2598           7          6.1         164.6       1.3X
+Native ORC Vectorized (Pushdown)                                                                     793            800           9         19.8          50.4       4.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5618           5627           8          2.8         357.2       1.0X
-Parquet Vectorized (Pushdown)                                                                       4299           4316          18          3.7         273.3       1.3X
-Native ORC Vectorized                                                                               4718           4732          13          3.3         300.0       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3689           3738          95          4.3         234.6       1.5X
+Parquet Vectorized                                                                                  5633           5660          28          2.8         358.2       1.0X
+Parquet Vectorized (Pushdown)                                                                       4358           4401          53          3.6         277.1       1.3X
+Native ORC Vectorized                                                                               4621           4640          18          3.4         293.8       1.2X
+Native ORC Vectorized (Pushdown)                                                                    3619           3638          18          4.3         230.1       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   8017           8025           8          2.0         509.7       1.0X
-Parquet Vectorized (Pushdown)                                                                        7756           7769          10          2.0         493.1       1.0X
-Native ORC Vectorized                                                                                6879           6900          16          2.3         437.4       1.2X
-Native ORC Vectorized (Pushdown)                                                                     6700           6725          25          2.3         426.0       1.2X
+Parquet Vectorized                                                                                   7895           7935          54          2.0         501.9       1.0X
+Parquet Vectorized (Pushdown)                                                                        7662           7702          35          2.1         487.1       1.0X
+Native ORC Vectorized                                                                                7068           7091          18          2.2         449.4       1.1X
+Native ORC Vectorized (Pushdown)                                                                     6868           6889          24          2.3         436.7       1.1X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   63             84          20          0.0    63428969.0       1.0X
-Parquet Vectorized (Pushdown)                        60             64           7          0.0    60435812.0       1.0X
-Native ORC Vectorized                                55             57           3          0.0    54999306.0       1.2X
-Native ORC Vectorized (Pushdown)                     56             58           4          0.0    55584526.0       1.1X
+Parquet Vectorized                                   62             80          21          0.0    62474384.0       1.0X
+Parquet Vectorized (Pushdown)                        63             65           3          0.0    62745406.0       1.0X
+Native ORC Vectorized                                56             59           5          0.0    56291738.0       1.1X
+Native ORC Vectorized (Pushdown)                     58             60           2          0.0    57939662.0       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  371            376           6          0.0   370725520.0       1.0X
-Parquet Vectorized (Pushdown)                       379            387           8          0.0   379310488.0       1.0X
-Native ORC Vectorized                               361            365           3          0.0   361340424.0       1.0X
-Native ORC Vectorized (Pushdown)                    366            373           5          0.0   366314223.0       1.0X
+Parquet Vectorized                                  360            373          10          0.0   360453365.0       1.0X
+Parquet Vectorized (Pushdown)                       366            374           8          0.0   366449891.0       1.0X
+Native ORC Vectorized                               353            356           3          0.0   352735496.0       1.0X
+Native ORC Vectorized (Pushdown)                    359            369           8          0.0   358660716.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 1975           2018          60          0.0  1974703729.0       1.0X
-Parquet Vectorized (Pushdown)                      1980           2024          53          0.0  1979551499.0       1.0X
-Native ORC Vectorized                              1965           2012          57          0.0  1964990387.0       1.0X
-Native ORC Vectorized (Pushdown)                   1968           1998          30          0.0  1967786641.0       1.0X
+Parquet Vectorized                                 1895           1916          27          0.0  1895172425.0       1.0X
+Parquet Vectorized (Pushdown)                      1918           1962          56          0.0  1918148217.0       1.0X
+Native ORC Vectorized                              1889           1921          49          0.0  1888761721.0       1.0X
+Native ORC Vectorized (Pushdown)                   1903           1913           9          0.0  1902514400.0       1.0X
 
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
index d62d646a6564f..ef89bc72e4576 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -2,733 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6742           6808          59          2.3         428.6       1.0X
-Parquet Vectorized (Pushdown)                       307            319          12         51.3          19.5      22.0X
-Native ORC Vectorized                              5062           5183          92          3.1         321.8       1.3X
-Native ORC Vectorized (Pushdown)                    307            327          18         51.2          19.5      21.9X
+Parquet Vectorized                                 6787           6843          55          2.3         431.5       1.0X
+Parquet Vectorized (Pushdown)                       298            322          20         52.8          18.9      22.8X
+Native ORC Vectorized                              5201           5298          95          3.0         330.7       1.3X
+Native ORC Vectorized (Pushdown)                    328            337           8         47.9          20.9      20.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                            6849           6872          28          2.3         435.5       1.0X
-Parquet Vectorized (Pushdown)                                  274            293          14         57.4          17.4      25.0X
-Native ORC Vectorized                                         5195           5211          22          3.0         330.3       1.3X
-Native ORC Vectorized (Pushdown)                               309            345          47         51.0          19.6      22.2X
+Parquet Vectorized                                            6854           6878          15          2.3         435.8       1.0X
+Parquet Vectorized (Pushdown)                                  289            299          10         54.4          18.4      23.7X
+Native ORC Vectorized                                         5278           5297          22          3.0         335.6       1.3X
+Native ORC Vectorized (Pushdown)                               331            362          34         47.5          21.0      20.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6824           6840          16          2.3         433.8       1.0X
-Parquet Vectorized (Pushdown)                       264            267           2         59.7          16.8      25.9X
-Native ORC Vectorized                              5108           5127          24          3.1         324.8       1.3X
-Native ORC Vectorized (Pushdown)                    291            303           8         54.1          18.5      23.5X
+Parquet Vectorized                                 6829           6844          12          2.3         434.2       1.0X
+Parquet Vectorized (Pushdown)                       266            281          14         59.1          16.9      25.7X
+Native ORC Vectorized                              5262           5283          14          3.0         334.5       1.3X
+Native ORC Vectorized (Pushdown)                    308            322           9         51.1          19.6      22.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  6848           6862          12          2.3         435.4       1.0X
-Parquet Vectorized (Pushdown)                        257            261           4         61.2          16.3      26.7X
-Native ORC Vectorized                               5099           5118          15          3.1         324.2       1.3X
-Native ORC Vectorized (Pushdown)                     281            288           6         56.0          17.9      24.4X
+Parquet Vectorized                                  6819           6838          13          2.3         433.6       1.0X
+Parquet Vectorized (Pushdown)                        261            274          11         60.3          16.6      26.1X
+Native ORC Vectorized                               5231           5251          13          3.0         332.6       1.3X
+Native ORC Vectorized (Pushdown)                     305            316          10         51.6          19.4      22.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              6831           6850          15          2.3         434.3       1.0X
-Parquet Vectorized (Pushdown)                                    257            262           5         61.2          16.3      26.6X
-Native ORC Vectorized                                           5122           5147          21          3.1         325.7       1.3X
-Native ORC Vectorized (Pushdown)                                 287            298           9         54.8          18.2      23.8X
+Parquet Vectorized                                              6859           6869           6          2.3         436.1       1.0X
+Parquet Vectorized (Pushdown)                                    266            274          10         59.2          16.9      25.8X
+Native ORC Vectorized                                           5284           5296           9          3.0         336.0       1.3X
+Native ORC Vectorized (Pushdown)                                 308            330          20         51.0          19.6      22.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  12806          12858          41          1.2         814.2       1.0X
-Parquet Vectorized (Pushdown)                       12798          12824          24          1.2         813.7       1.0X
-Native ORC Vectorized                               11297          11328          31          1.4         718.2       1.1X
-Native ORC Vectorized (Pushdown)                    11377          11394          16          1.4         723.3       1.1X
+Parquet Vectorized                                  12624          12760         116          1.2         802.6       1.0X
+Parquet Vectorized (Pushdown)                       12621          12631          11          1.2         802.4       1.0X
+Native ORC Vectorized                               11074          11089          10          1.4         704.1       1.1X
+Native ORC Vectorized (Pushdown)                    11168          11188          15          1.4         710.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6428           6440          13          2.4         408.7       1.0X
-Parquet Vectorized (Pushdown)                       242            256          11         65.1          15.4      26.6X
-Native ORC Vectorized                              4596           4620          24          3.4         292.2       1.4X
-Native ORC Vectorized (Pushdown)                    272            279           6         57.9          17.3      23.7X
+Parquet Vectorized                                 6439           6458          16          2.4         409.4       1.0X
+Parquet Vectorized (Pushdown)                       250            257           7         63.0          15.9      25.8X
+Native ORC Vectorized                              4759           4770           8          3.3         302.6       1.4X
+Native ORC Vectorized (Pushdown)                    294            304          14         53.6          18.7      21.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6427           6445          20          2.4         408.6       1.0X
-Parquet Vectorized (Pushdown)                           247            270          22         63.7          15.7      26.0X
-Native ORC Vectorized                                  4602           4611           8          3.4         292.6       1.4X
-Native ORC Vectorized (Pushdown)                        273            282           8         57.7          17.3      23.6X
+Parquet Vectorized                                     6422           6457          38          2.4         408.3       1.0X
+Parquet Vectorized (Pushdown)                           254            266          15         61.9          16.2      25.3X
+Native ORC Vectorized                                  4755           4771          20          3.3         302.3       1.4X
+Native ORC Vectorized (Pushdown)                        300            306           4         52.4          19.1      21.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6477           6499          31          2.4         411.8       1.0X
-Parquet Vectorized (Pushdown)                       247            255           6         63.6          15.7      26.2X
-Native ORC Vectorized                              4664           4668           5          3.4         296.5       1.4X
-Native ORC Vectorized (Pushdown)                    276            282           8         57.0          17.5      23.5X
+Parquet Vectorized                                 6477           6498          32          2.4         411.8       1.0X
+Parquet Vectorized (Pushdown)                       253            263          12         62.2          16.1      25.6X
+Native ORC Vectorized                              4799           4806           7          3.3         305.1       1.3X
+Native ORC Vectorized (Pushdown)                    295            300           3         53.3          18.8      21.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6476           6484          10          2.4         411.7       1.0X
-Parquet Vectorized (Pushdown)                       243            255           8         64.7          15.4      26.7X
-Native ORC Vectorized                              4644           4663          19          3.4         295.3       1.4X
-Native ORC Vectorized (Pushdown)                    270            279           8         58.3          17.2      24.0X
+Parquet Vectorized                                 6457           6483          21          2.4         410.5       1.0X
+Parquet Vectorized (Pushdown)                       251            261           8         62.7          16.0      25.7X
+Native ORC Vectorized                              4787           4807          18          3.3         304.3       1.3X
+Native ORC Vectorized (Pushdown)                    292            302          11         53.9          18.6      22.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       6461           6481          18          2.4         410.8       1.0X
-Parquet Vectorized (Pushdown)                             247            252           4         63.6          15.7      26.1X
-Native ORC Vectorized                                    4644           4660          12          3.4         295.3       1.4X
-Native ORC Vectorized (Pushdown)                          268            280           8         58.7          17.0      24.1X
+Parquet Vectorized                                       6460           6475          13          2.4         410.7       1.0X
+Parquet Vectorized (Pushdown)                             251            256           6         62.6          16.0      25.7X
+Native ORC Vectorized                                    4830           4840          10          3.3         307.1       1.3X
+Native ORC Vectorized (Pushdown)                          292            297           4         53.8          18.6      22.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6461           6475           9          2.4         410.8       1.0X
-Parquet Vectorized (Pushdown)                           243            259           9         64.6          15.5      26.6X
-Native ORC Vectorized                                  4650           4663          13          3.4         295.7       1.4X
-Native ORC Vectorized (Pushdown)                        271            275           3         58.1          17.2      23.9X
+Parquet Vectorized                                     6464           6478          12          2.4         411.0       1.0X
+Parquet Vectorized (Pushdown)                           250            262           6         63.0          15.9      25.9X
+Native ORC Vectorized                                  4803           4822          12          3.3         305.4       1.3X
+Native ORC Vectorized (Pushdown)                        292            298           5         53.8          18.6      22.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 7051           7060           9          2.2         448.3       1.0X
-Parquet Vectorized (Pushdown)                      1434           1446          14         11.0          91.2       4.9X
-Native ORC Vectorized                              5204           5216          12          3.0         330.9       1.4X
-Native ORC Vectorized (Pushdown)                   1265           1280          13         12.4          80.4       5.6X
+Parquet Vectorized                                 7055           7068          19          2.2         448.6       1.0X
+Parquet Vectorized (Pushdown)                      1462           1468           6         10.8          92.9       4.8X
+Native ORC Vectorized                              5388           5401          10          2.9         342.5       1.3X
+Native ORC Vectorized (Pushdown)                   1324           1328           4         11.9          84.2       5.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9069           9075           6          1.7         576.6       1.0X
-Parquet Vectorized (Pushdown)                      5971           5977           6          2.6         379.6       1.5X
-Native ORC Vectorized                              7341           7359          15          2.1         466.7       1.2X
-Native ORC Vectorized (Pushdown)                   5167           5178          12          3.0         328.5       1.8X
+Parquet Vectorized                                 9195           9226          24          1.7         584.6       1.0X
+Parquet Vectorized (Pushdown)                      6115           6134          14          2.6         388.8       1.5X
+Native ORC Vectorized                              7526           7553          19          2.1         478.5       1.2X
+Native ORC Vectorized (Pushdown)                   5330           5338          10          3.0         338.9       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11273          11286          19          1.4         716.7       1.0X
-Parquet Vectorized (Pushdown)                     10641          10665          27          1.5         676.5       1.1X
-Native ORC Vectorized                              9236           9282          61          1.7         587.2       1.2X
-Native ORC Vectorized (Pushdown)                   8872           8909          49          1.8         564.0       1.3X
+Parquet Vectorized                                11369          11390          32          1.4         722.8       1.0X
+Parquet Vectorized (Pushdown)                     10766          10775          10          1.5         684.5       1.1X
+Native ORC Vectorized                              9650           9667          12          1.6         613.6       1.2X
+Native ORC Vectorized (Pushdown)                   9267           9305          56          1.7         589.2       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11624          11629           5          1.4         739.0       1.0X
-Parquet Vectorized (Pushdown)                     11665          11692          25          1.3         741.6       1.0X
-Native ORC Vectorized                              9822           9837          14          1.6         624.5       1.2X
-Native ORC Vectorized (Pushdown)                   9875           9883           5          1.6         627.8       1.2X
+Parquet Vectorized                                11931          11947          17          1.3         758.6       1.0X
+Parquet Vectorized (Pushdown)                     11917          11938          15          1.3         757.7       1.0X
+Native ORC Vectorized                             10059          10078          25          1.6         639.6       1.2X
+Native ORC Vectorized (Pushdown)                  10127          10148          16          1.6         643.8       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11543          11563          13          1.4         733.9       1.0X
-Parquet Vectorized (Pushdown)                     11588          11594           4          1.4         736.8       1.0X
-Native ORC Vectorized                              9784           9807          29          1.6         622.0       1.2X
-Native ORC Vectorized (Pushdown)                   9848           9876          17          1.6         626.1       1.2X
+Parquet Vectorized                                11818          11836          12          1.3         751.4       1.0X
+Parquet Vectorized (Pushdown)                     11904          11917          14          1.3         756.9       1.0X
+Native ORC Vectorized                             10241          10264          20          1.5         651.1       1.2X
+Native ORC Vectorized (Pushdown)                  10308          10332          21          1.5         655.4       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11727          11740           8          1.3         745.6       1.0X
-Parquet Vectorized (Pushdown)                     11781          11791          11          1.3         749.0       1.0X
-Native ORC Vectorized                              9776           9803          23          1.6         621.5       1.2X
-Native ORC Vectorized (Pushdown)                   9858           9874          19          1.6         626.8       1.2X
+Parquet Vectorized                                11863          11867           3          1.3         754.2       1.0X
+Parquet Vectorized (Pushdown)                     11916          11937          14          1.3         757.6       1.0X
+Native ORC Vectorized                             10068          10109          43          1.6         640.1       1.2X
+Native ORC Vectorized (Pushdown)                  10156          10185          37          1.5         645.7       1.2X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5752           5769          18          2.7         365.7       1.0X
-Parquet Vectorized (Pushdown)                           207            213           9         75.9          13.2      27.8X
-Native ORC Vectorized                                  6475           6485           8          2.4         411.7       0.9X
-Native ORC Vectorized (Pushdown)                        910            916           6         17.3          57.8       6.3X
+Parquet Vectorized                                     5712           5727          29          2.8         363.2       1.0X
+Parquet Vectorized (Pushdown)                           209            215           5         75.2          13.3      27.3X
+Native ORC Vectorized                                  6631           6661          21          2.4         421.6       0.9X
+Native ORC Vectorized (Pushdown)                        970            974           4         16.2          61.6       5.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             5869           5872           3          2.7         373.1       1.0X
-Parquet Vectorized (Pushdown)                                   209            217           7         75.1          13.3      28.0X
-Native ORC Vectorized                                          6684           6699          12          2.4         424.9       0.9X
-Native ORC Vectorized (Pushdown)                                921            929          13         17.1          58.6       6.4X
+Parquet Vectorized                                             5888           5896           9          2.7         374.4       1.0X
+Parquet Vectorized (Pushdown)                                   212            222          12         74.1          13.5      27.7X
+Native ORC Vectorized                                          6813           6820           5          2.3         433.2       0.9X
+Native ORC Vectorized (Pushdown)                                971            978           5         16.2          61.8       6.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5871           5881          21          2.7         373.3       1.0X
-Parquet Vectorized (Pushdown)                           254            262           8         62.0          16.1      23.1X
-Native ORC Vectorized                                  6618           6632          16          2.4         420.8       0.9X
-Native ORC Vectorized (Pushdown)                        958            971          12         16.4          60.9       6.1X
+Parquet Vectorized                                     5826           5838          15          2.7         370.4       1.0X
+Parquet Vectorized (Pushdown)                           261            263           2         60.4          16.6      22.4X
+Native ORC Vectorized                                  6763           6785          18          2.3         430.0       0.9X
+Native ORC Vectorized (Pushdown)                       1019           1031          19         15.4          64.8       5.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5859           5870          18          2.7         372.5       1.0X
-Parquet Vectorized (Pushdown)                             252            257           7         62.3          16.0      23.2X
-Native ORC Vectorized                                    6619           6630          15          2.4         420.8       0.9X
-Native ORC Vectorized (Pushdown)                          956            965           9         16.4          60.8       6.1X
+Parquet Vectorized                                       5823           5836          13          2.7         370.2       1.0X
+Parquet Vectorized (Pushdown)                             258            264           6         60.9          16.4      22.6X
+Native ORC Vectorized                                    6715           6737          28          2.3         426.9       0.9X
+Native ORC Vectorized (Pushdown)                         1017           1029          14         15.5          64.6       5.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               5927           5932           6          2.7         376.8       1.0X
-Parquet Vectorized (Pushdown)                                     257            260           4         61.3          16.3      23.1X
-Native ORC Vectorized                                            6684           6698          10          2.4         425.0       0.9X
-Native ORC Vectorized (Pushdown)                                  963            972           9         16.3          61.2       6.2X
+Parquet Vectorized                                               5911           5924          12          2.7         375.8       1.0X
+Parquet Vectorized (Pushdown)                                     260            262           1         60.4          16.6      22.7X
+Native ORC Vectorized                                            6793           6830          45          2.3         431.9       0.9X
+Native ORC Vectorized (Pushdown)                                 1023           1032           7         15.4          65.0       5.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           12265          12294          35          1.3         779.8       1.0X
-Parquet Vectorized (Pushdown)                                12344          12375          34          1.3         784.8       1.0X
-Native ORC Vectorized                                        13187          13225          26          1.2         838.4       0.9X
-Native ORC Vectorized (Pushdown)                             13400          13431          30          1.2         852.0       0.9X
+Parquet Vectorized                                           12245          12263          13          1.3         778.5       1.0X
+Parquet Vectorized (Pushdown)                                12336          12360          28          1.3         784.3       1.0X
+Native ORC Vectorized                                        13260          13282          14          1.2         843.0       0.9X
+Native ORC Vectorized (Pushdown)                             13461          13478          20          1.2         855.8       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    7041           7147         129          2.2         447.7       1.0X
-Parquet Vectorized (Pushdown)                          888            903          20         17.7          56.4       7.9X
-Native ORC Vectorized                                 5292           5341          45          3.0         336.4       1.3X
-Native ORC Vectorized (Pushdown)                      5381           5416          33          2.9         342.1       1.3X
+Parquet Vectorized                                    7051           7097          66          2.2         448.3       1.0X
+Parquet Vectorized (Pushdown)                          892            908          14         17.6          56.7       7.9X
+Native ORC Vectorized                                 5370           5406          28          2.9         341.4       1.3X
+Native ORC Vectorized (Pushdown)                      5447           5458          10          2.9         346.3       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      7185           7196          11          2.2         456.8       1.0X
-Parquet Vectorized (Pushdown)                            245            250           5         64.2          15.6      29.3X
-Native ORC Vectorized                                   5172           5187          12          3.0         328.8       1.4X
-Native ORC Vectorized (Pushdown)                        5237           5264          23          3.0         333.0       1.4X
+Parquet Vectorized                                      6897           6908          14          2.3         438.5       1.0X
+Parquet Vectorized (Pushdown)                            247            252           4         63.6          15.7      27.9X
+Native ORC Vectorized                                   5237           5242           4          3.0         333.0       1.3X
+Native ORC Vectorized (Pushdown)                        5307           5329          35          3.0         337.4       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        7134           7161          27          2.2         453.6       1.0X
-Parquet Vectorized (Pushdown)                              239            247           9         65.9          15.2      29.9X
-Native ORC Vectorized                                     5107           5116           9          3.1         324.7       1.4X
-Native ORC Vectorized (Pushdown)                          5180           5189           8          3.0         329.3       1.4X
+Parquet Vectorized                                        6893           6917          26          2.3         438.3       1.0X
+Parquet Vectorized (Pushdown)                              241            246           8         65.4          15.3      28.7X
+Native ORC Vectorized                                     5240           5249           9          3.0         333.1       1.3X
+Native ORC Vectorized (Pushdown)                          5305           5317          14          3.0         337.3       1.3X
 
 
 ================================================================================================
 Pushdown benchmark for StringEndsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  5816           5829           9          2.7         369.8       1.0X
-Parquet Vectorized (Pushdown)                        336            345          14         46.9          21.3      17.3X
-Native ORC Vectorized                               6601           6617          13          2.4         419.7       0.9X
-Native ORC Vectorized (Pushdown)                    6768           6799          18          2.3         430.3       0.9X
+Parquet Vectorized                                  5852           5870          15          2.7         372.1       1.0X
+Parquet Vectorized (Pushdown)                        338            345           9         46.6          21.5      17.3X
+Native ORC Vectorized                               6740           6753          12          2.3         428.5       0.9X
+Native ORC Vectorized (Pushdown)                    6932           6955          18          2.3         440.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    5783           5803          25          2.7         367.7       1.0X
-Parquet Vectorized (Pushdown)                          243            248           4         64.8          15.4      23.8X
-Native ORC Vectorized                                 6557           6576          14          2.4         416.9       0.9X
-Native ORC Vectorized (Pushdown)                      6730           6748          13          2.3         427.9       0.9X
+Parquet Vectorized                                    5787           5812          17          2.7         367.9       1.0X
+Parquet Vectorized (Pushdown)                          244            252           8         64.5          15.5      23.7X
+Native ORC Vectorized                                 6697           6716          11          2.3         425.8       0.9X
+Native ORC Vectorized (Pushdown)                      6884           6900          12          2.3         437.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5762           5781          11          2.7         366.4       1.0X
-Parquet Vectorized (Pushdown)                            241            244           2         65.4          15.3      24.0X
-Native ORC Vectorized                                   6567           6579          20          2.4         417.5       0.9X
-Native ORC Vectorized (Pushdown)                        6757           6766          11          2.3         429.6       0.9X
+Parquet Vectorized                                      5784           5800          26          2.7         367.8       1.0X
+Parquet Vectorized (Pushdown)                            242            247           4         64.9          15.4      23.9X
+Native ORC Vectorized                                   6704           6717          10          2.3         426.2       0.9X
+Native ORC Vectorized (Pushdown)                        6895           6911          17          2.3         438.4       0.8X
 
 
 ================================================================================================
 Pushdown benchmark for StringContains
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   5964           5980          16          2.6         379.2       1.0X
-Parquet Vectorized (Pushdown)                         741            745           6         21.2          47.1       8.1X
-Native ORC Vectorized                                6709           6731          13          2.3         426.6       0.9X
-Native ORC Vectorized (Pushdown)                     6933           6941          11          2.3         440.8       0.9X
+Parquet Vectorized                                   6010           6037          20          2.6         382.1       1.0X
+Parquet Vectorized (Pushdown)                         755            758           4         20.8          48.0       8.0X
+Native ORC Vectorized                                6902           6918          11          2.3         438.8       0.9X
+Native ORC Vectorized (Pushdown)                     7113           7128          10          2.2         452.2       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5788           5802          12          2.7         368.0       1.0X
-Parquet Vectorized (Pushdown)                           241            244           2         65.3          15.3      24.0X
-Native ORC Vectorized                                  6538           6559          16          2.4         415.7       0.9X
-Native ORC Vectorized (Pushdown)                       6714           6734          14          2.3         426.9       0.9X
+Parquet Vectorized                                     5791           5799           8          2.7         368.2       1.0X
+Parquet Vectorized (Pushdown)                           246            247           1         64.0          15.6      23.6X
+Native ORC Vectorized                                  6700           6712          12          2.3         426.0       0.9X
+Native ORC Vectorized (Pushdown)                       6906           6923          18          2.3         439.1       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5786           5795           7          2.7         367.9       1.0X
-Parquet Vectorized (Pushdown)                             243            245           2         64.7          15.4      23.8X
-Native ORC Vectorized                                    6558           6580          26          2.4         417.0       0.9X
-Native ORC Vectorized (Pushdown)                         6735           6751          13          2.3         428.2       0.9X
+Parquet Vectorized                                       5803           5821          27          2.7         368.9       1.0X
+Parquet Vectorized (Pushdown)                             243            248           9         64.6          15.5      23.8X
+Native ORC Vectorized                                    6709           6730          18          2.3         426.5       0.9X
+Native ORC Vectorized (Pushdown)                         6910           6921          10          2.3         439.3       0.8X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     2818           2835          11          5.6         179.2       1.0X
-Parquet Vectorized (Pushdown)                            64             66           4        246.6           4.1      44.2X
-Native ORC Vectorized                                  3385           3402          20          4.6         215.2       0.8X
-Native ORC Vectorized (Pushdown)                         56             60           4        280.6           3.6      50.3X
+Parquet Vectorized                                     2857           2879          29          5.5         181.6       1.0X
+Parquet Vectorized (Pushdown)                            65             68           5        241.6           4.1      43.9X
+Native ORC Vectorized                                  3410           3429          17          4.6         216.8       0.8X
+Native ORC Vectorized (Pushdown)                         59             62           3        265.7           3.8      48.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        4033           4046          10          3.9         256.4       1.0X
-Parquet Vectorized (Pushdown)                             1791           1799           9          8.8         113.9       2.3X
-Native ORC Vectorized                                     4672           4695          33          3.4         297.0       0.9X
-Native ORC Vectorized (Pushdown)                          1931           1940          20          8.1         122.8       2.1X
+Parquet Vectorized                                        4091           4111          24          3.8         260.1       1.0X
+Parquet Vectorized (Pushdown)                             1849           1853           5          8.5         117.6       2.2X
+Native ORC Vectorized                                     4743           4778          46          3.3         301.6       0.9X
+Native ORC Vectorized (Pushdown)                          1983           1991           7          7.9         126.1       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        7745           7749           3          2.0         492.4       1.0X
-Parquet Vectorized (Pushdown)                             7436           7451          17          2.1         472.8       1.0X
-Native ORC Vectorized                                     8812           8828          11          1.8         560.3       0.9X
-Native ORC Vectorized (Pushdown)                          8370           8398          27          1.9         532.2       0.9X
+Parquet Vectorized                                        8000           8018          20          2.0         508.6       1.0X
+Parquet Vectorized (Pushdown)                             7633           7645           7          2.1         485.3       1.0X
+Native ORC Vectorized                                     8981           8991           9          1.8         571.0       0.9X
+Native ORC Vectorized (Pushdown)                          8512           8613         192          1.8         541.1       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8527           8541          19          1.8         542.2       1.0X
-Parquet Vectorized (Pushdown)                              8546           8565          23          1.8         543.3       1.0X
-Native ORC Vectorized                                      9948          10010          73          1.6         632.5       0.9X
-Native ORC Vectorized (Pushdown)                           9983           9991           9          1.6         634.7       0.9X
+Parquet Vectorized                                         8941           8969          32          1.8         568.5       1.0X
+Parquet Vectorized (Pushdown)                              8968           9032         115          1.8         570.2       1.0X
+Native ORC Vectorized                                     10136          10159          28          1.6         644.4       0.9X
+Native ORC Vectorized (Pushdown)                          10162          10196          33          1.5         646.1       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      2968           3005          22          5.3         188.7       1.0X
-Parquet Vectorized (Pushdown)                             62             65           4        253.1           4.0      47.8X
-Native ORC Vectorized                                   3458           3464           8          4.5         219.9       0.9X
-Native ORC Vectorized (Pushdown)                          53             56           4        296.9           3.4      56.0X
+Parquet Vectorized                                      2995           3006           9          5.3         190.4       1.0X
+Parquet Vectorized (Pushdown)                             63             67           4        248.3           4.0      47.3X
+Native ORC Vectorized                                   3431           3441          13          4.6         218.1       0.9X
+Native ORC Vectorized (Pushdown)                          56             60           3        279.2           3.6      53.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         3608           3617          12          4.4         229.4       1.0X
-Parquet Vectorized (Pushdown)                               917            927          14         17.2          58.3       3.9X
-Native ORC Vectorized                                      4100           4110           9          3.8         260.6       0.9X
-Native ORC Vectorized (Pushdown)                           1005           1007           2         15.7          63.9       3.6X
+Parquet Vectorized                                         3718           3727          17          4.2         236.4       1.0X
+Parquet Vectorized (Pushdown)                              1012           1016           2         15.5          64.4       3.7X
+Native ORC Vectorized                                      4142           4181          26          3.8         263.4       0.9X
+Native ORC Vectorized (Pushdown)                           1056           1063           7         14.9          67.1       3.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         5916           5933          15          2.7         376.1       1.0X
-Parquet Vectorized (Pushdown)                              4418           4425           6          3.6         280.9       1.3X
-Native ORC Vectorized                                      6642           6683          49          2.4         422.3       0.9X
-Native ORC Vectorized (Pushdown)                           4902           4908           4          3.2         311.6       1.2X
+Parquet Vectorized                                         6331           6343          17          2.5         402.5       1.0X
+Parquet Vectorized (Pushdown)                              4843           4855          10          3.2         307.9       1.3X
+Native ORC Vectorized                                      6859           6864          10          2.3         436.1       0.9X
+Native ORC Vectorized (Pushdown)                           5112           5123          10          3.1         325.0       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                          8185           8206          28          1.9         520.4       1.0X
-Parquet Vectorized (Pushdown)                               7907           7920          11          2.0         502.7       1.0X
-Native ORC Vectorized                                       9169           9182          15          1.7         582.9       0.9X
-Native ORC Vectorized (Pushdown)                            8834           8848          10          1.8         561.7       0.9X
+Parquet Vectorized                                          8837           8842           4          1.8         561.8       1.0X
+Parquet Vectorized (Pushdown)                               8563           8571           6          1.8         544.4       1.0X
+Native ORC Vectorized                                       9504           9540          31          1.7         604.2       0.9X
+Native ORC Vectorized (Pushdown)                            9203           9209           7          1.7         585.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      4245           4253          12          3.7         269.9       1.0X
-Parquet Vectorized (Pushdown)                             68             70           3        230.2           4.3      62.1X
-Native ORC Vectorized                                   3501           3524          16          4.5         222.6       1.2X
-Native ORC Vectorized (Pushdown)                          53             55           3        298.7           3.3      80.6X
+Parquet Vectorized                                      4207           4222          14          3.7         267.5       1.0X
+Parquet Vectorized (Pushdown)                             69             72           3        227.7           4.4      60.9X
+Native ORC Vectorized                                   3482           3502          19          4.5         221.4       1.2X
+Native ORC Vectorized (Pushdown)                          56             58           2        283.2           3.5      75.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         5051           5064          11          3.1         321.1       1.0X
-Parquet Vectorized (Pushdown)                              1272           1276           2         12.4          80.9       4.0X
-Native ORC Vectorized                                      4282           4293          18          3.7         272.3       1.2X
-Native ORC Vectorized (Pushdown)                           1150           1155           6         13.7          73.1       4.4X
+Parquet Vectorized                                         5073           5086          12          3.1         322.5       1.0X
+Parquet Vectorized (Pushdown)                              1319           1322           3         11.9          83.9       3.8X
+Native ORC Vectorized                                      4294           4312          24          3.7         273.0       1.2X
+Native ORC Vectorized (Pushdown)                           1158           1164           5         13.6          73.7       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8224           8255          60          1.9         522.9       1.0X
-Parquet Vectorized (Pushdown)                              6149           6159           9          2.6         391.0       1.3X
-Native ORC Vectorized                                      7372           7389          23          2.1         468.7       1.1X
-Native ORC Vectorized (Pushdown)                           5581           5589          10          2.8         354.9       1.5X
+Parquet Vectorized                                         8420           8438          11          1.9         535.3       1.0X
+Parquet Vectorized (Pushdown)                              6373           6378           5          2.5         405.2       1.3X
+Native ORC Vectorized                                      7440           7449          13          2.1         473.0       1.1X
+Native ORC Vectorized (Pushdown)                           5638           5662          14          2.8         358.5       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         11359          11379          15          1.4         722.2       1.0X
-Parquet Vectorized (Pushdown)                              10971          10975           2          1.4         697.5       1.0X
-Native ORC Vectorized                                      10366          10385          26          1.5         659.1       1.1X
-Native ORC Vectorized (Pushdown)                            9993          10016          19          1.6         635.3       1.1X
+Parquet Vectorized                                         11729          11741           9          1.3         745.7       1.0X
+Parquet Vectorized (Pushdown)                              11358          11369           8          1.4         722.1       1.0X
+Native ORC Vectorized                                      10556          10591          25          1.5         671.1       1.1X
+Native ORC Vectorized (Pushdown)                           10164          10192          18          1.5         646.2       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6439           6534         103          2.4         409.4       1.0X
-Parquet Vectorized (Pushdown)                                     250            257           8         62.9          15.9      25.7X
-Native ORC Vectorized                                            4639           4653          12          3.4         294.9       1.4X
-Native ORC Vectorized (Pushdown)                                  279            285           5         56.3          17.7      23.1X
+Parquet Vectorized                                               6436           6463          34          2.4         409.2       1.0X
+Parquet Vectorized (Pushdown)                                     255            260           4         61.6          16.2      25.2X
+Native ORC Vectorized                                            4805           4811           6          3.3         305.5       1.3X
+Native ORC Vectorized (Pushdown)                                  296            304           5         53.2          18.8      21.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6443           6456          16          2.4         409.7       1.0X
-Parquet Vectorized (Pushdown)                                     251            255           2         62.6          16.0      25.6X
-Native ORC Vectorized                                            4648           4672          29          3.4         295.5       1.4X
-Native ORC Vectorized (Pushdown)                                  280            286           5         56.3          17.8      23.0X
+Parquet Vectorized                                               6437           6443           6          2.4         409.2       1.0X
+Parquet Vectorized (Pushdown)                                     254            262           7         61.9          16.2      25.3X
+Native ORC Vectorized                                            4803           4813          14          3.3         305.3       1.3X
+Native ORC Vectorized (Pushdown)                                  299            305           6         52.6          19.0      21.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6446           6452           4          2.4         409.8       1.0X
-Parquet Vectorized (Pushdown)                                     254            260           7         62.0          16.1      25.4X
-Native ORC Vectorized                                            4637           4643           4          3.4         294.8       1.4X
-Native ORC Vectorized (Pushdown)                                  279            285           7         56.4          17.7      23.1X
+Parquet Vectorized                                               6431           6444           8          2.4         408.9       1.0X
+Parquet Vectorized (Pushdown)                                     255            259           3         61.8          16.2      25.2X
+Native ORC Vectorized                                            4802           4814          12          3.3         305.3       1.3X
+Native ORC Vectorized (Pushdown)                                  296            300           3         53.1          18.8      21.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6455           6467          10          2.4         410.4       1.0X
-Parquet Vectorized (Pushdown)                                      271            274           3         58.1          17.2      23.8X
-Native ORC Vectorized                                             4651           4666          17          3.4         295.7       1.4X
-Native ORC Vectorized (Pushdown)                                   292            297           4         53.9          18.5      22.1X
+Parquet Vectorized                                                6453           6463          12          2.4         410.2       1.0X
+Parquet Vectorized (Pushdown)                                      275            279           4         57.1          17.5      23.4X
+Native ORC Vectorized                                             4794           4807          13          3.3         304.8       1.3X
+Native ORC Vectorized (Pushdown)                                   310            314           3         50.8          19.7      20.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6451           6460           6          2.4         410.2       1.0X
-Parquet Vectorized (Pushdown)                                      270            277           8         58.3          17.2      23.9X
-Native ORC Vectorized                                             4646           4656          11          3.4         295.4       1.4X
-Native ORC Vectorized (Pushdown)                                   296            300           3         53.2          18.8      21.8X
+Parquet Vectorized                                                6445           6456           7          2.4         409.8       1.0X
+Parquet Vectorized (Pushdown)                                      271            282          13         58.1          17.2      23.8X
+Native ORC Vectorized                                             4809           4828          19          3.3         305.8       1.3X
+Native ORC Vectorized (Pushdown)                                   314            317           2         50.1          20.0      20.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6462           6471           9          2.4         410.8       1.0X
-Parquet Vectorized (Pushdown)                                      270            280           9         58.3          17.2      23.9X
-Native ORC Vectorized                                             4648           4655           6          3.4         295.5       1.4X
-Native ORC Vectorized (Pushdown)                                   293            298           5         53.7          18.6      22.1X
+Parquet Vectorized                                                6457           6464          13          2.4         410.5       1.0X
+Parquet Vectorized (Pushdown)                                      272            276           3         57.8          17.3      23.7X
+Native ORC Vectorized                                             4811           4837          17          3.3         305.9       1.3X
+Native ORC Vectorized (Pushdown)                                   309            314           3         50.9          19.6      20.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6665           6681          17          2.4         423.8       1.0X
-Parquet Vectorized (Pushdown)                                      875            882           6         18.0          55.6       7.6X
-Native ORC Vectorized                                             4869           4883          13          3.2         309.6       1.4X
-Native ORC Vectorized (Pushdown)                                   395            397           2         39.8          25.1      16.9X
+Parquet Vectorized                                                6651           6690          50          2.4         422.8       1.0X
+Parquet Vectorized (Pushdown)                                      855            858           5         18.4          54.4       7.8X
+Native ORC Vectorized                                             5035           5071          26          3.1         320.1       1.3X
+Native ORC Vectorized (Pushdown)                                   414            417           4         38.0          26.3      16.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6659           6670          10          2.4         423.4       1.0X
-Parquet Vectorized (Pushdown)                                     3157           3199          50          5.0         200.7       2.1X
-Native ORC Vectorized                                             4859           4874          14          3.2         308.9       1.4X
-Native ORC Vectorized (Pushdown)                                   424            425           1         37.1          27.0      15.7X
+Parquet Vectorized                                                6652           6657           5          2.4         422.9       1.0X
+Parquet Vectorized (Pushdown)                                     3465           3482          15          4.5         220.3       1.9X
+Native ORC Vectorized                                             5025           5033           5          3.1         319.5       1.3X
+Native ORC Vectorized (Pushdown)                                   441            442           2         35.7          28.0      15.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6667           6679          18          2.4         423.9       1.0X
-Parquet Vectorized (Pushdown)                                     5982           5991           7          2.6         380.3       1.1X
-Native ORC Vectorized                                             4879           4887          12          3.2         310.2       1.4X
-Native ORC Vectorized (Pushdown)                                   423            426           2         37.2          26.9      15.8X
+Parquet Vectorized                                                6656           6671          12          2.4         423.2       1.0X
+Parquet Vectorized (Pushdown)                                     5728           5738          11          2.7         364.2       1.2X
+Native ORC Vectorized                                             5040           5055          10          3.1         320.5       1.3X
+Native ORC Vectorized (Pushdown)                                   444            447           4         35.4          28.3      15.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6599           6606          10          2.4         419.5       1.0X
-Parquet Vectorized (Pushdown)                                       887            891           5         17.7          56.4       7.4X
-Native ORC Vectorized                                              4798           4809           7          3.3         305.1       1.4X
-Native ORC Vectorized (Pushdown)                                    482            486           6         32.7          30.6      13.7X
+Parquet Vectorized                                                 6596           6605           8          2.4         419.4       1.0X
+Parquet Vectorized (Pushdown)                                       887            890           3         17.7          56.4       7.4X
+Native ORC Vectorized                                              4967           4974           7          3.2         315.8       1.3X
+Native ORC Vectorized (Pushdown)                                    511            515           4         30.8          32.5      12.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6611           6628          16          2.4         420.3       1.0X
-Parquet Vectorized (Pushdown)                                      3286           3292          10          4.8         208.9       2.0X
-Native ORC Vectorized                                              4801           4806           5          3.3         305.2       1.4X
-Native ORC Vectorized (Pushdown)                                    558            564           6         28.2          35.4      11.9X
+Parquet Vectorized                                                 6618           6626           7          2.4         420.8       1.0X
+Parquet Vectorized (Pushdown)                                      3375           3385           8          4.7         214.6       2.0X
+Native ORC Vectorized                                              4988           4995           9          3.2         317.1       1.3X
+Native ORC Vectorized (Pushdown)                                    587            591           7         26.8          37.3      11.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6616           6630          20          2.4         420.6       1.0X
-Parquet Vectorized (Pushdown)                                      5983           5988           3          2.6         380.4       1.1X
-Native ORC Vectorized                                              4808           4815           7          3.3         305.7       1.4X
-Native ORC Vectorized (Pushdown)                                    564            567           2         27.9          35.8      11.7X
+Parquet Vectorized                                                 6604           6626          19          2.4         419.9       1.0X
+Parquet Vectorized (Pushdown)                                      5909           5920           8          2.7         375.7       1.1X
+Native ORC Vectorized                                              4987           4991           4          3.2         317.0       1.3X
+Native ORC Vectorized (Pushdown)                                    601            604           2         26.2          38.2      11.0X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           3117           3209          82          5.0         198.2       1.0X
-Parquet Vectorized (Pushdown)                                 100            102           5        157.7           6.3      31.3X
-Native ORC Vectorized                                        2264           2271           9          6.9         144.0       1.4X
-Native ORC Vectorized (Pushdown)                              112            115           3        140.3           7.1      27.8X
+Parquet Vectorized                                           3095           3112          16          5.1         196.8       1.0X
+Parquet Vectorized (Pushdown)                                 101            103           3        156.4           6.4      30.8X
+Native ORC Vectorized                                        2387           2394           5          6.6         151.8       1.3X
+Native ORC Vectorized (Pushdown)                              119            121           3        132.5           7.5      26.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              3624           3632           9          4.3         230.4       1.0X
-Parquet Vectorized (Pushdown)                                    880            881           1         17.9          55.9       4.1X
-Native ORC Vectorized                                           2736           2742           4          5.7         173.9       1.3X
-Native ORC Vectorized (Pushdown)                                 775            783          11         20.3          49.3       4.7X
+Parquet Vectorized                                              3619           3637          15          4.3         230.1       1.0X
+Parquet Vectorized (Pushdown)                                    879            886          10         17.9          55.9       4.1X
+Native ORC Vectorized                                           2876           2885           5          5.5         182.9       1.3X
+Native ORC Vectorized (Pushdown)                                 808            809           2         19.5          51.4       4.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              5679           5688          10          2.8         361.0       1.0X
-Parquet Vectorized (Pushdown)                                   4159           4167           7          3.8         264.4       1.4X
-Native ORC Vectorized                                           4832           4846          18          3.3         307.2       1.2X
-Native ORC Vectorized (Pushdown)                                3744           3753          12          4.2         238.0       1.5X
+Parquet Vectorized                                              5707           5724          18          2.8         362.8       1.0X
+Parquet Vectorized (Pushdown)                                   4215           4219           3          3.7         268.0       1.4X
+Native ORC Vectorized                                           4873           4884           9          3.2         309.8       1.2X
+Native ORC Vectorized (Pushdown)                                3724           3748          14          4.2         236.8       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               7666           7681          14          2.1         487.4       1.0X
-Parquet Vectorized (Pushdown)                                    7380           7403          34          2.1         469.2       1.0X
-Native ORC Vectorized                                            6779           6792          12          2.3         431.0       1.1X
-Native ORC Vectorized (Pushdown)                                 6594           6606          23          2.4         419.3       1.2X
+Parquet Vectorized                                               7806           7815           9          2.0         496.3       1.0X
+Parquet Vectorized (Pushdown)                                    7548           7552           3          2.1         479.9       1.0X
+Native ORC Vectorized                                            7017           7034          30          2.2         446.1       1.1X
+Native ORC Vectorized (Pushdown)                                 6797           6808          13          2.3         432.2       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    3202           3216           9          4.9         203.6       1.0X
-Parquet Vectorized (Pushdown)                                                         3218           3226           9          4.9         204.6       1.0X
-Native ORC Vectorized                                                                 2223           2233           6          7.1         141.4       1.4X
-Native ORC Vectorized (Pushdown)                                                        38             41           4        413.1           2.4      84.1X
+Parquet Vectorized                                                                    3215           3233          16          4.9         204.4       1.0X
+Parquet Vectorized (Pushdown)                                                         3202           3213          11          4.9         203.6       1.0X
+Native ORC Vectorized                                                                 2269           2275           6          6.9         144.2       1.4X
+Native ORC Vectorized (Pushdown)                                                        40             43           3        392.1           2.6      80.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       3746           3770          19          4.2         238.2       1.0X
-Parquet Vectorized (Pushdown)                                                            3744           3760          18          4.2         238.0       1.0X
-Native ORC Vectorized                                                                    2757           2768          14          5.7         175.3       1.4X
-Native ORC Vectorized (Pushdown)                                                          782            786           4         20.1          49.7       4.8X
+Parquet Vectorized                                                                       3776           3799          32          4.2         240.1       1.0X
+Parquet Vectorized (Pushdown)                                                            3783           3786           4          4.2         240.5       1.0X
+Native ORC Vectorized                                                                    2818           2837          17          5.6         179.1       1.3X
+Native ORC Vectorized (Pushdown)                                                          807            812           3         19.5          51.3       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       5956           5971          15          2.6         378.7       1.0X
-Parquet Vectorized (Pushdown)                                                            5948           5964          19          2.6         378.1       1.0X
-Native ORC Vectorized                                                                    4949           4980          44          3.2         314.6       1.2X
-Native ORC Vectorized (Pushdown)                                                         3785           3790           5          4.2         240.6       1.6X
+Parquet Vectorized                                                                       6080           6082           2          2.6         386.6       1.0X
+Parquet Vectorized (Pushdown)                                                            6072           6103          30          2.6         386.0       1.0X
+Native ORC Vectorized                                                                    5071           5086          18          3.1         322.4       1.2X
+Native ORC Vectorized (Pushdown)                                                         3911           3920           6          4.0         248.6       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                        8129           8135           8          1.9         516.8       1.0X
-Parquet Vectorized (Pushdown)                                                             8126           8143          28          1.9         516.6       1.0X
-Native ORC Vectorized                                                                     7117           7153          25          2.2         452.5       1.1X
-Native ORC Vectorized (Pushdown)                                                          6898           6936          28          2.3         438.5       1.2X
+Parquet Vectorized                                                                        8437           8445           7          1.9         536.4       1.0X
+Parquet Vectorized (Pushdown)                                                             8452           8458           4          1.9         537.4       1.0X
+Native ORC Vectorized                                                                     7424           7504          90          2.1         472.0       1.1X
+Native ORC Vectorized (Pushdown)                                                          7202           7230          25          2.2         457.9       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2990           2994           3          5.3         190.1       1.0X
-Parquet Vectorized (Pushdown)                                                                      61             64           4        256.0           3.9      48.7X
-Native ORC Vectorized                                                                            2229           2236           8          7.1         141.7       1.3X
-Native ORC Vectorized (Pushdown)                                                                   38             41           4        416.9           2.4      79.3X
+Parquet Vectorized                                                                               2975           2983           9          5.3         189.1       1.0X
+Parquet Vectorized (Pushdown)                                                                      63             68           6        247.8           4.0      46.9X
+Native ORC Vectorized                                                                            2264           2282          35          6.9         144.0       1.3X
+Native ORC Vectorized (Pushdown)                                                                   40             43           4        392.7           2.5      74.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3529           3538          14          4.5         224.3       1.0X
-Parquet Vectorized (Pushdown)                                                                        900            906          11         17.5          57.2       3.9X
-Native ORC Vectorized                                                                               2754           2757           3          5.7         175.1       1.3X
-Native ORC Vectorized (Pushdown)                                                                     783            784           2         20.1          49.8       4.5X
+Parquet Vectorized                                                                                  3552           3568          25          4.4         225.8       1.0X
+Parquet Vectorized (Pushdown)                                                                        914            917           2         17.2          58.1       3.9X
+Native ORC Vectorized                                                                               2827           2832           7          5.6         179.8       1.3X
+Native ORC Vectorized (Pushdown)                                                                     813            816           2         19.3          51.7       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5727           5733          11          2.7         364.1       1.0X
-Parquet Vectorized (Pushdown)                                                                       4263           4269           5          3.7         271.0       1.3X
-Native ORC Vectorized                                                                               4941           4979          67          3.2         314.1       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3776           3792          19          4.2         240.1       1.5X
+Parquet Vectorized                                                                                  5843           5849           6          2.7         371.5       1.0X
+Parquet Vectorized (Pushdown)                                                                       4410           4440          54          3.6         280.4       1.3X
+Native ORC Vectorized                                                                               5066           5081          16          3.1         322.1       1.2X
+Native ORC Vectorized (Pushdown)                                                                    3908           3916           6          4.0         248.5       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   7900           7907           6          2.0         502.3       1.0X
-Parquet Vectorized (Pushdown)                                                                        7632           7636           5          2.1         485.2       1.0X
-Native ORC Vectorized                                                                                7124           7140          19          2.2         452.9       1.1X
-Native ORC Vectorized (Pushdown)                                                                     6902           6917          10          2.3         438.8       1.1X
+Parquet Vectorized                                                                                   8214           8218           4          1.9         522.2       1.0X
+Parquet Vectorized (Pushdown)                                                                        7953           8002          63          2.0         505.6       1.0X
+Native ORC Vectorized                                                                                7436           7453          25          2.1         472.8       1.1X
+Native ORC Vectorized (Pushdown)                                                                     7215           7248          46          2.2         458.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               3008           3060          72          5.2         191.3       1.0X
-Parquet Vectorized (Pushdown)                                                                      62             63           2        255.5           3.9      48.9X
-Native ORC Vectorized                                                                            2227           2238          15          7.1         141.6       1.4X
-Native ORC Vectorized (Pushdown)                                                                   38             42           5        418.2           2.4      80.0X
+Parquet Vectorized                                                                               3003           3009           7          5.2         190.9       1.0X
+Parquet Vectorized (Pushdown)                                                                      63             67           4        248.8           4.0      47.5X
+Native ORC Vectorized                                                                            2258           2262           4          7.0         143.6       1.3X
+Native ORC Vectorized (Pushdown)                                                                   40             42           3        395.7           2.5      75.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3551           3559           7          4.4         225.7       1.0X
-Parquet Vectorized (Pushdown)                                                                        901            907           5         17.5          57.3       3.9X
-Native ORC Vectorized                                                                               2758           2762           4          5.7         175.3       1.3X
-Native ORC Vectorized (Pushdown)                                                                     780            783           3         20.2          49.6       4.5X
+Parquet Vectorized                                                                                  3586           3599          11          4.4         228.0       1.0X
+Parquet Vectorized (Pushdown)                                                                        916            919           3         17.2          58.2       3.9X
+Native ORC Vectorized                                                                               2836           2850          19          5.5         180.3       1.3X
+Native ORC Vectorized (Pushdown)                                                                     811            815           3         19.4          51.6       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5737           5751          19          2.7         364.8       1.0X
-Parquet Vectorized (Pushdown)                                                                       4269           4273           4          3.7         271.4       1.3X
-Native ORC Vectorized                                                                               4939           4943           3          3.2         314.0       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3784           3787           2          4.2         240.6       1.5X
+Parquet Vectorized                                                                                  5866           5881          27          2.7         372.9       1.0X
+Parquet Vectorized (Pushdown)                                                                       4410           4417           8          3.6         280.4       1.3X
+Native ORC Vectorized                                                                               5077           5089          19          3.1         322.8       1.2X
+Native ORC Vectorized (Pushdown)                                                                    3911           3919           8          4.0         248.6       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   7918           7942          20          2.0         503.4       1.0X
-Parquet Vectorized (Pushdown)                                                                        7645           7650           5          2.1         486.1       1.0X
-Native ORC Vectorized                                                                                7120           7138          19          2.2         452.7       1.1X
-Native ORC Vectorized (Pushdown)                                                                     6908           6931          17          2.3         439.2       1.1X
+Parquet Vectorized                                                                                   8221           8229           7          1.9         522.7       1.0X
+Parquet Vectorized (Pushdown)                                                                        7970           7981           9          2.0         506.7       1.0X
+Native ORC Vectorized                                                                                7426           7442          23          2.1         472.1       1.1X
+Native ORC Vectorized (Pushdown)                                                                     7193           7204           9          2.2         457.3       1.1X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   61             63           3          0.0    60809084.0       1.0X
-Parquet Vectorized (Pushdown)                        62             65           4          0.0    62249744.0       1.0X
-Native ORC Vectorized                                55             58           3          0.0    54905858.0       1.1X
-Native ORC Vectorized (Pushdown)                     56             61           5          0.0    56310471.0       1.1X
+Parquet Vectorized                                   63             65           3          0.0    63314623.0       1.0X
+Parquet Vectorized (Pushdown)                        64             66           3          0.0    64051869.0       1.0X
+Native ORC Vectorized                                58             59           2          0.0    57520375.0       1.1X
+Native ORC Vectorized (Pushdown)                     60             64           8          0.0    59737469.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  377            386           7          0.0   377453808.0       1.0X
-Parquet Vectorized (Pushdown)                       388            398          10          0.0   387673530.0       1.0X
-Native ORC Vectorized                               373            376           3          0.0   372643927.0       1.0X
-Native ORC Vectorized (Pushdown)                    377            388           8          0.0   377351458.0       1.0X
+Parquet Vectorized                                  423            431          11          0.0   422883307.0       1.0X
+Parquet Vectorized (Pushdown)                       427            431           2          0.0   427230106.0       1.0X
+Native ORC Vectorized                               407            416           8          0.0   406712827.0       1.0X
+Native ORC Vectorized (Pushdown)                    418            423           4          0.0   418468099.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 2008           2050          47          0.0  2007664554.0       1.0X
-Parquet Vectorized (Pushdown)                      2028           2059          34          0.0  2028080865.0       1.0X
-Native ORC Vectorized                              2007           2015           8          0.0  2007404672.0       1.0X
-Native ORC Vectorized (Pushdown)                   2022           2072          56          0.0  2021625278.0       1.0X
+Parquet Vectorized                                 2351           2379          30          0.0  2351073582.0       1.0X
+Parquet Vectorized (Pushdown)                      2359           2383          14          0.0  2358892376.0       1.0X
+Native ORC Vectorized                              2349           2376          19          0.0  2348555337.0       1.0X
+Native ORC Vectorized (Pushdown)                   2351           2372          17          0.0  2350854713.0       1.0X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
index 5f36f7900c51e..c2e7f658a4dc6 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off             70956          71507         779          1.4         709.6       1.0X
-GenerateExec Benchmark wholestage on              20836          20862          25          4.8         208.4       3.4X
+GenerateExec Benchmark wholestage off             88754          89024         381          1.1         887.5       1.0X
+GenerateExec Benchmark wholestage on              26904          27017         173          3.7         269.0       3.3X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
index 309b7691c7e57..8398bfdefbb40 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off             68787          69054         378          1.5         687.9       1.0X
-GenerateExec Benchmark wholestage on              22124          22203          69          4.5         221.2       3.1X
+GenerateExec Benchmark wholestage off             85096          85378         398          1.2         851.0       1.0X
+GenerateExec Benchmark wholestage on              25729          25905         115          3.9         257.3       3.3X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
index 22ebdc256a5c2..b60eba694717c 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  261            266           6          1.9         522.3       1.0X
+LongToUnsafeRowMap                                  361            363           2          1.4         721.9       1.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
index 791e6d3af1c0e..d66030f047958 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  252            256           4          2.0         503.3       1.0X
+LongToUnsafeRowMap                                  380            390           7          1.3         760.6       1.0X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt b/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
index 28468f39d226a..02eef14d6c991 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        50             67          12        198.3           5.0       1.0X
-InSet expression                                     33             41           5        300.1           3.3       1.5X
+In expression                                        28             34           7        358.3           2.8       1.0X
+InSet expression                                     51             65          15        194.7           5.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        50             55           5        199.0           5.0       1.0X
-InSet expression                                     35             41           7        289.5           3.5       1.5X
+In expression                                        31             35           5        326.8           3.1       1.0X
+InSet expression                                     50             54           3        200.1           5.0       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        82             88           7        122.3           8.2       1.0X
-InSet expression                                     45             48           3        223.8           4.5       1.8X
+In expression                                        59             62           2        168.2           5.9       1.0X
+InSet expression                                     66             75          11        150.9           6.6       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       136            140           3         73.3          13.6       1.0X
-InSet expression                                     64             67           4        157.3           6.4       2.1X
+In expression                                        97            103           4        103.0           9.7       1.0X
+InSet expression                                     79             83           3        126.7           7.9       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       247            250           3         40.6          24.7       1.0X
-InSet expression                                    105            107           2         95.3          10.5       2.4X
+In expression                                       171            176           4         58.4          17.1       1.0X
+InSet expression                                    107            112           4         93.5          10.7       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       410            416           5         24.4          41.0       1.0X
-InSet expression                                    183            188           3         54.7          18.3       2.2X
+In expression                                       387            403          13         25.8          38.7       1.0X
+InSet expression                                    188            196           8         53.3          18.8       2.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             30           5        380.3           2.6       1.0X
-InSet expression                                     20             22           4        508.5           2.0       1.3X
+In expression                                        26             31           4        379.5           2.6       1.0X
+InSet expression                                     73             78           4        137.2           7.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             39           4        290.0           3.4       1.0X
-InSet expression                                     21             25           4        472.2           2.1       1.6X
+In expression                                        36             40           4        278.6           3.6       1.0X
+InSet expression                                     87             91           3        114.3           8.7       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             60           4        175.0           5.7       1.0X
-InSet expression                                     21             25           5        466.1           2.1       2.7X
+In expression                                        59             63           6        170.4           5.9       1.0X
+InSet expression                                     87             91           3        115.0           8.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       101            106           8         98.6          10.1       1.0X
-InSet expression                                     21             24           3        465.5           2.1       4.7X
+In expression                                        99            103           3        101.2           9.9       1.0X
+InSet expression                                    119            125           9         84.2          11.9       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       183            186           3         54.6          18.3       1.0X
-InSet expression                                     22             24           3        458.7           2.2       8.4X
+In expression                                       182            186           4         54.8          18.2       1.0X
+InSet expression                                    105            111           4         95.2          10.5       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       364            370           4         27.4          36.4       1.0X
-InSet expression                                     23             26           4        427.7           2.3      15.6X
+In expression                                       343            356          14         29.1          34.3       1.0X
+InSet expression                                    117            123           3         85.1          11.7       2.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       886            902          12         11.3          88.6       1.0X
-InSet expression                                     25             27           3        398.9           2.5      35.4X
+In expression                                       523            540          10         19.1          52.3       1.0X
+InSet expression                                    129            136           4         77.6          12.9       4.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       656            707          61         15.2          65.6       1.0X
-InSet expression                                     26             30           4        383.1           2.6      25.1X
+In expression                                       683            709          21         14.6          68.3       1.0X
+InSet expression                                    142            149           6         70.7          14.2       4.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       800            802           3         12.5          80.0       1.0X
-InSet expression                                    193            195           4         51.9          19.3       4.2X
+In expression                                      1370           1396          18          7.3         137.0       1.0X
+InSet expression                                    161            167           4         62.1          16.1       8.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             25           3        435.4           2.3       1.0X
-InSet expression                                     25             27           3        404.5           2.5       0.9X
+In expression                                        25             28           3        394.1           2.5       1.0X
+InSet expression                                     69             72           3        145.8           6.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             33           6        339.8           2.9       1.0X
-InSet expression                                     27             30           6        368.3           2.7       1.1X
+In expression                                        31             35           5        318.9           3.1       1.0X
+InSet expression                                     89             92           3        112.2           8.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        61             65           4        163.6           6.1       1.0X
-InSet expression                                     34             37           5        296.4           3.4       1.8X
+In expression                                        57             61           4        176.3           5.7       1.0X
+InSet expression                                    112            116           6         89.6          11.2       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       106            109           4         94.7          10.6       1.0X
-InSet expression                                     35             38           3        282.5           3.5       3.0X
+In expression                                       101            104           2         99.0          10.1       1.0X
+InSet expression                                    122            128           3         81.8          12.2       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       200            202           3         50.0          20.0       1.0X
-InSet expression                                     41             43           3        245.9           4.1       4.9X
+In expression                                       178            183           5         56.3          17.8       1.0X
+InSet expression                                    109            114           3         91.7          10.9       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       380            383           4         26.3          38.0       1.0X
-InSet expression                                     46             51           6        215.3           4.6       8.2X
+In expression                                       315            323           9         31.7          31.5       1.0X
+InSet expression                                    115            119           2         86.8          11.5       2.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       565            567           4         17.7          56.5       1.0X
-InSet expression                                     50             54           5        200.4           5.0      11.3X
+In expression                                       478            483           4         20.9          47.8       1.0X
+InSet expression                                    125            131           4         80.3          12.5       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       756            757           1         13.2          75.6       1.0X
-InSet expression                                     54             56           3        185.5           5.4      14.0X
+In expression                                       628            649          12         15.9          62.8       1.0X
+InSet expression                                    140            145           8         71.6          14.0       4.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       883            885           2         11.3          88.3       1.0X
-InSet expression                                    191            194           2         52.5          19.1       4.6X
+In expression                                       999           1005           5         10.0          99.9       1.0X
+InSet expression                                    153            157           4         65.2          15.3       6.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        19             20           2        527.4           1.9       1.0X
-InSet expression                                     16             18           3        635.6           1.6       1.2X
+In expression                                        23             25           3        427.9           2.3       1.0X
+InSet expression                                     72             76           2        138.3           7.2       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             30           3        390.3           2.6       1.0X
-InSet expression                                     16             18           3        637.0           1.6       1.6X
+In expression                                        31             34           4        324.0           3.1       1.0X
+InSet expression                                     88             92           3        113.2           8.8       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        53             59           3        189.4           5.3       1.0X
-InSet expression                                     16             17           2        639.7           1.6       3.4X
+In expression                                        62             66           3        161.3           6.2       1.0X
+InSet expression                                     88             92           3        113.7           8.8       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        98            103           3        102.1           9.8       1.0X
-InSet expression                                     16             18           3        626.5           1.6       6.1X
+In expression                                       104            108           3         96.2          10.4       1.0X
+InSet expression                                    123            127           3         81.5          12.3       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       171            173           3         58.5          17.1       1.0X
-InSet expression                                     17             18           2        602.2           1.7      10.3X
+In expression                                       179            184           4         55.9          17.9       1.0X
+InSet expression                                    105            111           8         95.4          10.5       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       328            332           5         30.4          32.8       1.0X
-InSet expression                                     19             22           3        516.2           1.9      17.0X
+In expression                                       327            330           3         30.6          32.7       1.0X
+InSet expression                                    113            118           3         88.2          11.3       2.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       486            491           5         20.6          48.6       1.0X
-InSet expression                                     20             23           3        490.2           2.0      23.8X
+In expression                                       473            477           4         21.1          47.3       1.0X
+InSet expression                                    123            129           4         81.0          12.3       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       643            650           5         15.6          64.3       1.0X
-InSet expression                                     23             25           4        442.7           2.3      28.5X
+In expression                                       614            635          19         16.3          61.4       1.0X
+InSet expression                                    137            142           3         72.8          13.7       4.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       801            804           6         12.5          80.1       1.0X
-InSet expression                                    168            171           3         59.6          16.8       4.8X
+In expression                                       780            788           9         12.8          78.0       1.0X
+InSet expression                                    151            157           3         66.2          15.1       5.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        14             16           2        707.0           1.4       1.0X
-InSet expression                                     11             12           2        897.1           1.1       1.3X
+In expression                                        19             21           3        530.8           1.9       1.0X
+InSet expression                                     70             74           2        143.5           7.0       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             24           2        447.6           2.2       1.0X
-InSet expression                                     21             23           3        465.2           2.1       1.0X
+In expression                                        26             29           4        387.9           2.6       1.0X
+InSet expression                                     90             93           2        111.5           9.0       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        47             49           3        212.5           4.7       1.0X
-InSet expression                                     18             20           3        544.5           1.8       2.6X
+In expression                                        52             56           4        193.0           5.2       1.0X
+InSet expression                                     99            103           2        100.8           9.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        87             88           2        115.0           8.7       1.0X
-InSet expression                                     19             21           3        527.5           1.9       4.6X
+In expression                                        92             96           3        108.4           9.2       1.0X
+InSet expression                                    121            125           4         82.9          12.1       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       168            169           3         59.7          16.8       1.0X
-InSet expression                                     19             21           3        516.5           1.9       8.7X
+In expression                                       168            174           5         59.5          16.8       1.0X
+InSet expression                                    110            115           3         90.8          11.0       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       323            328           4         30.9          32.3       1.0X
-InSet expression                                     21             22           3        480.7           2.1      15.6X
+In expression                                       318            329           8         31.4          31.8       1.0X
+InSet expression                                    114            118           3         87.6          11.4       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       487            490           5         20.5          48.7       1.0X
-InSet expression                                     25             27           3        394.8           2.5      19.2X
+In expression                                       467            473           5         21.4          46.7       1.0X
+InSet expression                                    124            128           3         80.7          12.4       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       644            647           2         15.5          64.4       1.0X
-InSet expression                                     25             27           3        404.5           2.5      26.0X
+In expression                                       620            628           5         16.1          62.0       1.0X
+InSet expression                                    135            143           7         74.1          13.5       4.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       800            837          69         12.5          80.0       1.0X
-InSet expression                                    167            172           5         60.0          16.7       4.8X
+In expression                                       772            804          60         12.9          77.2       1.0X
+InSet expression                                    147            153           4         68.1          14.7       5.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        18             19           2        564.4           1.8       1.0X
-InSet expression                                     81             83           2        123.3           8.1       0.2X
+In expression                                        17             19           3        576.6           1.7       1.0X
+InSet expression                                     67             70           2        149.6           6.7       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        24             26           2        414.0           2.4       1.0X
-InSet expression                                     96            100           5        104.3           9.6       0.3X
+In expression                                        27             29           3        377.2           2.7       1.0X
+InSet expression                                     85             88           2        117.5           8.5       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        50             52           3        201.2           5.0       1.0X
-InSet expression                                     97            100           2        102.8           9.7       0.5X
+In expression                                        47             51           3        212.0           4.7       1.0X
+InSet expression                                     83             87           3        120.8           8.3       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        89             93           3        112.8           8.9       1.0X
-InSet expression                                    129            132           2         77.6          12.9       0.7X
+In expression                                        86             90           3        116.7           8.6       1.0X
+InSet expression                                    117            122           2         85.4          11.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       168            175           5         59.7          16.8       1.0X
-InSet expression                                    115            117           2         86.8          11.5       1.5X
+In expression                                       160            166           3         62.4          16.0       1.0X
+InSet expression                                    100            104           3        100.4          10.0       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       328            332           6         30.5          32.8       1.0X
-InSet expression                                    123            125           2         81.5          12.3       2.7X
+In expression                                       309            321          10         32.3          30.9       1.0X
+InSet expression                                    106            109           2         94.6          10.6       2.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             39           4        284.2           3.5       1.0X
-InSet expression                                    104            106           3         96.5          10.4       0.3X
+In expression                                        34             37           3        294.2           3.4       1.0X
+InSet expression                                     70             74           2        142.0           7.0       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        59             62           3        168.5           5.9       1.0X
-InSet expression                                    133            135           2         75.2          13.3       0.4X
+In expression                                        55             59           3        180.6           5.5       1.0X
+InSet expression                                     89             94           3        112.4           8.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       147            149           3         68.1          14.7       1.0X
-InSet expression                                    130            132           2         76.8          13.0       1.1X
+In expression                                       135            138           2         74.2          13.5       1.0X
+InSet expression                                     91             96           3        110.1           9.1       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       226            229           4         44.3          22.6       1.0X
-InSet expression                                    186            189           3         53.8          18.6       1.2X
+In expression                                       225            228           3         44.4          22.5       1.0X
+InSet expression                                    131            137           5         76.4          13.1       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       380            385           4         26.3          38.0       1.0X
-InSet expression                                    146            148           3         68.7          14.6       2.6X
+In expression                                       378            387           8         26.5          37.8       1.0X
+InSet expression                                    108            112           3         92.8          10.8       3.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2090           2139          28          4.8         209.0       1.0X
-InSet expression                                    150            152           1         66.6          15.0      13.9X
+In expression                                      1764           1808          77          5.7         176.4       1.0X
+InSet expression                                    110            113           2         91.0          11.0      16.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        33             35           3        306.5           3.3       1.0X
-InSet expression                                    117            119           1         85.5          11.7       0.3X
+In expression                                        31             34           3        318.3           3.1       1.0X
+InSet expression                                     73             78           6        137.3           7.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        60             63           3        167.6           6.0       1.0X
-InSet expression                                    145            146           1         68.9          14.5       0.4X
+In expression                                        57             61           3        175.1           5.7       1.0X
+InSet expression                                     91             96           3        109.3           9.1       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       150            153           3         66.5          15.0       1.0X
-InSet expression                                    141            143           2         71.0          14.1       1.1X
+In expression                                       138            143           4         72.4          13.8       1.0X
+InSet expression                                     92             95           3        108.5           9.2       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       226            229           3         44.2          22.6       1.0X
-InSet expression                                    193            195           2         51.7          19.3       1.2X
+In expression                                       211            215           4         47.4          21.1       1.0X
+InSet expression                                    136            140           4         73.3          13.6       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       383            387           4         26.1          38.3       1.0X
-InSet expression                                    157            158           1         63.7          15.7       2.4X
+In expression                                       366            374           7         27.3          36.6       1.0X
+InSet expression                                    111            115           2         90.1          11.1       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2213           2250          73          4.5         221.3       1.0X
-InSet expression                                    159            162           2         62.8          15.9      13.9X
+In expression                                      2083           2171          91          4.8         208.3       1.0X
+InSet expression                                    111            116           3         90.2          11.1      18.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        19             20           1         52.4          19.1       1.0X
-InSet expression                                     50             52           2         20.1          49.7       0.4X
+In expression                                        18             20           2         55.8          17.9       1.0X
+InSet expression                                     53             56           2         18.9          52.9       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             26           1         40.4          24.7       1.0X
-InSet expression                                     51             54           5         19.4          51.5       0.5X
+In expression                                        23             25           2         42.9          23.3       1.0X
+InSet expression                                     55             57           2         18.3          54.7       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        39             41           1         25.6          39.1       1.0X
-InSet expression                                     52             55           1         19.1          52.5       0.7X
+In expression                                        37             39           2         27.0          37.0       1.0X
+InSet expression                                     56             58           2         17.9          55.9       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        77             78           1         13.0          76.8       1.0X
-InSet expression                                     58             60           1         17.2          58.1       1.3X
+In expression                                        73             78           3         13.7          73.0       1.0X
+InSet expression                                     61             64           3         16.5          60.6       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       240            241           1          4.2         239.5       1.0X
-InSet expression                                     56             58           1         17.8          56.0       4.3X
+In expression                                       224            227           3          4.5         223.7       1.0X
+InSet expression                                     59             61           1         16.9          59.1       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       570            585          31          1.8         569.9       1.0X
-InSet expression                                     57             59           1         17.5          57.2      10.0X
+In expression                                       538            545           4          1.9         538.4       1.0X
+InSet expression                                     60             63           2         16.5          60.5       8.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         4              6           2        222.2           4.5       1.0X
-InSet expression                                      4              5           2        227.8           4.4       1.0X
+In expression                                         5              6           2        218.3           4.6       1.0X
+InSet expression                                      4              6           2        224.6           4.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        217.8           4.6       1.0X
-InSet expression                                      4              5           2        225.6           4.4       1.0X
+In expression                                         5              6           2        219.5           4.6       1.0X
+InSet expression                                      5              6           2        220.7           4.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        204.1           4.9       1.0X
-InSet expression                                      5              6           2        201.2           5.0       1.0X
+In expression                                         5              6           2        201.5           5.0       1.0X
+InSet expression                                      5              6           2        203.5           4.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         6              7           2        172.2           5.8       1.0X
-InSet expression                                      6              7           2        174.6           5.7       1.0X
+In expression                                         6              7           2        174.7           5.7       1.0X
+InSet expression                                      6              6           2        175.7           5.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         8              9           2        133.3           7.5       1.0X
-InSet expression                                      8              9           2        131.1           7.6       1.0X
+In expression                                         7              9           2        137.7           7.3       1.0X
+InSet expression                                      7              8           2        137.6           7.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        11             12           2         91.7          10.9       1.0X
-InSet expression                                     11             13           2         91.9          10.9       1.0X
+In expression                                        11             12           2         93.7          10.7       1.0X
+InSet expression                                     11             12           1         94.2          10.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             29           1         35.5          28.2       1.0X
-InSet expression                                     39             40           1         25.5          39.2       0.7X
+In expression                                        27             29           3         36.9          27.1       1.0X
+InSet expression                                     37             39           1         26.8          37.4       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        33             35           3         30.3          33.0       1.0X
-InSet expression                                     42             43           1         23.9          41.9       0.8X
+In expression                                        32             33           1         31.4          31.9       1.0X
+InSet expression                                     40             42           2         25.0          40.0       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        42             45           4         23.7          42.3       1.0X
-InSet expression                                     46             48           1         21.6          46.2       0.9X
+In expression                                        40             42           2         25.1          39.9       1.0X
+InSet expression                                     44             46           1         22.5          44.5       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             59           1         17.4          57.4       1.0X
-InSet expression                                     51             52           1         19.8          50.6       1.1X
+In expression                                        54             56           2         18.4          54.3       1.0X
+InSet expression                                     47             49           1         21.2          47.3       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        88             89           1         11.4          87.7       1.0X
-InSet expression                                     47             49           1         21.2          47.2       1.9X
+In expression                                       102            109           4          9.8         102.3       1.0X
+InSet expression                                     44             47           3         22.5          44.4       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       378            379           1          2.6         377.6       1.0X
-InSet expression                                     48             49           1         20.9          47.8       7.9X
+In expression                                       367            372           5          2.7         366.5       1.0X
+InSet expression                                     45             46           2         22.4          44.7       8.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        17             19           2        585.1           1.7       1.0X
-InSet expression                                     90             92           2        111.0           9.0       0.2X
+In expression                                        15             17           2        647.4           1.5       1.0X
+InSet expression                                     79             81           2        127.2           7.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             24           2        439.2           2.3       1.0X
-InSet expression                                    101            103           1         99.5          10.1       0.2X
+In expression                                        21             22           2        482.1           2.1       1.0X
+InSet expression                                     89             91           2        112.7           8.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        48             50           3        210.5           4.8       1.0X
-InSet expression                                    129            132           3         77.5          12.9       0.4X
+In expression                                        44             48           4        227.0           4.4       1.0X
+InSet expression                                    114            118           3         87.5          11.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        81             83           2        123.0           8.1       1.0X
-InSet expression                                    153            156           2         65.2          15.3       0.5X
+In expression                                        75             77           2        133.7           7.5       1.0X
+InSet expression                                    138            141           2         72.2          13.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       153            154           2         65.5          15.3       1.0X
-InSet expression                                    134            135           1         74.7          13.4       1.1X
+In expression                                       140            143           3         71.5          14.0       1.0X
+InSet expression                                    121            128           5         82.4          12.1       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       295            307          11         33.9          29.5       1.0X
-InSet expression                                    132            134           1         75.7          13.2       2.2X
+In expression                                       286            296           9         35.0          28.6       1.0X
+InSet expression                                    120            124           3         83.3          12.0       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       376            377           2         26.6          37.6       1.0X
-InSet expression                                    374            379           3         26.7          37.4       1.0X
+In expression                                       297            300           3         33.6          29.7       1.0X
+InSet expression                                    299            303           6         33.5          29.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       374            377           2         26.7          37.4       1.0X
-InSet expression                                    373            377           4         26.8          37.3       1.0X
+In expression                                       302            306           3         33.1          30.2       1.0X
+InSet expression                                    296            301           5         33.8          29.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       406            413           5         24.6          40.6       1.0X
-InSet expression                                    373            376           2         26.8          37.3       1.1X
+In expression                                       329            344          18         30.4          32.9       1.0X
+InSet expression                                    294            300           4         34.0          29.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       477            482           3         21.0          47.7       1.0X
-InSet expression                                    371            376           4         27.0          37.1       1.3X
+In expression                                       375            378           2         26.7          37.5       1.0X
+InSet expression                                    297            306           5         33.7          29.7       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       564            568           3         17.7          56.4       1.0X
-InSet expression                                    373            377           3         26.8          37.3       1.5X
+In expression                                       461            467           6         21.7          46.1       1.0X
+InSet expression                                    294            307           8         34.1          29.4       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       716            722           6         14.0          71.6       1.0X
-InSet expression                                    378            380           2         26.4          37.8       1.9X
+In expression                                       609            622           9         16.4          60.9       1.0X
+InSet expression                                    296            300           4         33.8          29.6       2.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       877            880           2         11.4          87.7       1.0X
-InSet expression                                    378            381           3         26.5          37.8       2.3X
+In expression                                       775            785           8         12.9          77.5       1.0X
+InSet expression                                    296            299           3         33.8          29.6       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1033           1039           6          9.7         103.3       1.0X
-InSet expression                                    382            384           2         26.2          38.2       2.7X
+In expression                                       925            940          14         10.8          92.5       1.0X
+InSet expression                                    300            308           6         33.3          30.0       3.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1198           1203           4          8.4         119.8       1.0X
-InSet expression                                    468            469           1         21.4          46.8       2.6X
+In expression                                      1079           1103          15          9.3         107.9       1.0X
+InSet expression                                    396            401           5         25.3          39.6       2.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             27           2         39.5          25.3       1.0X
-InSet expression                                     58             61           5         17.3          57.9       0.4X
+In expression                                        23             25           1         42.8          23.3       1.0X
+InSet expression                                     53             56           2         18.8          53.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             42           2         25.2          39.7       1.0X
-InSet expression                                     85             87           2         11.7          85.4       0.5X
+In expression                                        37             38           2         27.4          36.5       1.0X
+InSet expression                                     80             83           2         12.4          80.4       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       173            174           2          5.8         172.8       1.0X
-InSet expression                                    100            102           1         10.0         100.2       1.7X
+In expression                                       158            164           4          6.3         157.9       1.0X
+InSet expression                                     95             99           3         10.5          95.4       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       377            383           5          2.6         377.4       1.0X
-InSet expression                                    130            132           2          7.7         129.6       2.9X
+In expression                                       355            361           5          2.8         355.2       1.0X
+InSet expression                                    123            128           4          8.1         123.1       2.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       818            821           3          1.2         818.0       1.0X
-InSet expression                                    145            147           2          6.9         144.6       5.7X
+In expression                                       763            766           3          1.3         763.2       1.0X
+InSet expression                                    139            141           2          7.2         138.8       5.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1689           1871         325          0.6        1689.0       1.0X
-InSet expression                                    162            164           2          6.2         162.0      10.4X
+In expression                                      1623           1803         325          0.6        1623.3       1.0X
+InSet expression                                    155            163           6          6.5         154.8      10.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        17             18           2         60.4          16.6       1.0X
-InSet expression                                     87             88           2         11.5          86.9       0.2X
+In expression                                        15             17           2         65.4          15.3       1.0X
+InSet expression                                     76             81           2         13.2          76.0       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             25           2         44.5          22.5       1.0X
-InSet expression                                    134            136           2          7.5         133.6       0.2X
+In expression                                        21             22           1         47.3          21.1       1.0X
+InSet expression                                    115            121           3          8.7         115.0       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        71             73           4         14.1          71.0       1.0X
-InSet expression                                    158            160           2          6.3         157.7       0.5X
+In expression                                        65             72           3         15.5          64.6       1.0X
+InSet expression                                    134            143           6          7.4         134.3       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       217            219           2          4.6         217.4       1.0X
-InSet expression                                    205            210           6          4.9         204.9       1.1X
+In expression                                       195            197           2          5.1         194.6       1.0X
+InSet expression                                    179            189          10          5.6         179.2       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       554            555           2          1.8         553.7       1.0X
-InSet expression                                    229            232           2          4.4         229.4       2.4X
+In expression                                       516            523           6          1.9         516.2       1.0X
+InSet expression                                    204            209           3          4.9         203.5       2.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1378           1482         221          0.7        1378.1       1.0X
-InSet expression                                    266            270           4          3.8         266.2       5.2X
+In expression                                      1322           1432         221          0.8        1321.9       1.0X
+InSet expression                                    225            230           4          4.5         224.7       5.9X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-results.txt b/sql/core/benchmarks/InExpressionBenchmark-results.txt
index 8a432b1657356..e3529cd7f9cda 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        49             71          13        205.7           4.9       1.0X
-InSet expression                                     33             40           5        298.9           3.3       1.5X
+In expression                                        30             36           6        334.4           3.0       1.0X
+InSet expression                                     69             73           5        145.1           6.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        45             52           9        222.3           4.5       1.0X
-InSet expression                                     34             40           6        292.1           3.4       1.3X
+In expression                                        34             37           4        295.6           3.4       1.0X
+InSet expression                                     67             71           3        148.6           6.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        82             85           2        122.6           8.2       1.0X
-InSet expression                                     42             46           3        235.8           4.2       1.9X
+In expression                                        63             65           2        159.9           6.3       1.0X
+InSet expression                                     75             79           3        133.0           7.5       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       140            143           2         71.2          14.0       1.0X
-InSet expression                                     62             65           3        160.4           6.2       2.3X
+In expression                                       103            108           4         96.8          10.3       1.0X
+InSet expression                                     83             85           2        120.5           8.3       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       249            251           1         40.1          24.9       1.0X
-InSet expression                                    104            107           2         96.1          10.4       2.4X
+In expression                                       192            195           3         52.0          19.2       1.0X
+InSet expression                                     98             99           2        102.5           9.8       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       410            412           3         24.4          41.0       1.0X
-InSet expression                                    179            182           2         55.9          17.9       2.3X
+In expression                                       396            402           7         25.2          39.6       1.0X
+InSet expression                                    205            211           5         48.9          20.5       1.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             29           4        392.7           2.5       1.0X
-InSet expression                                     21             23           2        469.8           2.1       1.2X
+In expression                                        29             31           2        346.8           2.9       1.0X
+InSet expression                                     95             98           2        105.0           9.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             37           4        290.8           3.4       1.0X
-InSet expression                                     21             24           4        467.3           2.1       1.6X
+In expression                                        35             38           2        282.3           3.5       1.0X
+InSet expression                                    109            112           2         91.5          10.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             59           3        176.2           5.7       1.0X
-InSet expression                                     21             25           5        466.0           2.1       2.6X
+In expression                                        61             63           3        164.9           6.1       1.0X
+InSet expression                                    109            113           2         91.5          10.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        98            100           2        102.1           9.8       1.0X
-InSet expression                                     22             24           4        461.8           2.2       4.5X
+In expression                                       106            109           4         94.5          10.6       1.0X
+InSet expression                                    141            144           3         70.9          14.1       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       192            193           2         52.0          19.2       1.0X
-InSet expression                                     22             24           2        448.5           2.2       8.6X
+In expression                                       185            188           5         54.1          18.5       1.0X
+InSet expression                                    127            130           3         78.8          12.7       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       367            370           2         27.2          36.7       1.0X
-InSet expression                                     23             25           3        437.4           2.3      16.1X
+In expression                                       365            366           1         27.4          36.5       1.0X
+InSet expression                                    138            141           3         72.2          13.8       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       545            549           4         18.4          54.5       1.0X
-InSet expression                                     26             28           3        387.9           2.6      21.1X
+In expression                                       549            553           5         18.2          54.9       1.0X
+InSet expression                                    154            156           3         65.1          15.4       3.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       709            714           4         14.1          70.9       1.0X
-InSet expression                                     26             30           4        382.2           2.6      27.1X
+In expression                                       669            676           4         15.0          66.9       1.0X
+InSet expression                                    167            170           3         60.0          16.7       4.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       882            886           5         11.3          88.2       1.0X
-InSet expression                                    163            165           1         61.4          16.3       5.4X
+In expression                                       931            932           2         10.7          93.1       1.0X
+InSet expression                                    182            184           1         55.0          18.2       5.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             24           2        435.1           2.3       1.0X
-InSet expression                                     24             27           5        416.2           2.4       1.0X
+In expression                                        28             31           3        355.0           2.8       1.0X
+InSet expression                                     93             95           2        107.8           9.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        32             35           2        308.7           3.2       1.0X
-InSet expression                                     29             31           3        346.1           2.9       1.1X
+In expression                                        34             36           2        290.2           3.4       1.0X
+InSet expression                                    110            112           2         90.9          11.0       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             59           1        174.2           5.7       1.0X
-InSet expression                                     32             34           2        311.2           3.2       1.8X
+In expression                                        63             65           2        157.6           6.3       1.0X
+InSet expression                                    131            132           1         76.6          13.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        99            100           1        101.0           9.9       1.0X
-InSet expression                                     39             40           2        257.5           3.9       2.5X
+In expression                                       115            117           2         86.8          11.5       1.0X
+InSet expression                                    147            149           1         68.0          14.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       187            190           6         53.3          18.7       1.0X
-InSet expression                                     45             47           4        224.0           4.5       4.2X
+In expression                                       195            198           6         51.3          19.5       1.0X
+InSet expression                                    130            133           2         76.8          13.0       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       366            367           1         27.4          36.6       1.0X
-InSet expression                                     46             47           2        219.6           4.6       8.0X
+In expression                                       354            356           2         28.3          35.4       1.0X
+InSet expression                                    135            138           2         73.9          13.5       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       539            542           3         18.6          53.9       1.0X
-InSet expression                                     51             52           2        197.6           5.1      10.7X
+In expression                                       501            509          10         19.9          50.1       1.0X
+InSet expression                                    151            153           2         66.4          15.1       3.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       713            716           3         14.0          71.3       1.0X
-InSet expression                                     56             58           2        177.6           5.6      12.7X
+In expression                                       666            669           4         15.0          66.6       1.0X
+InSet expression                                    165            167           3         60.7          16.5       4.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1355           1373          20          7.4         135.5       1.0X
-InSet expression                                    163            165           1         61.3          16.3       8.3X
+In expression                                       858            861           4         11.7          85.8       1.0X
+InSet expression                                    178            181           3         56.2          17.8       4.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             24           1        448.3           2.2       1.0X
-InSet expression                                     19             20           1        529.9           1.9       1.2X
+In expression                                        25             27           3        397.2           2.5       1.0X
+InSet expression                                     94             97           4        106.1           9.4       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             30           1        350.0           2.9       1.0X
-InSet expression                                     19             20           2        527.6           1.9       1.5X
+In expression                                        34             35           2        294.5           3.4       1.0X
+InSet expression                                    109            111           1         91.4          10.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        54             58           3        183.9           5.4       1.0X
-InSet expression                                     19             20           2        528.7           1.9       2.9X
+In expression                                        60             61           1        166.1           6.0       1.0X
+InSet expression                                    112            114           1         89.2          11.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        92             99           4        108.1           9.2       1.0X
-InSet expression                                     20             22           4        511.6           2.0       4.7X
+In expression                                       114            115           1         87.6          11.4       1.0X
+InSet expression                                    144            146           1         69.2          14.4       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       175            176           2         57.3          17.5       1.0X
-InSet expression                                     20             21           2        499.8           2.0       8.7X
+In expression                                       194            195           1         51.5          19.4       1.0X
+InSet expression                                    126            128           1         79.5          12.6       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       329            341          27         30.4          32.9       1.0X
-InSet expression                                     21             23           2        466.6           2.1      15.3X
+In expression                                       353            357           6         28.3          35.3       1.0X
+InSet expression                                    136            138           2         73.6          13.6       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       486            487           2         20.6          48.6       1.0X
-InSet expression                                     23             25           3        441.1           2.3      21.4X
+In expression                                       511            513           3         19.6          51.1       1.0X
+InSet expression                                    149            151           2         67.2          14.9       3.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       644            651           6         15.5          64.4       1.0X
-InSet expression                                     24             26           3        420.0           2.4      27.0X
+In expression                                       660            677          25         15.2          66.0       1.0X
+InSet expression                                    162            165           3         61.6          16.2       4.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       800            827          56         12.5          80.0       1.0X
-InSet expression                                    156            159           3         63.9          15.6       5.1X
+In expression                                       842            849          10         11.9          84.2       1.0X
+InSet expression                                    175            178           2         57.2          17.5       4.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        14             16           2        702.0           1.4       1.0X
-InSet expression                                     12             14           2        819.4           1.2       1.2X
+In expression                                        22             24           2        459.4           2.2       1.0X
+InSet expression                                     91             94           2        109.4           9.1       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             24           1        441.9           2.3       1.0X
-InSet expression                                     18             19           2        545.1           1.8       1.2X
+In expression                                        28             29           2        354.7           2.8       1.0X
+InSet expression                                    107            109           1         93.5          10.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        47             48           1        212.6           4.7       1.0X
-InSet expression                                     16             17           2        644.2           1.6       3.0X
+In expression                                        56             57           2        179.5           5.6       1.0X
+InSet expression                                    122            124           1         82.0          12.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        88             90           1        113.2           8.8       1.0X
-InSet expression                                     17             19           3        575.2           1.7       5.1X
+In expression                                       100            102           2         99.8          10.0       1.0X
+InSet expression                                    142            144           1         70.4          14.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       168            168           1         59.6          16.8       1.0X
-InSet expression                                     20             20           1        511.4           2.0       8.6X
+In expression                                       180            182           3         55.6          18.0       1.0X
+InSet expression                                    129            132           3         77.5          12.9       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       325            328           2         30.7          32.5       1.0X
-InSet expression                                     21             22           2        474.6           2.1      15.4X
+In expression                                       346            348           2         28.9          34.6       1.0X
+InSet expression                                    134            138           4         74.4          13.4       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       488            490           3         20.5          48.8       1.0X
-InSet expression                                     26             27           1        391.7           2.6      19.1X
+In expression                                       507            508           1         19.7          50.7       1.0X
+InSet expression                                    148            150           1         67.7          14.8       3.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       648            651           2         15.4          64.8       1.0X
-InSet expression                                     30             32           3        332.4           3.0      21.5X
+In expression                                       666            669           2         15.0          66.6       1.0X
+InSet expression                                    161            163           1         62.1          16.1       4.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       800            832          64         12.5          80.0       1.0X
-InSet expression                                    155            157           1         64.3          15.5       5.1X
+In expression                                       824            861          78         12.1          82.4       1.0X
+InSet expression                                    173            176           2         57.7          17.3       4.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             21           1        506.8           2.0       1.0X
-InSet expression                                     88             90           2        114.1           8.8       0.2X
+In expression                                        21             22           2        486.8           2.1       1.0X
+InSet expression                                     86             88           2        115.8           8.6       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             29           1        361.9           2.8       1.0X
-InSet expression                                    102            104           1         98.2          10.2       0.3X
+In expression                                        28             30           2        350.9           2.8       1.0X
+InSet expression                                    101            104           4         98.7          10.1       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        55             69          21        181.0           5.5       1.0X
-InSet expression                                    105            106           1         95.6          10.5       0.5X
+In expression                                        53             55           2        188.7           5.3       1.0X
+InSet expression                                    104            106           2         96.3          10.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        95             96           2        105.3           9.5       1.0X
-InSet expression                                    134            136           1         74.6          13.4       0.7X
+In expression                                        92             95           2        108.7           9.2       1.0X
+InSet expression                                    134            136           2         74.9          13.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       170            171           1         58.7          17.0       1.0X
-InSet expression                                    116            118           1         86.4          11.6       1.5X
+In expression                                       172            174           4         58.2          17.2       1.0X
+InSet expression                                    117            119           2         85.5          11.7       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       328            342          13         30.5          32.8       1.0X
-InSet expression                                    127            128           1         78.9          12.7       2.6X
+In expression                                       329            334           5         30.4          32.9       1.0X
+InSet expression                                    126            127           1         79.5          12.6       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             36           1        285.5           3.5       1.0X
-InSet expression                                     95             97           2        105.4           9.5       0.4X
+In expression                                        35             36           1        282.0           3.5       1.0X
+InSet expression                                     98            100           2        102.5           9.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        62             64           2        162.3           6.2       1.0X
-InSet expression                                    114            116           1         87.6          11.4       0.5X
+In expression                                        64             66           5        157.0           6.4       1.0X
+InSet expression                                    118            119           1         85.1          11.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       126            127           1         79.4          12.6       1.0X
-InSet expression                                    116            117           2         86.6          11.6       1.1X
+In expression                                       127            129           2         78.7          12.7       1.0X
+InSet expression                                    118            120           1         84.7          11.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       205            206           1         48.8          20.5       1.0X
-InSet expression                                    157            161           4         63.6          15.7       1.3X
+In expression                                       209            211           2         47.8          20.9       1.0X
+InSet expression                                    159            161           4         63.0          15.9       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       365            367           5         27.4          36.5       1.0X
-InSet expression                                    128            130           3         78.2          12.8       2.9X
+In expression                                       366            379          26         27.3          36.6       1.0X
+InSet expression                                    132            135           4         75.8          13.2       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1676           1879         169          6.0         167.6       1.0X
-InSet expression                                    133            135           1         74.9          13.3      12.6X
+In expression                                      1675           1744         125          6.0         167.5       1.0X
+InSet expression                                    135            137           3         74.3          13.5      12.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             36           3        292.7           3.4       1.0X
-InSet expression                                     96             97           2        104.5           9.6       0.4X
+In expression                                        39             41           2        254.3           3.9       1.0X
+InSet expression                                    114            116           1         87.6          11.4       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        61             62           1        164.3           6.1       1.0X
-InSet expression                                    116            118           2         86.6          11.6       0.5X
+In expression                                        62             63           1        161.3           6.2       1.0X
+InSet expression                                    143            147           8         69.9          14.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       127            129           1         78.5          12.7       1.0X
-InSet expression                                    118            120           2         84.7          11.8       1.1X
+In expression                                       128            129           2         78.0          12.8       1.0X
+InSet expression                                    143            147           6         69.8          14.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       207            208           1         48.2          20.7       1.0X
-InSet expression                                    157            160           2         63.5          15.7       1.3X
+In expression                                       209            211           3         47.9          20.9       1.0X
+InSet expression                                    195            197           3         51.2          19.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       365            365           1         27.4          36.5       1.0X
-InSet expression                                    128            129           1         78.3          12.8       2.9X
+In expression                                       366            368           2         27.3          36.6       1.0X
+InSet expression                                    152            154           1         65.8          15.2       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1961           2009          82          5.1         196.1       1.0X
-InSet expression                                    136            138           1         73.6          13.6      14.4X
+In expression                                      1922           1995         100          5.2         192.2       1.0X
+InSet expression                                    155            157           1         64.7          15.5      12.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        19             20           1         52.1          19.2       1.0X
-InSet expression                                     55             57           2         18.0          55.4       0.3X
+In expression                                        20             21           1         50.0          20.0       1.0X
+InSet expression                                     60             61           1         16.7          59.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        24             25           2         41.4          24.2       1.0X
-InSet expression                                     57             59           1         17.4          57.4       0.4X
+In expression                                        25             28           3         39.3          25.5       1.0X
+InSet expression                                     62             64           1         16.2          61.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        39             40           2         25.7          38.9       1.0X
-InSet expression                                     58             59           1         17.3          57.9       0.7X
+In expression                                        40             42           3         24.7          40.4       1.0X
+InSet expression                                     62             65           3         16.1          62.0       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        74             76           3         13.4          74.5       1.0X
-InSet expression                                     62             64           2         16.1          62.1       1.2X
+In expression                                        78             80           1         12.8          78.3       1.0X
+InSet expression                                     67             69           1         14.9          67.0       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       236            238           1          4.2         236.3       1.0X
-InSet expression                                     61             62           1         16.5          60.5       3.9X
+In expression                                       236            333         252          4.2         235.9       1.0X
+InSet expression                                     66             70           7         15.1          66.3       3.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       548            605         122          1.8         547.8       1.0X
-InSet expression                                     63             65           1         15.9          62.8       8.7X
+In expression                                       558            613         121          1.8         558.2       1.0X
+InSet expression                                     67             69           1         15.0          66.8       8.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           1        213.0           4.7       1.0X
-InSet expression                                      5              5           1        220.4           4.5       1.0X
+In expression                                         5              6           1        186.8           5.4       1.0X
+InSet expression                                      5              6           1        195.2           5.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              5           1        216.1           4.6       1.0X
-InSet expression                                      5              5           1        216.8           4.6       1.0X
+In expression                                         5              6           1        192.0           5.2       1.0X
+InSet expression                                      5              6           1        191.5           5.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           1        196.3           5.1       1.0X
-InSet expression                                      5              5           1        197.0           5.1       1.0X
+In expression                                         6              6           1        176.0           5.7       1.0X
+InSet expression                                      6              6           1        178.0           5.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         6              7           1        168.4           5.9       1.0X
-InSet expression                                      6              6           1        167.7           6.0       1.0X
+In expression                                         6              7           1        155.1           6.4       1.0X
+InSet expression                                      6              7           1        155.7           6.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         8              8           1        130.3           7.7       1.0X
-InSet expression                                      8              8           1        130.9           7.6       1.0X
+In expression                                         8              9           1        121.9           8.2       1.0X
+InSet expression                                      8              9           1        122.1           8.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        11             12           1         89.4          11.2       1.0X
-InSet expression                                     11             12           1         88.5          11.3       1.0X
+In expression                                        12             12           1         86.0          11.6       1.0X
+InSet expression                                     12             12           1         86.3          11.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             30           1         34.7          28.9       1.0X
-InSet expression                                     42             44           1         23.8          42.1       0.7X
+In expression                                        29             30           1         34.6          28.9       1.0X
+InSet expression                                     43             44           1         23.2          43.0       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             35           1         29.1          34.3       1.0X
-InSet expression                                     44             45           1         22.6          44.2       0.8X
+In expression                                        34             35           1         29.2          34.3       1.0X
+InSet expression                                     46             47           2         22.0          45.5       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        42             44           1         23.8          42.1       1.0X
-InSet expression                                     48             50           1         20.6          48.4       0.9X
+In expression                                        43             44           1         23.2          43.1       1.0X
+InSet expression                                     50             52           4         20.0          50.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             61           8         17.7          56.6       1.0X
-InSet expression                                     53             54           2         19.0          52.6       1.1X
+In expression                                        58             59           1         17.3          57.7       1.0X
+InSet expression                                     54             55           1         18.6          53.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        89             90           2         11.3          88.8       1.0X
-InSet expression                                     50             52           3         20.1          49.7       1.8X
+In expression                                        89             90           2         11.3          88.6       1.0X
+InSet expression                                     52             53           2         19.4          51.7       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       390            392           2          2.6         390.0       1.0X
-InSet expression                                     50             52           1         19.8          50.4       7.7X
+In expression                                       383            387           3          2.6         383.3       1.0X
+InSet expression                                     51             54           2         19.5          51.3       7.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        15             16           1        646.9           1.5       1.0X
-InSet expression                                     90             91           1        111.7           9.0       0.2X
+In expression                                        16             17           1        636.1           1.6       1.0X
+InSet expression                                     92             94           2        108.7           9.2       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             23           2        457.4           2.2       1.0X
-InSet expression                                     99            100           1        101.0           9.9       0.2X
+In expression                                        22             23           1        447.6           2.2       1.0X
+InSet expression                                    102            103           1         98.4          10.2       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        45             46           1        221.5           4.5       1.0X
-InSet expression                                    125            127           1         79.9          12.5       0.4X
+In expression                                        46             47           1        218.8           4.6       1.0X
+InSet expression                                    127            130           1         78.5          12.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        81             82           1        123.8           8.1       1.0X
-InSet expression                                    153            155           3         65.5          15.3       0.5X
+In expression                                        81             83           2        123.6           8.1       1.0X
+InSet expression                                    151            153           2         66.1          15.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       153            155           3         65.4          15.3       1.0X
-InSet expression                                    131            140          19         76.2          13.1       1.2X
+In expression                                       153            155           5         65.4          15.3       1.0X
+InSet expression                                    132            135           1         75.5          13.2       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       297            307          10         33.7          29.7       1.0X
-InSet expression                                    127            130           1         78.4          12.7       2.3X
+In expression                                       297            307          10         33.6          29.7       1.0X
+InSet expression                                    131            132           2         76.6          13.1       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       452            455           5         22.1          45.2       1.0X
-InSet expression                                    455            458           5         22.0          45.5       1.0X
+In expression                                       450            455           6         22.2          45.0       1.0X
+InSet expression                                    445            447           2         22.5          44.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       460            461           1         21.7          46.0       1.0X
-InSet expression                                    448            449           1         22.3          44.8       1.0X
+In expression                                       451            456           4         22.2          45.1       1.0X
+InSet expression                                    445            449           3         22.5          44.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       485            487           3         20.6          48.5       1.0X
-InSet expression                                    452            456           4         22.1          45.2       1.1X
+In expression                                       484            487           2         20.6          48.4       1.0X
+InSet expression                                    445            447           2         22.5          44.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       544            548           6         18.4          54.4       1.0X
-InSet expression                                    452            454           2         22.1          45.2       1.2X
+In expression                                       533            536           3         18.7          53.3       1.0X
+InSet expression                                    449            450           2         22.3          44.9       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       622            624           2         16.1          62.2       1.0X
-InSet expression                                    454            457           4         22.0          45.4       1.4X
+In expression                                       619            623           6         16.2          61.9       1.0X
+InSet expression                                    447            448           1         22.4          44.7       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       783            788           8         12.8          78.3       1.0X
-InSet expression                                    450            455           6         22.2          45.0       1.7X
+In expression                                       779            790          12         12.8          77.9       1.0X
+InSet expression                                    447            454          10         22.3          44.7       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       949            952           3         10.5          94.9       1.0X
-InSet expression                                    455            457           4         22.0          45.5       2.1X
+In expression                                       944            948           4         10.6          94.4       1.0X
+InSet expression                                    455            456           1         22.0          45.5       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1115           1119           5          9.0         111.5       1.0X
-InSet expression                                    462            463           1         21.6          46.2       2.4X
+In expression                                      1105           1107           2          9.0         110.5       1.0X
+InSet expression                                    451            454           3         22.2          45.1       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1279           1284           3          7.8         127.9       1.0X
-InSet expression                                    543            544           1         18.4          54.3       2.4X
+In expression                                      1279           1289           6          7.8         127.9       1.0X
+InSet expression                                    542            544           3         18.4          54.2       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 In expression                                        25             26           2         39.7          25.2       1.0X
-InSet expression                                     56             57           1         17.9          55.8       0.5X
+InSet expression                                     57             58           2         17.6          56.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             41           1         25.3          39.5       1.0X
-InSet expression                                     84             85           1         11.9          83.8       0.5X
+In expression                                        40             41           1         25.0          40.0       1.0X
+InSet expression                                     85             87           3         11.7          85.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       148            151           2          6.7         148.5       1.0X
-InSet expression                                     99            101           4         10.1          98.5       1.5X
+In expression                                       158            161           3          6.3         158.3       1.0X
+InSet expression                                    100            103           4         10.0          99.9       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       368            370           2          2.7         367.9       1.0X
-InSet expression                                    126            129           4          7.9         126.5       2.9X
+In expression                                       400            401           1          2.5         400.1       1.0X
+InSet expression                                    128            131           2          7.8         128.3       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       753            759           7          1.3         753.5       1.0X
-InSet expression                                    143            145           1          7.0         142.9       5.3X
+In expression                                       751            754           4          1.3         751.0       1.0X
+InSet expression                                    145            147           2          6.9         144.5       5.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1611           1858         317          0.6        1610.9       1.0X
-InSet expression                                    159            161           1          6.3         159.3      10.1X
+In expression                                      1672           1862         278          0.6        1671.8       1.0X
+InSet expression                                    162            164           2          6.2         162.4      10.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             22           2         49.2          20.3       1.0X
-InSet expression                                     81             83           2         12.4          80.8       0.3X
+In expression                                        20             22           2         48.9          20.5       1.0X
+InSet expression                                     81             84           2         12.3          81.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             31           2         34.2          29.3       1.0X
-InSet expression                                    122            125           1          8.2         122.5       0.2X
+In expression                                        30             32           3         32.9          30.4       1.0X
+InSet expression                                    124            126           2          8.0         124.5       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        69             71           3         14.5          69.2       1.0X
-InSet expression                                    144            147           3          6.9         143.9       0.5X
+In expression                                        69             71           1         14.4          69.4       1.0X
+InSet expression                                    146            148           1          6.8         146.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       205            207           3          4.9         204.6       1.0X
-InSet expression                                    186            192           9          5.4         186.2       1.1X
+In expression                                       212            214           3          4.7         212.1       1.0X
+InSet expression                                    190            193           3          5.3         189.9       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       501            503           2          2.0         501.2       1.0X
-InSet expression                                    211            212           2          4.7         211.3       2.4X
+In expression                                       482            492           6          2.1         482.2       1.0X
+InSet expression                                    214            216           1          4.7         213.8       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1209           1412         252          0.8        1208.5       1.0X
-InSet expression                                    245            246           2          4.1         244.6       4.9X
+In expression                                      1209           1433         281          0.8        1209.2       1.0X
+InSet expression                                    245            247           2          4.1         245.0       4.9X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
index bb16529424567..a6cadf2a57c1a 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            175            200          28          5.7         175.0       1.0X
-row-based deserialization                             132            142           9          7.6         132.3       1.3X
+columnar deserialization + columnar-to-row            156            169          18          6.4         155.6       1.0X
+row-based deserialization                             125            176          46          8.0         125.1       1.2X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
index 69ef98c2e5f25..1774d114da136 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
@@ -2,11 +2,11 @@
 Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            177            218          64          5.7         176.6       1.0X
-row-based deserialization                             132            190          81          7.6         132.2       1.3X
+columnar deserialization + columnar-to-row            195            199           8          5.1         194.6       1.0X
+row-based deserialization                             127            128           1          7.9         127.3       1.5X
 
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
index bf27e9ce51ead..b2a1034782f1d 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         8818           8868          71          0.0       44088.8       1.0X
-two partition columns, 500 partitions                       24601          24662          86          0.0      123006.4       0.4X
-three partition columns, 2000 partitions                    66361          66397          51          0.0      331804.1       0.1X
+one partition column, 100 partitions                         9816           9873          80          0.0       49079.3       1.0X
+two partition columns, 500 partitions                       26057          26309         355          0.0      130285.9       0.4X
+three partition columns, 2000 partitions                    72728          72816         124          0.0      363640.3       0.1X
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
index cc4a7661dd8da..b360d3f5d5270 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         8858           8907          70          0.0       44289.2       1.0X
-two partition columns, 500 partitions                       24244          25085        1189          0.0      121220.9       0.4X
-three partition columns, 2000 partitions                    65616          67508        2676          0.0      328079.4       0.1X
+one partition column, 100 partitions                         9772           9823          71          0.0       48861.7       1.0X
+two partition columns, 500 partitions                       25719          25897         252          0.0      128594.9       0.4X
+three partition columns, 2000 partitions                    72019          72199         254          0.0      360097.0       0.1X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
index 2384e04fd5647..06f16fec2065d 100644
--- a/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          415            449          38          2.4         414.7       1.0X
-prepare string w/o interval                         376            383           9          2.7         376.3       1.1X
-1 units w/ interval                                 326            338          15          3.1         326.1       1.3X
-1 units w/o interval                                320            321           2          3.1         319.9       1.3X
-2 units w/ interval                                 474            483           9          2.1         474.2       0.9X
-2 units w/o interval                                474            474           1          2.1         473.5       0.9X
-3 units w/ interval                                1035           1045           9          1.0        1035.4       0.4X
-3 units w/o interval                               1035           1041           8          1.0        1035.2       0.4X
-4 units w/ interval                                1287           1292           6          0.8        1287.1       0.3X
-4 units w/o interval                               1297           1305          13          0.8        1296.8       0.3X
-5 units w/ interval                                1447           1455           7          0.7        1446.8       0.3X
-5 units w/o interval                               1445           1451           6          0.7        1445.3       0.3X
-6 units w/ interval                                1600           1604           5          0.6        1600.3       0.3X
-6 units w/o interval                               1596           1600           7          0.6        1595.9       0.3X
-7 units w/ interval                                1839           1848           9          0.5        1838.6       0.2X
-7 units w/o interval                               1821           1826           8          0.5        1821.0       0.2X
-8 units w/ interval                                2021           2038          26          0.5        2021.3       0.2X
-8 units w/o interval                               2095           2109          20          0.5        2095.5       0.2X
-9 units w/ interval                                2273           2290          19          0.4        2272.6       0.2X
-9 units w/o interval                               2286           2326          47          0.4        2285.8       0.2X
-10 units w/ interval                               2884           2899          14          0.3        2884.4       0.1X
-10 units w/o interval                              2882           2889          10          0.3        2882.1       0.1X
-11 units w/ interval                               3155           3190          50          0.3        3155.2       0.1X
-11 units w/o interval                              3032           3076          49          0.3        3032.3       0.1X
+prepare string w/ interval                          399            402           5          2.5         399.2       1.0X
+prepare string w/o interval                         390            397          13          2.6         389.7       1.0X
+1 units w/ interval                                 319            320           2          3.1         318.8       1.3X
+1 units w/o interval                                327            329           2          3.1         326.8       1.2X
+2 units w/ interval                                 503            509           6          2.0         503.5       0.8X
+2 units w/o interval                                498            500           2          2.0         497.7       0.8X
+3 units w/ interval                                1080           1089          15          0.9        1079.9       0.4X
+3 units w/o interval                               1110           1113           5          0.9        1109.8       0.4X
+4 units w/ interval                                1369           1371           4          0.7        1368.5       0.3X
+4 units w/o interval                               1378           1386          12          0.7        1377.7       0.3X
+5 units w/ interval                                1529           1531           2          0.7        1528.8       0.3X
+5 units w/o interval                               1545           1549           5          0.6        1545.2       0.3X
+6 units w/ interval                                1698           1706           7          0.6        1698.0       0.2X
+6 units w/o interval                               1700           1707           7          0.6        1700.2       0.2X
+7 units w/ interval                                2028           2040          11          0.5        2027.5       0.2X
+7 units w/o interval                               2044           2046           3          0.5        2043.9       0.2X
+8 units w/ interval                                2261           2271          12          0.4        2260.7       0.2X
+8 units w/o interval                               2249           2258           9          0.4        2249.3       0.2X
+9 units w/ interval                                2705           2710           4          0.4        2705.2       0.1X
+9 units w/o interval                               2713           2722           8          0.4        2713.3       0.1X
+10 units w/ interval                               2784           2789           7          0.4        2784.0       0.1X
+10 units w/o interval                              2785           2790           5          0.4        2784.9       0.1X
+11 units w/ interval                               3123           3148          31          0.3        3122.7       0.1X
+11 units w/o interval                              3136           3144          10          0.3        3136.2       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                               323            329           6          3.1         323.5       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             37             39           2         26.8          37.3       8.7X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)             59             64           4         16.9          59.2       5.5X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             60             63           5         16.7          59.8       5.4X
-make_interval(0, 1, 2, 3, *, *, *)                    308            313           8          3.2         308.0       1.1X
-make_interval(*, *, *, *, *, *, *)                    335            343           7          3.0         334.7       1.0X
+prepare make_interval()                               346            352           5          2.9         346.3       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             39             43           4         25.3          39.5       8.8X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             51             57           9         19.5          51.3       6.7X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             57             59           3         17.7          56.6       6.1X
+make_interval(0, 1, 2, 3, *, *, *)                    356            358           2          2.8         355.9       1.0X
+make_interval(*, *, *, *, *, *, *)                    344            347           4          2.9         344.4       1.0X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt
index 73ccbdbfaac23..86cfcdf96dbd7 100644
--- a/sql/core/benchmarks/IntervalBenchmark-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          396            448          74          2.5         396.2       1.0X
-prepare string w/o interval                         373            383           9          2.7         372.6       1.1X
-1 units w/ interval                                 332            341          11          3.0         331.8       1.2X
-1 units w/o interval                                371            377           7          2.7         371.0       1.1X
-2 units w/ interval                                 499            502           3          2.0         499.3       0.8X
-2 units w/o interval                                474            481           7          2.1         474.4       0.8X
-3 units w/ interval                                1122           1124           4          0.9        1122.0       0.4X
-3 units w/o interval                               1105           1118          12          0.9        1104.6       0.4X
-4 units w/ interval                                1418           1425           8          0.7        1418.1       0.3X
-4 units w/o interval                               1397           1401           4          0.7        1397.1       0.3X
-5 units w/ interval                                1568           1576          10          0.6        1568.0       0.3X
-5 units w/o interval                               1564           1566           2          0.6        1563.7       0.3X
-6 units w/ interval                                1748           1754           6          0.6        1748.5       0.2X
-6 units w/o interval                               1736           1737           1          0.6        1735.7       0.2X
-7 units w/ interval                                2148           2159          10          0.5        2147.5       0.2X
-7 units w/o interval                               2176           2176           0          0.5        2175.7       0.2X
-8 units w/ interval                                2404           2412           7          0.4        2404.5       0.2X
-8 units w/o interval                               2381           2387           6          0.4        2381.1       0.2X
-9 units w/ interval                                2632           2649          15          0.4        2631.9       0.2X
-9 units w/o interval                               2636           2648          16          0.4        2635.7       0.2X
-10 units w/ interval                               2849           2852           4          0.4        2849.3       0.1X
-10 units w/o interval                              2836           2848          20          0.4        2835.8       0.1X
-11 units w/ interval                               3049           3058          16          0.3        3048.6       0.1X
-11 units w/o interval                              3052           3062           9          0.3        3051.8       0.1X
+prepare string w/ interval                          375            379           4          2.7         375.4       1.0X
+prepare string w/o interval                         365            367           2          2.7         364.8       1.0X
+1 units w/ interval                                 321            329           8          3.1         321.1       1.2X
+1 units w/o interval                                291            302          12          3.4         291.2       1.3X
+2 units w/ interval                                 435            441           7          2.3         434.9       0.9X
+2 units w/o interval                                416            418           2          2.4         415.7       0.9X
+3 units w/ interval                                1019           1024           4          1.0        1019.3       0.4X
+3 units w/o interval                               1000           1006           9          1.0        1000.1       0.4X
+4 units w/ interval                                1319           1326           5          0.8        1319.4       0.3X
+4 units w/o interval                               1317           1321           4          0.8        1317.3       0.3X
+5 units w/ interval                                1457           1467           9          0.7        1457.4       0.3X
+5 units w/o interval                               1461           1467           5          0.7        1461.2       0.3X
+6 units w/ interval                                1631           1635           4          0.6        1630.7       0.2X
+6 units w/o interval                               1614           1620           6          0.6        1614.4       0.2X
+7 units w/ interval                                2270           2282          12          0.4        2270.2       0.2X
+7 units w/o interval                               2252           2255           3          0.4        2252.0       0.2X
+8 units w/ interval                                2238           2247          13          0.4        2237.6       0.2X
+8 units w/o interval                               2237           2239           3          0.4        2236.6       0.2X
+9 units w/ interval                                2478           2484           7          0.4        2478.2       0.2X
+9 units w/o interval                               2455           2458           4          0.4        2455.2       0.2X
+10 units w/ interval                               2628           2635           6          0.4        2628.5       0.1X
+10 units w/o interval                              2618           2633          13          0.4        2618.4       0.1X
+11 units w/ interval                               2907           2915           8          0.3        2907.3       0.1X
+11 units w/o interval                              2905           2914          10          0.3        2905.1       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                               344            348           4          2.9         344.4       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             45             50           8         22.0          45.4       7.6X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)             55             56           1         18.1          55.3       6.2X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             56             59           4         17.8          56.2       6.1X
-make_interval(0, 1, 2, 3, *, *, *)                    329            331           3          3.0         328.7       1.0X
-make_interval(*, *, *, *, *, *, *)                    340            343           3          2.9         339.9       1.0X
+prepare make_interval()                               344            347           3          2.9         344.3       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             44             45           1         22.8          44.0       7.8X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             51             51           1         19.7          50.8       6.8X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             55             60           9         18.2          54.9       6.3X
+make_interval(0, 1, 2, 3, *, *, *)                    340            341           1          2.9         340.0       1.0X
+make_interval(*, *, *, *, *, *, *)                    333            335           2          3.0         333.3       1.0X
 
diff --git a/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt b/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
index 02744d00178b8..d7bb196bb7144 100644
--- a/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         2064           2065           2         10.2          98.4       1.0X
-Join w long wholestage on                           930            961          29         22.6          44.3       2.2X
+Join w long wholestage off                         2147           2166          27          9.8         102.4       1.0X
+Join w long wholestage on                           944            990          40         22.2          45.0       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              2006           2018          18         10.5          95.7       1.0X
-Join w long duplicated wholestage on                917            932          27         22.9          43.7       2.2X
+Join w long duplicated wholestage off              2214           2248          48          9.5         105.6       1.0X
+Join w long duplicated wholestage on                996           1005          10         21.1          47.5       2.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     147152         147206          77          0.1        7016.7       1.0X
-Join w 2 ints wholestage on                      105544         105604          67          0.2        5032.7       1.4X
+Join w 2 ints wholestage off                     148982         149062         112          0.1        7104.0       1.0X
+Join w 2 ints wholestage on                      105434         105515          63          0.2        5027.5       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      3414           3420           9          6.1         162.8       1.0X
-Join w 2 longs wholestage on                       2117           2154          32          9.9         100.9       1.6X
+Join w 2 longs wholestage off                      3442           3459          23          6.1         164.1       1.0X
+Join w 2 longs wholestage on                       2179           2191          11          9.6         103.9       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off           8996           9037          58          2.3         429.0       1.0X
-Join w 2 longs duplicated wholestage on            5567           5636          95          3.8         265.5       1.6X
+Join w 2 longs duplicated wholestage off          10326          10385          84          2.0         492.4       1.0X
+Join w 2 longs duplicated wholestage on            6246           6271          22          3.4         297.8       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   1605           1633          39         13.1          76.5       1.0X
-outer join w long wholestage on                     983            993           9         21.3          46.9       1.6X
+outer join w long wholestage off                   1711           1713           3         12.3          81.6       1.0X
+outer join w long wholestage on                    1045           1056           8         20.1          49.8       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1062           1075          19         19.8          50.6       1.0X
-semi join w long wholestage on                      558            580          14         37.6          26.6       1.9X
+semi join w long wholestage off                    1207           1210           4         17.4          57.6       1.0X
+semi join w long wholestage on                      682            701          14         30.7          32.5       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                      517            527          15          4.1         246.5       1.0X
-sort merge join wholestage on                       446            464          12          4.7         212.4       1.2X
+sort merge join wholestage off                      548            573          35          3.8         261.3       1.0X
+sort merge join wholestage on                       489            520          28          4.3         233.1       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1010           1020          14          2.1         481.6       1.0X
-sort merge join with duplicates wholestage on             904            927          17          2.3         431.2       1.1X
+sort merge join with duplicates wholestage off           1054           1091          52          2.0         502.8       1.0X
+sort merge join with duplicates wholestage on             934            961          24          2.2         445.2       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    516            524          11          8.1         123.1       1.0X
-shuffle hash join wholestage on                     377            419          34         11.1          89.9       1.4X
+shuffle hash join wholestage off                    501            514          18          8.4         119.4       1.0X
+shuffle hash join wholestage on                     427            469          32          9.8         101.7       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          25325          25376          73          0.8        1207.6       1.0X
-broadcast nested loop join wholestage on           19304          19559         319          1.1         920.5       1.3X
+broadcast nested loop join wholestage off          26497          26499           2          0.8        1263.5       1.0X
+broadcast nested loop join wholestage on           18614          18698          61          1.1         887.6       1.4X
 
 
diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt
index a3c026eb02fab..a8e057371664b 100644
--- a/sql/core/benchmarks/JoinBenchmark-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         2121           2143          31          9.9         101.2       1.0X
-Join w long wholestage on                           943            963          17         22.2          45.0       2.2X
+Join w long wholestage off                         2138           2142           6          9.8         101.9       1.0X
+Join w long wholestage on                           944            958          15         22.2          45.0       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              2151           2153           3          9.7         102.6       1.0X
-Join w long duplicated wholestage on                882            907          23         23.8          42.1       2.4X
+Join w long duplicated wholestage off              2278           2326          68          9.2         108.6       1.0X
+Join w long duplicated wholestage on               1080           1084           2         19.4          51.5       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     144107         144246         197          0.1        6871.6       1.0X
-Join w 2 ints wholestage on                      108436         109016         343          0.2        5170.6       1.3X
+Join w 2 ints wholestage off                     149192         149209          24          0.1        7114.0       1.0X
+Join w 2 ints wholestage on                      111484         111555          82          0.2        5316.0       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      3146           3165          27          6.7         150.0       1.0X
-Join w 2 longs wholestage on                       2023           2035          14         10.4          96.4       1.6X
+Join w 2 longs wholestage off                      3463           3507          62          6.1         165.1       1.0X
+Join w 2 longs wholestage on                       2116           2133          25          9.9         100.9       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off          11530          11630         142          1.8         549.8       1.0X
-Join w 2 longs duplicated wholestage on            5228           5248          24          4.0         249.3       2.2X
+Join w 2 longs duplicated wholestage off          10496          10500           6          2.0         500.5       1.0X
+Join w 2 longs duplicated wholestage on            6203           6227          32          3.4         295.8       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   1760           1771          16         11.9          83.9       1.0X
-outer join w long wholestage on                     931            951          17         22.5          44.4       1.9X
+outer join w long wholestage off                   1834           1840           9         11.4          87.5       1.0X
+outer join w long wholestage on                    1078           1083           4         19.5          51.4       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1158           1161           4         18.1          55.2       1.0X
-semi join w long wholestage on                      527            548          24         39.8          25.1       2.2X
+semi join w long wholestage off                    1326           1332           8         15.8          63.2       1.0X
+semi join w long wholestage on                      711            716           7         29.5          33.9       1.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                      510            522          17          4.1         243.1       1.0X
-sort merge join wholestage on                       461            478          13          4.6         219.6       1.1X
+sort merge join wholestage off                      541            551          14          3.9         258.2       1.0X
+sort merge join wholestage on                       487            493           4          4.3         232.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1020           1022           3          2.1         486.2       1.0X
-sort merge join with duplicates wholestage on             915            921           9          2.3         436.1       1.1X
+sort merge join with duplicates wholestage off           1092           1096           7          1.9         520.6       1.0X
+sort merge join with duplicates wholestage on             965            977          14          2.2         460.3       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    524            528           6          8.0         124.8       1.0X
-shuffle hash join wholestage on                     372            378           4         11.3          88.7       1.4X
+shuffle hash join wholestage off                    551            558          10          7.6         131.4       1.0X
+shuffle hash join wholestage on                     397            404           6         10.6          94.7       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          28415          28455          57          0.7        1354.9       1.0X
-broadcast nested loop join wholestage on           17648          17700          36          1.2         841.5       1.6X
+broadcast nested loop join wholestage off          27651          27728         109          0.8        1318.5       1.0X
+broadcast nested loop join wholestage on           19162          19202          33          1.1         913.7       1.4X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
index 4cb72c9b02fed..80448f80df486 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
@@ -3,128 +3,128 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2293           2316          37          2.2         458.6       1.0X
-UTF-8 is set                                       3389           3399          14          1.5         677.8       0.7X
+No encoding                                        2459           2482          39          2.0         491.9       1.0X
+UTF-8 is set                                       3337           3360          20          1.5         667.4       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        1900           1931          36          2.6         380.1       1.0X
-UTF-8 is set                                       3049           3055           6          1.6         609.7       0.6X
+No encoding                                        2195           2205          11          2.3         439.1       1.0X
+UTF-8 is set                                       3159           3169           9          1.6         631.7       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        4662           4674          20          0.2        4661.7       1.0X
-UTF-8 is set                                       4492           4508          20          0.2        4491.8       1.0X
+No encoding                                        4837           4914         116          0.2        4837.1       1.0X
+UTF-8 is set                                       4384           4417          30          0.2        4383.6       1.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        9989          10251         226          0.0      199788.0       1.0X
-UTF-8 is set                                      10872          10943          93          0.0      217437.4       0.9X
+No encoding                                        9775           9911         129          0.0      195491.4       1.0X
+UTF-8 is set                                      10824          10845          31          0.0      216478.6       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  1724           1740          17          0.6        1723.6       1.0X
-Select 1 column                                    1345           1349           7          0.7        1344.6       1.3X
+Select 10 columns                                  1606           1614           8          0.6        1606.2       1.0X
+Select 1 column                                    1334           1341           7          0.7        1333.7       1.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       633            640           9          1.6         632.9       1.0X
-Short column with UTF-8                             872            886          22          1.1         872.1       0.7X
-Wide column without encoding                       5266           5277          12          0.2        5266.2       0.1X
-Wide column with UTF-8                             6953           6959           8          0.1        6953.0       0.1X
+Short column without encoding                       595            596           2          1.7         594.9       1.0X
+Short column with UTF-8                             819            828          10          1.2         819.2       0.7X
+Wide column without encoding                       5442           5464          28          0.2        5442.1       0.1X
+Wide column with UTF-8                             6442           6454          12          0.2        6442.0       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                            58             61           4         17.2          58.2       1.0X
-from_json                                          1235           1257          21          0.8        1235.4       0.0X
-json_tuple                                         1101           1110          10          0.9        1100.5       0.1X
-get_json_object wholestage off                     1063           1068           6          0.9        1062.6       0.1X
-get_json_object wholestage on                       989            989           1          1.0         988.7       0.1X
+Text read                                            55             56           1         18.2          55.0       1.0X
+from_json                                          1152           1156           3          0.9        1152.1       0.0X
+json_tuple                                         1185           1188           4          0.8        1185.0       0.0X
+get_json_object wholestage off                     1093           1099          10          0.9        1093.3       0.1X
+get_json_object wholestage on                      1017           1019           1          1.0        1017.3       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           230            236           8         21.7          46.1       1.0X
-schema inferring                                   1914           1921          11          2.6         382.7       0.1X
-parsing                                            2849           2856           9          1.8         569.8       0.1X
+Text read                                           236            238           2         21.2          47.2       1.0X
+schema inferring                                   2018           2025           8          2.5         403.6       0.1X
+parsing                                            2730           2737          10          1.8         546.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           536            548          14          9.3         107.1       1.0X
-Schema inferring                                   2366           2374           7          2.1         473.2       0.2X
-Parsing without charset                            2908           2911           3          1.7         581.6       0.2X
-Parsing with UTF-8                                 4059           4064           8          1.2         811.8       0.1X
+Text read                                           549            552           4          9.1         109.9       1.0X
+Schema inferring                                   2522           2525           4          2.0         504.4       0.2X
+Parsing without charset                            2921           2933          17          1.7         584.2       0.2X
+Parsing with UTF-8                                 3873           3881          13          1.3         774.7       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      106            112           9          9.4         105.9       1.0X
-to_json(timestamp)                                  744            747           4          1.3         743.6       0.1X
-write timestamps to files                           633            637           4          1.6         633.4       0.2X
-Create a dataset of dates                           124            128           5          8.1         123.8       0.9X
-to_json(date)                                       560            561           1          1.8         559.9       0.2X
-write dates to files                                453            466          12          2.2         452.7       0.2X
+Create a dataset of timestamps                      103            107           7          9.7         103.1       1.0X
+to_json(timestamp)                                  737            742           5          1.4         736.5       0.1X
+write timestamps to files                           644            646           2          1.6         643.9       0.2X
+Create a dataset of dates                           111            117           6          9.0         110.7       0.9X
+to_json(date)                                       557            562           6          1.8         556.6       0.2X
+write dates to files                                434            436           2          2.3         434.1       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                   146            154           8          6.8         146.2       1.0X
-read timestamps from files                                                      1079           1084           5          0.9        1079.0       0.1X
-infer timestamps from files                                                     1977           1986          11          0.5        1976.8       0.1X
-read date text from files                                                        142            144           2          7.0         142.4       1.0X
-read date from files                                                             718            725           6          1.4         718.1       0.2X
-timestamp strings                                                                137            138           1          7.3         137.2       1.1X
-parse timestamps from Dataset[String]                                           1258           1275          14          0.8        1258.0       0.1X
-infer timestamps from Dataset[String]                                           2182           2186           6          0.5        2182.4       0.1X
-date strings                                                                     196            201           5          5.1         195.5       0.7X
-parse dates from Dataset[String]                                                1016           1025           7          1.0        1016.3       0.1X
-from_json(timestamp)                                                            1924           1953          38          0.5        1924.2       0.1X
-from_json(date)                                                                 1644           1696          74          0.6        1644.1       0.1X
-infer error timestamps from Dataset[String] with default format                 1463           1473           9          0.7        1463.1       0.1X
-infer error timestamps from Dataset[String] with user-provided format           1451           1459          12          0.7        1450.6       0.1X
-infer error timestamps from Dataset[String] with legacy format                  1486           1494           8          0.7        1486.3       0.1X
+read timestamp text from files                                                   151            157           8          6.6         150.7       1.0X
+read timestamps from files                                                      1071           1086          13          0.9        1071.1       0.1X
+infer timestamps from files                                                     2021           2025           5          0.5        2020.8       0.1X
+read date text from files                                                        137            147          11          7.3         136.5       1.1X
+read date from files                                                             699            705           9          1.4         698.7       0.2X
+timestamp strings                                                                143            149           5          7.0         143.4       1.1X
+parse timestamps from Dataset[String]                                           1251           1255           3          0.8        1251.1       0.1X
+infer timestamps from Dataset[String]                                           2181           2186           5          0.5        2181.1       0.1X
+date strings                                                                     226            234          13          4.4         225.7       0.7X
+parse dates from Dataset[String]                                                 974            977           4          1.0         973.8       0.2X
+from_json(timestamp)                                                            1758           1764           9          0.6        1758.2       0.1X
+from_json(date)                                                                 1470           1473           3          0.7        1469.7       0.1X
+infer error timestamps from Dataset[String] with default format                 1436           1438           3          0.7        1436.1       0.1X
+infer error timestamps from Dataset[String] with user-provided format           1437           1444           8          0.7        1437.4       0.1X
+infer error timestamps from Dataset[String] with legacy format                  1448           1450           3          0.7        1448.2       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5708           5724          17          0.0       57078.7       1.0X
-pushdown disabled                                  5625           5646          20          0.0       56254.5       1.0X
-w/ filters                                          742            770          38          0.1        7418.9       7.7X
+w/o filters                                        5891           5911          22          0.0       58911.2       1.0X
+pushdown disabled                                  5547           5560          11          0.0       55470.8       1.1X
+w/ filters                                          618            626          10          0.2        6177.6       9.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Partial JSON results:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-parse invalid JSON                                 2551           2628          90          0.0      255064.5       1.0X
+parse invalid JSON                                 2319           2338          26          0.0      231898.9       1.0X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
index 20388dc756fb8..3f4b9e435b06d 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -3,128 +3,128 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2456           2513          59          2.0         491.2       1.0X
-UTF-8 is set                                       3355           3365          15          1.5         671.1       0.7X
+No encoding                                        2406           2422          16          2.1         481.1       1.0X
+UTF-8 is set                                       3323           3335          10          1.5         664.5       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2162           2201          34          2.3         432.4       1.0X
-UTF-8 is set                                       3168           3178          17          1.6         633.5       0.7X
+No encoding                                        2005           2037          29          2.5         401.0       1.0X
+UTF-8 is set                                       3138           3143           5          1.6         627.6       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3185           3258         122          0.3        3185.0       1.0X
-UTF-8 is set                                       4058           4093          42          0.2        4058.1       0.8X
+No encoding                                        3337           3406          64          0.3        3336.8       1.0X
+UTF-8 is set                                       4383           4411          27          0.2        4383.4       0.8X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        9244           9334         132          0.0      184884.5       1.0X
-UTF-8 is set                                      10249          10258          10          0.0      204988.0       0.9X
+No encoding                                        9364           9394          35          0.0      187287.2       1.0X
+UTF-8 is set                                      10402          10439          42          0.0      208036.3       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  1641           1650           7          0.6        1641.4       1.0X
-Select 1 column                                    1118           1120           3          0.9        1117.9       1.5X
+Select 10 columns                                  1657           1663           5          0.6        1657.2       1.0X
+Select 1 column                                    1328           1331           4          0.8        1327.7       1.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       627            635           7          1.6         626.7       1.0X
-Short column with UTF-8                             819            834          15          1.2         819.5       0.8X
-Wide column without encoding                       5191           5227          39          0.2        5191.4       0.1X
-Wide column with UTF-8                             6490           6506          17          0.2        6489.9       0.1X
+Short column without encoding                       656            656           1          1.5         655.8       1.0X
+Short column with UTF-8                             844            858          23          1.2         843.5       0.8X
+Wide column without encoding                       5501           5529          26          0.2        5501.5       0.1X
+Wide column with UTF-8                             6440           6460          25          0.2        6440.0       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                            57             58           0         17.4          57.4       1.0X
-from_json                                          1105           1118          17          0.9        1105.2       0.1X
-json_tuple                                         1151           1152           1          0.9        1151.2       0.0X
-get_json_object wholestage off                     1080           1081           2          0.9        1079.8       0.1X
-get_json_object wholestage on                      1018           1024           7          1.0        1018.3       0.1X
+Text read                                            51             52           1         19.8          50.6       1.0X
+from_json                                          1134           1142           7          0.9        1134.4       0.0X
+json_tuple                                         1117           1121           4          0.9        1116.9       0.0X
+get_json_object wholestage off                     1036           1042           7          1.0        1036.3       0.0X
+get_json_object wholestage on                       944            945           1          1.1         944.3       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           255            257           1         19.6          51.1       1.0X
-schema inferring                                   1775           1776           2          2.8         355.0       0.1X
-parsing                                            2833           2835           3          1.8         566.6       0.1X
+Text read                                           227            230           3         22.0          45.5       1.0X
+schema inferring                                   1835           1836           1          2.7         367.1       0.1X
+parsing                                            2831           2843          15          1.8         566.3       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           581            583           2          8.6         116.2       1.0X
-Schema inferring                                   2391           2397           6          2.1         478.2       0.2X
-Parsing without charset                            2973           2975           3          1.7         594.6       0.2X
-Parsing with UTF-8                                 3956           3969          17          1.3         791.2       0.1X
+Text read                                           588            594           6          8.5         117.5       1.0X
+Schema inferring                                   2444           2449           5          2.0         488.8       0.2X
+Parsing without charset                            3046           3052           5          1.6         609.2       0.2X
+Parsing with UTF-8                                 3937           3940           4          1.3         787.4       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      109            119          13          9.2         109.3       1.0X
-to_json(timestamp)                                  795            800           6          1.3         794.8       0.1X
-write timestamps to files                           730            734           3          1.4         730.1       0.1X
-Create a dataset of dates                           133            143           8          7.5         133.3       0.8X
-to_json(date)                                       598            601           4          1.7         598.1       0.2X
-write dates to files                                475            478           3          2.1         474.6       0.2X
+Create a dataset of timestamps                      100            105           6         10.0         100.5       1.0X
+to_json(timestamp)                                  815            820           4          1.2         815.2       0.1X
+write timestamps to files                           734            745          14          1.4         733.6       0.1X
+Create a dataset of dates                           112            118           6          8.9         111.9       0.9X
+to_json(date)                                       606            608           3          1.6         606.3       0.2X
+write dates to files                                472            480           8          2.1         472.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                   149            153           4          6.7         148.5       1.0X
-read timestamps from files                                                      1049           1057           8          1.0        1048.6       0.1X
-infer timestamps from files                                                     1942           1969          37          0.5        1942.4       0.1X
-read date text from files                                                        141            143           3          7.1         140.7       1.1X
-read date from files                                                             712            718           7          1.4         712.4       0.2X
-timestamp strings                                                                142            158          14          7.0         142.4       1.0X
-parse timestamps from Dataset[String]                                           1286           1291           5          0.8        1285.9       0.1X
-infer timestamps from Dataset[String]                                           2139           2145           6          0.5        2138.6       0.1X
-date strings                                                                     209            210           1          4.8         209.0       0.7X
-parse dates from Dataset[String]                                                1019           1026           6          1.0        1019.0       0.1X
-from_json(timestamp)                                                            1738           1741           5          0.6        1737.8       0.1X
-from_json(date)                                                                 1477           1482           6          0.7        1477.3       0.1X
-infer error timestamps from Dataset[String] with default format                 1380           1387           6          0.7        1380.4       0.1X
-infer error timestamps from Dataset[String] with user-provided format           1380           1388           7          0.7        1380.5       0.1X
-infer error timestamps from Dataset[String] with legacy format                  1442           1450           9          0.7        1442.3       0.1X
+read timestamp text from files                                                   154            156           3          6.5         153.9       1.0X
+read timestamps from files                                                      1048           1055           6          1.0        1048.1       0.1X
+infer timestamps from files                                                     1962           1967           6          0.5        1961.6       0.1X
+read date text from files                                                        139            148          12          7.2         139.1       1.1X
+read date from files                                                             728            736           7          1.4         727.8       0.2X
+timestamp strings                                                                135            140           5          7.4         134.7       1.1X
+parse timestamps from Dataset[String]                                           1193           1197           3          0.8        1192.9       0.1X
+infer timestamps from Dataset[String]                                           2042           2046           4          0.5        2042.3       0.1X
+date strings                                                                     203            204           1          4.9         203.1       0.8X
+parse dates from Dataset[String]                                                 991            996           8          1.0         990.6       0.2X
+from_json(timestamp)                                                            1670           1679          11          0.6        1669.9       0.1X
+from_json(date)                                                                 1459           1460           1          0.7        1458.6       0.1X
+infer error timestamps from Dataset[String] with default format                 1393           1400           7          0.7        1392.6       0.1X
+infer error timestamps from Dataset[String] with user-provided format           1384           1388           5          0.7        1383.6       0.1X
+infer error timestamps from Dataset[String] with legacy format                  1418           1419           2          0.7        1418.3       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5828           5845          19          0.0       58278.4       1.0X
-pushdown disabled                                  5515           5536          32          0.0       55146.3       1.1X
-w/ filters                                          685            691           7          0.1        6845.1       8.5X
+w/o filters                                        5964           5972          10          0.0       59641.7       1.0X
+pushdown disabled                                  5780           5785           6          0.0       57798.8       1.0X
+w/ filters                                          701            702           1          0.1        7010.4       8.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Partial JSON results:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-parse invalid JSON                                 2346           2495         227          0.0      234637.6       1.0X
+parse invalid JSON                                 2429           2545         138          0.0      242888.8       1.0X
 
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
index 558c1887d63fe..7ff49dc2c3d73 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                1981           2010          32         50.5          19.8       1.0X
-make_date(2019, 9, 16)                             1833           1849          14         54.5          18.3       1.1X
-make_date(*, *, *)                                 4011           4035          26         24.9          40.1       0.5X
+prepare make_date()                                2324           2379          51         43.0          23.2       1.0X
+make_date(2019, 9, 16)                             1859           1869          11         53.8          18.6       1.3X
+make_date(*, *, *)                                 2884           2914          29         34.7          28.8       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                               351            361          11          2.8         351.3       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             42             55          12         23.9          41.9       8.4X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             36             40           4         28.0          35.8       9.8X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             34             43          12         29.7          33.7      10.4X
-make_timestamp(*, *, *, 3, 4, 50.123456)               164            166           2          6.1         163.9       2.1X
-make_timestamp(*, *, *, *, *, 0)                       101            108          10          9.9         101.3       3.5X
-make_timestamp(*, *, *, *, *, 60.0)                    145            147           2          6.9         145.5       2.4X
-make_timestamp(2019, 1, 2, *, *, *)                    460            462           2          2.2         460.3       0.8X
-make_timestamp(*, *, *, *, *, *)                       477            480           4          2.1         476.5       0.7X
+prepare make_timestamp()                               312            318           6          3.2         312.2       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             48             49           1         20.7          48.3       6.5X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             33             38           5         30.0          33.3       9.4X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             32             35           3         30.9          32.3       9.7X
+make_timestamp(*, *, *, 3, 4, 50.123456)               165            169           6          6.1         164.7       1.9X
+make_timestamp(*, *, *, *, *, 0)                       107            110           2          9.3         107.3       2.9X
+make_timestamp(*, *, *, *, *, 60.0)                    149            159          14          6.7         149.2       2.1X
+make_timestamp(2019, 1, 2, *, *, *)                    476            477           1          2.1         475.8       0.7X
+make_timestamp(*, *, *, *, *, *)                       495            503           9          2.0         495.5       0.6X
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
index 8f53cd7815be2..02c43e519ff81 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                2009           2031          35         49.8          20.1       1.0X
-make_date(2019, 9, 16)                             1855           1866          10         53.9          18.6       1.1X
-make_date(*, *, *)                                 3980           4011          27         25.1          39.8       0.5X
+prepare make_date()                                2144           2161          29         46.6          21.4       1.0X
+make_date(2019, 9, 16)                             1807           1812           5         55.3          18.1       1.2X
+make_date(*, *, *)                                 2860           2876          22         35.0          28.6       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                               362            369           8          2.8         361.7       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             45             49           5         22.0          45.4       8.0X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             38             40           2         26.0          38.5       9.4X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             42             50           9         23.8          42.0       8.6X
-make_timestamp(*, *, *, 3, 4, 50.123456)               158            162           6          6.3         158.4       2.3X
-make_timestamp(*, *, *, *, *, 0)                       106            113           8          9.5         105.5       3.4X
-make_timestamp(*, *, *, *, *, 60.0)                    144            146           2          6.9         144.1       2.5X
-make_timestamp(2019, 1, 2, *, *, *)                    471            473           3          2.1         470.9       0.8X
-make_timestamp(*, *, *, *, *, *)                       450            455           6          2.2         449.8       0.8X
+prepare make_timestamp()                               327            329           3          3.1         326.9       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             34             34           1         29.8          33.6       9.7X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             34             40           5         29.4          34.1       9.6X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             34             38           4         29.6          33.8       9.7X
+make_timestamp(*, *, *, 3, 4, 50.123456)               171            176           5          5.9         170.5       1.9X
+make_timestamp(*, *, *, *, *, 0)                       101            108          10          9.9         101.0       3.2X
+make_timestamp(*, *, *, *, *, 60.0)                    144            146           3          7.0         143.6       2.3X
+make_timestamp(2019, 1, 2, *, *, *)                    429            430           1          2.3         428.8       0.8X
+make_timestamp(*, *, *, *, *, *)                       481            488           6          2.1         481.2       0.7X
 
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
index be07b82eba912..0d974239430df 100644
--- a/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
@@ -2,45 +2,45 @@
 Metadata Struct Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                 378            399          21         13.2          75.6       1.0X
-_metadata.file_path                                 476            507          15         10.5          95.3       0.8X
-_metadata.file_name                                 490            504           8         10.2          97.9       0.8X
-_metadata.file_size                                 390            417           8         12.8          78.0       1.0X
-_metadata.file_block_start                          406            417          10         12.3          81.1       0.9X
-_metadata.file_block_length                         384            413          10         13.0          76.9       1.0X
-_metadata.file_modification_time                    404            416           6         12.4          80.7       0.9X
-_metadata.row_index                                 440            468          10         11.4          88.0       0.9X
-_metadata                                           727            744          14          6.9         145.5       0.5X
+no metadata columns                                 631            647           9          7.9         126.3       1.0X
+_metadata.file_path                                 704            737          10          7.1         140.9       0.9X
+_metadata.file_name                                 700            739          15          7.1         139.9       0.9X
+_metadata.file_size                                 623            666          15          8.0         124.7       1.0X
+_metadata.file_block_start                          630            665          12          7.9         126.0       1.0X
+_metadata.file_block_length                         622            661          17          8.0         124.3       1.0X
+_metadata.file_modification_time                    629            664          13          7.9         125.8       1.0X
+_metadata.row_index                                 669            713          17          7.5         133.8       0.9X
+_metadata                                           961            993          20          5.2         192.2       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                1702           1724          15          2.9         340.5       1.0X
-_metadata.file_path                                2177           2218          32          2.3         435.4       0.8X
-_metadata.file_name                                2200           2226          28          2.3         440.0       0.8X
-_metadata.file_size                                1995           2037          19          2.5         399.0       0.9X
-_metadata.file_block_start                         2015           2044          17          2.5         403.0       0.8X
-_metadata.file_block_length                        2021           2044          14          2.5         404.2       0.8X
-_metadata.file_modification_time                   2000           2042          20          2.5         399.9       0.9X
-_metadata.row_index                                2095           2136          17          2.4         418.9       0.8X
-_metadata                                          3039           3088          32          1.6         607.9       0.6X
+no metadata columns                                2687           2714          20          1.9         537.3       1.0X
+_metadata.file_path                                3372           3402          21          1.5         674.4       0.8X
+_metadata.file_name                                3370           3402          23          1.5         673.9       0.8X
+_metadata.file_size                                3227           3256          61          1.5         645.4       0.8X
+_metadata.file_block_start                         3196           3225          21          1.6         639.1       0.8X
+_metadata.file_block_length                        3198           3246          30          1.6         639.7       0.8X
+_metadata.file_modification_time                   3189           3239          19          1.6         637.9       0.8X
+_metadata.row_index                                3643           3686          25          1.4         728.5       0.7X
+_metadata                                          4684           4710          20          1.1         936.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                4770           4852          44          1.0         953.9       1.0X
-_metadata.file_path                                5356           5374          10          0.9        1071.1       0.9X
-_metadata.file_name                                5372           5397          21          0.9        1074.4       0.9X
-_metadata.file_size                                5130           5167          13          1.0        1025.9       0.9X
-_metadata.file_block_start                         5143           5165          11          1.0        1028.7       0.9X
-_metadata.file_block_length                        5136           5162          25          1.0        1027.2       0.9X
-_metadata.file_modification_time                   5146           5158           8          1.0        1029.1       0.9X
-_metadata                                          5864           5886          15          0.9        1172.8       0.8X
+no metadata columns                                6920           6947          27          0.7        1384.1       1.0X
+_metadata.file_path                                7691           7716          17          0.7        1538.2       0.9X
+_metadata.file_name                                7694           7728          32          0.6        1538.8       0.9X
+_metadata.file_size                                7502           7538          26          0.7        1500.5       0.9X
+_metadata.file_block_start                         7513           7536          19          0.7        1502.6       0.9X
+_metadata.file_block_length                        7504           7525          13          0.7        1500.9       0.9X
+_metadata.file_modification_time                   7501           7520          11          0.7        1500.2       0.9X
+_metadata                                          8293           8310          10          0.6        1658.6       0.8X
 
 
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
index 2be20cea649de..b74cc469b2c8d 100644
--- a/sql/core/benchmarks/MetadataStructBenchmark-results.txt
+++ b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
@@ -2,45 +2,45 @@
 Metadata Struct Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                 380            405          22         13.1          76.0       1.0X
-_metadata.file_path                                 489            503           7         10.2          97.8       0.8X
-_metadata.file_name                                 487            495           7         10.3          97.5       0.8X
-_metadata.file_size                                 410            418           6         12.2          82.1       0.9X
-_metadata.file_block_start                          403            409           4         12.4          80.6       0.9X
-_metadata.file_block_length                         406            412           5         12.3          81.2       0.9X
-_metadata.file_modification_time                    406            414           8         12.3          81.1       0.9X
-_metadata.row_index                                 451            458           8         11.1          90.2       0.8X
-_metadata                                           764            773           6          6.5         152.8       0.5X
+no metadata columns                                 649            669          23          7.7         129.8       1.0X
+_metadata.file_path                                 750            765          10          6.7         150.0       0.9X
+_metadata.file_name                                 776            793          20          6.4         155.2       0.8X
+_metadata.file_size                                 687            706          24          7.3         137.5       0.9X
+_metadata.file_block_start                          684            697          11          7.3         136.7       0.9X
+_metadata.file_block_length                         686            703          13          7.3         137.3       0.9X
+_metadata.file_modification_time                    686            702          15          7.3         137.2       0.9X
+_metadata.row_index                                 732            757          28          6.8         146.4       0.9X
+_metadata                                          1048           1065          16          4.8         209.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                2084           2104          15          2.4         416.7       1.0X
-_metadata.file_path                                2577           2611          34          1.9         515.3       0.8X
-_metadata.file_name                                2596           2624          29          1.9         519.3       0.8X
-_metadata.file_size                                2430           2468          61          2.1         486.0       0.9X
-_metadata.file_block_start                         2383           2407          31          2.1         476.7       0.9X
-_metadata.file_block_length                        2430           2457          21          2.1         486.1       0.9X
-_metadata.file_modification_time                   2427           2456          30          2.1         485.4       0.9X
-_metadata.row_index                                2898           2926          22          1.7         579.6       0.7X
-_metadata                                          3924           3965          26          1.3         784.8       0.5X
+no metadata columns                                2565           2605          42          1.9         512.9       1.0X
+_metadata.file_path                                3396           3431          32          1.5         679.3       0.8X
+_metadata.file_name                                3391           3429          30          1.5         678.2       0.8X
+_metadata.file_size                                3174           3205          25          1.6         634.8       0.8X
+_metadata.file_block_start                         3187           3243          61          1.6         637.4       0.8X
+_metadata.file_block_length                        3228           3266          39          1.5         645.6       0.8X
+_metadata.file_modification_time                   3220           3261          31          1.6         644.0       0.8X
+_metadata.row_index                                3609           3644          23          1.4         721.7       0.7X
+_metadata                                          4854           4910          27          1.0         970.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                5252           5270          18          1.0        1050.3       1.0X
-_metadata.file_path                                5861           5880          15          0.9        1172.2       0.9X
-_metadata.file_name                                5877           5899          11          0.9        1175.5       0.9X
-_metadata.file_size                                5610           5631          13          0.9        1122.1       0.9X
-_metadata.file_block_start                         5590           5619          26          0.9        1118.0       0.9X
-_metadata.file_block_length                        5600           5617          11          0.9        1120.1       0.9X
-_metadata.file_modification_time                   5598           5618          17          0.9        1119.6       0.9X
-_metadata                                          6512           6555          29          0.8        1302.3       0.8X
+no metadata columns                                6808           6932         123          0.7        1361.5       1.0X
+_metadata.file_path                                7560           7591          18          0.7        1512.0       0.9X
+_metadata.file_name                                7594           7619          42          0.7        1518.9       0.9X
+_metadata.file_size                                7372           7392          10          0.7        1474.3       0.9X
+_metadata.file_block_start                         7369           7393          18          0.7        1473.8       0.9X
+_metadata.file_block_length                        7370           7389          15          0.7        1474.1       0.9X
+_metadata.file_modification_time                   7376           7393          13          0.7        1475.1       0.9X
+_metadata                                          8188           8211          23          0.6        1637.5       0.8X
 
 
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
index c97c0059404ef..6cc4668711283 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                          681            733          49          0.0   680971074.0       1.0X
-2 stage(s)                                                         1345           1443         139          0.0  1345060505.0       0.5X
-3 stage(s)                                                         1872           1995         174          0.0  1871758987.0       0.4X
+1 stage(s)                                                          757            787          33          0.0   756863015.0       1.0X
+2 stage(s)                                                         1448           1611         230          0.0  1447967154.0       0.5X
+3 stage(s)                                                         2313           2394         115          0.0  2312633108.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              388                66
-     2              395                159
-     3              384                229
+     1              375                73
+     2              348                230
+     3              393                279
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
index 2b2d73d682521..14203a6d527e4 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                          614            653          49          0.0   613999403.0       1.0X
-2 stage(s)                                                         1306           1324          25          0.0  1306014655.0       0.5X
-3 stage(s)                                                         1977           2002          35          0.0  1977274301.0       0.3X
+1 stage(s)                                                          759            804          53          0.0   759264298.0       1.0X
+2 stage(s)                                                         1571           1604          48          0.0  1570666996.0       0.5X
+3 stage(s)                                                         2364           2417          75          0.0  2363843200.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              394                65
-     2              385                168
-     3              374                251
+     1              481                66
+     2              403                198
+     3              443                292
diff --git a/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt b/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
index 20e23bb049b39..d48562a67ddfe 100644
--- a/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   34570          35516        1338         60.7          16.5       1.0X
-range/filter/sum wholestage on                     2343           2443         192        895.2           1.1      14.8X
+range/filter/sum wholestage off                   39354          39673         451         53.3          18.8       1.0X
+range/filter/sum wholestage on                     3472           3578          64        604.1           1.7      11.3X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                       66             72          10       8003.4           0.1       1.0X
-range/limit/sum wholestage on                        54             65           7       9624.5           0.1       1.2X
+range/limit/sum wholestage off                       59             62           4       8881.1           0.1       1.0X
+range/limit/sum wholestage on                        60             67           7       8666.1           0.1       1.0X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off             7862           8003         199         16.7          60.0       1.0X
-sample with replacement wholestage on              5047           5075          18         26.0          38.5       1.6X
+sample with replacement wholestage off             8093           8118          35         16.2          61.7       1.0X
+sample with replacement wholestage on              5100           5122          18         25.7          38.9       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           1844           1854          14         71.1          14.1       1.0X
-sample without replacement wholestage on             637            649          10        205.8           4.9       2.9X
+sample without replacement wholestage off           2716           2724          12         48.3          20.7       1.0X
+sample without replacement wholestage on             652            659           8        200.9           5.0       4.2X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   161            230          72          6.5         153.9       1.0X
-collect 2 millions                                  356            427          54          2.9         339.4       0.5X
-collect 4 millions                                  766            822          65          1.4         730.4       0.2X
+collect 1 million                                   149            227          52          7.0         142.0       1.0X
+collect 2 millions                                  288            413         105          3.6         274.5       0.5X
+collect 4 millions                                  780            831          85          1.3         743.8       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             153            228          82          6.8         146.0       1.0X
-collect limit 2 millions                            300            417         128          3.5         285.9       0.5X
+collect limit 1 million                             156            222          55          6.7         148.8       1.0X
+collect limit 2 millions                            322            441          83          3.3         307.4       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             13081          13094          19          1.3         779.7       1.0X
-generate explode array wholestage on               3028           3137          75          5.5         180.5       4.3X
+generate explode array wholestage off             12087          12259         244          1.4         720.4       1.0X
+generate explode array wholestage on               3313           3473          99          5.1         197.5       3.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               24054          24141         123          0.7        1433.7       1.0X
-generate explode map wholestage on                 9148           9370         220          1.8         545.2       2.6X
+generate explode map wholestage off               24473          24489          22          0.7        1458.7       1.0X
+generate explode map wholestage on                 9589           9743         163          1.7         571.6       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          13193          13206          18          1.3         786.4       1.0X
-generate posexplode array wholestage on            2898           2982          73          5.8         172.8       4.6X
+generate posexplode array wholestage off          12779          12830          72          1.3         761.7       1.0X
+generate posexplode array wholestage on            3340           3492          89          5.0         199.1       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off               6456           6502          65          2.6         384.8       1.0X
-generate inline array wholestage on                2340           2491          86          7.2         139.5       2.8X
+generate inline array wholestage off               7039           7047          12          2.4         419.5       1.0X
+generate inline array wholestage on                2712           2806          80          6.2         161.6       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            192            198           9          0.3        3198.6       1.0X
-generate big struct array wholestage on             151            157           6          0.4        2518.7       1.3X
+generate big struct array wholestage off            188            196          11          0.3        3127.5       1.0X
+generate big struct array wholestage on             149            169          16          0.4        2484.0       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          18698          20695        2824          0.0      311636.5       1.0X
-generate big nested struct array wholestage on             147            172          22          0.4        2449.1     127.2X
+generate big nested struct array wholestage off          21562          21565           4          0.0      359373.5       1.0X
+generate big nested struct array wholestage on             143            161          17          0.4        2378.5     151.1X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     13964          14158         274          1.2         832.3       1.0X
-generate stack wholestage on                       3062           3085          21          5.5         182.5       4.6X
+generate stack wholestage off                     13383          13385           2          1.3         797.7       1.0X
+generate stack wholestage on                       3139           3149          11          5.3         187.1       4.3X
 
 
diff --git a/sql/core/benchmarks/MiscBenchmark-results.txt b/sql/core/benchmarks/MiscBenchmark-results.txt
index 113839ab5cfba..bc6376495bc1d 100644
--- a/sql/core/benchmarks/MiscBenchmark-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   33468          36665        4522         62.7          16.0       1.0X
-range/filter/sum wholestage on                     2352           2437         160        891.6           1.1      14.2X
+range/filter/sum wholestage off                   39427          39448          31         53.2          18.8       1.0X
+range/filter/sum wholestage on                     3452           3698         149        607.6           1.6      11.4X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                       72             79          10       7264.4           0.1       1.0X
-range/limit/sum wholestage on                        64             69           6       8179.1           0.1       1.1X
+range/limit/sum wholestage off                       91            106          22       5753.7           0.2       1.0X
+range/limit/sum wholestage on                        75             84          10       6966.3           0.1       1.2X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off             8125           8280         219         16.1          62.0       1.0X
-sample with replacement wholestage on              4990           5012          30         26.3          38.1       1.6X
+sample with replacement wholestage off             8030           8041          16         16.3          61.3       1.0X
+sample with replacement wholestage on              4992           5008          26         26.3          38.1       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           1912           1920          12         68.6          14.6       1.0X
-sample without replacement wholestage on             646            652           6        202.9           4.9       3.0X
+sample without replacement wholestage off           3093           3100           9         42.4          23.6       1.0X
+sample without replacement wholestage on             630            660          28        208.1           4.8       4.9X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   165            211          64          6.4         157.2       1.0X
-collect 2 millions                                  284            401         112          3.7         271.1       0.6X
-collect 4 millions                                  696            772         122          1.5         664.0       0.2X
+collect 1 million                                   147            194          72          7.2         139.8       1.0X
+collect 2 millions                                  274            394          87          3.8         261.2       0.5X
+collect 4 millions                                  758            817          60          1.4         722.7       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             153            211          70          6.9         145.5       1.0X
-collect limit 2 millions                            302            430          85          3.5         287.8       0.5X
+collect limit 1 million                             149            216          81          7.0         142.0       1.0X
+collect limit 2 millions                            283            397          87          3.7         269.6       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             13376          13504         181          1.3         797.3       1.0X
-generate explode array wholestage on               2842           2931          70          5.9         169.4       4.7X
+generate explode array wholestage off             13755          13765          14          1.2         819.9       1.0X
+generate explode array wholestage on               2847           2938          80          5.9         169.7       4.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               24432          24452          28          0.7        1456.2       1.0X
-generate explode map wholestage on                 9206           9290          93          1.8         548.7       2.7X
+generate explode map wholestage off               24921          24985          91          0.7        1485.4       1.0X
+generate explode map wholestage on                 9283           9399          83          1.8         553.3       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          13900          13921          29          1.2         828.5       1.0X
-generate posexplode array wholestage on            2891           3015          71          5.8         172.3       4.8X
+generate posexplode array wholestage off          14332          14431         139          1.2         854.3       1.0X
+generate posexplode array wholestage on            2909           3002          52          5.8         173.4       4.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off               6752           6784          45          2.5         402.5       1.0X
-generate inline array wholestage on                2495           2563          86          6.7         148.7       2.7X
+generate inline array wholestage off               7138           7205          96          2.4         425.5       1.0X
+generate inline array wholestage on                2369           2489         116          7.1         141.2       3.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            227            235          13          0.3        3776.5       1.0X
-generate big struct array wholestage on             167            185          26          0.4        2791.6       1.4X
+generate big struct array wholestage off            194            211          25          0.3        3229.0       1.0X
+generate big struct array wholestage on             165            173          10          0.4        2750.7       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          18438          20607        3067          0.0      307296.7       1.0X
-generate big nested struct array wholestage on             167            178           9          0.4        2787.1     110.3X
+generate big nested struct array wholestage off          17295          17757         654          0.0      288246.5       1.0X
+generate big nested struct array wholestage on             163            174          11          0.4        2709.5     106.4X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     15293          15312          28          1.1         911.5       1.0X
-generate stack wholestage on                       3068           3096          26          5.5         182.9       5.0X
+generate stack wholestage off                     14910          14980          99          1.1         888.7       1.0X
+generate stack wholestage on                       3115           3129          14          5.4         185.7       4.8X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
index 41107864c1ad9..e5f8398d72d7a 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     60             78          12         16.6          60.2       1.0X
-Nested column                                        56             64           6         17.7          56.4       1.1X
-Nested column in array                              164            171           5          6.1         164.3       0.4X
+Top-level column                                     49             60          11         20.5          48.8       1.0X
+Nested column                                        51             55           5         19.8          50.6       1.0X
+Nested column in array                              159            165           5          6.3         159.4       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    245            256          10          4.1         244.8       1.0X
-Nested column                                       239            255          13          4.2         238.5       1.0X
-Nested column in array                              525            543          14          1.9         524.5       0.5X
+Top-level column                                    242            260          19          4.1         241.8       1.0X
+Nested column                                       230            250          18          4.3         230.0       1.1X
+Nested column in array                              498            543          22          2.0         497.5       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    216            231          20          4.6         216.4       1.0X
-Nested column                                       216            227          11          4.6         215.8       1.0X
-Nested column in array                              472            482           7          2.1         472.5       0.5X
+Top-level column                                    208            219           9          4.8         207.9       1.0X
+Nested column                                       214            218           6          4.7         214.1       1.0X
+Nested column in array                              477            492          10          2.1         476.6       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    210            220           6          4.8         210.0       1.0X
-Nested column                                       233            245           9          4.3         233.0       0.9X
-Nested column in array                              501            509           5          2.0         501.0       0.4X
+Top-level column                                    207            219           8          4.8         206.8       1.0X
+Nested column                                       236            252          22          4.2         236.2       0.9X
+Nested column in array                              498            513          20          2.0         497.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     65             75           8         15.4          65.1       1.0X
-Nested column                                        73             91          16         13.8          72.6       0.9X
-Nested column in array                              205            245          54          4.9         205.0       0.3X
+Top-level column                                     63             73           9         15.9          62.8       1.0X
+Nested column                                        70             83          13         14.3          70.1       0.9X
+Nested column in array                              200            224          17          5.0         200.3       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    297            311          21          3.4         296.8       1.0X
-Nested column                                       351            369          25          2.8         351.0       0.8X
-Nested column in array                              719            783          47          1.4         718.7       0.4X
+Top-level column                                    288            302          14          3.5         287.6       1.0X
+Nested column                                       344            356          15          2.9         344.0       0.8X
+Nested column in array                              712            760          44          1.4         711.8       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index e8e79b7b32039..f9a3f229b7ff5 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     70             90          12         14.2          70.3       1.0X
-Nested column                                        60             68           6         16.7          60.0       1.2X
-Nested column in array                              162            169           5          6.2         161.9       0.4X
+Top-level column                                     46             54           8         21.8          45.9       1.0X
+Nested column                                        49             55           5         20.2          49.5       0.9X
+Nested column in array                              152            157           7          6.6         151.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    240            258          12          4.2         240.2       1.0X
-Nested column                                       245            256           7          4.1         244.8       1.0X
-Nested column in array                              510            537          21          2.0         509.6       0.5X
+Top-level column                                    243            257          21          4.1         243.3       1.0X
+Nested column                                       238            263          15          4.2         238.3       1.0X
+Nested column in array                              508            531          18          2.0         507.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    216            223           4          4.6         216.2       1.0X
-Nested column                                       221            230           5          4.5         220.8       1.0X
-Nested column in array                              475            479           5          2.1         474.6       0.5X
+Top-level column                                    218            226           7          4.6         217.7       1.0X
+Nested column                                       222            227           3          4.5         221.6       1.0X
+Nested column in array                              471            479           6          2.1         471.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    214            222           7          4.7         213.7       1.0X
-Nested column                                       237            250          11          4.2         236.9       0.9X
-Nested column in array                              506            512           5          2.0         505.8       0.4X
+Top-level column                                    216            223           3          4.6         215.9       1.0X
+Nested column                                       242            248          11          4.1         242.0       0.9X
+Nested column in array                              508            517          17          2.0         508.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     69             80           9         14.6          68.6       1.0X
-Nested column                                        77             92          24         13.0          76.7       0.9X
-Nested column in array                              203            231          18          4.9         203.4       0.3X
+Top-level column                                     56             73          13         17.7          56.5       1.0X
+Nested column                                        68             82          17         14.8          67.6       0.8X
+Nested column in array                              216            226           9          4.6         216.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    311            324          11          3.2         311.0       1.0X
-Nested column                                       359            369           7          2.8         359.0       0.9X
-Nested column in array                              715            727          13          1.4         714.7       0.4X
+Top-level column                                    307            344          41          3.3         306.7       1.0X
+Nested column                                       361            401          28          2.8         361.3       0.8X
+Nested column in array                              771            793          15          1.3         771.2       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
index b29fc02ad669d..2a303225d57b9 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     67             81          12         15.0          66.6       1.0X
-Nested column                                        59             65           4         17.1          58.6       1.1X
-Nested column in array                              167            178           9          6.0         167.1       0.4X
+Top-level column                                     61             83          14         16.3          61.3       1.0X
+Nested column                                        59             67           7         17.1          58.6       1.0X
+Nested column in array                              169            176           4          5.9         169.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    242            253           8          4.1         241.7       1.0X
-Nested column                                       234            258          17          4.3         234.5       1.0X
-Nested column in array                              522            536           9          1.9         522.1       0.5X
+Top-level column                                    242            263          16          4.1         242.4       1.0X
+Nested column                                       237            260          10          4.2         236.6       1.0X
+Nested column in array                              513            535          14          1.9         513.5       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    212            225           9          4.7         212.0       1.0X
-Nested column                                       218            228           9          4.6         217.5       1.0X
-Nested column in array                              473            480           5          2.1         473.4       0.4X
+Top-level column                                    217            229           9          4.6         216.6       1.0X
+Nested column                                       222            236           8          4.5         222.1       1.0X
+Nested column in array                              477            484           7          2.1         477.2       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    214            222           8          4.7         213.8       1.0X
-Nested column                                       235            250          14          4.3         235.0       0.9X
-Nested column in array                              502            508           3          2.0         501.6       0.4X
+Top-level column                                    214            225           9          4.7         214.3       1.0X
+Nested column                                       240            254          14          4.2         239.8       0.9X
+Nested column in array                              508            520           8          2.0         508.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     64             79          13         15.7          63.7       1.0X
-Nested column                                        75             92          14         13.3          75.2       0.8X
-Nested column in array                              217            254          54          4.6         216.5       0.3X
+Top-level column                                     56             71           8         18.0          55.7       1.0X
+Nested column                                        73             92          18         13.7          73.2       0.8X
+Nested column in array                              209            237          22          4.8         208.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    294            309          21          3.4         293.7       1.0X
-Nested column                                       348            363          19          2.9         348.2       0.8X
-Nested column in array                              719            808          33          1.4         718.6       0.4X
+Top-level column                                    300            315          21          3.3         299.8       1.0X
+Nested column                                       353            366          17          2.8         353.4       0.8X
+Nested column in array                              809            838          18          1.2         809.3       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index 8824e3aaa0d2c..80519cb6b28bc 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     66             85          10         15.1          66.1       1.0X
-Nested column                                        59             67           6         17.0          58.9       1.1X
-Nested column in array                              165            170           4          6.1         165.2       0.4X
+Top-level column                                     80            103          13         12.5          79.8       1.0X
+Nested column                                        70             81           9         14.3          69.9       1.1X
+Nested column in array                              183            193           6          5.5         182.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    233            259          29          4.3         232.7       1.0X
-Nested column                                       251            267          21          4.0         250.6       0.9X
-Nested column in array                              506            531          17          2.0         505.5       0.5X
+Top-level column                                    269            283          10          3.7         268.9       1.0X
+Nested column                                       259            282          16          3.9         259.4       1.0X
+Nested column in array                              567            581          15          1.8         567.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    214            220           7          4.7         214.3       1.0X
-Nested column                                       219            225           6          4.6         218.6       1.0X
-Nested column in array                              470            476           4          2.1         470.4       0.5X
+Top-level column                                    242            247           5          4.1         242.0       1.0X
+Nested column                                       245            254           7          4.1         245.0       1.0X
+Nested column in array                              517            523           4          1.9         516.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    212            220           9          4.7         212.0       1.0X
-Nested column                                       237            244           5          4.2         236.9       0.9X
-Nested column in array                              505            512           4          2.0         505.0       0.4X
+Top-level column                                    234            246          10          4.3         234.2       1.0X
+Nested column                                       259            270           7          3.9         258.7       0.9X
+Nested column in array                              547            559           9          1.8         546.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     69             84          18         14.4          69.3       1.0X
-Nested column                                        76             92          15         13.2          76.0       0.9X
-Nested column in array                              206            229          16          4.8         206.2       0.3X
+Top-level column                                     78             89          16         12.8          78.0       1.0X
+Nested column                                        86             96           8         11.6          86.0       0.9X
+Nested column in array                              225            263          18          4.4         224.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    302            314           6          3.3         302.3       1.0X
-Nested column                                       361            377          22          2.8         361.2       0.8X
-Nested column in array                              718            743          32          1.4         718.3       0.4X
+Top-level column                                    344            359          10          2.9         344.1       1.0X
+Nested column                                       385            409          14          2.6         384.6       0.9X
+Nested column in array                              769            781          10          1.3         768.9       0.4X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
index cd071f54b6e4d..2b5f80423b41d 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  6478           6539          38         16.2          61.8       1.0X
-With nested predicate Pushdown                       54             74          20       1929.3           0.5     119.2X
+Without nested predicate Pushdown                  6507           6573          42         16.1          62.1       1.0X
+With nested predicate Pushdown                       51             66          15       2074.7           0.5     128.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  6966           7033          67         15.1          66.4       1.0X
-With nested predicate Pushdown                       46             59           9       2286.9           0.4     151.9X
+Without nested predicate Pushdown                  6994           7049          34         15.0          66.7       1.0X
+With nested predicate Pushdown                       41             58          13       2582.3           0.4     172.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 13440          13517          51          7.8         128.2       1.0X
-With nested predicate Pushdown                    13430          13499          67          7.8         128.1       1.0X
+Without nested predicate Pushdown                 13276          13334          59          7.9         126.6       1.0X
+With nested predicate Pushdown                    13267          13393         117          7.9         126.5       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
index 556da31a7c424..b2a9464e8f4d6 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  7164           7228          37         14.6          68.3       1.0X
-With nested predicate Pushdown                       67             87          15       1560.0           0.6     106.6X
+Without nested predicate Pushdown                  7218           7266          26         14.5          68.8       1.0X
+With nested predicate Pushdown                       49             72          14       2136.4           0.5     147.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  7767           7799          36         13.5          74.1       1.0X
-With nested predicate Pushdown                       46             60          10       2287.6           0.4     169.5X
+Without nested predicate Pushdown                  7799           7864          49         13.4          74.4       1.0X
+With nested predicate Pushdown                       48             60           9       2194.5           0.5     163.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 14168          14233          35          7.4         135.1       1.0X
-With nested predicate Pushdown                    14156          14242          84          7.4         135.0       1.0X
+Without nested predicate Pushdown                 14137          14228          42          7.4         134.8       1.0X
+With nested predicate Pushdown                    14176          14233          49          7.4         135.2       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
index efe69ca7b5e2d..f3a5ff49939b7 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     67             78          12         15.0          66.7       1.0X
-Nested column                                        66             79          10         15.2          66.0       1.0X
-Nested column in array                              222            229           6          4.5         222.3       0.3X
+Top-level column                                     64             77           8         15.6          64.1       1.0X
+Nested column                                        65             74           9         15.3          65.3       1.0X
+Nested column in array                              245            251           6          4.1         244.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    243            264          17          4.1         242.5       1.0X
-Nested column                                       245            271          18          4.1         244.8       1.0X
-Nested column in array                              565            614          28          1.8         565.4       0.4X
+Top-level column                                    232            252          12          4.3         231.7       1.0X
+Nested column                                       242            259          16          4.1         242.2       1.0X
+Nested column in array                              578            609          23          1.7         578.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    221            233          13          4.5         220.7       1.0X
-Nested column                                       222            238          13          4.5         222.4       1.0X
-Nested column in array                              535            544           6          1.9         535.4       0.4X
+Top-level column                                    214            226           9          4.7         213.5       1.0X
+Nested column                                       219            229           9          4.6         219.0       1.0X
+Nested column in array                              540            551           9          1.9         540.2       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    213            222           8          4.7         212.6       1.0X
-Nested column                                       236            255          11          4.2         236.1       0.9X
-Nested column in array                              559            590          24          1.8         559.4       0.4X
+Top-level column                                    212            225           8          4.7         211.6       1.0X
+Nested column                                       236            245           6          4.2         235.8       0.9X
+Nested column in array                              576            586          10          1.7         576.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     70             85          12         14.2          70.5       1.0X
-Nested column                                        72             87          10         13.8          72.3       1.0X
-Nested column in array                              259            307          28          3.9         259.2       0.3X
+Top-level column                                     75             85          11         13.4          74.7       1.0X
+Nested column                                        77            100          17         12.9          77.2       1.0X
+Nested column in array                              283            300          10          3.5         283.1       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    305            317          15          3.3         305.1       1.0X
-Nested column                                       359            384          37          2.8         359.0       0.8X
-Nested column in array                              771            822          34          1.3         770.7       0.4X
+Top-level column                                    303            318          18          3.3         302.5       1.0X
+Nested column                                       352            373          20          2.8         352.1       0.9X
+Nested column in array                              813            850          33          1.2         812.8       0.4X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index 51e2fb1081aa5..e30af4fa62c56 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     68             84          11         14.7          68.1       1.0X
-Nested column                                        68             75           5         14.8          67.5       1.0X
-Nested column in array                              222            230           7          4.5         221.5       0.3X
+Top-level column                                     75             87          10         13.3          74.9       1.0X
+Nested column                                        72             80           8         13.9          72.0       1.0X
+Nested column in array                              243            248           5          4.1         242.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    245            262          14          4.1         244.6       1.0X
-Nested column                                       241            267          23          4.2         240.8       1.0X
-Nested column in array                              558            582          20          1.8         557.8       0.4X
+Top-level column                                    251            283          41          4.0         250.7       1.0X
+Nested column                                       258            275          11          3.9         258.5       1.0X
+Nested column in array                              586            622          33          1.7         586.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    219            224           4          4.6         218.9       1.0X
-Nested column                                       224            231           5          4.5         224.4       1.0X
-Nested column in array                              523            528           5          1.9         523.2       0.4X
+Top-level column                                    226            234           6          4.4         226.4       1.0X
+Nested column                                       231            240           8          4.3         230.7       1.0X
+Nested column in array                              550            560          18          1.8         549.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    216            220           3          4.6         215.7       1.0X
-Nested column                                       238            245           5          4.2         238.4       0.9X
-Nested column in array                              552            562           6          1.8         552.3       0.4X
+Top-level column                                    224            229           6          4.5         224.1       1.0X
+Nested column                                       252            259           5          4.0         252.0       0.9X
+Nested column in array                              595            604           9          1.7         595.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     72             81           5         13.8          72.5       1.0X
-Nested column                                        79             96          12         12.7          78.8       0.9X
-Nested column in array                              258            276          19          3.9         258.2       0.3X
+Top-level column                                     75             86           9         13.4          74.8       1.0X
+Nested column                                        76             98          21         13.1          76.4       1.0X
+Nested column in array                              286            300          11          3.5         286.1       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    307            312           3          3.3         306.9       1.0X
-Nested column                                       351            359           6          2.9         350.6       0.9X
-Nested column in array                              767            777          12          1.3         766.6       0.4X
+Top-level column                                    319            333          14          3.1         318.6       1.0X
+Nested column                                       376            388          10          2.7         375.6       0.8X
+Nested column in array                              820            824           5          1.2         819.7       0.4X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
index 6c4ad608aa760..42f2d9349d24a 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 225            250          20         37.2          26.9       1.0X
-Double                                              279            291           8         30.0          33.3       0.8X
+Int                                                 168            186          15         49.9          20.0       1.0X
+Double                                              269            286          13         31.2          32.1       0.6X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
index 918ea9b063ea0..eb5e87109dabc 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 259            276          13         32.4          30.8       1.0X
-Double                                              301            313          10         27.9          35.9       0.9X
+Int                                                 157            173          10         53.4          18.7       1.0X
+Double                                              248            269          13         33.8          29.6       0.6X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt b/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
index 041e51290affc..16f19bd9bce4c 100644
--- a/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                         10045          10154         180         52.2          19.2       1.0X
-limit after range                                    40             43           4      13223.6           0.1     253.4X
-filter after range                                 1004           1028          16        522.3           1.9      10.0X
-count after range                                    35             39           5      15096.5           0.1     289.2X
-count after limit after range                        40             41           1      13172.1           0.1     252.4X
+full scan                                         12772          12920         156         41.1          24.4       1.0X
+limit after range                                    19             19           0      27834.0           0.0     678.0X
+filter after range                                 1020           1042          34        514.1           1.9      12.5X
+count after range                                   334            339           6       1572.0           0.6      38.3X
+count after limit after range                        28             31           3      18729.5           0.1     456.2X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-results.txt b/sql/core/benchmarks/RangeBenchmark-results.txt
index 0fb11bf11525f..d6a426e804efd 100644
--- a/sql/core/benchmarks/RangeBenchmark-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                          9808          10162         239         53.5          18.7       1.0X
-limit after range                                    47             48           1      11171.4           0.1     209.0X
-filter after range                                 1000           1016          20        524.1           1.9       9.8X
-count after range                                    35             39           4      15014.2           0.1     280.9X
-count after limit after range                        34             39           5      15248.4           0.1     285.3X
+full scan                                         13257          13466         272         39.5          25.3       1.0X
+limit after range                                    16             17           1      31952.5           0.0     808.0X
+filter after range                                 1029           1044          23        509.6           2.0      12.9X
+count after range                                   187            191           2       2803.4           0.4      70.9X
+count after limit after range                        27             32           4      19467.1           0.1     492.3X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-jdk21-results.txt b/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
index 1f7f55a5c5b81..0950e46adcf30 100644
--- a/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                 8388           8420          45          3.0         335.5       1.0X
-reference Arrays.sort                              2044           2069          35         12.2          81.8       4.1X
-radix sort one byte                                  66             69           3        379.7           2.6     127.4X
-radix sort two bytes                                123            126           3        204.0           4.9      68.4X
-radix sort eight bytes                              467            482          14         53.6          18.7      18.0X
-radix sort key prefix array                         556            561           6         45.0          22.2      15.1X
+reference TimSort key prefix array                 8459           8536         108          3.0         338.4       1.0X
+reference Arrays.sort                              2076           2089          17         12.0          83.1       4.1X
+radix sort one byte                                  67             70           2        372.3           2.7     126.0X
+radix sort two bytes                                125            130           4        200.1           5.0      67.7X
+radix sort eight bytes                              470            481           6         53.1          18.8      18.0X
+radix sort key prefix array                         566            572           4         44.2          22.6      14.9X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt
index 7d6805987bcc9..68f6eed9b84cc 100644
--- a/sql/core/benchmarks/SortBenchmark-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                 8372           8465         132          3.0         334.9       1.0X
-reference Arrays.sort                              2043           2071          40         12.2          81.7       4.1X
-radix sort one byte                                  64             72           5        390.2           2.6     130.7X
-radix sort two bytes                                119            130           8        209.6           4.8      70.2X
-radix sort eight bytes                              476            508          27         52.5          19.0      17.6X
-radix sort key prefix array                         913            917           5         27.4          36.5       9.2X
+reference TimSort key prefix array                 8170           8294         175          3.1         326.8       1.0X
+reference Arrays.sort                              2059           2085          37         12.1          82.4       4.0X
+radix sort one byte                                  64             71           5        391.0           2.6     127.8X
+radix sort two bytes                                117            126           5        213.2           4.7      69.7X
+radix sort eight bytes                              477            493           9         52.4          19.1      17.1X
+radix sort key prefix array                         912            920           7         27.4          36.5       9.0X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
index 0317e61163752..6a42c7b283b7e 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
@@ -2,141 +2,143 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            9             10           1          1.1         936.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              41             42           1          0.2        4068.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             15             16           1          0.7        1500.4       0.6X
+In-memory                                                           10             11           1          1.0         968.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              40             42           2          0.2        4033.5       0.2X
+RocksDB (trackTotalNumberOfRows: false)                             15             16           1          0.7        1502.0       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             11           1          1.1         929.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            40             41           1          0.3        3955.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1497.3       0.6X
+In-memory                                                          9             11           1          1.1         943.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            48             50           1          0.2        4817.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1499.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           1          1.1         907.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            39             40           1          0.3        3886.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1497.2       0.6X
+In-memory                                                          9             10           1          1.1         906.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            54             56           1          0.2        5418.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1535.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      9             10           1          1.1         904.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        39             40           1          0.3        3859.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       15             16           0          0.7        1497.2       0.6X
+In-memory                                                     10             11           1          1.1         951.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        57             58           1          0.2        5680.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       16             16           1          0.6        1563.7       0.6X
 
 
 ================================================================================================
 merge rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                    519            533           7          0.0       51916.6       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                   171            177           3          0.1       17083.9       3.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    531            550           7          0.0       53076.7       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                   175            183           3          0.1       17475.3       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  506            521           7          0.0       50644.0       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 170            176           3          0.1       17022.0       3.0X
+RocksDB (trackTotalNumberOfRows: true)                                                  522            538           8          0.0       52183.0       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 171            177           3          0.1       17100.7       3.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  493            508           6          0.0       49319.3       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 169            175           3          0.1       16897.6       2.9X
+RocksDB (trackTotalNumberOfRows: true)                                                  518            534           7          0.0       51827.6       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 174            179           2          0.1       17358.9       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                              495            508           6          0.0       49462.5       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                             169            175           3          0.1       16896.6       2.9X
+RocksDB (trackTotalNumberOfRows: true)                                              522            537           6          0.0       52162.9       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                             173            181           3          0.1       17259.8       3.0X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        0              1           0         26.3          38.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          39             41           1          0.3        3942.0       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         15             16           1          0.7        1529.2       0.0X
+In-memory                                                                                        1              1           0         17.5          57.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          56             58           1          0.2        5647.4       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         15             16           1          0.7        1525.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              9           1          1.3         790.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        40             41           1          0.2        4036.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             16           0          0.7        1536.9       0.5X
+In-memory                                                                                      8              9           0          1.2         826.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        50             51           1          0.2        4955.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           0          0.7        1533.6       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8             10           1          1.2         847.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        41             42           1          0.2        4099.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       16             16           0          0.6        1563.3       0.5X
+In-memory                                                                                      9             11           1          1.1         892.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4351.5       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1526.5       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  9             10           1          1.2         859.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    41             42           1          0.2        4118.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                   15             16           1          0.7        1507.8       0.6X
+In-memory                                                                                  9             10           1          1.1         894.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    41             42           1          0.2        4142.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                   15             16           0          0.7        1509.7       0.6X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            8              9           1          1.2         831.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              40             40           1          0.3        3956.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             16             16           0          0.6        1571.3       0.5X
+In-memory                                                                            9              9           1          1.2         851.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              40             41           1          0.2        4030.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             16             17           1          0.6        1632.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           8              8           1          1.3         787.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             21             22           0          0.5        2112.6       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                             9              9           0          1.1         932.9       0.8X
+In-memory                                                                           8              9           1          1.2         807.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             21             22           1          0.5        2124.6       0.4X
+RocksDB (trackTotalNumberOfRows: false)                                             9             10           0          1.1         940.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          7              8           0          1.4         715.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.5         676.3       1.1X
-RocksDB (trackTotalNumberOfRows: false)                                            4              5           0          2.3         442.3       1.6X
+In-memory                                                                          7              8           1          1.4         739.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.4         697.1       1.1X
+RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.2         460.0       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      0              0           0         23.8          41.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         3              3           0          3.2         309.5       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        3              3           0          3.2         309.9       0.1X
+In-memory                                                                      0              1           0         23.9          41.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         3              3           0          3.0         328.1       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        3              3           0          3.0         329.5       0.1X
+
+
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
index d2aa646d5ec1d..9c99d86e3bfd1 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
@@ -2,141 +2,143 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                           10             12           1          1.0         960.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              42             43           2          0.2        4173.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             16             16           1          0.6        1551.6       0.6X
+In-memory                                                            9             10           0          1.1         927.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              41             42           2          0.2        4063.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                             15             16           1          0.7        1500.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                         10             12           1          1.0         970.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            41             42           1          0.2        4095.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             17           1          0.6        1544.6       0.6X
+In-memory                                                          9             10           1          1.1         926.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            49             50           1          0.2        4853.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1495.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             11           1          1.1         933.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            40             41           1          0.3        3966.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.6        1540.2       0.6X
+In-memory                                                          9             10           0          1.1         900.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            54             55           1          0.2        5359.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             15           1          0.7        1491.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      9             11           1          1.1         936.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        39             41           1          0.3        3942.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       15             16           0          0.7        1530.1       0.6X
+In-memory                                                      9             10           0          1.1         899.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        55             56           1          0.2        5500.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       15             15           0          0.7        1493.8       0.6X
 
 
 ================================================================================================
 merge rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                    525            538           6          0.0       52516.4       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                   170            177           4          0.1       16960.4       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                    515            526           6          0.0       51507.8       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                   167            175           3          0.1       16747.6       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  514            528           6          0.0       51351.9       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 168            174           4          0.1       16794.0       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                  516            526           4          0.0       51588.3       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 166            171           3          0.1       16579.3       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  500            513           6          0.0       49955.1       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 169            174           2          0.1       16867.1       3.0X
+RocksDB (trackTotalNumberOfRows: true)                                                  513            523           5          0.0       51287.0       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 165            170           2          0.1       16532.2       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                              492            508           8          0.0       49225.8       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                             168            173           3          0.1       16757.2       2.9X
+RocksDB (trackTotalNumberOfRows: true)                                              513            521           4          0.0       51288.3       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                             165            169           2          0.1       16482.6       3.1X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        0              1           0         26.1          38.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          38             40           1          0.3        3835.6       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         15             16           1          0.7        1455.7       0.0X
+In-memory                                                                                        0              0           0         27.9          35.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          54             56           1          0.2        5448.6       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         15             15           0          0.7        1458.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              9           1          1.3         793.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        40             41           1          0.2        4018.1       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             16           0          0.7        1505.6       0.5X
+In-memory                                                                                      8              8           0          1.3         772.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        48             49           1          0.2        4773.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       14             15           0          0.7        1445.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8             10           1          1.2         837.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        41             42           1          0.2        4073.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1470.6       0.6X
+In-memory                                                                                      8              9           0          1.2         826.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        42             43           1          0.2        4198.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             15           0          0.7        1460.1       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  8              9           0          1.2         843.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    41             42           1          0.2        4088.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                   15             15           0          0.7        1466.1       0.6X
+In-memory                                                                                  8              9           0          1.2         833.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    40             41           1          0.2        4043.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                   15             15           0          0.7        1457.1       0.6X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            8              9           0          1.2         833.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              40             41           0          0.3        3976.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             16             16           0          0.6        1588.1       0.5X
+In-memory                                                                            8              9           0          1.2         835.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              40             40           0          0.3        3972.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             15             16           0          0.6        1547.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           8              8           0          1.3         784.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             22             22           0          0.5        2155.1       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                            10             10           0          1.0         986.9       0.8X
+In-memory                                                                           8              8           0          1.3         775.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             21             22           1          0.5        2130.5       0.4X
+RocksDB (trackTotalNumberOfRows: false)                                            10             10           0          1.0         973.2       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          7              8           0          1.4         722.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.4         718.8       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.0         488.7       1.5X
+In-memory                                                                          7              8           0          1.4         704.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.4         717.5       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.1         482.3       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
 AMD EPYC 7763 64-Core Processor
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      0              1           0         21.3          46.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         358.9       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         358.7       0.1X
+In-memory                                                                      0              0           0         23.0          43.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         352.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         353.7       0.1X
+
+
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt
index 1ecc3156431a3..73165e6568854 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt
@@ -3,23 +3,23 @@ Benchmark for performance of subexpression elimination
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6301           6360          57          0.0    63006990.7       1.0X
-subExprElimination false, codegen: false           6059           6228         248          0.0    60587697.9       1.0X
-subExprElimination true, codegen: true             1194           1265          92          0.0    11936424.6       5.3X
-subExprElimination true, codegen: false            1184           1244          62          0.0    11839767.3       5.3X
+subExprElimination false, codegen: true            5718           5952         306          0.0    57180602.0       1.0X
+subExprElimination false, codegen: false           5691           5724          36          0.0    56912726.3       1.0X
+subExprElimination true, codegen: true             1296           1353          56          0.0    12955974.7       4.4X
+subExprElimination true, codegen: false            1195           1274          71          0.0    11946584.3       4.8X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6555           6609          65          0.0    65552511.3       1.0X
-subExprElimination false, codegen: false           6432           6501          92          0.0    64321921.4       1.0X
-subExprElimination true, codegen: true             1871           1890          27          0.0    18708460.4       3.5X
-subExprElimination true, codegen: false            1853           1894          37          0.0    18527264.1       3.5X
+subExprElimination false, codegen: true            6036           6207         176          0.0    60362284.0       1.0X
+subExprElimination false, codegen: false           6027           6111         106          0.0    60270452.3       1.0X
+subExprElimination true, codegen: true             1975           2005          50          0.0    19751387.6       3.1X
+subExprElimination true, codegen: false            1844           1969         108          0.0    18442635.2       3.3X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index 0ad0b3fdcc6cc..42f2df1de0337 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -3,23 +3,23 @@ Benchmark for performance of subexpression elimination
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6634           6827         265          0.0    66342414.3       1.0X
-subExprElimination false, codegen: false           6492           6677         172          0.0    64915975.3       1.0X
-subExprElimination true, codegen: true             1306           1328          31          0.0    13062245.2       5.1X
-subExprElimination true, codegen: false            1253           1292          39          0.0    12527565.4       5.3X
+subExprElimination false, codegen: true            6332           6606         239          0.0    63318653.1       1.0X
+subExprElimination false, codegen: false           6178           6270         117          0.0    61782941.5       1.0X
+subExprElimination true, codegen: true             1438           1497          64          0.0    14383249.6       4.4X
+subExprElimination true, codegen: false            1382           1415          48          0.0    13817508.7       4.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6880           7088         206          0.0    68799261.9       1.0X
-subExprElimination false, codegen: false           6806           6929         123          0.0    68063401.4       1.0X
-subExprElimination true, codegen: true             1838           1928          93          0.0    18380916.3       3.7X
-subExprElimination true, codegen: false            1847           1920          64          0.0    18467889.5       3.7X
+subExprElimination false, codegen: true            6539           6660         105          0.0    65387594.7       1.0X
+subExprElimination false, codegen: false           6548           6584          49          0.0    65477566.0       1.0X
+subExprElimination true, codegen: true             2032           2093          66          0.0    20323994.4       3.2X
+subExprElimination true, codegen: false            2016           2078          69          0.0    20155395.9       3.2X
 
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
index 614381dc3e578..0d13b70e5682e 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                  550            699         210          0.8        1191.6       1.0X
+q1                                                  405            522         130          1.1         878.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                  792            809          24          2.8         355.0       1.0X
+q2                                                  676            726          47          3.3         302.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  221            261          43         13.5          74.3       1.0X
+q3                                                  184            201          10         16.1          62.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                 4185           4574         550          1.2         802.9       1.0X
+q4                                                 4172           4480         435          1.2         800.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 1066           1325         366          5.3         189.4       1.0X
+q5                                                 1050           1064          20          5.4         186.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                 1063           1110          67          2.9         340.5       1.0X
+q6                                                  989           1051          88          3.2         316.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                  549            573          26          8.9         112.2       1.0X
+q7                                                  498            515          25          9.8         101.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                  460            515          49          6.8         148.1       1.0X
+q8                                                  402            426          20          7.7         129.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                  863            882          25          0.0    24667420.2       1.0X
+q9                                                  872            873           1          0.0    24921608.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                1814           1934         170          1.1         875.8       1.0X
+q10                                                1859           1959         140          1.1         897.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                1614           1820         290          2.3         428.0       1.0X
+q11                                                1675           1908         330          2.3         444.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 163            199          47          5.0         200.9       1.0X
+q12                                                 143            167          24          5.7         176.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                 726            762          51          6.8         147.2       1.0X
+q13                                                 724            762          48          6.8         146.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                               5344           5424         114          1.0        1041.7       1.0X
+q14a                                               4892           5087         277          1.0         953.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                               3893           4053         226          1.3         758.8       1.0X
+q14b                                               3769           3856         123          1.4         734.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 408            443          27          4.1         245.5       1.0X
+q15                                                 404            441          43          4.1         242.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                 564            627          42          2.8         360.8       1.0X
+q16                                                 587            645          53          2.7         375.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                1246           1299          74          3.8         265.1       1.0X
+q17                                                1372           1376           5          3.4         292.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                 944           1043         140          3.8         262.2       1.0X
+q18                                                 926           1142         304          3.9         257.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 292            321          31         10.7          93.5       1.0X
+q19                                                 279            304          36         11.2          89.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 173            187          21          8.9         112.8       1.0X
+q20                                                 160            183          25          9.6         104.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                 661            672          10         17.9          55.8       1.0X
+q21                                                 623            650          28         19.0          52.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                3223           3302         112          3.7         272.3       1.0X
+q22                                                3106           3138          46          3.8         262.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                               5979           6048          98          0.9        1143.3       1.0X
+q23a                                               6166           6171           6          0.8        1179.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                               6074           6126          74          0.9        1161.4       1.0X
+q23b                                               6289           6440         214          0.8        1202.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                                154            248          48         21.7          46.1       1.0X
+q24a                                                206            235          24         16.2          61.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                                208            249          30         16.0          62.5       1.0X
+q24b                                                158            232          46         21.1          47.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                1401           1419          26          3.4         298.1       1.0X
+q25                                                1285           1317          46          3.7         273.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 300            332          35         11.5          86.8       1.0X
+q26                                                 292            314          23         11.8          84.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                 477            504          28         10.2          97.6       1.0X
+q27                                                 516            553          38          9.5         105.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                1136           1207         100          2.5         394.5       1.0X
+q28                                                1176           1179           5          2.4         408.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                1244           1413         239          3.8         264.7       1.0X
+q29                                                1292           1294           2          3.6         275.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                 388            406          33          0.8        1315.6       1.0X
+q30                                                 387            415          33          0.8        1313.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                 749            775          31          5.0         201.3       1.0X
+q31                                                 740            843         125          5.0         198.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 183            218          34          8.3         119.8       1.0X
+q32                                                 183            198          20          8.3         119.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                 379            421          47         13.7          73.2       1.0X
+q33                                                 401            432          25         12.9          77.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 341            379          50          9.0         111.4       1.0X
+q34                                                 336            375          32          9.1         109.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                1145           1212          94          1.8         552.9       1.0X
+q35                                                1231           1240          13          1.7         594.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                 499            531          29          5.9         168.1       1.0X
+q36                                                 494            531          31          6.0         166.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                 817            864          44         16.3          61.5       1.0X
+q37                                                 793            811          24         16.7          59.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                 634            690          52          8.2         121.7       1.0X
+q38                                                 645            690          43          8.1         123.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               1401           1529         180          8.4         118.4       1.0X
+q39a                                               1410           1507         136          8.4         119.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               1356           1362           8          8.7         114.5       1.0X
+q39b                                               1362           1375          19          8.7         115.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 288            338          43          5.8         172.2       1.0X
+q40                                                 269            296          29          6.2         160.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 151            168          19          0.1        8388.1       1.0X
+q41                                                 147            167          20          0.1        8166.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 142            156          24         20.9          48.0       1.0X
+q42                                                 146            164          23         20.4          49.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 275            300          24         10.8          93.0       1.0X
+q43                                                 283            305          26         10.4          96.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                 302            338          39          9.6         104.2       1.0X
+q44                                                 329            380          62          8.8         113.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 182            212          34          5.3         189.9       1.0X
+q45                                                 167            201          23          5.7         174.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                 418            447          42          7.4         134.3       1.0X
+q46                                                 472            504          27          6.6         151.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                1518           1551          48          2.0         510.8       1.0X
+q47                                                1488           1654         235          2.0         501.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                 874            891          19          5.6         177.4       1.0X
+q48                                                 877            906          27          5.6         178.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                 521            632         105         10.8          92.8       1.0X
+q49                                                 647            693          58          8.7         115.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                 565            599          27          5.7         174.4       1.0X
+q50                                                 643            676          28          5.0         198.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                2469           2610         199          1.5         672.5       1.0X
+q51                                                2677           2903         319          1.4         729.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 140            160          21         21.2          47.1       1.0X
+q52                                                 145            157          16         20.5          48.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 276            300          23         10.8          92.8       1.0X
+q53                                                 276            292          14         10.8          92.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                1168           1204          50          4.5         221.2       1.0X
+q54                                                1223           1256          47          4.3         231.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 139            151          17         21.3          46.9       1.0X
+q55                                                 145            168          24         20.5          48.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                 415            431          20         12.5          80.2       1.0X
+q56                                                 397            426          29         13.0          76.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                 772            822          73          2.0         504.5       1.0X
+q57                                                 752            812          62          2.0         491.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                 394            426          35         13.0          76.8       1.0X
+q58                                                 396            437          38         13.0          77.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                 583            626          41          5.1         197.4       1.0X
+q59                                                 582            633          61          5.1         197.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                 383            440          55         13.5          74.0       1.0X
+q60                                                 405            467          59         12.8          78.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                 530            642         191          5.9         169.8       1.0X
+q61                                                 567            695         169          5.5         181.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 162            181          23          4.9         204.9       1.0X
+q62                                                 166            185          25          4.8         210.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 269            305          27         11.1          90.4       1.0X
+q63                                                 267            308          25         11.1          90.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                2129           2334         290          3.3         307.6       1.0X
+q64                                                2323           2600         392          3.0         335.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                 614            646          39          4.8         206.6       1.0X
+q65                                                 680            704          35          4.4         228.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                 509            575          60          4.6         219.4       1.0X
+q66                                                 518            579          65          4.5         223.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                                5168           5274         149          0.6        1739.7       1.0X
+q67                                                5487           5527          57          0.5        1847.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                 416            455          40          7.5         133.6       1.0X
+q68                                                 485            505          22          6.4         155.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                1601           1617          22          1.3         773.3       1.0X
+q69                                                1592           1605          17          1.3         768.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                 548            569          25          5.4         185.6       1.0X
+q70                                                 557            594          34          5.3         188.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                 345            377          38         15.1          66.1       1.0X
+q71                                                 361            389          31         14.5          69.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                              115881         116420         762          0.1        7550.3       1.0X
+q72                                              111274         114140        4054          0.1        7250.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 304            342          30         10.1          99.3       1.0X
+q73                                                 315            352          36          9.7         103.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                1079           1503         600          3.5         286.2       1.0X
+q74                                                1104           1493         550          3.4         292.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                1369           1575         292          4.1         242.9       1.0X
+q75                                                1291           1479         266          4.4         229.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 269            305          42         19.1          52.5       1.0X
+q76                                                 286            304          25         18.0          55.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                 546            755         182         10.3          97.3       1.0X
+q77                                                 452            545          83         12.4          80.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                2326           2453         179          2.4         414.3       1.0X
+q78                                                1995           2312         450          2.8         355.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 403            432          33          7.6         131.7       1.0X
+q79                                                 427            454          38          7.2         139.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                1534           1569          49          3.7         271.7       1.0X
+q80                                                1196           1286         127          4.7         211.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                 293            359          43          1.3         798.5       1.0X
+q81                                                 335            364          39          1.1         914.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1067           1080          19         13.8          72.5       1.0X
+q82                                                1066           1075          13         13.8          72.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                 232            285          56          2.6         389.5       1.0X
+q83                                                 239            267          18          2.5         401.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                 640            688          70          3.7         270.5       1.0X
+q84                                                 629            657          32          3.8         265.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                1729           2002         386          1.6         610.1       1.0X
+q85                                                1810           2045         333          1.6         638.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 178            207          30          4.6         219.3       1.0X
+q86                                                 176            194          23          4.6         217.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                 660            717          61          7.9         126.7       1.0X
+q87                                                 650            705          54          8.0         124.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                1121           1286         234          2.7         376.9       1.0X
+q88                                                1182           1328         205          2.5         397.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 292            335          35         10.2          98.4       1.0X
+q89                                                 284            314          33         10.5          95.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 110            130          16          7.4         135.4       1.0X
+q90                                                 118            143          23          6.9         145.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 304            339          35          7.6         132.4       1.0X
+q91                                                 324            346          20          7.1         141.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 133            156          23          6.1         163.8       1.0X
+q92                                                 125            151          24          6.5         154.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                 424            453          39          7.5         134.0       1.0X
+q93                                                 380            405          28          8.3         120.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                 296            345          53          2.8         352.1       1.0X
+q94                                                 312            341          38          2.7         371.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                5098           5262         232          0.2        6054.1       1.0X
+q95                                                5317           5518         285          0.2        6314.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 155            167          15         19.1          52.2       1.0X
+q96                                                 163            181          24         18.3          54.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1147           1178          44          3.8         261.1       1.0X
+q97                                                1110           1176          93          4.0         252.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 257            297          48         11.6          86.4       1.0X
+q98                                                 260            277          24         11.4          87.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 240            259          24          6.3         158.5       1.0X
+q99                                                 245            265          24          6.2         161.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                            968           1233         375          5.8         172.0       1.0X
+q5a-v2.7                                           1019           1185         234          5.5         181.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                             917            940          33          3.4         294.0       1.0X
+q6-v2.7                                             909            937          26          3.4         291.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          1654           1690          51          1.3         798.9       1.0X
+q10a-v2.7                                          1665           1721          79          1.2         803.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           1472           1902         607          2.6         390.3       1.0X
+q11-v2.7                                           1547           1844         419          2.4         410.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            124            139          21          6.5         153.3       1.0X
+q12-v2.7                                            125            139          21          6.5         153.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                           3734           3804          99          1.4         728.0       1.0X
+q14-v2.7                                           3522           3706         261          1.5         686.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                          7026           7251         319          0.7        1369.6       1.0X
+q14a-v2.7                                          7188           7299         156          0.7        1401.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          1874           2095         312          1.9         520.2       1.0X
+q18a-v2.7                                          1854           1861          10          1.9         514.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            150            170          25         10.2          98.2       1.0X
+q20-v2.7                                            149            172          35         10.3          97.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          12490          12574         120          0.9        1055.2       1.0X
+q22-v2.7                                          12659          12738         112          0.9        1069.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                          1930           1938          12          6.1         163.0       1.0X
+q22a-v2.7                                          1850           1901          72          6.4         156.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                            101            230          75         33.2          30.2       1.0X
+q24-v2.7                                            197            239          29         17.0          59.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          1145           1236         129          4.3         234.2       1.0X
+q27a-v2.7                                          1122           1150          40          4.4         229.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            311            348          35          9.8         101.6       1.0X
+q34-v2.7                                            338            357          30          9.1         110.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           1202           1244          58          1.7         580.6       1.0X
+q35-v2.7                                           1221           1238          23          1.7         589.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          1154           1161          10          1.8         557.5       1.0X
+q35a-v2.7                                          1173           1208          49          1.8         566.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                           454            483          42          6.5         152.7       1.0X
+q36a-v2.7                                           462            493          42          6.4         155.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           1474           1651         250          2.0         496.2       1.0X
+q47-v2.7                                           1492           1637         205          2.0         502.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                            513            572          99         10.9          91.4       1.0X
+q49-v2.7                                            541            612          77         10.4          96.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         15031          15419         550          0.2        4093.6       1.0X
+q51a-v2.7                                         14021          14459         619          0.3        3818.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                            748            836         127          2.0         488.5       1.0X
+q57-v2.7                                            749            822          81          2.0         489.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           2164           2350         262          3.2         312.7       1.0X
+q64-v2.7                                           2085           2377         413          3.3         301.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                          6595           6800         289          0.5        2219.9       1.0X
+q67a-v2.7                                          6711           7040         464          0.4        2259.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                           618            630          13          4.8         209.4       1.0X
+q70a-v2.7                                           598            643          49          4.9         202.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                         107039         107499         650          0.1        6974.1       1.0X
+q72-v2.7                                         112833         114390        2202          0.1        7351.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                            977           1034          81          3.9         259.0       1.0X
+q74-v2.7                                           1067           1068           0          3.5         283.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           1130           1379         352          5.0         200.6       1.0X
+q75-v2.7                                           1182           1411         325          4.8         209.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                           656            906         366          8.6         116.7       1.0X
+q77a-v2.7                                          1302           1356          77          4.3         231.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           2148           2283         191          2.6         382.5       1.0X
+q78-v2.7                                           1772           2057         403          3.2         315.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          1388           1606         308          4.1         245.9       1.0X
+q80a-v2.7                                          1412           1593         256          4.0         250.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                           222            269          49          3.6         274.3       1.0X
+q86a-v2.7                                           218            251          35          3.7         268.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            246            263          11         12.1          82.7       1.0X
+q98-v2.7                                            258            276          22         11.5          86.7       1.0X
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
index 4b8893f4ab7c4..d66ea7b619588 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                  625            722         101          0.7        1354.8       1.0X
+q1                                                  338            403          38          1.4         733.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                  794            870          67          2.8         355.6       1.0X
+q2                                                  768            824          74          2.9         344.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  256            278          23         11.6          86.0       1.0X
+q3                                                  214            232          16         13.9          71.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                 4141           4545         571          1.3         794.6       1.0X
+q4                                                 4002           4315         442          1.3         767.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 1061           1329         380          5.3         188.5       1.0X
+q5                                                 1343           1497         218          4.2         238.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                 1069           1091          31          2.9         342.6       1.0X
+q6                                                  953           1004          72          3.3         305.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                  578            640          66          8.5         118.2       1.0X
+q7                                                  554            568          14          8.8         113.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                  535            584          36          5.8         172.4       1.0X
+q8                                                  475            498          28          6.5         153.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                 1052           1072          29          0.0    30044514.0       1.0X
+q9                                                  818            930         101          0.0    23364476.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                1818           1929         157          1.1         877.7       1.0X
+q10                                                1842           2015         245          1.1         889.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                1881           2011         185          2.0         498.6       1.0X
+q11                                                1749           1996         349          2.2         463.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 204            255          28          4.0         252.2       1.0X
+q12                                                 196            226          17          4.1         241.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                 775            830          63          6.4         157.2       1.0X
+q13                                                 752            814          54          6.6         152.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                               4566           5052         687          1.1         890.2       1.0X
+q14a                                               4993           5656         938          1.0         973.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                               3764           3924         226          1.4         733.8       1.0X
+q14b                                               3721           3867         207          1.4         725.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 391            409          13          4.3         235.0       1.0X
+q15                                                 411            441          20          4.0         247.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                 618            691          83          2.5         395.1       1.0X
+q16                                                 552            635          94          2.8         353.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                1306           1367          86          3.6         278.0       1.0X
+q17                                                1419           1443          33          3.3         302.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                1161           1278         167          3.1         322.2       1.0X
+q18                                                1100           1167          95          3.3         305.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 345            380          29          9.1         110.4       1.0X
+q19                                                 327            343          18          9.6         104.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 212            244          17          7.2         138.6       1.0X
+q20                                                 195            238          31          7.8         127.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                 700            717          22         16.9          59.1       1.0X
+q21                                                 567            600          28         20.9          47.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                3286           3429         202          3.6         277.6       1.0X
+q22                                                3145           3283         195          3.8         265.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                               5545           5893         493          0.9        1060.2       1.0X
+q23a                                               5658           5959         425          0.9        1081.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                               5399           5526         179          1.0        1032.4       1.0X
+q23b                                               5790           5814          34          0.9        1107.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                                161            264          75         20.7          48.4       1.0X
+q24a                                                105            224          54         31.7          31.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                                285            310          20         11.7          85.4       1.0X
+q24b                                                218            277          40         15.3          65.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                1326           1436         155          3.5         282.3       1.0X
+q25                                                1298           1321          32          3.6         276.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 403            434          26          8.6         116.7       1.0X
+q26                                                 335            362          26         10.3          97.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                 553            565          17          8.8         113.0       1.0X
+q27                                                 523            550          26          9.4         106.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                1506           1533          39          1.9         522.9       1.0X
+q28                                                1210           1300         126          2.4         420.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                1385           1392           9          3.4         294.8       1.0X
+q29                                                1229           1245          23          3.8         261.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                 458            484          21          0.6        1554.1       1.0X
+q30                                                 437            476          28          0.7        1481.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                 967           1188         313          3.8         259.9       1.0X
+q31                                                 963           1225         371          3.9         258.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 241            292          57          6.3         157.7       1.0X
+q32                                                 226            248          13          6.8         147.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                 506            537          29         10.2          97.8       1.0X
+q33                                                 395            461          49         13.1          76.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 371            412          39          8.2         121.3       1.0X
+q34                                                 327            350          26          9.4         106.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                1388           1409          29          1.5         670.5       1.0X
+q35                                                1228           1234           7          1.7         593.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                 542            560          21          5.5         182.3       1.0X
+q36                                                 549            558          13          5.4         184.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                 917            921           7         14.5          69.1       1.0X
+q37                                                 772            813          37         17.2          58.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                 790            794           3          6.6         151.7       1.0X
+q38                                                 731            907         202          7.1         140.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               1582           1743         228          7.5         133.6       1.0X
+q39a                                               1287           1442         219          9.2         108.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               1571           1599          39          7.5         132.7       1.0X
+q39b                                               1247           1289          60          9.5         105.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 344            355          14          4.9         205.3       1.0X
+q40                                                 323            348          23          5.2         192.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 172            204          22          0.1        9531.6       1.0X
+q41                                                 171            193          17          0.1        9511.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 155            189          25         19.2          52.2       1.0X
+q42                                                 171            187          14         17.3          57.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 317            333          16          9.3         107.2       1.0X
+q43                                                 302            323          19          9.8         102.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                 382            406          21          7.6         131.8       1.0X
+q44                                                 313            335          19          9.3         108.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 235            264          23          4.1         244.6       1.0X
+q45                                                 189            233          32          5.1         196.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                 506            512           6          6.1         162.6       1.0X
+q46                                                 498            512          10          6.2         160.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                1561           1780         310          1.9         525.4       1.0X
+q47                                                1538           1758         310          1.9         517.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                 834            860          26          5.9         169.4       1.0X
+q48                                                 847            873          23          5.8         172.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                 629            670          50          8.9         112.0       1.0X
+q49                                                 672            741          59          8.4         119.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                 650            710          59          5.0         200.4       1.0X
+q50                                                 668            686          24          4.9         206.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                2555           2704         211          1.4         695.8       1.0X
+q51                                                2662           2825         231          1.4         725.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 149            178          25         19.9          50.3       1.0X
+q52                                                 151            173          12         19.7          50.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 338            350          11          8.8         113.6       1.0X
+q53                                                 255            287          33         11.6          85.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                1365           1368           4          3.9         258.6       1.0X
+q54                                                1366           1388          31          3.9         258.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 160            182           8         18.6          53.9       1.0X
+q55                                                 153            170          17         19.5          51.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                 485            529          45         10.7          93.7       1.0X
+q56                                                 445            501          50         11.6          85.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                 890            948          73          1.7         581.1       1.0X
+q57                                                 744            798          47          2.1         486.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                 489            527          46         10.5          95.4       1.0X
+q58                                                 474            529          60         10.8          92.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                 657            692          43          4.5         222.4       1.0X
+q59                                                 636            673          36          4.6         215.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                 563            577          12          9.2         108.8       1.0X
+q60                                                 537            627         141          9.6         103.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                 614            646          24          5.1         196.8       1.0X
+q61                                                 612            630          17          5.1         196.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 169            184          13          4.7         213.5       1.0X
+q62                                                 185            204          11          4.3         233.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 272            289          12         10.9          91.7       1.0X
+q63                                                 256            289          49         11.6          86.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                2573           2662         126          2.7         371.8       1.0X
+q64                                                2327           2744         590          3.0         336.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                 625            655          40          4.8         210.2       1.0X
+q65                                                 588            606          30          5.1         197.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                 591            720         148          3.9         254.8       1.0X
+q66                                                 510            538          34          4.5         220.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                                5192           5284         130          0.6        1747.6       1.0X
+q67                                                5165           5225          84          0.6        1738.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                 476            514          49          6.5         153.1       1.0X
+q68                                                 480            504          21          6.5         154.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                1523           1543          28          1.4         735.4       1.0X
+q69                                                1648           1648           1          1.3         795.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                 533            561          35          5.5         180.6       1.0X
+q70                                                 621            634          10          4.8         210.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                 409            426          16         12.8          78.4       1.0X
+q71                                                 425            446          21         12.3          81.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                               91047          91388         482          0.2        5932.2       1.0X
+q72                                              109565         110431        1224          0.1        7138.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 372            395          24          8.2         121.4       1.0X
+q73                                                 371            384          11          8.2         121.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                1321           1672         496          2.9         350.3       1.0X
+q74                                                1243           1519         390          3.0         329.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                1578           1746         237          3.6         280.2       1.0X
+q75                                                1365           1649         402          4.1         242.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 332            349          18         15.5          64.7       1.0X
+q76                                                 320            344          23         16.0          62.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                 528            794         237         10.6          94.0       1.0X
+q77                                                 530            827         277         10.6          94.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                2017           2368         497          2.8         359.1       1.0X
+q78                                                1870           2048         251          3.0         333.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 430            448          14          7.1         140.6       1.0X
+q79                                                 431            443          11          7.1         140.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                1481           1583         145          3.8         262.4       1.0X
+q80                                                1172           1306         191          4.8         207.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                 371            391          19          1.0        1012.4       1.0X
+q81                                                 366            386          25          1.0         997.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1193           1197           6         12.3          81.1       1.0X
+q82                                                1096           1113          25         13.4          74.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                 303            354          34          2.0         509.2       1.0X
+q83                                                 265            288          24          2.2         444.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                 759            769          10          3.1         320.6       1.0X
+q84                                                 763            782          24          3.1         322.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                2197           2337         198          1.3         775.0       1.0X
+q85                                                1875           2089         301          1.5         661.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 200            220          10          4.0         247.5       1.0X
+q86                                                 195            220          13          4.2         240.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                 733            756          23          7.1         140.7       1.0X
+q87                                                 745            772          34          7.0         142.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                1342           1562         310          2.2         451.5       1.0X
+q88                                                1239           1371         187          2.4         416.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 294            318          22         10.1          99.0       1.0X
+q89                                                 324            363          34          9.2         109.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 150            169          18          5.4         184.2       1.0X
+q90                                                 128            144          16          6.4         157.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 347            367          17          6.6         151.4       1.0X
+q91                                                 303            327          20          7.6         131.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 147            185          23          5.5         180.9       1.0X
+q92                                                 125            149          22          6.5         154.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                 429            438          14          7.4         135.4       1.0X
+q93                                                 365            375          10          8.7         115.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                 373            397          14          2.3         442.8       1.0X
+q94                                                 288            299           6          2.9         342.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                5202           5314         158          0.2        6178.1       1.0X
+q95                                                5528           5648         169          0.2        6565.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 170            191          14         17.4          57.3       1.0X
+q96                                                 168            194          22         17.7          56.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1149           1159          14          3.8         261.6       1.0X
+q97                                                1164           1192          39          3.8         265.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 297            314          11         10.0         100.0       1.0X
+q98                                                 289            311          12         10.3          97.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 283            310          14          5.3         187.4       1.0X
+q99                                                 257            280          14          5.9         169.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                           1412           1633         312          4.0         250.9       1.0X
+q5a-v2.7                                           1275           1493         309          4.4         226.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                             988            995           8          3.2         316.6       1.0X
+q6-v2.7                                            1008           1012           4          3.1         323.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          1815           1823          11          1.1         876.3       1.0X
+q10a-v2.7                                          1754           1765          16          1.2         846.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           1614           1887         387          2.3         427.9       1.0X
+q11-v2.7                                           1667           1798         186          2.3         442.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            145            163          12          5.6         179.5       1.0X
+q12-v2.7                                            140            165          17          5.8         172.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                           3903           4064         226          1.3         761.0       1.0X
+q14-v2.7                                           3930           4153         315          1.3         766.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                          7517           7870         499          0.7        1465.3       1.0X
+q14a-v2.7                                          6341           6841         707          0.8        1236.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          1889           2147         365          1.9         524.4       1.0X
+q18a-v2.7                                          1690           1928         336          2.1         469.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            178            204          24          8.6         116.2       1.0X
+q20-v2.7                                            149            163          17         10.2          97.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          12979          13026          66          0.9        1096.6       1.0X
+q22-v2.7                                          13001          13080         111          0.9        1098.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                          2017           2153         192          5.9         170.4       1.0X
+q22a-v2.7                                          1890           1968         110          6.3         159.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                            144            270          77         23.1          43.3       1.0X
+q24-v2.7                                            200            228          25         16.7          59.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          1470           1847         533          3.3         300.5       1.0X
+q27a-v2.7                                          1274           1474         283          3.8         260.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            365            402          30          8.4         119.4       1.0X
+q34-v2.7                                            362            377          16          8.4         118.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           1297           1305          12          1.6         626.2       1.0X
+q35-v2.7                                           1324           1346          31          1.6         639.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          1265           1294          41          1.6         611.0       1.0X
+q35a-v2.7                                          1271           1288          24          1.6         613.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                           543            550          11          5.5         182.8       1.0X
+q36a-v2.7                                           535            543           5          5.6         180.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           1448           1619         241          2.1         487.4       1.0X
+q47-v2.7                                           1562           1638         107          1.9         525.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                            592            637          83          9.5         105.4       1.0X
+q49-v2.7                                            614            651          52          9.1         109.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         13982          14156         247          0.3        3807.9       1.0X
+q51a-v2.7                                         14597          14929         469          0.3        3975.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                            839            868          49          1.8         548.0       1.0X
+q57-v2.7                                            738            777          61          2.1         482.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           2523           2775         357          2.7         364.5       1.0X
+q64-v2.7                                           2239           2674         615          3.1         323.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                          6557           6809         356          0.5        2207.3       1.0X
+q67a-v2.7                                          7426           7591         233          0.4        2499.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                           674            689          13          4.4         228.3       1.0X
+q70a-v2.7                                           652            711          60          4.5         220.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                          88701          89378         957          0.2        5779.4       1.0X
+q72-v2.7                                         108642         111301        3761          0.1        7078.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                           1198           1493         416          3.1         317.7       1.0X
+q74-v2.7                                           1000           1158         223          3.8         265.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           1386           1697         441          4.1         246.0       1.0X
+q75-v2.7                                           1435           1692         363          3.9         254.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                           831           1151         454          6.8         147.9       1.0X
+q77a-v2.7                                           803            813          17          7.0         142.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           2326           2451         176          2.4         414.3       1.0X
+q78-v2.7                                           2051           2419         520          2.7         365.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          1772           1791          26          3.2         313.9       1.0X
+q80a-v2.7                                          1670           1814         204          3.4         295.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                           262            284          16          3.1         324.1       1.0X
+q86a-v2.7                                           258            282          16          3.1         318.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            284            300          13         10.5          95.6       1.0X
+q98-v2.7                                            288            302          16         10.3          96.8       1.0X
 
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
index 257288226675f..c725476b53778 100644
--- a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 TakeOrderedAndProject
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-TakeOrderedAndProject with SMJ for doExecute                 213            227          20          0.0       21332.0       1.0X
-TakeOrderedAndProject with SMJ for executeCollect             94            109          17          0.1        9447.1       2.3X
+TakeOrderedAndProject with SMJ for doExecute                  87             91           4          0.1        8677.0       1.0X
+TakeOrderedAndProject with SMJ for executeCollect             63             70           8          0.2        6290.5       1.4X
 
 
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
index 7e8a7436320d3..d3b09bc5d8958 100644
--- a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
@@ -2,11 +2,11 @@
 TakeOrderedAndProject
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-TakeOrderedAndProject with SMJ for doExecute                 245            276          31          0.0       24456.5       1.0X
-TakeOrderedAndProject with SMJ for executeCollect            113            121           8          0.1       11305.6       2.2X
+TakeOrderedAndProject with SMJ for doExecute                 107            108           1          0.1       10711.2       1.0X
+TakeOrderedAndProject with SMJ for executeCollect             76             80           5          0.1        7647.4       1.4X
 
 
diff --git a/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt b/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
index c0d786b8f8f04..edd607e86e0f4 100644
--- a/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
@@ -2,21 +2,21 @@
 Top-K Computation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Top-K:                                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         8527           8649         123          2.5         406.6       1.0X
-ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1687           1769          57         12.4          80.5       5.1X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11123          11209          57          1.9         530.4       0.8X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4977           5016          30          4.2         237.3       1.7X
-RANK (PARTITION: , WindowGroupLimit: false)                               9299           9573         159          2.3         443.4       0.9X
-RANK (PARTITION: , WindowGroupLimit: true)                                1794           1953         123         11.7          85.5       4.8X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                11622          11881         149          1.8         554.2       0.7X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4864           5029          68          4.3         232.0       1.8X
-DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9101           9293          72          2.3         434.0       0.9X
-DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1796           1939         117         11.7          85.7       4.7X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11532          11581          38          1.8         549.9       0.7X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4955           4997          39          4.2         236.3       1.7X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9338           9444          88          2.2         445.3       1.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1602           1622          12         13.1          76.4       5.8X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11523          11814         140          1.8         549.5       0.8X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4612           4824         102          4.5         219.9       2.0X
+RANK (PARTITION: , WindowGroupLimit: false)                               9780           9938          73          2.1         466.4       1.0X
+RANK (PARTITION: , WindowGroupLimit: true)                                1780           1937         122         11.8          84.9       5.2X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                11823          12111         147          1.8         563.8       0.8X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4739           4857          78          4.4         226.0       2.0X
+DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9565           9822         134          2.2         456.1       1.0X
+DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1765           1937         116         11.9          84.1       5.3X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11830          12062         157          1.8         564.1       0.8X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4751           4899          67          4.4         226.5       2.0X
 
 
diff --git a/sql/core/benchmarks/TopKBenchmark-results.txt b/sql/core/benchmarks/TopKBenchmark-results.txt
index 8b77fd0a90051..8df7b646b3a69 100644
--- a/sql/core/benchmarks/TopKBenchmark-results.txt
+++ b/sql/core/benchmarks/TopKBenchmark-results.txt
@@ -2,21 +2,21 @@
 Top-K Computation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Top-K:                                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         8973           9156         258          2.3         427.9       1.0X
-ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1686           1695           8         12.4          80.4       5.3X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          10830          10981         154          1.9         516.4       0.8X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4550           4673          79          4.6         217.0       2.0X
-RANK (PARTITION: , WindowGroupLimit: false)                               9397           9624         166          2.2         448.1       1.0X
-RANK (PARTITION: , WindowGroupLimit: true)                                1778           1905          97         11.8          84.8       5.0X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                11454          11605         132          1.8         546.2       0.8X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4569           4698          83          4.6         217.9       2.0X
-DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9465           9531          50          2.2         451.3       0.9X
-DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1804           1920          79         11.6          86.0       5.0X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11416          11592         185          1.8         544.4       0.8X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4549           4693         105          4.6         216.9       2.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9300           9429         180          2.3         443.5       1.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1665           1676          11         12.6          79.4       5.6X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12100          12186          61          1.7         577.0       0.8X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4751           4805          36          4.4         226.6       2.0X
+RANK (PARTITION: , WindowGroupLimit: false)                               9883           9971          74          2.1         471.3       0.9X
+RANK (PARTITION: , WindowGroupLimit: true)                                1919           1960          31         10.9          91.5       4.8X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                12946          13013          36          1.6         617.3       0.7X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4751           4809          45          4.4         226.5       2.0X
+DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9882           9953          57          2.1         471.2       0.9X
+DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1932           1974          47         10.9          92.1       4.8X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12891          12989         143          1.6         614.7       0.7X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4773           4812          23          4.4         227.6       1.9X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt b/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
index 2c7fd8805fddd..c4126410e8f38 100644
--- a/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off            108            135          38          0.9        1082.5       1.0X
-long/nullable int/string to string wholestage on              69             79           9          1.5         689.4       1.6X
+long/nullable int/string to string wholestage off             29             30           1          3.4         290.3       1.0X
+long/nullable int/string to string wholestage on              31             34           5          3.3         305.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             44             47           4          2.3         441.4       1.0X
-long/nullable int/string to option wholestage on              38             41           3          2.6         378.1       1.2X
+long/nullable int/string to option wholestage off             22             28           8          4.5         221.8       1.0X
+long/nullable int/string to option wholestage on              23             33           7          4.3         230.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             30             31           2          3.3         298.8       1.0X
-long/nullable int/string to primitive wholestage on              28             31           2          3.6         281.4       1.1X
+long/nullable int/string to primitive wholestage off             16             18           3          6.3         158.0       1.0X
+long/nullable int/string to primitive wholestage on              17             18           2          6.0         165.8       1.0X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             37             39           3          2.7         370.0       1.0X
-long/nullable int to string wholestage on              31             40           8          3.2         311.5       1.2X
+long/nullable int to string wholestage off             21             21           0          4.8         207.8       1.0X
+long/nullable int to string wholestage on              24             29           5          4.3         235.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             22             26           6          4.6         216.7       1.0X
-long/nullable int to option wholestage on              22             23           0          4.5         224.6       1.0X
+long/nullable int to option wholestage off             14             15           1          6.9         144.0       1.0X
+long/nullable int to option wholestage on              15             17           2          6.8         146.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             21             22           1          4.7         214.3       1.0X
-long/nullable int to primitive wholestage on              18             19           0          5.4         184.0       1.2X
+long/nullable int to primitive wholestage off             11             15           6          9.5         105.3       1.0X
+long/nullable int to primitive wholestage on              12             13           2          8.3         120.6       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             13             19           8          7.8         128.7       1.0X
-With identity UDF                                    16             17           1          6.2         161.5       0.8X
+Baseline                                              7              7           0         14.0          71.3       1.0X
+With identity UDF                                    10             12           2         10.1          99.4       0.7X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-results.txt b/sql/core/benchmarks/UDFBenchmark-results.txt
index 84ea01c12a80b..3208259e29b3f 100644
--- a/sql/core/benchmarks/UDFBenchmark-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off            108            138          43          0.9        1075.9       1.0X
-long/nullable int/string to string wholestage on              68             82           9          1.5         679.2       1.6X
+long/nullable int/string to string wholestage off             32             35           4          3.1         318.8       1.0X
+long/nullable int/string to string wholestage on              31             41           8          3.2         314.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             53             54           1          1.9         532.8       1.0X
-long/nullable int/string to option wholestage on              35             41           5          2.8         354.3       1.5X
+long/nullable int/string to option wholestage off             23             27           6          4.4         226.1       1.0X
+long/nullable int/string to option wholestage on              27             35           6          3.7         272.8       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             38             39           2          2.6         378.4       1.0X
-long/nullable int/string to primitive wholestage on              29             32           3          3.5         288.1       1.3X
+long/nullable int/string to primitive wholestage off             18             18           0          5.5         181.5       1.0X
+long/nullable int/string to primitive wholestage on              20             21           2          5.1         196.4       0.9X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             28             28           0          3.6         279.2       1.0X
-long/nullable int to string wholestage on              30             39           7          3.4         296.0       0.9X
+long/nullable int to string wholestage off             22             22           1          4.6         218.7       1.0X
+long/nullable int to string wholestage on              23             23           0          4.3         232.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             20             20           0          5.1         196.0       1.0X
-long/nullable int to option wholestage on              22             23           1          4.5         224.4       0.9X
+long/nullable int to option wholestage off             14             15           0          7.0         143.4       1.0X
+long/nullable int to option wholestage on              15             16           2          6.5         153.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             16             16           0          6.3         159.8       1.0X
-long/nullable int to primitive wholestage on              17             18           0          5.7         174.6       0.9X
+long/nullable int to primitive wholestage off             12             12           0          8.3         121.0       1.0X
+long/nullable int to primitive wholestage on              13             13           1          7.7         129.5       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             11             15           7          8.8         114.3       1.0X
-With identity UDF                                    13             15           2          7.5         134.0       0.9X
+Baseline                                              8              8           0         13.1          76.3       1.0X
+With identity UDF                                    11             11           0          9.0         110.7       0.7X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
index 8b70ff9fd40c4..5283d13abce43 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  80             80           0       2100.7           0.5       1.0X
-Double                                              158            158           0       1061.9           0.9       0.5X
+Int                                                  73             74           1       2292.6           0.4       1.0X
+Double                                              158            158           0       1063.2           0.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  12             13           1       1813.0           0.6       1.0X
-Double                                               32             37           3        662.4           1.5       0.4X
+Int                                                  13             15           2       1608.6           0.6       1.0X
+Double                                               30             34           4        696.9           1.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  19             26           9       3238.9           0.3       1.0X
-Double                                               40             52          19       1578.9           0.6       0.5X
+Int                                                  20             23           2       3090.1           0.3       1.0X
+Double                                               42             47           2       1508.0           0.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     20             21           1       3178.0           0.3       1.0X
-Double                                                  42             43           1       1502.2           0.7       0.5X
+Int                                                     22             24           2       2892.7           0.3       1.0X
+Double                                                  44             47           2       1425.5           0.7       0.5X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
index 9c677148c4f57..af96712ae368f 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  76             76           0       2216.5           0.5       1.0X
-Double                                              158            158           0       1063.8           0.9       0.5X
+Int                                                  73             73           0       2313.3           0.4       1.0X
+Double                                              152            156           2       1106.9           0.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  12             13           1       1822.0           0.5       1.0X
-Double                                               29             34           3        724.2           1.4       0.4X
+Int                                                  12             14           2       1744.6           0.6       1.0X
+Double                                               28             33           3        738.4           1.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  19             21           1       3317.8           0.3       1.0X
-Double                                               37             42           1       1686.1           0.6       0.5X
+Int                                                  19             22           2       3335.4           0.3       1.0X
+Double                                               39             44           2       1594.2           0.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     19             21           1       3263.0           0.3       1.0X
-Double                                                  43             46           2       1474.1           0.7       0.5X
+Int                                                     20             22           2       3128.0           0.3       1.0X
+Double                                                  42             46           2       1481.7           0.7       0.5X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
index 71d5b1fe490fe..e289715a15dc9 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                            1              2           1          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                1              1           0          0.0      Infinity       1.2X
-To non-nullable StructTypes using non-performant method                       18             19           2          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                          800            877          68          0.0      Infinity       0.0X
+To non-nullable StructTypes using performant method                            2              3           1          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                1              1           0          0.0      Infinity       1.4X
+To non-nullable StructTypes using non-performant method                       18             18           1          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                          789            799          16          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             1390           1390           0          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 1336           1381          64          0.0      Infinity       1.0X
+To non-nullable StructTypes using performant method                             1182           1264         115          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 1280           1280           1          0.0      Infinity       0.9X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
index 75e0f806b0aae..b05804969c1b6 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
 To non-nullable StructTypes using performant method                            2              3           1          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                1              1           0          0.0      Infinity       1.4X
-To non-nullable StructTypes using non-performant method                       19             19           1          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                          796            852          54          0.0      Infinity       0.0X
+To nullable StructTypes using performant method                                1              2           0          0.0      Infinity       1.3X
+To non-nullable StructTypes using non-performant method                       19             20           2          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                          867            899          30          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             1643           1656          18          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 1502           1623         171          0.0      Infinity       1.1X
+To non-nullable StructTypes using performant method                             1554           1575          30          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 1666           1704          54          0.0      Infinity       0.9X
 
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
index 8904d36b19a56..def6739a917fa 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        9467           9517          55         52.8          18.9       1.0X
-java_long_add_default                                                                 21990          22037          72         22.7          44.0       0.4X
-java_long_add_magic                                                                   11660          11741         102         42.9          23.3       0.8X
-java_long_add_static_magic                                                            11334          11348          18         44.1          22.7       0.8X
-scala_long_add_default                                                                22748          23088         305         22.0          45.5       0.4X
-scala_long_add_magic                                                                  11839          11875          44         42.2          23.7       0.8X
+native_long_add                                                                        9638           9709          87         51.9          19.3       1.0X
+java_long_add_default                                                                 27727          27753          27         18.0          55.5       0.3X
+java_long_add_magic                                                                   11740          11767          31         42.6          23.5       0.8X
+java_long_add_static_magic                                                            11578          11647          85         43.2          23.2       0.8X
+scala_long_add_default                                                                23241          23295          73         21.5          46.5       0.4X
+scala_long_add_magic                                                                  11729          11805         107         42.6          23.5       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        10114          10166          54         49.4          20.2       1.0X
-java_long_add_default                                                                  22353          22379          28         22.4          44.7       0.5X
-java_long_add_magic                                                                    11948          11985          51         41.8          23.9       0.8X
-java_long_add_static_magic                                                             10076          10102          26         49.6          20.2       1.0X
-scala_long_add_default                                                                 22141          22150           9         22.6          44.3       0.5X
-scala_long_add_magic                                                                   11858          11897          50         42.2          23.7       0.9X
+native_long_add                                                                        10259          10290          34         48.7          20.5       1.0X
+java_long_add_default                                                                  22285          22378         127         22.4          44.6       0.5X
+java_long_add_magic                                                                    11725          11813          83         42.6          23.5       0.9X
+java_long_add_static_magic                                                              9877           9966         116         50.6          19.8       1.0X
+scala_long_add_default                                                                 22320          22495         187         22.4          44.6       0.5X
+scala_long_add_magic                                                                   11742          11827          77         42.6          23.5       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        22606          22631          37         22.1          45.2       1.0X
-java_long_add_default                                                                  28087          28189         102         17.8          56.2       0.8X
-java_long_add_magic                                                                    32639          32846         298         15.3          65.3       0.7X
-java_long_add_static_magic                                                             30810          31179         628         16.2          61.6       0.7X
-scala_long_add_default                                                                 26433          26511         106         18.9          52.9       0.9X
-scala_long_add_magic                                                                   32777          32875          99         15.3          65.6       0.7X
+native_long_add                                                                        22577          22649         123         22.1          45.2       1.0X
+java_long_add_default                                                                  27897          27935          59         17.9          55.8       0.8X
+java_long_add_magic                                                                    32443          32564         110         15.4          64.9       0.7X
+java_long_add_static_magic                                                             31297          31408         107         16.0          62.6       0.7X
+scala_long_add_default                                                                 26280          26438         200         19.0          52.6       0.9X
+scala_long_add_magic                                                                   32608          32625          17         15.3          65.2       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         22794          22845          68         21.9          45.6       1.0X
-java_long_add_default                                                                   26502          26650         148         18.9          53.0       0.9X
-java_long_add_magic                                                                     32876          32962          80         15.2          65.8       0.7X
-java_long_add_static_magic                                                              30909          31054         168         16.2          61.8       0.7X
-scala_long_add_default                                                                  26483          26489           5         18.9          53.0       0.9X
-scala_long_add_magic                                                                    32883          32899          15         15.2          65.8       0.7X
+native_long_add                                                                         21616          21652          43         23.1          43.2       1.0X
+java_long_add_default                                                                   25274          25397         209         19.8          50.5       0.9X
+java_long_add_magic                                                                     31544          31592          53         15.9          63.1       0.7X
+java_long_add_static_magic                                                              30400          30965         492         16.4          60.8       0.7X
+scala_long_add_default                                                                  25277          25394         138         19.8          50.6       0.9X
+scala_long_add_magic                                                                    31560          31711         261         15.8          63.1       0.7X
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
index 2663912b4769b..69bcb6ca79de0 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        9352           9422          68         53.5          18.7       1.0X
-java_long_add_default                                                                 21996          22071         125         22.7          44.0       0.4X
-java_long_add_magic                                                                   10666          10693          24         46.9          21.3       0.9X
-java_long_add_static_magic                                                            10534          10585          45         47.5          21.1       0.9X
-scala_long_add_default                                                                22996          23055          91         21.7          46.0       0.4X
-scala_long_add_magic                                                                  10698          10765          68         46.7          21.4       0.9X
+native_long_add                                                                        9469          10166         728         52.8          18.9       1.0X
+java_long_add_default                                                                 22104          22180         123         22.6          44.2       0.4X
+java_long_add_magic                                                                   10681          10726          53         46.8          21.4       0.9X
+java_long_add_static_magic                                                            10526          10622          84         47.5          21.1       0.9X
+scala_long_add_default                                                                22671          23034         438         22.1          45.3       0.4X
+scala_long_add_magic                                                                  10662          10703          39         46.9          21.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         9897           9931          29         50.5          19.8       1.0X
-java_long_add_default                                                                  21890          21944          49         22.8          43.8       0.5X
-java_long_add_magic                                                                    10699          10803         173         46.7          21.4       0.9X
-java_long_add_static_magic                                                              9882          10183         464         50.6          19.8       1.0X
-scala_long_add_default                                                                 21844          21920         103         22.9          43.7       0.5X
-scala_long_add_magic                                                                   10715          10722           6         46.7          21.4       0.9X
+native_long_add                                                                         9914           9941          27         50.4          19.8       1.0X
+java_long_add_default                                                                  21984          22016          45         22.7          44.0       0.5X
+java_long_add_magic                                                                    10683          10700          25         46.8          21.4       0.9X
+java_long_add_static_magic                                                              9884           9941          60         50.6          19.8       1.0X
+scala_long_add_default                                                                 21936          22057         180         22.8          43.9       0.5X
+scala_long_add_magic                                                                   10677          10997         538         46.8          21.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        22708          22769          69         22.0          45.4       1.0X
-java_long_add_default                                                                  25849          26143         389         19.3          51.7       0.9X
-java_long_add_magic                                                                    32210          32379         256         15.5          64.4       0.7X
-java_long_add_static_magic                                                             31705          31755          62         15.8          63.4       0.7X
-scala_long_add_default                                                                 26389          26548         170         18.9          52.8       0.9X
-scala_long_add_magic                                                                   32369          32429          63         15.4          64.7       0.7X
+native_long_add                                                                        22579          22718         163         22.1          45.2       1.0X
+java_long_add_default                                                                  25854          25927         124         19.3          51.7       0.9X
+java_long_add_magic                                                                    32272          32342          69         15.5          64.5       0.7X
+java_long_add_static_magic                                                             30215          30835         987         16.5          60.4       0.7X
+scala_long_add_default                                                                 26500          26616         161         18.9          53.0       0.9X
+scala_long_add_magic                                                                   32366          32583         317         15.4          64.7       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         21779          21955         211         23.0          43.6       1.0X
-java_long_add_default                                                                   25989          26116         206         19.2          52.0       0.8X
-java_long_add_magic                                                                     31604          31648          48         15.8          63.2       0.7X
-java_long_add_static_magic                                                              31273          31340         109         16.0          62.5       0.7X
-scala_long_add_default                                                                  25860          25913          48         19.3          51.7       0.8X
-scala_long_add_magic                                                                    31568          31669          90         15.8          63.1       0.7X
+native_long_add                                                                         21710          21832         159         23.0          43.4       1.0X
+java_long_add_default                                                                   25610          25663          49         19.5          51.2       0.8X
+java_long_add_magic                                                                     31550          31580          45         15.8          63.1       0.7X
+java_long_add_static_magic                                                              29780          29820          49         16.8          59.6       0.7X
+scala_long_add_default                                                                  25753          26613        1063         19.4          51.5       0.8X
+scala_long_add_magic                                                                    31546          31702         184         15.8          63.1       0.7X
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
index 712436e4c1353..f8b9e3744bf27 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
@@ -2,157 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              2           1          0.0     1010700.0       1.0X
-100 select expressions                                2              3           1          0.0     2025948.0       0.5X
-2500 select expressions                              39             42           4          0.0    39031401.0       0.0X
+1 select expressions                                  1              1           0          0.0      618123.0       1.0X
+100 select expressions                                2              3           1          0.0     2251962.0       0.3X
+2500 select expressions                              46             48           4          0.0    46311762.0       0.0X
 
 
 ================================================================================================
 optimize large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-100 columns                                           5              5           1          0.0     4506568.0       1.0X
-1000 columns                                         31             33           2          0.0    31376169.0       0.1X
-10000 columns                                       328            344          14          0.0   327627136.0       0.0X
+100 columns                                           5              5           1          0.0     4594183.0       1.0X
+1000 columns                                         34             35           4          0.0    33513952.0       0.1X
+10000 columns                                       359            388          38          0.0   359145545.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   15             21           5          6.7         149.8       1.0X
-1 cols x 100000 rows (exec in-mem)                   15             20           5          6.7         149.0       1.0X
-1 cols x 100000 rows (read parquet)                  26             33           8          3.8         261.7       0.6X
-1 cols x 100000 rows (write parquet)                 91            100           7          1.1         906.8       0.2X
-100 cols x 1000 rows (read in-mem)                   12             16           4          8.4         118.8       1.3X
-100 cols x 1000 rows (exec in-mem)                   16             21           6          6.2         160.5       0.9X
-100 cols x 1000 rows (read parquet)                  22             28           6          4.5         223.4       0.7X
-100 cols x 1000 rows (write parquet)                 89             96           9          1.1         894.8       0.2X
-2500 cols x 40 rows (read in-mem)                    71             75           8          1.4         708.2       0.2X
-2500 cols x 40 rows (exec in-mem)                   130            135           7          0.8        1297.2       0.1X
-2500 cols x 40 rows (read parquet)                  308            318           7          0.3        3084.1       0.0X
-2500 cols x 40 rows (write parquet)                 144            149           4          0.7        1441.9       0.1X
+1 cols x 100000 rows (read in-mem)                   13             18           5          7.5         134.2       1.0X
+1 cols x 100000 rows (exec in-mem)                   14             17           4          7.4         135.4       1.0X
+1 cols x 100000 rows (read parquet)                  25             34           9          4.1         246.6       0.5X
+1 cols x 100000 rows (write parquet)                 95            106          13          1.1         950.3       0.1X
+100 cols x 1000 rows (read in-mem)                   12             16           5          8.4         118.8       1.1X
+100 cols x 1000 rows (exec in-mem)                   16             19           5          6.3         158.4       0.8X
+100 cols x 1000 rows (read parquet)                  22             28           8          4.6         217.5       0.6X
+100 cols x 1000 rows (write parquet)                 93            102          15          1.1         934.8       0.1X
+2500 cols x 40 rows (read in-mem)                    74             83          11          1.4         739.5       0.2X
+2500 cols x 40 rows (exec in-mem)                   132            150          15          0.8        1324.5       0.1X
+2500 cols x 40 rows (read parquet)                  289            318          32          0.3        2894.9       0.0X
+2500 cols x 40 rows (write parquet)                 152            176          26          0.7        1522.8       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   19             24           6          5.3         187.3       1.0X
-1 wide x 100000 rows (exec in-mem)                   20             24           5          4.9         204.1       0.9X
-1 wide x 100000 rows (read parquet)                  21             26           6          4.8         208.0       0.9X
-1 wide x 100000 rows (write parquet)                 95            103           9          1.1         952.3       0.2X
-100 wide x 1000 rows (read in-mem)                   14             17           4          7.0         143.3       1.3X
-100 wide x 1000 rows (exec in-mem)                   22             25           4          4.5         220.4       0.8X
-100 wide x 1000 rows (read parquet)                  21             23           5          4.8         206.5       0.9X
-100 wide x 1000 rows (write parquet)                 90             96           7          1.1         900.7       0.2X
-2500 wide x 40 rows (read in-mem)                    20             23           4          5.0         201.5       0.9X
-2500 wide x 40 rows (exec in-mem)                   203            213          10          0.5        2027.7       0.1X
-2500 wide x 40 rows (read parquet)                   63             67           5          1.6         627.6       0.3X
-2500 wide x 40 rows (write parquet)                  96            102           5          1.0         964.9       0.2X
+1 wide x 100000 rows (read in-mem)                   19             24           7          5.4         186.4       1.0X
+1 wide x 100000 rows (exec in-mem)                   20             25           7          4.9         204.7       0.9X
+1 wide x 100000 rows (read parquet)                  21             25           6          4.8         207.1       0.9X
+1 wide x 100000 rows (write parquet)                100            108          11          1.0        1000.3       0.2X
+100 wide x 1000 rows (read in-mem)                   14             17           5          7.0         143.8       1.3X
+100 wide x 1000 rows (exec in-mem)                   23             28           7          4.3         230.8       0.8X
+100 wide x 1000 rows (read parquet)                  21             25           6          4.8         206.8       0.9X
+100 wide x 1000 rows (write parquet)                 96            101           7          1.0         961.9       0.2X
+2500 wide x 40 rows (read in-mem)                    21             24           5          4.8         210.2       0.9X
+2500 wide x 40 rows (exec in-mem)                   233            254          23          0.4        2325.6       0.1X
+2500 wide x 40 rows (read parquet)                   62             68           9          1.6         617.9       0.3X
+2500 wide x 40 rows (write parquet)                 102            111          19          1.0        1022.9       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   15             19           5          6.8         147.7       1.0X
-1 deep x 100000 rows (exec in-mem)                   17             21           5          5.9         169.5       0.9X
-1 deep x 100000 rows (read parquet)                  17             20           5          5.9         169.7       0.9X
-1 deep x 100000 rows (write parquet)                 90             96           7          1.1         902.1       0.2X
-100 deep x 1000 rows (read in-mem)                   42             44           4          2.4         415.3       0.4X
-100 deep x 1000 rows (exec in-mem)                  463            464           1          0.2        4629.9       0.0X
-100 deep x 1000 rows (read parquet)                 440            447           6          0.2        4402.6       0.0X
-100 deep x 1000 rows (write parquet)                118            122           4          0.8        1182.7       0.1X
-250 deep x 400 rows (read in-mem)                   190            193           3          0.5        1898.3       0.1X
-250 deep x 400 rows (exec in-mem)                  2955           2961           9          0.0       29549.9       0.0X
-250 deep x 400 rows (read parquet)                 2657           2661           6          0.0       26566.2       0.0X
-250 deep x 400 rows (write parquet)                 266            272           6          0.4        2655.3       0.1X
+1 deep x 100000 rows (read in-mem)                   15             19           6          6.5         154.6       1.0X
+1 deep x 100000 rows (exec in-mem)                   17             20           5          5.7         174.1       0.9X
+1 deep x 100000 rows (read parquet)                  17             23           7          5.7         174.6       0.9X
+1 deep x 100000 rows (write parquet)                 96            106          14          1.0         961.6       0.2X
+100 deep x 1000 rows (read in-mem)                   47             54           9          2.1         466.7       0.3X
+100 deep x 1000 rows (exec in-mem)                  521            538          16          0.2        5211.5       0.0X
+100 deep x 1000 rows (read parquet)                 500            510          13          0.2        5001.6       0.0X
+100 deep x 1000 rows (write parquet)                128            134           6          0.8        1278.6       0.1X
+250 deep x 400 rows (read in-mem)                   221            231           8          0.5        2210.9       0.1X
+250 deep x 400 rows (exec in-mem)                  3301           3306           7          0.0       33011.6       0.0X
+250 deep x 400 rows (read parquet)                 3049           3073          34          0.0       30491.4       0.0X
+250 deep x 400 rows (write parquet)                 298            307          11          0.3        2982.8       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                13             15           3          7.8         127.9       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                14             16           4          7.1         141.1       0.9X
-1 x 1 deep x 100000 rows (read parquet)               16             19           5          6.2         160.4       0.8X
-1 x 1 deep x 100000 rows (write parquet)              89             92           5          1.1         888.1       0.1X
-128 x 8 deep x 1000 rows (read in-mem)                13             14           3          7.8         128.4       1.0X
-128 x 8 deep x 1000 rows (exec in-mem)                25             28           4          4.0         249.1       0.5X
-128 x 8 deep x 1000 rows (read parquet)               20             22           5          5.1         197.6       0.6X
-128 x 8 deep x 1000 rows (write parquet)              87             94           8          1.1         873.8       0.1X
-1024 x 11 deep x 100 rows (read in-mem)               18             20           3          5.6         178.4       0.7X
-1024 x 11 deep x 100 rows (exec in-mem)              138            143           8          0.7        1375.0       0.1X
-1024 x 11 deep x 100 rows (read parquet)              33             36           5          3.0         334.0       0.4X
-1024 x 11 deep x 100 rows (write parquet)             93             97           7          1.1         925.2       0.1X
+1 x 1 deep x 100000 rows (read in-mem)                13             17           5          7.6         131.7       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                15             19           5          6.7         148.2       0.9X
+1 x 1 deep x 100000 rows (read parquet)               16             19           5          6.1         164.3       0.8X
+1 x 1 deep x 100000 rows (write parquet)              94             99           6          1.1         935.0       0.1X
+128 x 8 deep x 1000 rows (read in-mem)                13             15           4          7.6         131.4       1.0X
+128 x 8 deep x 1000 rows (exec in-mem)                27             31           5          3.7         269.8       0.5X
+128 x 8 deep x 1000 rows (read parquet)               20             22           4          4.9         202.9       0.6X
+128 x 8 deep x 1000 rows (write parquet)              93             99           8          1.1         933.8       0.1X
+1024 x 11 deep x 100 rows (read in-mem)               18             20           4          5.6         179.2       0.7X
+1024 x 11 deep x 100 rows (exec in-mem)              143            154          10          0.7        1429.6       0.1X
+1024 x 11 deep x 100 rows (read parquet)              34             37           5          2.9         344.1       0.4X
+1024 x 11 deep x 100 rows (write parquet)             98            102           4          1.0         977.9       0.1X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   14             16           4          7.1         141.0       1.0X
-1 wide x 100000 rows (exec in-mem)                   16             19           4          6.2         162.5       0.9X
-1 wide x 100000 rows (read parquet)                  16             20           5          6.1         164.8       0.9X
-1 wide x 100000 rows (write parquet)                 89             95           6          1.1         888.9       0.2X
-100 wide x 1000 rows (read in-mem)                   11             13           4          9.3         107.3       1.3X
-100 wide x 1000 rows (exec in-mem)                   12             14           4          8.3         119.9       1.2X
-100 wide x 1000 rows (read parquet)                  16             19           6          6.3         157.7       0.9X
-100 wide x 1000 rows (write parquet)                 86             93           9          1.2         857.0       0.2X
-2500 wide x 40 rows (read in-mem)                    11             12           3          9.5         105.1       1.3X
-2500 wide x 40 rows (exec in-mem)                    12             13           3          8.3         120.3       1.2X
-2500 wide x 40 rows (read parquet)                   16             18           4          6.3         158.2       0.9X
-2500 wide x 40 rows (write parquet)                  85             92           7          1.2         854.1       0.2X
+1 wide x 100000 rows (read in-mem)                   15             19           5          6.6         151.2       1.0X
+1 wide x 100000 rows (exec in-mem)                   17             20           5          5.8         172.4       0.9X
+1 wide x 100000 rows (read parquet)                  17             19           5          5.8         171.0       0.9X
+1 wide x 100000 rows (write parquet)                 95            105           6          1.0         952.9       0.2X
+100 wide x 1000 rows (read in-mem)                   11             13           4          8.9         112.9       1.3X
+100 wide x 1000 rows (exec in-mem)                   13             15           4          7.8         128.6       1.2X
+100 wide x 1000 rows (read parquet)                  17             20           5          6.0         166.7       0.9X
+100 wide x 1000 rows (write parquet)                 92            101           7          1.1         920.3       0.2X
+2500 wide x 40 rows (read in-mem)                    11             13           4          9.0         110.9       1.4X
+2500 wide x 40 rows (exec in-mem)                    13             14           3          7.9         127.4       1.2X
+2500 wide x 40 rows (read parquet)                   16             19           4          6.1         164.4       0.9X
+2500 wide x 40 rows (write parquet)                  91             98           6          1.1         909.1       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   11             12           3          8.9         112.0       1.0X
-1 wide x 100000 rows (exec in-mem)                   14             15           3          7.1         140.1       0.8X
-1 wide x 100000 rows (read parquet)                  20             21           4          5.1         197.2       0.6X
-1 wide x 100000 rows (write parquet)                 86             90           5          1.2         858.3       0.1X
-100 wide x 1000 rows (read in-mem)                    7              8           2         15.0          66.6       1.7X
-100 wide x 1000 rows (exec in-mem)                    9             11           3         11.1          90.3       1.2X
-100 wide x 1000 rows (read parquet)                  17             20           5          5.8         172.7       0.6X
-100 wide x 1000 rows (write parquet)                 82             85           5          1.2         815.3       0.1X
-2500 wide x 40 rows (read in-mem)                     9             10           2         11.5          86.9       1.3X
-2500 wide x 40 rows (exec in-mem)                    11             12           3          9.4         106.7       1.0X
-2500 wide x 40 rows (read parquet)                   17             19           4          5.8         172.4       0.6X
-2500 wide x 40 rows (write parquet)                  84             91           7          1.2         840.4       0.1X
+1 wide x 100000 rows (read in-mem)                   12             14           3          8.5         117.9       1.0X
+1 wide x 100000 rows (exec in-mem)                   15             17           2          6.5         154.8       0.8X
+1 wide x 100000 rows (read parquet)                  20             23           5          4.9         202.6       0.6X
+1 wide x 100000 rows (write parquet)                 92             97           5          1.1         918.0       0.1X
+100 wide x 1000 rows (read in-mem)                    7              8           2         13.5          74.1       1.6X
+100 wide x 1000 rows (exec in-mem)                    9             10           2         10.7          93.1       1.3X
+100 wide x 1000 rows (read parquet)                  18             21           5          5.5         181.3       0.7X
+100 wide x 1000 rows (write parquet)                 88             91           2          1.1         881.6       0.1X
+2500 wide x 40 rows (read in-mem)                     9             10           2         10.9          91.3       1.3X
+2500 wide x 40 rows (exec in-mem)                    11             12           2          9.1         109.7       1.1X
+2500 wide x 40 rows (read parquet)                   18             20           4          5.6         179.7       0.7X
+2500 wide x 40 rows (write parquet)                  89             97           6          1.1         892.1       0.1X
 
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
index 51a5792330a13..3272e7a72fcc4 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
@@ -2,157 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              2           1          0.0     1037637.0       1.0X
-100 select expressions                                2              3           1          0.0     2276460.0       0.5X
-2500 select expressions                              44             48           4          0.0    44445446.0       0.0X
+1 select expressions                                  1              1           0          0.0      665640.0       1.0X
+100 select expressions                                3              3           1          0.0     2542608.0       0.3X
+2500 select expressions                              53             56           4          0.0    53485744.0       0.0X
 
 
 ================================================================================================
 optimize large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-100 columns                                           5              6           1          0.0     4963948.0       1.0X
-1000 columns                                         35             36           1          0.0    35350989.0       0.1X
-10000 columns                                       348            367          21          0.0   348345246.0       0.0X
+100 columns                                           5              6           1          0.0     5225216.0       1.0X
+1000 columns                                         38             42           4          0.0    37975149.0       0.1X
+10000 columns                                       395            424          30          0.0   394705382.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   17             23           3          5.7         174.0       1.0X
-1 cols x 100000 rows (exec in-mem)                   14             18           3          7.1         140.4       1.2X
-1 cols x 100000 rows (read parquet)                  27             34           5          3.7         270.5       0.6X
-1 cols x 100000 rows (write parquet)                 90             97           5          1.1         896.4       0.2X
-100 cols x 1000 rows (read in-mem)                   11             15           3          8.7         114.9       1.5X
-100 cols x 1000 rows (exec in-mem)                   16             20           4          6.4         155.7       1.1X
-100 cols x 1000 rows (read parquet)                  21             26           4          4.7         212.8       0.8X
-100 cols x 1000 rows (write parquet)                 85             92           6          1.2         854.2       0.2X
-2500 cols x 40 rows (read in-mem)                    72             76           6          1.4         716.3       0.2X
-2500 cols x 40 rows (exec in-mem)                   132            138           6          0.8        1319.7       0.1X
-2500 cols x 40 rows (read parquet)                  273            282           8          0.4        2734.5       0.1X
-2500 cols x 40 rows (write parquet)                 142            155           9          0.7        1418.2       0.1X
+1 cols x 100000 rows (read in-mem)                   15             22           5          6.8         146.6       1.0X
+1 cols x 100000 rows (exec in-mem)                   17             25           6          5.8         171.7       0.9X
+1 cols x 100000 rows (read parquet)                  28             38           7          3.6         275.8       0.5X
+1 cols x 100000 rows (write parquet)                103            123          12          1.0        1034.1       0.1X
+100 cols x 1000 rows (read in-mem)                   14             21           5          7.1         141.6       1.0X
+100 cols x 1000 rows (exec in-mem)                   18             25           6          5.6         178.2       0.8X
+100 cols x 1000 rows (read parquet)                  24             34           7          4.1         243.6       0.6X
+100 cols x 1000 rows (write parquet)                106            129          14          0.9        1064.4       0.1X
+2500 cols x 40 rows (read in-mem)                    84            106          10          1.2         842.2       0.2X
+2500 cols x 40 rows (exec in-mem)                   155            170          15          0.6        1546.3       0.1X
+2500 cols x 40 rows (read parquet)                  295            328          41          0.3        2946.7       0.0X
+2500 cols x 40 rows (write parquet)                 165            183          18          0.6        1651.4       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   18             21           3          5.5         180.4       1.0X
-1 wide x 100000 rows (exec in-mem)                   19             23           3          5.2         192.4       0.9X
-1 wide x 100000 rows (read parquet)                  19             23           3          5.2         194.0       0.9X
-1 wide x 100000 rows (write parquet)                 91             97           5          1.1         905.2       0.2X
-100 wide x 1000 rows (read in-mem)                   14             17           3          7.3         136.6       1.3X
-100 wide x 1000 rows (exec in-mem)                   21             23           3          4.8         208.6       0.9X
-100 wide x 1000 rows (read parquet)                  19             21           3          5.2         194.1       0.9X
-100 wide x 1000 rows (write parquet)                 87             91           6          1.2         867.4       0.2X
-2500 wide x 40 rows (read in-mem)                    19             22           3          5.2         194.1       0.9X
-2500 wide x 40 rows (exec in-mem)                   199            207           7          0.5        1994.6       0.1X
-2500 wide x 40 rows (read parquet)                   60             63           3          1.7         604.7       0.3X
-2500 wide x 40 rows (write parquet)                  91             97           4          1.1         914.6       0.2X
+1 wide x 100000 rows (read in-mem)                   21             28           6          4.7         214.5       1.0X
+1 wide x 100000 rows (exec in-mem)                   23             30           6          4.4         229.7       0.9X
+1 wide x 100000 rows (read parquet)                  21             25           4          4.7         210.8       1.0X
+1 wide x 100000 rows (write parquet)                104            110           5          1.0        1036.3       0.2X
+100 wide x 1000 rows (read in-mem)                   15             18           3          6.6         151.7       1.4X
+100 wide x 1000 rows (exec in-mem)                   23             27           4          4.3         233.9       0.9X
+100 wide x 1000 rows (read parquet)                  21             24           3          4.7         211.7       1.0X
+100 wide x 1000 rows (write parquet)                 98            101           4          1.0         979.6       0.2X
+2500 wide x 40 rows (read in-mem)                    21             24           3          4.7         212.7       1.0X
+2500 wide x 40 rows (exec in-mem)                   223            233           7          0.4        2227.4       0.1X
+2500 wide x 40 rows (read parquet)                   65             69           3          1.5         654.4       0.3X
+2500 wide x 40 rows (write parquet)                 104            108           4          1.0        1035.7       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   14             16           2          7.2         139.0       1.0X
-1 deep x 100000 rows (exec in-mem)                   16             18           3          6.3         158.7       0.9X
-1 deep x 100000 rows (read parquet)                  16             18           3          6.2         162.4       0.9X
-1 deep x 100000 rows (write parquet)                 86             90           4          1.2         859.6       0.2X
-100 deep x 1000 rows (read in-mem)                   43             45           2          2.3         429.0       0.3X
-100 deep x 1000 rows (exec in-mem)                  519            528           6          0.2        5188.6       0.0X
-100 deep x 1000 rows (read parquet)                 507            517          12          0.2        5068.3       0.0X
-100 deep x 1000 rows (write parquet)                116            121           5          0.9        1163.8       0.1X
-250 deep x 400 rows (read in-mem)                   196            203           6          0.5        1963.7       0.1X
-250 deep x 400 rows (exec in-mem)                  3290           3294           6          0.0       32897.5       0.0X
-250 deep x 400 rows (read parquet)                 3044           3044           1          0.0       30435.7       0.0X
-250 deep x 400 rows (write parquet)                 269            277           7          0.4        2688.8       0.1X
+1 deep x 100000 rows (read in-mem)                   16             18           3          6.2         161.8       1.0X
+1 deep x 100000 rows (exec in-mem)                   18             22           3          5.4         183.6       0.9X
+1 deep x 100000 rows (read parquet)                  19             22           3          5.2         192.1       0.8X
+1 deep x 100000 rows (write parquet)                 99            103           4          1.0         992.0       0.2X
+100 deep x 1000 rows (read in-mem)                   31             34           4          3.2         314.9       0.5X
+100 deep x 1000 rows (exec in-mem)                  479            486           8          0.2        4794.2       0.0X
+100 deep x 1000 rows (read parquet)                 464            469           5          0.2        4643.8       0.0X
+100 deep x 1000 rows (write parquet)                115            119           3          0.9        1146.1       0.1X
+250 deep x 400 rows (read in-mem)                   122            125           2          0.8        1219.8       0.1X
+250 deep x 400 rows (exec in-mem)                  3018           3025          11          0.0       30175.6       0.0X
+250 deep x 400 rows (read parquet)                 2818           2822           6          0.0       28178.4       0.0X
+250 deep x 400 rows (write parquet)                 204            211           5          0.5        2042.2       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                12             14           3          8.5         117.6       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                13             16           4          7.5         133.5       0.9X
-1 x 1 deep x 100000 rows (read parquet)               15             17           3          6.6         151.2       0.8X
-1 x 1 deep x 100000 rows (write parquet)              85             88           3          1.2         848.9       0.1X
-128 x 8 deep x 1000 rows (read in-mem)                12             14           2          8.2         121.3       1.0X
-128 x 8 deep x 1000 rows (exec in-mem)                23             26           2          4.3         231.4       0.5X
-128 x 8 deep x 1000 rows (read parquet)               19             21           3          5.3         189.3       0.6X
-128 x 8 deep x 1000 rows (write parquet)              84             87           5          1.2         838.3       0.1X
-1024 x 11 deep x 100 rows (read in-mem)               17             18           2          5.9         168.3       0.7X
-1024 x 11 deep x 100 rows (exec in-mem)              131            139           5          0.8        1308.0       0.1X
-1024 x 11 deep x 100 rows (read parquet)              33             37           4          3.0         329.1       0.4X
-1024 x 11 deep x 100 rows (write parquet)             89             93           3          1.1         888.8       0.1X
+1 x 1 deep x 100000 rows (read in-mem)                14             15           2          7.2         138.1       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                16             19           3          6.4         155.6       0.9X
+1 x 1 deep x 100000 rows (read parquet)               17             19           3          5.9         169.3       0.8X
+1 x 1 deep x 100000 rows (write parquet)              95             99           5          1.1         950.2       0.1X
+128 x 8 deep x 1000 rows (read in-mem)                14             15           3          7.4         135.2       1.0X
+128 x 8 deep x 1000 rows (exec in-mem)                27             30           4          3.6         274.2       0.5X
+128 x 8 deep x 1000 rows (read parquet)               21             23           3          4.9         205.5       0.7X
+128 x 8 deep x 1000 rows (write parquet)              95             99           3          1.1         950.6       0.1X
+1024 x 11 deep x 100 rows (read in-mem)               18             20           3          5.6         178.7       0.8X
+1024 x 11 deep x 100 rows (exec in-mem)              152            161           6          0.7        1518.3       0.1X
+1024 x 11 deep x 100 rows (read parquet)              35             37           3          2.9         345.0       0.4X
+1024 x 11 deep x 100 rows (write parquet)            100            106           4          1.0        1003.5       0.1X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   14             15           3          7.3         136.2       1.0X
-1 wide x 100000 rows (exec in-mem)                   16             17           2          6.4         155.5       0.9X
-1 wide x 100000 rows (read parquet)                  15             17           3          6.5         153.4       0.9X
-1 wide x 100000 rows (write parquet)                 86             93           6          1.2         860.3       0.2X
-100 wide x 1000 rows (read in-mem)                   10             12           2         10.0          99.7       1.4X
-100 wide x 1000 rows (exec in-mem)                   11             13           2          8.8         113.8       1.2X
-100 wide x 1000 rows (read parquet)                  15             17           3          6.6         151.1       0.9X
-100 wide x 1000 rows (write parquet)                 83             88           7          1.2         828.2       0.2X
-2500 wide x 40 rows (read in-mem)                    10             11           2         10.1          98.6       1.4X
-2500 wide x 40 rows (exec in-mem)                    11             12           2          8.8         113.6       1.2X
-2500 wide x 40 rows (read parquet)                   15             16           3          6.8         147.9       0.9X
-2500 wide x 40 rows (write parquet)                  82             84           2          1.2         816.2       0.2X
+1 wide x 100000 rows (read in-mem)                   16             19           4          6.3         159.2       1.0X
+1 wide x 100000 rows (exec in-mem)                   18             21           5          5.6         179.4       0.9X
+1 wide x 100000 rows (read parquet)                  18             21           4          5.5         180.5       0.9X
+1 wide x 100000 rows (write parquet)                 99            105           6          1.0         990.6       0.2X
+100 wide x 1000 rows (read in-mem)                   13             14           2          7.9         127.3       1.3X
+100 wide x 1000 rows (exec in-mem)                   14             16           3          7.4         135.0       1.2X
+100 wide x 1000 rows (read parquet)                  17             19           3          5.7         174.5       0.9X
+100 wide x 1000 rows (write parquet)                 96            101           4          1.0         957.0       0.2X
+2500 wide x 40 rows (read in-mem)                    12             13           2          8.4         118.5       1.3X
+2500 wide x 40 rows (exec in-mem)                    13             14           2          7.7         130.2       1.2X
+2500 wide x 40 rows (read parquet)                   17             20           3          5.8         173.7       0.9X
+2500 wide x 40 rows (write parquet)                  94             99           3          1.1         935.0       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   11             12           2          9.1         109.6       1.0X
-1 wide x 100000 rows (exec in-mem)                   14             15           1          7.4         135.2       0.8X
-1 wide x 100000 rows (read parquet)                  19             22           4          5.4         185.9       0.6X
-1 wide x 100000 rows (write parquet)                 82             86           5          1.2         815.2       0.1X
-100 wide x 1000 rows (read in-mem)                    7              7           1         15.1          66.2       1.7X
-100 wide x 1000 rows (exec in-mem)                    9             10           2         11.7          85.6       1.3X
-100 wide x 1000 rows (read parquet)                  16             18           3          6.1         164.4       0.7X
-100 wide x 1000 rows (write parquet)                 78             82           4          1.3         776.2       0.1X
-2500 wide x 40 rows (read in-mem)                     8              9           1         12.2          82.2       1.3X
-2500 wide x 40 rows (exec in-mem)                    10             11           2         10.0         100.4       1.1X
-2500 wide x 40 rows (read parquet)                   16             18           3          6.1         164.9       0.7X
-2500 wide x 40 rows (write parquet)                  79             84           4          1.3         790.9       0.1X
+1 wide x 100000 rows (read in-mem)                   12             14           2          8.0         124.2       1.0X
+1 wide x 100000 rows (exec in-mem)                   16             17           2          6.2         160.1       0.8X
+1 wide x 100000 rows (read parquet)                  21             24           5          4.8         207.7       0.6X
+1 wide x 100000 rows (write parquet)                 97            104           7          1.0         970.5       0.1X
+100 wide x 1000 rows (read in-mem)                    8              9           2         12.9          77.5       1.6X
+100 wide x 1000 rows (exec in-mem)                   10             12           2         10.0          99.7       1.2X
+100 wide x 1000 rows (read parquet)                  19             21           3          5.2         191.2       0.6X
+100 wide x 1000 rows (write parquet)                 91             95           3          1.1         911.0       0.1X
+2500 wide x 40 rows (read in-mem)                    10             11           1         10.2          98.5       1.3X
+2500 wide x 40 rows (exec in-mem)                    12             13           1          8.2         121.6       1.0X
+2500 wide x 40 rows (read parquet)                   19             21           3          5.2         190.7       0.7X
+2500 wide x 40 rows (write parquet)                  93             99           5          1.1         929.2       0.1X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt b/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
index 16d601d2f623e..b9cfa3a8bd0b4 100644
--- a/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2615           2687          80          0.4        2494.2       1.0X
-split threshold 100                                2150           2169          18          0.5        2050.1       1.2X
-split threshold 1024                               1635           1650          15          0.6        1559.3       1.6X
-split threshold 2048                               1611           1625          13          0.7        1536.3       1.6X
-split threshold 4096                               1699           1738          24          0.6        1620.2       1.5X
-split threshold 8192                               2319           2336          18          0.5        2211.3       1.1X
-split threshold 65536                             20666          21392         489          0.1       19709.0       0.1X
+split threshold 10                                 2580           2601          18          0.4        2460.7       1.0X
+split threshold 100                                2137           2154          19          0.5        2038.2       1.2X
+split threshold 1024                               1652           1660          11          0.6        1575.0       1.6X
+split threshold 2048                               1586           1601          12          0.7        1512.4       1.6X
+split threshold 4096                               1715           1727           7          0.6        1635.7       1.5X
+split threshold 8192                               2359           2366           6          0.4        2250.0       1.1X
+split threshold 65536                             20935          21321         294          0.1       19964.9       0.1X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt
index 0ae395b755163..5dc6dde967ec1 100644
--- a/sql/core/benchmarks/WideTableBenchmark-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2531           2589          69          0.4        2413.7       1.0X
-split threshold 100                                2053           2069          18          0.5        1957.9       1.2X
-split threshold 1024                               1654           1668          13          0.6        1577.2       1.5X
-split threshold 2048                               1597           1630          19          0.7        1523.2       1.6X
-split threshold 4096                               1673           1683          10          0.6        1595.4       1.5X
-split threshold 8192                               2122           2146          21          0.5        2023.7       1.2X
-split threshold 65536                             21606          21831         217          0.0       20604.8       0.1X
+split threshold 10                                 2477           2481           4          0.4        2362.0       1.0X
+split threshold 100                                1985           1996           9          0.5        1892.6       1.2X
+split threshold 1024                               1610           1628          14          0.7        1535.2       1.5X
+split threshold 2048                               1582           1596          12          0.7        1508.8       1.6X
+split threshold 4096                               1664           1672           9          0.6        1587.1       1.5X
+split threshold 8192                               2126           2131           6          0.5        2027.4       1.2X
+split threshold 65536                             21950          22285         234          0.0       20932.7       0.1X
 
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 05f906206e5e2..59d798e6e62fe 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -109,7 +109,7 @@
       <classifier>${orc.classifier}</classifier>
     </dependency>
     <dependency>
-      <groupId>org.apache.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-storage-api</artifactId>
     </dependency>
     <dependency>
diff --git a/sql/core/src/main/java/org/apache/parquet/filter2/predicate/SparkFilterApi.java b/sql/core/src/main/java/org/apache/parquet/filter2/predicate/SparkFilterApi.java
index 884042c824046..f25ae38437014 100644
--- a/sql/core/src/main/java/org/apache/parquet/filter2/predicate/SparkFilterApi.java
+++ b/sql/core/src/main/java/org/apache/parquet/filter2/predicate/SparkFilterApi.java
@@ -1,20 +1,18 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- *   http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.apache.parquet.filter2.predicate;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index 2bb0b02d4c9c4..1882d990bef55 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -218,7 +218,9 @@ public byte[] getBytes(int rowId, int count) {
       Platform.copyMemory(null, data + rowId, array, Platform.BYTE_ARRAY_OFFSET, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -279,7 +281,9 @@ public short[] getShorts(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 2L, array, Platform.SHORT_ARRAY_OFFSET, count * 2L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -345,7 +349,9 @@ public int[] getInts(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 4L, array, Platform.INT_ARRAY_OFFSET, count * 4L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -423,7 +429,9 @@ public long[] getLongs(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 8L, array, Platform.LONG_ARRAY_OFFSET, count * 8L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -487,7 +495,9 @@ public float[] getFloats(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 4L, array, Platform.FLOAT_ARRAY_OFFSET, count * 4L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -553,7 +563,9 @@ public double[] getDoubles(int rowId, int count) {
         count * 8L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 2bf2b8d08fcea..1908b511269a6 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -216,7 +216,9 @@ public byte[] getBytes(int rowId, int count) {
       System.arraycopy(byteData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -276,7 +278,9 @@ public short[] getShorts(int rowId, int count) {
       System.arraycopy(shortData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -337,7 +341,9 @@ public int[] getInts(int rowId, int count) {
       System.arraycopy(intData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -409,7 +415,9 @@ public long[] getLongs(int rowId, int count) {
       System.arraycopy(longData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -466,7 +474,9 @@ public float[] getFloats(int rowId, int count) {
       System.arraycopy(floatData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -525,7 +535,9 @@ public double[] getDoubles(int rowId, int count) {
       System.arraycopy(doubleData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i));
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 22c09c51c2376..2f383f45f1f2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -20,7 +20,8 @@ package org.apache.spark.sql
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{LEFT_EXPR, RIGHT_EXPR}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions._
@@ -171,29 +172,6 @@ class Column(val expr: Expression) extends Logging {
     Column.fn(name, this, lit(other))
   }
 
-  /**
-   * A version of the `fn` method specifically designed for binary operations in PySpark
-   * that require logging information.
-   * This method is used when the operation involves another Column.
-   *
-   * @param name                The name of the operation to be performed.
-   * @param other               The value to be used in the operation, which will be converted to a
-   *                            Column if not already one.
-   * @param pysparkFragment     A string representing the 'fragment' of the PySpark error context,
-   *                            typically indicates the name of PySpark function.
-   * @param pysparkCallSite     A string representing the 'callSite' of the PySpark error context,
-   *                            providing the exact location within the PySpark code where the
-   *                            operation originated.
-   * @return A Column resulting from the operation.
-   */
-  private def fn(
-      name: String, other: Any, pysparkFragment: String, pysparkCallSite: String): Column = {
-    val tupleInfo = (pysparkFragment, pysparkCallSite)
-    withOrigin(Some(tupleInfo)) {
-      Column.fn(name, this, lit(other))
-    }
-  }
-
   override def toString: String = toPrettySQL(expr)
 
   override def equals(that: Any): Boolean = that match {
@@ -310,8 +288,9 @@ class Column(val expr: Expression) extends Logging {
     val right = lit(other).expr
     if (this.expr == right) {
       logWarning(
-        s"Constructing trivially true equals predicate, '${this.expr} = $right'. " +
-            "Perhaps you need to use aliases.")
+        log"Constructing trivially true equals predicate, " +
+          log"'${MDC(LEFT_EXPR, this.expr)} = ${MDC(RIGHT_EXPR, right)}'. " +
+          log"Perhaps you need to use aliases.")
     }
     fn("=", other)
   }
@@ -516,8 +495,9 @@ class Column(val expr: Expression) extends Logging {
     val right = lit(other).expr
     if (this.expr == right) {
       logWarning(
-        s"Constructing trivially true equals predicate, '${this.expr} <=> $right'. " +
-          "Perhaps you need to use aliases.")
+        log"Constructing trivially true equals predicate, " +
+          log"'${MDC(LEFT_EXPR, this.expr)} <=> ${MDC(RIGHT_EXPR, right)}'. " +
+          log"Perhaps you need to use aliases.")
     }
     fn("<=>", other)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 9992d8cbba076..9d7a765a24c92 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -77,6 +77,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     if (schema != null) {
       val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
       this.userSpecifiedSchema = Option(replaced)
+      validateSingleVariantColumn()
     }
     this
   }
@@ -106,6 +107,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    */
   def option(key: String, value: String): DataFrameReader = {
     this.extraOptions = this.extraOptions + (key -> value)
+    validateSingleVariantColumn()
     this
   }
 
@@ -149,6 +151,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    */
   def options(options: scala.collection.Map[String, String]): DataFrameReader = {
     this.extraOptions ++= options
+    validateSingleVariantColumn()
     this
   }
 
@@ -766,6 +769,17 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     }
   }
 
+  /**
+   * Ensure that the `singleVariantColumn` option cannot be used if there is also a user specified
+   * schema.
+   */
+  private def validateSingleVariantColumn(): Unit = {
+    if (extraOptions.get(JSONOptions.SINGLE_VARIANT_COLUMN).isDefined &&
+      userSpecifiedSchema.isDefined) {
+      throw QueryCompilationErrors.invalidSingleVariantColumn()
+    }
+  }
+
   ///////////////////////////////////////////////////////////////////////////////////////
   // Builder pattern config options
   ///////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala
index 63cee8861c5a4..8ffdbb952b082 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.internal.SQLConf
  * Use `SparkSession.dataSource` to access this.
  */
 @Evolving
-private[sql] class DataSourceRegistration private[sql] (dataSourceManager: DataSourceManager)
+class DataSourceRegistration private[sql] (dataSourceManager: DataSourceManager)
   extends Logging {
 
   protected[sql] def registerPython(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index c29fd968fc195..c7511737b2b3f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -95,10 +95,26 @@ private[sql] object Dataset {
       new Dataset[Row](qe, ExpressionEncoder(qe.analyzed.schema))
   }
 
+  def ofRows(
+      sparkSession: SparkSession,
+      logicalPlan: LogicalPlan,
+      shuffleCleanupMode: ShuffleCleanupMode): DataFrame =
+    sparkSession.withActive {
+      val qe = new QueryExecution(
+        sparkSession, logicalPlan, shuffleCleanupMode = shuffleCleanupMode)
+      qe.assertAnalyzed()
+      new Dataset[Row](qe, ExpressionEncoder(qe.analyzed.schema))
+    }
+
   /** A variant of ofRows that allows passing in a tracker so we can track query parsing time. */
-  def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan, tracker: QueryPlanningTracker)
+  def ofRows(
+      sparkSession: SparkSession,
+      logicalPlan: LogicalPlan,
+      tracker: QueryPlanningTracker,
+      shuffleCleanupMode: ShuffleCleanupMode = DoNotCleanup)
     : DataFrame = sparkSession.withActive {
-    val qe = new QueryExecution(sparkSession, logicalPlan, tracker)
+    val qe = new QueryExecution(
+      sparkSession, logicalPlan, tracker, shuffleCleanupMode = shuffleCleanupMode)
     qe.assertAnalyzed()
     new Dataset[Row](qe, ExpressionEncoder(qe.analyzed.schema))
   }
@@ -738,7 +754,7 @@ class Dataset[T] private[sql](
    *                           checkpoint directory. If false creates a local checkpoint using
    *                           the caching subsystem
    */
-  private def checkpoint(eager: Boolean, reliableCheckpoint: Boolean): Dataset[T] = {
+  private[sql] def checkpoint(eager: Boolean, reliableCheckpoint: Boolean): Dataset[T] = {
     val actionName = if (reliableCheckpoint) "checkpoint" else "localCheckpoint"
     withAction(actionName, queryExecution) { physicalPlan =>
       val internalRdd = physicalPlan.execute().map(_.copy())
@@ -3888,8 +3904,7 @@ class Dataset[T] private[sql](
    * @since 1.6.0
    */
   def unpersist(blocking: Boolean): this.type = {
-    sparkSession.sharedState.cacheManager.uncacheQuery(
-      sparkSession, logicalPlan, cascade = false, blocking)
+    sparkSession.sharedState.cacheManager.uncacheQuery(this, cascade = false, blocking)
     this
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 862268eba6664..52ab633cd75a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.api.java.function._
+import org.apache.spark.sql.catalyst.analysis.{EliminateEventTimeWatermark, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -676,6 +677,44 @@ class KeyValueGroupedDataset[K, V] private[sql](
     )
   }
 
+  /**
+   * (Scala-specific)
+   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
+   * We allow the user to act on per-group set of input rows along with keyed state and the
+   * user can choose to output/return 0 or more rows.
+   * For a streaming dataframe, we will repeatedly invoke the interface methods for new rows
+   * in each trigger and the user's state/state variables will be stored persistently across
+   * invocations.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
+   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
+   *
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor   Instance of statefulProcessor whose functions will
+   *                            be invoked by the operator.
+   * @param eventTimeColumnName eventTime column in the output dataset. Any operations after
+   *                            transformWithState will use the new eventTimeColumn. The user
+   *                            needs to ensure that the eventTime for emitted output adheres to
+   *                            the watermark boundary, otherwise streaming query will fail.
+   * @param outputMode          The output mode of the stateful processor.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   */
+  private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      eventTimeColumnName: String,
+      outputMode: OutputMode): Dataset[U] = {
+    val transformWithState = TransformWithState[K, V, U](
+      groupingAttributes,
+      dataAttributes,
+      statefulProcessor,
+      TimeMode.EventTime(),
+      outputMode,
+      child = logicalPlan
+    )
+    updateEventTimeColumnAfterTransformWithState(transformWithState, eventTimeColumnName)
+  }
+
   /**
    * (Java-specific)
    * Invokes methods defined in the stateful processor used in arbitrary state API v2.
@@ -702,6 +741,39 @@ class KeyValueGroupedDataset[K, V] private[sql](
     transformWithState(statefulProcessor, timeMode, outputMode)(outputEncoder)
   }
 
+  /**
+   * (Java-specific)
+   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
+   * We allow the user to act on per-group set of input rows along with keyed state and the
+   * user can choose to output/return 0 or more rows.
+   *
+   * For a streaming dataframe, we will repeatedly invoke the interface methods for new rows
+   * in each trigger and the user's state/state variables will be stored persistently across
+   * invocations.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
+   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
+   *
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor Instance of statefulProcessor whose functions will be invoked by the
+   *                          operator.
+   * @param eventTimeColumnName eventTime column in the output dataset. Any operations after
+   *                            transformWithState will use the new eventTimeColumn. The user
+   *                            needs to ensure that the eventTime for emitted output adheres to
+   *                            the watermark boundary, otherwise streaming query will fail.
+   * @param outputMode        The output mode of the stateful processor.
+   * @param outputEncoder     Encoder for the output type.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   */
+  private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      eventTimeColumnName: String,
+      outputMode: OutputMode,
+      outputEncoder: Encoder[U]): Dataset[U] = {
+    transformWithState(statefulProcessor, eventTimeColumnName, outputMode)(outputEncoder)
+  }
+
   /**
    * (Scala-specific)
    * Invokes methods defined in the stateful processor used in arbitrary state API v2.
@@ -739,19 +811,98 @@ class KeyValueGroupedDataset[K, V] private[sql](
     )
   }
 
+  /**
+   * (Scala-specific)
+   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
+   * Functions as the function above, but with additional eventTimeColumnName for output.
+   *
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   * @tparam S The type of initial state objects. Must be encodable to Spark SQL types.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
+   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
+   *
+   * @param statefulProcessor   Instance of statefulProcessor whose functions will
+   *                            be invoked by the operator.
+   * @param eventTimeColumnName eventTime column in the output dataset. Any operations after
+   *                            transformWithState will use the new eventTimeColumn. The user
+   *                            needs to ensure that the eventTime for emitted output adheres to
+   *                            the watermark boundary, otherwise streaming query will fail.
+   * @param outputMode          The output mode of the stateful processor.
+   * @param initialState        User provided initial state that will be used to initiate state for
+   *                            the query in the first batch.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   */
+  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      eventTimeColumnName: String,
+      outputMode: OutputMode,
+      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = {
+    val transformWithState = TransformWithState[K, V, U, S](
+      groupingAttributes,
+      dataAttributes,
+      statefulProcessor,
+      TimeMode.EventTime(),
+      outputMode,
+      child = logicalPlan,
+      initialState.groupingAttributes,
+      initialState.dataAttributes,
+      initialState.queryExecution.analyzed
+    )
+
+    updateEventTimeColumnAfterTransformWithState(transformWithState, eventTimeColumnName)
+  }
+
   /**
    * (Java-specific)
    * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * Functions as the function above, but with additional initial state.
+   * Functions as the function above, but with additional initialStateEncoder for state encoding.
+   *
+   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
+   * @tparam S The type of initial state objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor   Instance of statefulProcessor whose functions will
+   *                            be invoked by the operator.
+   * @param timeMode            The time mode semantics of the stateful processor for
+   *                            timers and TTL.
+   * @param outputMode          The output mode of the stateful processor.
+   * @param initialState        User provided initial state that will be used to initiate state for
+   *                            the query in the first batch.
+   * @param outputEncoder       Encoder for the output type.
+   * @param initialStateEncoder Encoder for the initial state type.
+   *
+   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
+   */
+  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      timeMode: TimeMode,
+      outputMode: OutputMode,
+      initialState: KeyValueGroupedDataset[K, S],
+      outputEncoder: Encoder[U],
+      initialStateEncoder: Encoder[S]): Dataset[U] = {
+    transformWithState(statefulProcessor, timeMode,
+      outputMode, initialState)(outputEncoder, initialStateEncoder)
+  }
+
+  /**
+   * (Java-specific)
+   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
+   * Functions as the function above, but with additional eventTimeColumnName for output.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
+   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
    *
    * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
    * @tparam S The type of initial state objects. Must be encodable to Spark SQL types.
    * @param statefulProcessor Instance of statefulProcessor whose functions will
    *                          be invoked by the operator.
-   * @param timeMode          The time mode semantics of the stateful processor for timers and TTL.
    * @param outputMode        The output mode of the stateful processor.
    * @param initialState      User provided initial state that will be used to initiate state for
    *                          the query in the first batch.
+   * @param eventTimeColumnName event column in the output dataset. Any operations after
+   *                            transformWithState will use the new eventTimeColumn. The user
+   *                            needs to ensure that the eventTime for emitted output adheres to
+   *                            the watermark boundary, otherwise streaming query will fail.
    * @param outputEncoder     Encoder for the output type.
    * @param initialStateEncoder Encoder for the initial state type.
    *
@@ -759,15 +910,34 @@ class KeyValueGroupedDataset[K, V] private[sql](
    */
   private[sql] def transformWithState[U: Encoder, S: Encoder](
       statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
-      timeMode: TimeMode,
       outputMode: OutputMode,
       initialState: KeyValueGroupedDataset[K, S],
+      eventTimeColumnName: String,
       outputEncoder: Encoder[U],
       initialStateEncoder: Encoder[S]): Dataset[U] = {
-    transformWithState(statefulProcessor, timeMode,
+    transformWithState(statefulProcessor, eventTimeColumnName,
       outputMode, initialState)(outputEncoder, initialStateEncoder)
   }
 
+  /**
+   * Creates a new dataset with updated eventTimeColumn after the transformWithState
+   * logical node.
+   */
+  private def updateEventTimeColumnAfterTransformWithState[U: Encoder](
+      transformWithState: LogicalPlan,
+      eventTimeColumnName: String): Dataset[U] = {
+    val transformWithStateDataset = Dataset[U](
+      sparkSession,
+      transformWithState
+    )
+
+    Dataset[U](sparkSession, EliminateEventTimeWatermark(
+      UpdateEventTimeWatermarkColumn(
+        UnresolvedAttribute(eventTimeColumnName),
+        None,
+        transformWithStateDataset.logicalPlan)))
+  }
+
   /**
    * (Scala-specific)
    * Reduces the elements of each group of data using the specified binary function.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
index ca04b9bfc55f0..b7f9c96f82e04 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
@@ -32,11 +32,17 @@ import org.apache.spark.sql.functions.expr
  * @param table the name of the target table for the merge operation.
  * @param ds the source Dataset to merge into the target table.
  * @param on the merge condition.
+ * @param schemaEvolutionEnabled whether to enable automatic schema evolution for this merge
+ *                               operation. Default is `false`.
  *
  * @since 4.0.0
  */
 @Experimental
-class MergeIntoWriter[T] private[sql] (table: String, ds: Dataset[T], on: Column) {
+class MergeIntoWriter[T] private[sql] (
+    table: String,
+    ds: Dataset[T],
+    on: Column,
+    private[sql] val schemaEvolutionEnabled: Boolean = false) {
 
   private val df: DataFrame = ds.toDF()
 
@@ -160,6 +166,17 @@ class MergeIntoWriter[T] private[sql] (table: String, ds: Dataset[T], on: Column
     new WhenNotMatchedBySource[T](this, Some(condition.expr))
   }
 
+  /**
+   * Enable automatic schema evolution for this merge operation.
+   * @return A `MergeIntoWriter` instance with schema evolution enabled.
+   */
+  def withSchemaEvolution(): MergeIntoWriter[T] = {
+    new MergeIntoWriter[T](this.table, this.ds, this.on, schemaEvolutionEnabled = true)
+      .withNewMatchedActions(this.matchedActions: _*)
+      .withNewNotMatchedActions(this.notMatchedActions: _*)
+      .withNewNotMatchedBySourceActions(this.notMatchedBySourceActions: _*)
+  }
+
   /**
    * Executes the merge operation.
    */
@@ -176,23 +193,24 @@ class MergeIntoWriter[T] private[sql] (table: String, ds: Dataset[T], on: Column
       on.expr,
       matchedActions,
       notMatchedActions,
-      notMatchedBySourceActions)
+      notMatchedBySourceActions,
+      schemaEvolutionEnabled)
     val qe = sparkSession.sessionState.executePlan(merge)
     qe.assertCommandExecuted()
   }
 
-  private[sql] def withNewMatchedAction(action: MergeAction): MergeIntoWriter[T] = {
-    this.matchedActions = this.matchedActions :+ action
+  private[sql] def withNewMatchedActions(actions: MergeAction*): MergeIntoWriter[T] = {
+    this.matchedActions ++= actions
     this
   }
 
-  private[sql] def withNewNotMatchedAction(action: MergeAction): MergeIntoWriter[T] = {
-    this.notMatchedActions = this.notMatchedActions :+ action
+  private[sql] def withNewNotMatchedActions(actions: MergeAction*): MergeIntoWriter[T] = {
+    this.notMatchedActions ++= actions
     this
   }
 
-  private[sql] def withNewNotMatchedBySourceAction(action: MergeAction): MergeIntoWriter[T] = {
-    this.notMatchedBySourceActions = this.notMatchedBySourceActions :+ action
+  private[sql] def withNewNotMatchedBySourceActions(actions: MergeAction*): MergeIntoWriter[T] = {
+    this.notMatchedBySourceActions ++= actions
     this
   }
 }
@@ -219,7 +237,7 @@ case class WhenMatched[T] private[sql](
    * @return The MergeIntoWriter instance with the update all action configured.
    */
   def updateAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedAction(UpdateStarAction(condition))
+    mergeIntoWriter.withNewMatchedActions(UpdateStarAction(condition))
   }
 
   /**
@@ -230,7 +248,7 @@ case class WhenMatched[T] private[sql](
    * @return The MergeIntoWriter instance with the update action configured.
    */
   def update(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedAction(
+    mergeIntoWriter.withNewMatchedActions(
       UpdateAction(condition, map.map(x => Assignment(expr(x._1).expr, x._2.expr)).toSeq))
   }
 
@@ -240,7 +258,7 @@ case class WhenMatched[T] private[sql](
    * @return The MergeIntoWriter instance with the delete action configured.
    */
   def delete(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedAction(DeleteAction(condition))
+    mergeIntoWriter.withNewMatchedActions(DeleteAction(condition))
   }
 }
 
@@ -266,7 +284,7 @@ case class WhenNotMatched[T] private[sql](
    * @return The MergeIntoWriter instance with the insert all action configured.
    */
   def insertAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedAction(InsertStarAction(condition))
+    mergeIntoWriter.withNewNotMatchedActions(InsertStarAction(condition))
   }
 
   /**
@@ -277,7 +295,7 @@ case class WhenNotMatched[T] private[sql](
    * @return The MergeIntoWriter instance with the insert action configured.
    */
   def insert(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedAction(
+    mergeIntoWriter.withNewNotMatchedActions(
       InsertAction(condition, map.map(x => Assignment(expr(x._1).expr, x._2.expr)).toSeq))
   }
 }
@@ -302,7 +320,7 @@ case class WhenNotMatchedBySource[T] private[sql](
    * @return The MergeIntoWriter instance with the update all action configured.
    */
   def updateAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceAction(UpdateStarAction(condition))
+    mergeIntoWriter.withNewNotMatchedBySourceActions(UpdateStarAction(condition))
   }
 
   /**
@@ -313,7 +331,7 @@ case class WhenNotMatchedBySource[T] private[sql](
    * @return The MergeIntoWriter instance with the update action configured.
    */
   def update(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceAction(
+    mergeIntoWriter.withNewNotMatchedBySourceActions(
       UpdateAction(condition, map.map(x => Assignment(expr(x._1).expr, x._2.expr)).toSeq))
   }
 
@@ -324,6 +342,6 @@ case class WhenNotMatchedBySource[T] private[sql](
    * @return The MergeIntoWriter instance with the delete action configured.
    */
   def delete(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceAction(DeleteAction(condition))
+    mergeIntoWriter.withNewNotMatchedBySourceActions(DeleteAction(condition))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala b/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
index 104e7c101fd1c..30d5943c60922 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql
 
 import java.util.UUID
 
-import scala.jdk.CollectionConverters.MapHasAsJava
-
 import org.apache.spark.sql.catalyst.plans.logical.CollectMetrics
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.util.QueryExecutionListener
@@ -47,9 +45,7 @@ import org.apache.spark.util.ArrayImplicits._
  * @param name name of the metric
  * @since 3.3.0
  */
-class Observation(val name: String) {
-
-  if (name.isEmpty) throw new IllegalArgumentException("Name must not be empty")
+class Observation(name: String) extends ObservationBase(name) {
 
   /**
    * Create an Observation instance without providing a name. This generates a random name.
@@ -60,8 +56,6 @@ class Observation(val name: String) {
 
   @volatile private var dataframeId: Option[(SparkSession, Long)] = None
 
-  @volatile private var metrics: Option[Map[String, Any]] = None
-
   /**
    * Attach this observation to the given [[Dataset]] to observe aggregation expressions.
    *
@@ -83,55 +77,6 @@ class Observation(val name: String) {
     ds.observe(name, expr, exprs: _*)
   }
 
-  /**
-   * (Scala-specific) Get the observed metrics. This waits for the observed dataset to finish
-   * its first action. Only the result of the first action is available. Subsequent actions do not
-   * modify the result.
-   *
-   * @return the observed metrics as a `Map[String, Any]`
-   * @throws InterruptedException interrupted while waiting
-   */
-  @throws[InterruptedException]
-  def get: Map[String, _] = {
-    synchronized {
-      // we need to loop as wait might return without us calling notify
-      // https://en.wikipedia.org/w/index.php?title=Spurious_wakeup&oldid=992601610
-      while (this.metrics.isEmpty) {
-        wait()
-      }
-    }
-
-    this.metrics.get
-  }
-
-  /**
-   * (Java-specific) Get the observed metrics. This waits for the observed dataset to finish
-   * its first action. Only the result of the first action is available. Subsequent actions do not
-   * modify the result.
-   *
-   * @return the observed metrics as a `java.util.Map[String, Object]`
-   * @throws InterruptedException interrupted while waiting
-   */
-  @throws[InterruptedException]
-  def getAsJava: java.util.Map[String, AnyRef] = {
-      get.map { case (key, value) => (key, value.asInstanceOf[Object])}.asJava
-  }
-
-  /**
-   * Get the observed metrics. This returns the metrics if they are available, otherwise an empty.
-   *
-   * @return the observed metrics as a `Map[String, Any]`
-   */
-  @throws[InterruptedException]
-  private[sql] def getOrEmpty: Map[String, _] = {
-    synchronized {
-      if (metrics.isEmpty) {
-        wait(100) // Wait for 100ms to see if metrics are available
-      }
-      metrics.getOrElse(Map.empty)
-    }
-  }
-
   private[sql] def register(sparkSession: SparkSession, dataframeId: Long): Unit = {
     // makes this class thread-safe:
     // only the first thread entering this block can set sparkSession
@@ -158,9 +103,8 @@ class Observation(val name: String) {
         case _ => false
       }) {
         val row = qe.observedMetrics.get(name)
-        this.metrics = row.map(r => r.getValuesMap[Any](r.schema.fieldNames.toImmutableArraySeq))
-        if (metrics.isDefined) {
-          notifyAll()
+        val metrics = row.map(r => r.getValuesMap[Any](r.schema.fieldNames.toImmutableArraySeq))
+        if (setMetricsAndNotify(metrics)) {
           unregister()
         }
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index d257a6b771b93..56f13994277d1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -130,54 +130,63 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits {
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newIntSeqEncoder: Encoder[Seq[Int]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newLongSeqEncoder: Encoder[Seq[Long]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newDoubleSeqEncoder: Encoder[Seq[Double]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newFloatSeqEncoder: Encoder[Seq[Float]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newByteSeqEncoder: Encoder[Seq[Byte]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newShortSeqEncoder: Encoder[Seq[Short]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newBooleanSeqEncoder: Encoder[Seq[Boolean]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newStringSeqEncoder: Encoder[Seq[String]] = ExpressionEncoder()
 
   /**
    * @since 1.6.1
    * @deprecated use [[newSequenceEncoder]]
    */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
   def newProductSeqEncoder[A <: Product : TypeTag]: Encoder[Seq[A]] = ExpressionEncoder()
 
   /** @since 2.2.0 */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 15eeca87dcf65..466e4cf813185 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -29,7 +29,8 @@ import scala.util.control.NonFatal
 import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext, SparkException, TaskContext}
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CALL_SITE_LONG_FORM, CLASS_NAME}
 import org.apache.spark.internal.config.{ConfigEntry, EXECUTOR_ALLOW_SPARK_CONTEXT}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
@@ -232,8 +233,12 @@ class SparkSession private(
 
   /**
    * A collection of methods for registering user-defined data sources.
+   *
+   * @since 4.0.0
    */
-  private[sql] def dataSource: DataSourceRegistration = sessionState.dataSourceRegistration
+  @Experimental
+  @Unstable
+  def dataSource: DataSourceRegistration = sessionState.dataSourceRegistration
 
   /**
    * Returns a `StreamingQueryManager` that allows managing all the
@@ -1358,13 +1363,13 @@ object SparkSession extends Logging {
     val session = getActiveSession.orElse(getDefaultSession)
     if (session.isDefined) {
       logWarning(
-        s"""An existing Spark session exists as the active or default session.
-           |This probably means another suite leaked it. Attempting to stop it before continuing.
-           |This existing Spark session was created at:
-           |
-           |${session.get.creationSite.longForm}
-           |
-         """.stripMargin)
+        log"""An existing Spark session exists as the active or default session.
+             |This probably means another suite leaked it. Attempting to stop it before continuing.
+             |This existing Spark session was created at:
+             |
+             |${MDC(CALL_SITE_LONG_FORM, session.get.creationSite.longForm)}
+             |
+           """.stripMargin)
       session.get.stop()
       SparkSession.clearActiveSession()
       SparkSession.clearDefaultSession()
@@ -1391,7 +1396,8 @@ object SparkSession extends Logging {
         case e@(_: ClassCastException |
                 _: ClassNotFoundException |
                 _: NoClassDefFoundError) =>
-          logWarning(s"Cannot use $extensionConfClassName to configure session extensions.", e)
+          logWarning(log"Cannot use ${MDC(CLASS_NAME, extensionConfClassName)} to configure " +
+            log"session extensions.", e)
       }
     }
     extensions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index 62e6cc07b3e92..eb8c1d65a8b53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -26,7 +26,8 @@ import net.razorvine.pickle.{Pickler, Unpickler}
 
 import org.apache.spark.SparkException
 import org.apache.spark.api.python.DechunkedInputStream
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.CLASS_LOADER
 import org.apache.spark.security.SocketAuthServer
 import org.apache.spark.sql.{Column, DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
@@ -136,8 +137,8 @@ private[sql] object PythonSQLUtils extends Logging {
   def addJarToCurrentClassLoader(path: String): Unit = {
     Utils.getContextOrSparkClassLoader match {
       case cl: MutableURLClassLoader => cl.addURL(Utils.resolveURI(path).toURL)
-      case cl => logWarning(
-        s"Unsupported class loader $cl will not update jars in the thread class loader.")
+      case cl => logWarning(log"Unsupported class loader ${MDC(CLASS_LOADER, cl)} will not " +
+        log"update jars in the thread class loader.")
     }
   }
 
@@ -164,10 +165,6 @@ private[sql] object PythonSQLUtils extends Logging {
     }
   }
 
-  def timestampDiff(unit: String, start: Column, end: Column): Column = {
-    Column(TimestampDiff(unit, start.expr, end.expr))
-  }
-
   def pandasProduct(e: Column, ignoreNA: Boolean): Column = {
     Column(PandasProduct(e.expr, ignoreNA).toAggregateExpression(false))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 97b701b7380d5..ecbc57f25ad44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -27,7 +27,8 @@ import org.apache.spark.TaskContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.api.r.SerDe
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.CONFIG
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericRowWithSchema, Literal}
@@ -58,9 +59,9 @@ private[sql] object SQLUtils extends Logging {
         SparkSession.builder().enableHiveSupport().sparkContext(jsc.sc).getOrCreate()
       } else {
         if (enableHiveSupport) {
-          logWarning("SparkR: enableHiveSupport is requested for SparkSession but " +
-            s"Spark is not built with Hive or ${CATALOG_IMPLEMENTATION.key} is not set to " +
-            "'hive', falling back to without Hive support.")
+          logWarning(log"SparkR: enableHiveSupport is requested for SparkSession but " +
+            log"Spark is not built with Hive or ${MDC(CONFIG, CATALOG_IMPLEMENTATION.key)} " +
+            log"is not set to 'hive', falling back to without Hive support.")
         }
         SparkSession.builder().sparkContext(jsc.sc).getOrCreate()
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 36221d728066e..169aad2f234d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.SparkException
+import org.apache.spark.internal.LogKeys.CONFIG
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils, ClusterBySpec}
@@ -379,8 +381,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) =>
       AlterViewAsCommand(ident, originalText, query)
 
+    case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode) =>
+      AlterViewSchemaBindingCommand(ident, viewSchemaMode)
+
     case CreateView(ResolvedV1Identifier(ident), userSpecifiedColumns, comment,
-        properties, originalText, child, allowExisting, replace) =>
+        properties, originalText, child, allowExisting, replace, viewSchemaMode) =>
       CreateViewCommand(
         name = ident,
         userSpecifiedColumns = userSpecifiedColumns,
@@ -390,9 +395,10 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         plan = child,
         allowExisting = allowExisting,
         replace = replace,
-        viewType = PersistedView)
+        viewType = PersistedView,
+        viewSchemaMode = viewSchemaMode)
 
-    case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _) =>
+    case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _, _) =>
       throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "views")
 
     case ShowViews(ns: ResolvedNamespace, pattern, output) =>
@@ -524,9 +530,10 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       if (!createHiveTableByDefault || (ctas && conf.convertCTAS)) {
         (nonHiveStorageFormat, conf.defaultDataSourceName)
       } else {
-        logWarning("A Hive serde table will be created as there is no table provider " +
-          s"specified. You can set ${SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key} to false " +
-          "so that native data source table will be created instead.")
+        logWarning(log"A Hive serde table will be created as there is no table provider " +
+          log"specified. You can set " +
+          log"${MDC(CONFIG, SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT.key)} to false so that " +
+          log"native data source table will be created instead.")
         (defaultHiveStorage, DDLUtils.HIVE_PROVIDER)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
new file mode 100644
index 0000000000000..8ae0341e5646c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/SQLFunction.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.catalog.UserDefinedFunction._
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * Represent a SQL function.
+ *
+ * @param name qualified name of the SQL function
+ * @param inputParam function input parameters
+ * @param returnType function return type
+ * @param exprText function body as an expression
+ * @param queryText function body as a query
+ * @param comment function comment
+ * @param deterministic whether the function is deterministic
+ * @param containsSQL whether the function has data access routine to be CONTAINS SQL
+ * @param isTableFunc whether the function is a table function
+ * @param properties additional properties to be serialized for the SQL function
+ * @param owner owner of the function
+ * @param createTimeMs function creation time in milliseconds
+ */
+case class SQLFunction(
+    name: FunctionIdentifier,
+    inputParam: Option[StructType],
+    returnType: Either[DataType, StructType],
+    exprText: Option[String],
+    queryText: Option[String],
+    comment: Option[String],
+    deterministic: Option[Boolean],
+    containsSQL: Option[Boolean],
+    isTableFunc: Boolean,
+    properties: Map[String, String],
+    owner: Option[String] = None,
+    createTimeMs: Long = System.currentTimeMillis) extends UserDefinedFunction {
+
+  assert(exprText.nonEmpty || queryText.nonEmpty)
+  assert((isTableFunc && returnType.isRight) || (!isTableFunc && returnType.isLeft))
+
+  override val language: RoutineLanguage = LanguageSQL
+}
+
+object SQLFunction {
+
+  /**
+   * This method returns an optional DataType indicating, when present, either the return type for
+   * scalar user-defined functions, or a StructType indicating the names and types of the columns in
+   * the output schema for table functions. If the optional value is empty, this indicates that the
+   * CREATE FUNCTION statement did not have any RETURNS clause at all (for scalar functions), or
+   * that it included a RETURNS TABLE clause but without any specified output schema (for table
+   * functions), prompting the analyzer to infer these metadata instead.
+   */
+  def parseReturnTypeText(
+      text: String,
+      isTableFunc: Boolean,
+      parser: ParserInterface): Option[Either[DataType, StructType]] = {
+    if (!isTableFunc) {
+      // This is a scalar user-defined function.
+      if (text.isEmpty) {
+        // The CREATE FUNCTION statement did not have any RETURNS clause.
+        Option.empty[Either[DataType, StructType]]
+      } else {
+        // The CREATE FUNCTION statement included a RETURNS clause with an explicit return type.
+        Some(Left(parseDataType(text, parser)))
+      }
+    } else {
+      // This is a table function.
+      if (text.equalsIgnoreCase("table")) {
+        // The CREATE FUNCTION statement had a RETURNS TABLE clause but without any explicit schema.
+        Option.empty[Either[DataType, StructType]]
+      } else {
+        // The CREATE FUNCTION statement included a RETURNS TABLE clause with an explicit schema.
+        Some(Right(parseTableSchema(text, parser)))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
new file mode 100644
index 0000000000000..1473f19cb71bd
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/catalog/UserDefinedFunction.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.catalog
+
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * The base class for all user defined functions registered via SQL queries.
+ */
+trait UserDefinedFunction {
+
+  /**
+   * Qualified name of the function
+   */
+  def name: FunctionIdentifier
+
+  /**
+   * Additional properties to be serialized for the function.
+   * Use this to preserve the runtime configuration that should be used during the function
+   * execution, such as SQL configs etc. See [[SQLConf]] for more info.
+   */
+  def properties: Map[String, String]
+
+  /**
+   * Owner of the function
+   */
+  def owner: Option[String]
+
+  /**
+   * Function creation time in milliseconds since the linux epoch
+   */
+  def createTimeMs: Long
+
+  /**
+   * The language of the user defined function.
+   */
+  def language: RoutineLanguage
+}
+
+object UserDefinedFunction {
+  def parseTableSchema(text: String, parser: ParserInterface): StructType = {
+    val parsed = parser.parseTableSchema(text)
+    CharVarcharUtils.failIfHasCharVarchar(parsed).asInstanceOf[StructType]
+  }
+
+  def parseDataType(text: String, parser: ParserInterface): DataType = {
+    val dataType = parser.parseDataType(text)
+    CharVarcharUtils.failIfHasCharVarchar(dataType)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
index 398f21e01b806..ca04991b50fc2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -95,7 +95,7 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       }
     case Cast(child, dataType, _, evalMode)
         if evalMode == EvalMode.ANSI || Cast.canUpCast(child.dataType, dataType) =>
-      generateExpression(child).map(v => new V2Cast(v, dataType))
+      generateExpression(child).map(v => new V2Cast(v, child.dataType, dataType))
     case AggregateExpression(aggregateFunction, Complete, isDistinct, None, _) =>
       generateAggregateFunc(aggregateFunction, isDistinct)
     case Abs(_, true) => generateExpressionWithName("ABS", expr, isPredicate)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala
index 1113e63cab332..885ddf4110cbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/CachedBatchSerializer.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.columnar
 
 import org.apache.spark.annotation.{DeveloperApi, Since}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{FILTER, PREDICATE}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -307,7 +308,8 @@ abstract class SimpleMetricsCachedBatchSerializer extends CachedBatchSerializer
               allowFailures = true))
 
         boundFilter.foreach(_ =>
-          filter.foreach(f => logInfo(s"Predicate $p generates partition filter: $f")))
+          filter.foreach(f => logInfo(log"Predicate ${MDC(PREDICATE, p)} generates " +
+            log"partition filter: ${MDC(FILTER, f)}")))
 
         // If the filter can't be resolved then we are missing required statistics.
         boundFilter.filter(_.resolved)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index 91042b59677bf..af3a8d67e3c29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -27,7 +27,8 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.{SparkFiles, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Cast, Expression, GenericInternalRow, JsonToStructs, Literal, StructsToJson, UnsafeProjection}
@@ -185,7 +186,7 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
     if (!proc.isAlive) {
       val exitCode = proc.exitValue()
       if (exitCode != 0) {
-        logError(stderrBuffer.toString) // log the stderr circular buffer
+        logError(log"${MDC(STDERR, stderrBuffer.toString)}") // log the stderr circular buffer
         throw QueryExecutionErrors.subprocessExitedError(exitCode, stderrBuffer, cause)
       }
     }
@@ -329,12 +330,13 @@ abstract class BaseScriptTransformationWriterThread extends Thread with Logging
         // Javadoc this call will not throw an exception:
         _exception = t
         proc.destroy()
-        logError(s"Thread-${this.getClass.getSimpleName}-Feed exit cause by: ", t)
+        logError(log"Thread-${MDC(CLASS_NAME, this.getClass.getSimpleName)}-Feed " +
+          log"exit cause by: ", t)
     } finally {
       try {
         Utils.tryLogNonFatalError(outputStream.close())
         if (proc.waitFor() != 0) {
-          logError(stderrBuffer.toString) // log the stderr circular buffer
+          logError(log"${MDC(STDERR, stderrBuffer.toString)}") // log the stderr circular buffer
         }
       } catch {
         case NonFatal(exceptionFromFinallyBlock) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 4f3cecd17894d..b96f257e6b5b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -17,17 +17,15 @@
 
 package org.apache.spark.sql.execution
 
-import scala.collection.immutable.IndexedSeq
-
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.{LogEntry, Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression}
 import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
-import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, SubqueryAlias, View}
+import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, View}
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -40,7 +38,10 @@ import org.apache.spark.storage.StorageLevel
 import org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK
 
 /** Holds a cached logical plan and its data */
-case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation) {
+case class CachedData(
+    // A normalized resolved plan (See QueryExecution#normalized).
+    plan: LogicalPlan,
+    cachedRepresentation: InMemoryRelation) {
   override def toString: String =
     s"""
        |CachedData(
@@ -55,7 +56,9 @@ case class CachedData(plan: LogicalPlan, cachedRepresentation: InMemoryRelation)
  * InMemoryRelation.  This relation is automatically substituted query plans that return the
  * `sameResult` as the originally cached query.
  *
- * Internal to Spark SQL.
+ * Internal to Spark SQL. All its public APIs take analyzed plans and will normalize them before
+ * further usage, or take [[Dataset]] and get its normalized plan. See `QueryExecution.normalize`
+ * for more details about plan normalization.
  */
 class CacheManager extends Logging with AdaptiveSparkPlanHelper {
 
@@ -79,41 +82,43 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     cachedData.isEmpty
   }
 
+  // Test-only
+  def cacheQuery(query: Dataset[_]): Unit = {
+    cacheQuery(query, tableName = None, storageLevel = MEMORY_AND_DISK)
+  }
+
   /**
    * Caches the data produced by the logical representation of the given [[Dataset]].
-   * Unlike `RDD.cache()`, the default storage level is set to be `MEMORY_AND_DISK` because
-   * recomputing the in-memory columnar representation of the underlying table is expensive.
    */
   def cacheQuery(
       query: Dataset[_],
-      tableName: Option[String] = None,
-      storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = {
-    cacheQuery(query.sparkSession, query.queryExecution.normalized, tableName, storageLevel)
+      tableName: Option[String],
+      storageLevel: StorageLevel): Unit = {
+    cacheQueryInternal(query.sparkSession, query.queryExecution.normalized, tableName, storageLevel)
   }
 
   /**
-   * Caches the data produced by the given [[LogicalPlan]].
-   * Unlike `RDD.cache()`, the default storage level is set to be `MEMORY_AND_DISK` because
-   * recomputing the in-memory columnar representation of the underlying table is expensive.
+   * Caches the data produced by the given [[LogicalPlan]]. The given plan will be normalized
+   * before being used further.
    */
   def cacheQuery(
       spark: SparkSession,
       planToCache: LogicalPlan,
-      tableName: Option[String]): Unit = {
-    cacheQuery(spark, planToCache, tableName, MEMORY_AND_DISK)
+      tableName: Option[String],
+      storageLevel: StorageLevel): Unit = {
+    val normalized = QueryExecution.normalize(spark, planToCache)
+    cacheQueryInternal(spark, normalized, tableName, storageLevel)
   }
 
-  /**
-   * Caches the data produced by the given [[LogicalPlan]].
-   */
-  def cacheQuery(
+  // The `planToCache` should have been normalized.
+  private def cacheQueryInternal(
       spark: SparkSession,
       planToCache: LogicalPlan,
       tableName: Option[String],
       storageLevel: StorageLevel): Unit = {
     if (storageLevel == StorageLevel.NONE) {
       // Do nothing for StorageLevel.NONE since it will not actually cache any data.
-    } else if (lookupCachedData(planToCache).nonEmpty) {
+    } else if (lookupCachedDataInternal(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
       val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark)
@@ -126,7 +131,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       }
 
       this.synchronized {
-        if (lookupCachedData(planToCache).nonEmpty) {
+        if (lookupCachedDataInternal(planToCache).nonEmpty) {
           logWarning("Data has already been cached.")
         } else {
           val cd = CachedData(planToCache, inMemoryRelation)
@@ -140,38 +145,64 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
 
   /**
    * Un-cache the given plan or all the cache entries that refer to the given plan.
-   * @param query     The [[Dataset]] to be un-cached.
-   * @param cascade   If true, un-cache all the cache entries that refer to the given
-   *                  [[Dataset]]; otherwise un-cache the given [[Dataset]] only.
+   *
+   * @param query    The [[Dataset]] to be un-cached.
+   * @param cascade  If true, un-cache all the cache entries that refer to the given
+   *                 [[Dataset]]; otherwise un-cache the given [[Dataset]] only.
+   * @param blocking Whether to block until all blocks are deleted.
    */
+  def uncacheQuery(
+      query: Dataset[_],
+      cascade: Boolean,
+      blocking: Boolean): Unit = {
+    uncacheQueryInternal(query.sparkSession, query.queryExecution.normalized, cascade, blocking)
+  }
+
+  // An overload to provide default value for the `blocking` parameter.
   def uncacheQuery(
       query: Dataset[_],
       cascade: Boolean): Unit = {
-    uncacheQuery(query.sparkSession, query.queryExecution.normalized, cascade)
+    uncacheQuery(query, cascade, blocking = false)
   }
 
   /**
    * Un-cache the given plan or all the cache entries that refer to the given plan.
-   * @param spark     The Spark session.
-   * @param plan      The plan to be un-cached.
-   * @param cascade   If true, un-cache all the cache entries that refer to the given
-   *                  plan; otherwise un-cache the given plan only.
-   * @param blocking  Whether to block until all blocks are deleted.
+   *
+   * @param spark    The Spark session.
+   * @param plan     The plan to be un-cached.
+   * @param cascade  If true, un-cache all the cache entries that refer to the given
+   *                 plan; otherwise un-cache the given plan only.
+   * @param blocking Whether to block until all blocks are deleted.
    */
   def uncacheQuery(
       spark: SparkSession,
       plan: LogicalPlan,
       cascade: Boolean,
-      blocking: Boolean = false): Unit = {
-    uncacheQuery(spark, _.sameResult(plan), cascade, blocking)
+      blocking: Boolean): Unit = {
+    val normalized = QueryExecution.normalize(spark, plan)
+    uncacheQueryInternal(spark, normalized, cascade, blocking)
+  }
+
+  // An overload to provide default value for the `blocking` parameter.
+  def uncacheQuery(
+      spark: SparkSession,
+      plan: LogicalPlan,
+      cascade: Boolean): Unit = {
+    uncacheQuery(spark, plan, cascade, blocking = false)
+  }
+
+  // The `plan` should have been normalized.
+  private def uncacheQueryInternal(
+      spark: SparkSession,
+      plan: LogicalPlan,
+      cascade: Boolean,
+      blocking: Boolean): Unit = {
+    uncacheByCondition(spark, _.sameResult(plan), cascade, blocking)
   }
 
   def uncacheTableOrView(spark: SparkSession, name: Seq[String], cascade: Boolean): Unit = {
-    uncacheQuery(
-      spark,
-      isMatchedTableOrView(_, name, spark.sessionState.conf),
-      cascade,
-      blocking = false)
+    uncacheByCondition(
+      spark, isMatchedTableOrView(_, name, spark.sessionState.conf), cascade, blocking = false)
   }
 
   private def isMatchedTableOrView(plan: LogicalPlan, name: Seq[String], conf: SQLConf): Boolean = {
@@ -180,28 +211,24 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     }
 
     plan match {
-      case SubqueryAlias(ident, LogicalRelation(_, _, Some(catalogTable), _)) =>
-        val v1Ident = catalogTable.identifier
-        isSameName(ident.qualifier :+ ident.name) && isSameName(v1Ident.nameParts)
+      case LogicalRelation(_, _, Some(catalogTable), _) =>
+        isSameName(catalogTable.identifier.nameParts)
 
-      case SubqueryAlias(ident, DataSourceV2Relation(_, _, Some(catalog), Some(v2Ident), _)) =>
+      case DataSourceV2Relation(_, _, Some(catalog), Some(v2Ident), _) =>
         import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
-        isSameName(ident.qualifier :+ ident.name) &&
-          isSameName(v2Ident.toQualifiedNameParts(catalog))
+        isSameName(v2Ident.toQualifiedNameParts(catalog))
 
-      case SubqueryAlias(ident, View(catalogTable, _, _)) =>
-        val v1Ident = catalogTable.identifier
-        isSameName(ident.qualifier :+ ident.name) && isSameName(v1Ident.nameParts)
+      case View(catalogTable, _, _) =>
+        isSameName(catalogTable.identifier.nameParts)
 
-      case SubqueryAlias(ident, HiveTableRelation(catalogTable, _, _, _, _)) =>
-        val v1Ident = catalogTable.identifier
-        isSameName(ident.qualifier :+ ident.name) && isSameName(v1Ident.nameParts)
+      case HiveTableRelation(catalogTable, _, _, _, _) =>
+        isSameName(catalogTable.identifier.nameParts)
 
       case _ => false
     }
   }
 
-  def uncacheQuery(
+  private def uncacheByCondition(
       spark: SparkSession,
       isMatchedPlan: LogicalPlan => Boolean,
       cascade: Boolean,
@@ -254,10 +281,12 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
   }
 
   /**
-   * Tries to re-cache all the cache entries that refer to the given plan.
+   * Tries to re-cache all the cache entries that refer to the given plan. The given plan will be
+   * normalized before being used further.
    */
   def recacheByPlan(spark: SparkSession, plan: LogicalPlan): Unit = {
-    recacheByCondition(spark, _.plan.exists(_.sameResult(plan)))
+    val normalized = QueryExecution.normalize(spark, plan)
+    recacheByCondition(spark, _.plan.exists(_.sameResult(normalized)))
   }
 
   /**
@@ -280,7 +309,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       }
       val recomputedPlan = cd.copy(cachedRepresentation = newCache)
       this.synchronized {
-        if (lookupCachedData(recomputedPlan.plan).nonEmpty) {
+        if (lookupCachedDataInternal(recomputedPlan.plan).nonEmpty) {
           logWarning("While recaching, data was already added to cache.")
         } else {
           cachedData = recomputedPlan +: cachedData
@@ -291,13 +320,23 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     }
   }
 
-  /** Optionally returns cached data for the given [[Dataset]] */
+  /**
+   * Optionally returns cached data for the given [[Dataset]]
+   */
   def lookupCachedData(query: Dataset[_]): Option[CachedData] = {
-    lookupCachedData(query.queryExecution.normalized)
+    lookupCachedDataInternal(query.queryExecution.normalized)
   }
 
-  /** Optionally returns cached data for the given [[LogicalPlan]]. */
-  def lookupCachedData(plan: LogicalPlan): Option[CachedData] = {
+  /**
+   * Optionally returns cached data for the given [[LogicalPlan]]. The given plan will be normalized
+   * before being used further.
+   */
+  def lookupCachedData(session: SparkSession, plan: LogicalPlan): Option[CachedData] = {
+    val normalized = QueryExecution.normalize(session, plan)
+    lookupCachedDataInternal(normalized)
+  }
+
+  private def lookupCachedDataInternal(plan: LogicalPlan): Option[CachedData] = {
     val result = cachedData.find(cd => plan.sameResult(cd.plan))
     if (result.isDefined) {
       CacheManager.logCacheOperation(log"Dataframe cache hit for input plan:" +
@@ -307,13 +346,16 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     result
   }
 
-  /** Replaces segments of the given logical plan with cached versions where possible. */
-  def useCachedData(plan: LogicalPlan): LogicalPlan = {
+  /**
+   * Replaces segments of the given logical plan with cached versions where possible. The input
+   * plan must be normalized.
+   */
+  private[sql] def useCachedData(plan: LogicalPlan): LogicalPlan = {
     val newPlan = plan transformDown {
       case command: IgnoreCachedData => command
 
       case currentFragment =>
-        lookupCachedData(currentFragment).map { cached =>
+        lookupCachedDataInternal(currentFragment).map { cached =>
           // After cache lookup, we should still keep the hints from the input plan.
           val hints = EliminateResolvedHint.extractHintsFromPlan(currentFragment)._2
           val cachedPlan = cached.cachedRepresentation.withOutput(currentFragment.output)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index f583bb665de14..2ebbb9664f67a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -22,6 +22,8 @@ import java.util.concurrent.TimeUnit._
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.internal.LogKeys.{COUNT, MAX_SPLIT_BYTES, OPEN_COST_IN_BYTES}
+import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
@@ -713,7 +715,7 @@ case class FileSourceScanExec(
       bucketSpec: BucketSpec,
       readFile: (PartitionedFile) => Iterator[InternalRow],
       selectedPartitions: ScanFileListing): RDD[InternalRow] = {
-    logInfo(s"Planning with ${bucketSpec.numBuckets} buckets")
+    logInfo(log"Planning with ${MDC(COUNT, bucketSpec.numBuckets)} buckets")
     val partitionArray = selectedPartitions.toPartitionArray
     val filesGroupedToBuckets = partitionArray.groupBy { f =>
       BucketingUtils
@@ -731,7 +733,7 @@ case class FileSourceScanExec(
     }
 
     val filePartitions = optionalNumCoalescedBuckets.map { numCoalescedBuckets =>
-      logInfo(s"Coalescing to ${numCoalescedBuckets} buckets")
+      logInfo(log"Coalescing to ${MDC(COUNT, numCoalescedBuckets)} buckets")
       val coalescedBuckets = prunedFilesGroupedToBuckets.groupBy(_._1 % numCoalescedBuckets)
       Seq.tabulate(numCoalescedBuckets) { bucketId =>
         val partitionedFiles = coalescedBuckets.get(bucketId).map {
@@ -764,8 +766,9 @@ case class FileSourceScanExec(
     val openCostInBytes = relation.sparkSession.sessionState.conf.filesOpenCostInBytes
     val maxSplitBytes =
       FilePartition.maxSplitBytes(relation.sparkSession, selectedPartitions)
-    logInfo(s"Planning scan with bin packing, max size: $maxSplitBytes bytes, " +
-      s"open cost is considered as scanning $openCostInBytes bytes.")
+    logInfo(log"Planning scan with bin packing, max size: ${MDC(MAX_SPLIT_BYTES, maxSplitBytes)} " +
+      log"bytes, open cost is considered as scanning ${MDC(OPEN_COST_IN_BYTES, openCostInBytes)} " +
+      log"bytes.")
 
     // Filter files with bucket pruning if possible
     val bucketingEnabled = relation.sparkSession.sessionState.conf.bucketingEnabled
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/EmptyRelationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/EmptyRelationExec.scala
new file mode 100644
index 0000000000000..085c0b22524c9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/EmptyRelationExec.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+/**
+ * A leaf node wrapper for propagated empty relation, which preserved the eliminated logical plan.
+ * The logical plan might be partial executed, i.e., containing LogicalQueryStage.
+ */
+case class EmptyRelationExec(@transient logical: LogicalPlan) extends LeafExecNode
+    with InputRDDCodegen {
+  private val rdd = sparkContext.emptyRDD[InternalRow]
+
+  // Here we can not use def, because logical won't be serialized to executor while this method
+  // will be call in executor.
+  override val output: Seq[Attribute] = logical.output
+
+  override protected def doExecute(): RDD[InternalRow] = rdd
+
+  override def executeCollect(): Array[InternalRow] = Array.empty
+
+  override def executeTake(limit: Int): Array[InternalRow] = Array.empty
+
+  override def executeTail(limit: Int): Array[InternalRow] = Array.empty
+
+  protected override def doExecuteColumnar(): RDD[ColumnarBatch] = sparkContext.emptyRDD
+
+  override def inputRDD: RDD[InternalRow] = rdd
+
+  override protected val createUnsafeProjection: Boolean = false
+
+  protected override def stringArgs: Iterator[Any] = Iterator(s"[plan_id=$id]")
+
+  override def generateTreeString(
+      depth: Int,
+      lastChildren: java.util.ArrayList[Boolean],
+      append: String => Unit,
+      verbose: Boolean,
+      prefix: String = "",
+      addSuffix: Boolean = false,
+      maxFields: Int,
+      printNodeId: Boolean,
+      indent: Int = 0): Unit = {
+    super.generateTreeString(depth,
+      lastChildren,
+      append,
+      verbose,
+      prefix,
+      addSuffix,
+      maxFields,
+      printNodeId,
+      indent)
+    lastChildren.add(true)
+    logical.generateTreeString(
+      depth + 1, lastChildren, append, verbose, "", false, maxFields, printNodeId, indent)
+    lastChildren.remove(lastChildren.size() - 1)
+  }
+
+  override def doCanonicalize(): SparkPlan = {
+    this.copy(logical = LocalRelation(logical.output).canonicalized)
+  }
+
+  override protected[sql] def cleanupResources(): Unit = {
+    logical.foreach {
+      case LogicalQueryStage(_, physical) =>
+        physical.cleanupResources()
+      case _ =>
+    }
+    super.cleanupResources()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 252a6290cbc7f..8c7ed7b88d45d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{LOGICAL_PLAN_COLUMNS, OPTIMIZED_PLAN_COLUMNS}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Dataset, Encoder, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -226,10 +227,11 @@ object LogicalRDD extends Logging {
       (Some(rewrittenStatistics), Some(rewrittenConstraints))
     }.getOrElse {
       // can't rewrite stats and constraints, give up
-      logWarning("The output columns are expected to the same (for name and type) for output " +
-        "between logical plan and optimized plan, but they aren't. output in logical plan: " +
-        s"${logicalPlan.output.map(_.simpleString(10))} / output in optimized plan: " +
-        s"${optimizedPlan.output.map(_.simpleString(10))}")
+      logWarning(log"The output columns are expected to the same (for name and type) for output " +
+        log"between logical plan and optimized plan, but they aren't. output in logical plan: " +
+        log"${MDC(LOGICAL_PLAN_COLUMNS, logicalPlan.output.map(_.simpleString(10)))} " +
+        log"/ output in optimized plan: " +
+        log"${MDC(OPTIMIZED_PLAN_COLUMNS, optimizedPlan.output.map(_.simpleString(10)))}")
 
       (None, None)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index 11f6ae0e47ee1..421a963453f0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import java.util.Collections.newSetFromMap
 import java.util.IdentityHashMap
-import java.util.Set
 
 import scala.collection.mutable.{ArrayBuffer, BitSet}
 
@@ -30,6 +28,8 @@ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveS
 import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec}
 
 object ExplainUtils extends AdaptiveSparkPlanHelper {
+  def localIdMap: ThreadLocal[java.util.Map[QueryPlan[_], Int]] = QueryPlan.localIdMap
+
   /**
    * Given a input physical plan, performs the following tasks.
    *   1. Computes the whole stage codegen id for current operator and records it in the
@@ -80,24 +80,26 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
    * instances but cached plan is an exception. The `InMemoryRelation#innerChildren` use a shared
    * plan instance across multi-queries. Add lock for this method to avoid tag race condition.
    */
-  def processPlan[T <: QueryPlan[T]](plan: T, append: String => Unit): Unit = synchronized {
+  def processPlan[T <: QueryPlan[T]](plan: T, append: String => Unit): Unit = {
+    val prevIdMap = localIdMap.get()
     try {
-      // Initialize a reference-unique set of Operators to avoid accdiental overwrites and to allow
-      // intentional overwriting of IDs generated in previous AQE iteration
-      val operators = newSetFromMap[QueryPlan[_]](new IdentityHashMap())
+      // Initialize a reference-unique id map to store generated ids, which also avoid accidental
+      // overwrites and to allow intentional overwriting of IDs generated in previous AQE iteration
+      val idMap = new IdentityHashMap[QueryPlan[_], Int]()
+      localIdMap.set(idMap)
       // Initialize an array of ReusedExchanges to help find Adaptively Optimized Out
       // Exchanges as part of SPARK-42753
       val reusedExchanges = ArrayBuffer.empty[ReusedExchangeExec]
 
       var currentOperatorID = 0
-      currentOperatorID = generateOperatorIDs(plan, currentOperatorID, operators, reusedExchanges,
+      currentOperatorID = generateOperatorIDs(plan, currentOperatorID, idMap, reusedExchanges,
         true)
 
       val subqueries = ArrayBuffer.empty[(SparkPlan, Expression, BaseSubqueryExec)]
       getSubqueries(plan, subqueries)
 
       currentOperatorID = subqueries.foldLeft(currentOperatorID) {
-        (curId, plan) => generateOperatorIDs(plan._3.child, curId, operators, reusedExchanges,
+        (curId, plan) => generateOperatorIDs(plan._3.child, curId, idMap, reusedExchanges,
           true)
       }
 
@@ -105,9 +107,9 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
       val optimizedOutExchanges = ArrayBuffer.empty[Exchange]
       reusedExchanges.foreach{ reused =>
         val child = reused.child
-        if (!operators.contains(child)) {
+        if (!idMap.containsKey(child)) {
           optimizedOutExchanges.append(child)
-          currentOperatorID = generateOperatorIDs(child, currentOperatorID, operators,
+          currentOperatorID = generateOperatorIDs(child, currentOperatorID, idMap,
             reusedExchanges, false)
         }
       }
@@ -144,7 +146,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
         append("\n")
       }
     } finally {
-      removeTags(plan)
+      localIdMap.set(prevIdMap)
     }
   }
 
@@ -159,13 +161,15 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
    * @param plan Input query plan to process
    * @param startOperatorID The start value of operation id. The subsequent operations will be
    *                        assigned higher value.
-   * @param visited A unique set of operators visited by generateOperatorIds. The set is scoped
-   *                at the callsite function processPlan. It serves two purpose: Firstly, it is
-   *                used to avoid accidentally overwriting existing IDs that were generated in
-   *                the same processPlan call. Secondly, it is used to allow for intentional ID
-   *                overwriting as part of SPARK-42753 where an Adaptively Optimized Out Exchange
-   *                and its subtree may contain IDs that were generated in a previous AQE
-   *                iteration's processPlan call which would result in incorrect IDs.
+   * @param idMap   A reference-unique map store operators visited by generateOperatorIds and its
+   *                id. This Map is scoped at the callsite function processPlan. It serves three
+   *                purpose:
+   *                Firstly, it stores the QueryPlan - generated ID mapping. Secondly, it is used to
+   *                avoid accidentally overwriting existing IDs that were generated in the same
+   *                processPlan call. Thirdly, it is used to allow for intentional ID overwriting as
+   *                part of SPARK-42753 where an Adaptively Optimized Out Exchange and its subtree
+   *                may contain IDs that were generated in a previous AQE iteration's processPlan
+   *                call which would result in incorrect IDs.
    * @param reusedExchanges A unique set of ReusedExchange nodes visited which will be used to
    *                        idenitfy adaptively optimized out exchanges in SPARK-42753.
    * @param addReusedExchanges Whether to add ReusedExchange nodes to reusedExchanges set. We set it
@@ -177,7 +181,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
   private def generateOperatorIDs(
       plan: QueryPlan[_],
       startOperatorID: Int,
-      visited: Set[QueryPlan[_]],
+      idMap: java.util.Map[QueryPlan[_], Int],
       reusedExchanges: ArrayBuffer[ReusedExchangeExec],
       addReusedExchanges: Boolean): Int = {
     var currentOperationID = startOperatorID
@@ -186,36 +190,35 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
       return currentOperationID
     }
 
-    def setOpId(plan: QueryPlan[_]): Unit = if (!visited.contains(plan)) {
+    def setOpId(plan: QueryPlan[_]): Unit = idMap.computeIfAbsent(plan, plan => {
       plan match {
         case r: ReusedExchangeExec if addReusedExchanges =>
           reusedExchanges.append(r)
         case _ =>
       }
-      visited.add(plan)
       currentOperationID += 1
-      plan.setTagValue(QueryPlan.OP_ID_TAG, currentOperationID)
-    }
+      currentOperationID
+    })
 
     plan.foreachUp {
       case _: WholeStageCodegenExec =>
       case _: InputAdapter =>
       case p: AdaptiveSparkPlanExec =>
-        currentOperationID = generateOperatorIDs(p.executedPlan, currentOperationID, visited,
+        currentOperationID = generateOperatorIDs(p.executedPlan, currentOperationID, idMap,
           reusedExchanges, addReusedExchanges)
         if (!p.executedPlan.fastEquals(p.initialPlan)) {
-          currentOperationID = generateOperatorIDs(p.initialPlan, currentOperationID, visited,
+          currentOperationID = generateOperatorIDs(p.initialPlan, currentOperationID, idMap,
             reusedExchanges, addReusedExchanges)
         }
         setOpId(p)
       case p: QueryStageExec =>
-        currentOperationID = generateOperatorIDs(p.plan, currentOperationID, visited,
+        currentOperationID = generateOperatorIDs(p.plan, currentOperationID, idMap,
           reusedExchanges, addReusedExchanges)
         setOpId(p)
       case other: QueryPlan[_] =>
         setOpId(other)
         currentOperationID = other.innerChildren.foldLeft(currentOperationID) {
-          (curId, plan) => generateOperatorIDs(plan, curId, visited, reusedExchanges,
+          (curId, plan) => generateOperatorIDs(plan, curId, idMap, reusedExchanges,
             addReusedExchanges)
         }
     }
@@ -241,7 +244,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
     }
 
     def collectOperatorWithID(plan: QueryPlan[_]): Unit = {
-      plan.getTagValue(QueryPlan.OP_ID_TAG).foreach { id =>
+      Option(ExplainUtils.localIdMap.get().get(plan)).foreach { id =>
         if (collectedOperators.add(id)) operators += plan
       }
     }
@@ -334,20 +337,6 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
    * `operationId` tag value.
    */
   def getOpId(plan: QueryPlan[_]): String = {
-    plan.getTagValue(QueryPlan.OP_ID_TAG).map(v => s"$v").getOrElse("unknown")
-  }
-
-  def removeTags(plan: QueryPlan[_]): Unit = {
-    def remove(p: QueryPlan[_], children: Seq[QueryPlan[_]]): Unit = {
-      p.unsetTagValue(QueryPlan.OP_ID_TAG)
-      p.unsetTagValue(QueryPlan.CODEGEN_ID_TAG)
-      children.foreach(removeTags)
-    }
-
-    plan foreach {
-      case p: AdaptiveSparkPlanExec => remove(p, Seq(p.executedPlan, p.initialPlan))
-      case p: QueryStageExec => remove(p, Seq(p.plan))
-      case plan: QueryPlan[_] => remove(plan, plan.innerChildren)
-    }
+    Option(ExplainUtils.localIdMap.get().get(plan)).map(v => s"$v").getOrElse("unknown")
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
index 56289d73c071f..59810adc4b22e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
@@ -22,7 +22,8 @@ import java.io.Closeable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{SparkEnv, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, MAX_NUM_ROWS_IN_MEMORY_BUFFER}
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
@@ -122,8 +123,9 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
       inMemoryBuffer += unsafeRow.copy()
     } else {
       if (spillableArray == null) {
-        logInfo(s"Reached spill threshold of $numRowsInMemoryBufferThreshold rows, switching to " +
-          s"${classOf[UnsafeExternalSorter].getName}")
+        logInfo(log"Reached spill threshold of " +
+          log"${MDC(MAX_NUM_ROWS_IN_MEMORY_BUFFER, numRowsInMemoryBufferThreshold)} rows, " +
+          log"switching to ${MDC(CLASS_NAME, classOf[UnsafeExternalSorter].getName)}")
 
         // We will not sort the rows, so prefixComparator and recordComparator are null
         spillableArray = UnsafeExternalSorter.create(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index c59fd77c4bb35..8df650ca39b7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.sql.execution
 
-import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
-import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period}
+import java.time._
 
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.ToStringBase
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.HIVE_STYLE
 import org.apache.spark.sql.catalyst.util.IntervalUtils.{durationToMicros, periodToMonths, toDayTimeIntervalString, toYearMonthIntervalString}
@@ -35,7 +36,7 @@ import org.apache.spark.util.ArrayImplicits._
 /**
  * Runs a query returning the result in Hive compatible form.
  */
-object HiveResult {
+object HiveResult extends SQLConfHelper {
   case class TimeFormatters(date: DateFormatter, timestamp: TimestampFormatter)
 
   def getTimeFormatters: TimeFormatters = {
@@ -45,6 +46,16 @@ object HiveResult {
     TimeFormatters(dateFormatter, timestampFormatter)
   }
 
+  type BinaryFormatter = Array[Byte] => String
+
+  def getBinaryFormatter: BinaryFormatter = {
+    if (conf.getConf(SQLConf.BINARY_OUTPUT_STYLE).isEmpty) {
+      // Keep the legacy behavior for compatibility.
+      conf.setConf(SQLConf.BINARY_OUTPUT_STYLE, Some("UTF8"))
+    }
+    ToStringBase.getBinaryFormatter(_).toString
+  }
+
   private def stripRootCommandResult(executedPlan: SparkPlan): SparkPlan = executedPlan match {
     case CommandResultExec(_, plan, _) => plan
     case other => other
@@ -74,11 +85,12 @@ object HiveResult {
         executedPlan.executeCollect().map(_.getString(1)).toImmutableArraySeq
       case other =>
         val timeFormatters = getTimeFormatters
+        val binaryFormatter = getBinaryFormatter
         val result: Seq[Seq[Any]] = other.executeCollectPublic().map(_.toSeq).toImmutableArraySeq
         // We need the types so we can output struct field names
         val types = executedPlan.output.map(_.dataType)
         // Reformat to match hive tab delimited output.
-        result.map(_.zip(types).map(e => toHiveString(e, false, timeFormatters)))
+        result.map(_.zip(types).map(e => toHiveString(e, false, timeFormatters, binaryFormatter)))
           .map(_.mkString("\t"))
     }
 
@@ -95,7 +107,8 @@ object HiveResult {
   def toHiveString(
       a: (Any, DataType),
       nested: Boolean,
-      formatters: TimeFormatters): String = a match {
+      formatters: TimeFormatters,
+      binaryFormatter: BinaryFormatter): String = a match {
     case (null, _) => if (nested) "null" else "NULL"
     case (b, BooleanType) => b.toString
     case (d: Date, DateType) => formatters.date.format(d)
@@ -103,21 +116,22 @@ object HiveResult {
     case (t: Timestamp, TimestampType) => formatters.timestamp.format(t)
     case (i: Instant, TimestampType) => formatters.timestamp.format(i)
     case (l: LocalDateTime, TimestampNTZType) => formatters.timestamp.format(l)
-    case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8)
+    case (bin: Array[Byte], BinaryType) => binaryFormatter(bin)
     case (decimal: java.math.BigDecimal, DecimalType()) => decimal.toPlainString
     case (n, _: NumericType) => n.toString
     case (s: String, _: StringType) => if (nested) "\"" + s + "\"" else s
     case (interval: CalendarInterval, CalendarIntervalType) => interval.toString
     case (seq: scala.collection.Seq[_], ArrayType(typ, _)) =>
-      seq.map(v => (v, typ)).map(e => toHiveString(e, true, formatters)).mkString("[", ",", "]")
+      seq.map(v => (v, typ)).map(e => toHiveString(e, true, formatters, binaryFormatter))
+        .mkString("[", ",", "]")
     case (m: Map[_, _], MapType(kType, vType, _)) =>
       m.map { case (key, value) =>
-        toHiveString((key, kType), true, formatters) + ":" +
-          toHiveString((value, vType), true, formatters)
+        toHiveString((key, kType), true, formatters, binaryFormatter) + ":" +
+          toHiveString((value, vType), true, formatters, binaryFormatter)
       }.toSeq.sorted.mkString("{", ",", "}")
     case (struct: Row, StructType(fields)) =>
       struct.toSeq.zip(fields).map { case (v, t) =>
-        s""""${t.name}":${toHiveString((v, t.dataType), true, formatters)}"""
+        s""""${t.name}":${toHiveString((v, t.dataType), true, formatters, binaryFormatter)}"""
       }.mkString("{", ",", "}")
     case (period: Period, YearMonthIntervalType(startField, endField)) =>
       toYearMonthIntervalString(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 6280e7dd100c7..357484ca19df2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -26,7 +26,8 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.EXTENDED_EXPLAIN_GENERATOR
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, ExtendedExplainGenerator, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
@@ -59,7 +60,8 @@ class QueryExecution(
     val sparkSession: SparkSession,
     val logical: LogicalPlan,
     val tracker: QueryPlanningTracker = new QueryPlanningTracker,
-    val mode: CommandExecutionMode.Value = CommandExecutionMode.ALL) extends Logging {
+    val mode: CommandExecutionMode.Value = CommandExecutionMode.ALL,
+    val shuffleCleanupMode: ShuffleCleanupMode = DoNotCleanup) extends Logging {
 
   val id: Long = QueryExecution.nextExecutionId
 
@@ -132,19 +134,7 @@ class QueryExecution(
 
   // The plan that has been normalized by custom rules, so that it's more likely to hit cache.
   lazy val normalized: LogicalPlan = {
-    val normalizationRules = sparkSession.sessionState.planNormalizationRules
-    if (normalizationRules.isEmpty) {
-      commandExecuted
-    } else {
-      val planChangeLogger = new PlanChangeLogger[LogicalPlan]()
-      val normalized = normalizationRules.foldLeft(commandExecuted) { (p, rule) =>
-        val result = rule.apply(p)
-        planChangeLogger.logRule(rule.ruleName, p, result)
-        result
-      }
-      planChangeLogger.logBatch("Plan Normalization", commandExecuted, normalized)
-      normalized
-    }
+    QueryExecution.normalize(sparkSession, commandExecuted, Some(tracker))
   }
 
   lazy val withCachedData: LogicalPlan = sparkSession.withActive {
@@ -385,7 +375,8 @@ class QueryExecution(
           append(s"\n== Extended Information (${extension.title}) ==\n")
           append(extension.generateExtendedInfo(plan))
         } catch {
-          case NonFatal(e) => logWarning(s"Cannot use $extension to get extended information.", e)
+          case NonFatal(e) => logWarning(log"Cannot use " +
+            log"${MDC(EXTENDED_EXPLAIN_GENERATOR, extension)} to get extended information.", e)
         })
     }
   }
@@ -457,6 +448,22 @@ object CommandExecutionMode extends Enumeration {
   val SKIP, NON_ROOT, ALL = Value
 }
 
+/**
+ * Modes for shuffle dependency cleanup.
+ *
+ * DoNotCleanup: Do not perform any cleanup.
+ * SkipMigration: Shuffle dependencies will not be migrated at node decommissions.
+ * RemoveShuffleFiles: Shuffle dependency files are removed at the end of SQL executions.
+ */
+sealed trait ShuffleCleanupMode
+
+case object DoNotCleanup extends ShuffleCleanupMode
+
+case object SkipMigration extends ShuffleCleanupMode
+
+case object RemoveShuffleFiles extends ShuffleCleanupMode
+
+
 object QueryExecution {
   private val _nextExecutionId = new AtomicLong(0)
 
@@ -594,4 +601,27 @@ object QueryExecution {
       case e: Throwable => throw toInternalError(msg, e)
     }
   }
+
+  def normalize(
+      session: SparkSession,
+      plan: LogicalPlan,
+      tracker: Option[QueryPlanningTracker] = None): LogicalPlan = {
+    val normalizationRules = session.sessionState.planNormalizationRules
+    if (normalizationRules.isEmpty) {
+      plan
+    } else {
+      val planChangeLogger = new PlanChangeLogger[LogicalPlan]()
+      val normalized = normalizationRules.foldLeft(plan) { (p, rule) =>
+        val startTime = System.nanoTime()
+        val result = rule.apply(p)
+        val runTime = System.nanoTime() - startTime
+        val effective = !result.fastEquals(p)
+        tracker.foreach(_.recordRuleInvocation(rule.ruleName, runTime, effective))
+        planChangeLogger.logRule(rule.ruleName, p, result)
+        result
+      }
+      planChangeLogger.logBatch("Plan Normalization", plan, normalized)
+      normalized
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index 561deacfb72d9..f4be03c90be75 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -20,14 +20,16 @@ package org.apache.spark.sql.execution
 import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future => JFuture}
 import java.util.concurrent.atomic.AtomicLong
 
+import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
-import org.apache.spark.{ErrorMessageFormat, JobArtifactSet, SparkException, SparkThrowable, SparkThrowableHelper}
+import org.apache.spark.{ErrorMessageFormat, JobArtifactSet, SparkEnv, SparkException, SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.SparkContext.{SPARK_JOB_DESCRIPTION, SPARK_JOB_INTERRUPT_ON_CANCEL}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{SPARK_DRIVER_PREFIX, SPARK_EXECUTOR_PREFIX}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.SQL_EVENT_TRUNCATE_LENGTH
@@ -115,6 +117,7 @@ object SQLExecution extends Logging {
 
       withSQLConfPropagated(sparkSession) {
         var ex: Option[Throwable] = None
+        var isExecutedPlanAvailable = false
         val startTime = System.nanoTime()
         val startEvent = SparkListenerSQLExecutionStart(
           executionId = executionId,
@@ -147,6 +150,7 @@ object SQLExecution extends Logging {
               }
               sc.listenerBus.post(
                 startEvent.copy(physicalPlanDescription = planDesc, sparkPlanInfo = planInfo))
+              isExecutedPlanAvailable = true
               f()
           }
         } catch {
@@ -161,6 +165,24 @@ object SQLExecution extends Logging {
             case e =>
               Utils.exceptionString(e)
           }
+          if (queryExecution.shuffleCleanupMode != DoNotCleanup
+            && isExecutedPlanAvailable) {
+            val shuffleIds = queryExecution.executedPlan match {
+              case ae: AdaptiveSparkPlanExec =>
+                ae.context.shuffleIds.asScala.keys
+              case _ =>
+                Iterable.empty
+            }
+            shuffleIds.foreach { shuffleId =>
+              queryExecution.shuffleCleanupMode match {
+                case RemoveShuffleFiles =>
+                  SparkEnv.get.shuffleManager.unregisterShuffle(shuffleId)
+                case SkipMigration =>
+                  SparkEnv.get.blockManager.migratableResolver.addShuffleToSkip(shuffleId)
+                case _ => // this should not happen
+              }
+            }
+          }
           val event = SparkListenerSQLExecutionEnd(
             executionId,
             System.currentTimeMillis(),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 70a35ea911538..3382a1161ddba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.ExperimentalMethods
+import org.apache.spark.sql.catalyst.analysis.RewriteCollationJoin
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -89,9 +90,11 @@ class SparkOptimizer(
       InferWindowGroupLimit,
       LimitPushDown,
       LimitPushDownThroughWindow,
-      EliminateLimits) :+
+      EliminateLimits,
+      ConstantFolding) :+
     Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+
-    Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)
+    Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition) :+
+    Batch("RewriteCollationJoin", Once, RewriteCollationJoin)
 
   override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+
     ExtractPythonUDFFromJoinCondition.ruleName :+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
index 7c45b02ee8468..615c8746a3e52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanInfo.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
+import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.metric.SQLMetricInfo
@@ -51,6 +53,19 @@ class SparkPlanInfo(
 
 private[execution] object SparkPlanInfo {
 
+  private def fromLogicalPlan(plan: LogicalPlan): SparkPlanInfo = {
+    val childrenInfo = plan match {
+      case LogicalQueryStage(_, physical) => Seq(fromSparkPlan(physical))
+      case _ => (plan.children ++ plan.subqueries).map(fromLogicalPlan)
+    }
+    new SparkPlanInfo(
+      plan.nodeName,
+      plan.simpleString(SQLConf.get.maxToStringFields),
+      childrenInfo,
+      Map[String, String](),
+      Seq.empty)
+  }
+
   def fromSparkPlan(plan: SparkPlan): SparkPlanInfo = {
     val children = plan match {
       case ReusedExchangeExec(_, child) => child :: Nil
@@ -58,6 +73,7 @@ private[execution] object SparkPlanInfo {
       case a: AdaptiveSparkPlanExec => a.executedPlan :: Nil
       case stage: QueryStageExec => stage.plan :: Nil
       case inMemTab: InMemoryTableScanExec => inMemTab.relation.cachedPlan :: Nil
+      case EmptyRelationExec(logical) => (logical :: Nil)
       case _ => plan.children ++ plan.subqueries
     }
     val metrics = plan.metrics.toSeq.map { case (key, metric) =>
@@ -69,10 +85,17 @@ private[execution] object SparkPlanInfo {
       case fileScan: FileSourceScanLike => fileScan.metadata
       case _ => Map[String, String]()
     }
+    val childrenInfo = children.flatMap {
+      case child: SparkPlan =>
+        Some(fromSparkPlan(child))
+      case child: LogicalPlan =>
+        Some(fromLogicalPlan(child))
+      case _ => None
+    }
     new SparkPlanInfo(
       plan.nodeName,
       plan.simpleString(SQLConf.get.maxToStringFields),
-      children.map(fromSparkPlan),
+      childrenInfo,
       metadata,
       metrics)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 8192be2699933..055fec02d2aea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -27,7 +27,7 @@ import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, UnresolvedFunctionName, UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, SchemaEvolution, SchemaTypeEvolution, UnresolvedFunctionName, UnresolvedIdentifier, UnresolvedNamespace}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 import org.apache.spark.sql.catalyst.parser._
@@ -150,6 +150,9 @@ class SparkSqlAstBuilder extends AstBuilder {
    * }}}
    */
   override def visitSetCollation(ctx: SetCollationContext): LogicalPlan = withOrigin(ctx) {
+    if (!SQLConf.get.collationEnabled) {
+      throw QueryCompilationErrors.collationNotEnabledError()
+    }
     val key = SQLConf.DEFAULT_COLLATION.key
     SetCommand(Some(key -> Some(ctx.identifier.getText.toUpperCase(Locale.ROOT))))
   }
@@ -340,7 +343,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         visitCreateTableClauses(ctx.createTableClauses())
       val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse(
         throw QueryParsingErrors.createTempTableNotSpecifyProviderError(ctx))
-      val schema = Option(ctx.createOrReplaceTableColTypeList()).map(createSchema)
+      val schema = Option(ctx.colDefinitionList()).map(createSchema)
 
       logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " +
           "CREATE TEMPORARY VIEW ... USING ... instead")
@@ -505,6 +508,7 @@ class SparkSqlAstBuilder extends AstBuilder {
     }
 
     checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
+    checkDuplicateClauses(ctx.schemaBinding(), "WITH SCHEMA", ctx)
     checkDuplicateClauses(ctx.PARTITIONED, "PARTITIONED ON", ctx)
     checkDuplicateClauses(ctx.TBLPROPERTIES, "TBLPROPERTIES", ctx)
 
@@ -524,6 +528,10 @@ class SparkSqlAstBuilder extends AstBuilder {
       operationNotAllowed("TBLPROPERTIES can't coexist with CREATE TEMPORARY VIEW", ctx)
     }
 
+    if (ctx.TEMPORARY != null && ctx.schemaBinding(0) != null) {
+      throw QueryParsingErrors.temporaryViewWithSchemaBindingMode(ctx)
+    }
+
     val viewType = if (ctx.TEMPORARY == null) {
       PersistedView
     } else if (ctx.GLOBAL != null) {
@@ -543,6 +551,13 @@ class SparkSqlAstBuilder extends AstBuilder {
       val originalText = source(ctx.query)
       assert(Option(originalText).isDefined,
         "'originalText' must be provided to create permanent view")
+      val schemaBinding = visitSchemaBinding(ctx.schemaBinding(0))
+      val finalSchemaBinding =
+        if (schemaBinding == SchemaEvolution && userSpecifiedColumns.nonEmpty) {
+          SchemaTypeEvolution
+        } else {
+          schemaBinding
+        }
       CreateView(
         withIdentClause(ctx.identifierReference(), UnresolvedIdentifier(_)),
         userSpecifiedColumns,
@@ -551,7 +566,8 @@ class SparkSqlAstBuilder extends AstBuilder {
         Some(originalText),
         qPlan,
         ctx.EXISTS != null,
-        ctx.REPLACE != null)
+        ctx.REPLACE != null,
+        finalSchemaBinding)
     } else {
       // Disallows 'CREATE TEMPORARY VIEW IF NOT EXISTS' to be consistent with
       // 'CREATE TEMPORARY TABLE'
@@ -637,6 +653,173 @@ class SparkSqlAstBuilder extends AstBuilder {
     })
   }
 
+  /**
+   * Create a [[CreateUserDefinedFunctionCommand]].
+   *
+   * For example:
+   * {{{
+   *   CREATE [OR REPLACE] [TEMPORARY] FUNCTION [IF NOT EXISTS] [db_name.]function_name
+   *   ([param_name param_type [COMMENT param_comment], ...])
+   *   RETURNS {ret_type | TABLE (ret_name ret_type [COMMENT ret_comment], ...])}
+   *   [routine_characteristic]
+   *   RETURN {expression | query };
+   *
+   *   routine_characteristic
+   *   { LANGUAGE {SQL | IDENTIFIER} |
+   *     [NOT] DETERMINISTIC |
+   *     COMMENT function_comment |
+   *     [CONTAINS SQL | READS SQL DATA] }
+   * }}}
+   */
+  override def visitCreateUserDefinedFunction(ctx: CreateUserDefinedFunctionContext): LogicalPlan =
+    withOrigin(ctx) {
+      assert(ctx.expression != null || ctx.query != null)
+
+      if (ctx.EXISTS != null && ctx.REPLACE != null) {
+        throw QueryParsingErrors.createFuncWithBothIfNotExistsAndReplaceError(ctx)
+      }
+
+      val inputParamText = Option(ctx.parameters).map(source)
+      val returnTypeText: String =
+        if (ctx.RETURNS != null &&
+          (Option(ctx.dataType).nonEmpty || Option(ctx.returnParams).nonEmpty)) {
+          source(Option(ctx.dataType).getOrElse(ctx.returnParams))
+        } else {
+          ""
+        }
+      val exprText = Option(ctx.expression()).map(source)
+      val queryText = Option(ctx.query()).map(source)
+
+      val (containsSQL, deterministic, comment, optionalLanguage) =
+        visitRoutineCharacteristics(ctx.routineCharacteristics())
+      val language: RoutineLanguage = optionalLanguage.getOrElse(LanguageSQL)
+      val isTableFunc = ctx.TABLE() != null || returnTypeText.equalsIgnoreCase("table")
+
+      withIdentClause(ctx.identifierReference(), functionIdentifier => {
+        if (ctx.TEMPORARY == null) {
+          // TODO: support creating persistent UDFs.
+          operationNotAllowed(s"creating persistent SQL functions is not supported", ctx)
+        } else {
+          // Disallow to define a temporary function with `IF NOT EXISTS`
+          if (ctx.EXISTS != null) {
+            throw QueryParsingErrors.defineTempFuncWithIfNotExistsError(ctx)
+          }
+
+          if (functionIdentifier.length > 2) {
+            throw QueryParsingErrors.unsupportedFunctionNameError(functionIdentifier, ctx)
+          } else if (functionIdentifier.length == 2) {
+            // Temporary function names should not contain database prefix like "database.function"
+            throw QueryParsingErrors.specifyingDBInCreateTempFuncError(functionIdentifier.head, ctx)
+          }
+
+          CreateUserDefinedFunctionCommand(
+            functionIdentifier.asFunctionIdentifier,
+            inputParamText,
+            returnTypeText,
+            exprText,
+            queryText,
+            comment,
+            deterministic,
+            containsSQL,
+            language,
+            isTableFunc,
+            isTemp = true,
+            ctx.EXISTS != null,
+            ctx.REPLACE != null
+          )
+        }
+      })
+    }
+
+  /**
+   * SQL function routine characteristics.
+   * Currently only deterministic clause and comment clause are used.
+   *
+   * routine language: [LANGUAGE SQL | IDENTIFIER]
+   * specific name: [SPECIFIC specific_name]
+   * routine data access: [NO SQL | CONTAINS SQL | READS SQL DATA | MODIFIES SQL DATA]
+   * routine null call: [RETURNS NULL ON NULL INPUT | CALLED ON NULL INPUT]
+   * routine determinism: [DETERMINISTIC | NOT DETERMINISTIC]
+   * comment: [COMMENT function_comment]
+   * rights: [SQL SECURITY INVOKER | SQL SECURITY DEFINER]
+   */
+  override def visitRoutineCharacteristics(ctx: RoutineCharacteristicsContext)
+  : (Option[Boolean], Option[Boolean], Option[String], Option[RoutineLanguage]) =
+    withOrigin(ctx) {
+      checkDuplicateClauses(ctx.routineLanguage(), "LANGUAGE", ctx)
+      checkDuplicateClauses(ctx.specificName(), "SPECIFIC", ctx)
+      checkDuplicateClauses(ctx.sqlDataAccess(), "SQL DATA ACCESS", ctx)
+      checkDuplicateClauses(ctx.nullCall(), "NULL CALL", ctx)
+      checkDuplicateClauses(ctx.deterministic(), "DETERMINISTIC", ctx)
+      checkDuplicateClauses(ctx.commentSpec(), "COMMENT", ctx)
+      checkDuplicateClauses(ctx.rightsClause(), "SQL SECURITY RIGHTS", ctx)
+
+      val language: Option[RoutineLanguage] = ctx
+        .routineLanguage()
+        .asScala
+        .headOption
+        .map(x => {
+          if (x.SQL() != null) {
+            LanguageSQL
+          } else {
+            val name: String = x.IDENTIFIER().getText()
+            operationNotAllowed(s"Unsupported language for user defined functions: $name", x)
+          }
+        })
+
+      val deterministic = ctx.deterministic().asScala.headOption.map(visitDeterminism)
+      val comment = visitCommentSpecList(ctx.commentSpec())
+
+      ctx.specificName().asScala.headOption.foreach(checkSpecificName)
+      ctx.nullCall().asScala.headOption.foreach(checkNullCall)
+      ctx.rightsClause().asScala.headOption.foreach(checkRightsClause)
+      val containsSQL: Option[Boolean] =
+        ctx.sqlDataAccess().asScala.headOption.map(visitDataAccess)
+      (containsSQL, deterministic, comment, language)
+    }
+
+  /**
+   * Check if the function has a SPECIFIC name,
+   * which is a way to provide an alternative name for the function.
+   * This check applies for all user defined functions.
+   * Use functionName to specify the function that is currently checked.
+   */
+  private def checkSpecificName(ctx: SpecificNameContext): Unit =
+    withOrigin(ctx) {
+      operationNotAllowed(s"SQL function with SPECIFIC name is not supported", ctx)
+    }
+
+  private def checkNullCall(ctx: NullCallContext): Unit = withOrigin(ctx) {
+    if (ctx.RETURNS() != null) {
+      operationNotAllowed("SQL function with RETURNS NULL ON NULL INPUT is not supported", ctx)
+    }
+  }
+
+  /**
+   * Check SQL function data access clause. Currently only READS SQL DATA and CONTAINS SQL
+   * are supported. Return true if the data access routine is CONTAINS SQL.
+   */
+  private def visitDataAccess(ctx: SqlDataAccessContext): Boolean = withOrigin(ctx) {
+    if (ctx.NO() != null) {
+      operationNotAllowed("SQL function with NO SQL is not supported", ctx)
+    }
+    if (ctx.MODIFIES() != null) {
+      operationNotAllowed("SQL function with MODIFIES SQL DATA is not supported", ctx)
+    }
+    return ctx.READS() == null
+  }
+
+  private def checkRightsClause(ctx: RightsClauseContext): Unit = withOrigin(ctx) {
+    if (ctx.INVOKER() != null) {
+      operationNotAllowed("SQL function with SQL SECURITY INVOKER is not supported", ctx)
+    }
+  }
+
+  private def visitDeterminism(ctx: DeterministicContext): Boolean = withOrigin(ctx) {
+    blockBang(ctx.errorCapturingNot())
+    ctx.errorCapturingNot() == null
+  }
+
   /**
    * Create a DROP FUNCTION statement.
    *
@@ -915,4 +1098,22 @@ class SparkSqlAstBuilder extends AstBuilder {
 
     (ctx.LOCAL != null, finalStorage, Some(DDLUtils.HIVE_PROVIDER))
   }
+
+  /**
+   * Create a [[UnsetNamespacePropertiesCommand]] command.
+   *
+   * Expected format:
+   * {{{
+   *   ALTER (DATABASE|SCHEMA|NAMESPACE) database
+   *   UNSET (DBPROPERTIES | PROPERTIES) ('key1', 'key2');
+   * }}}
+   */
+  override def visitUnsetNamespaceProperties(
+      ctx: UnsetNamespacePropertiesContext): LogicalPlan = withOrigin(ctx) {
+    val properties = visitPropertyKeys(ctx.propertyList)
+    val cleanedProperties = cleanNamespaceProperties(properties.map(_ -> "").toMap, ctx).keys.toSeq
+    UnsetNamespacePropertiesCommand(
+      withIdentClause(ctx.identifierReference(), UnresolvedNamespace(_)),
+      cleanedProperties)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index d7ebf786168b8..ed7a6162cc9f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -31,7 +31,6 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRelationV2}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.aggregate.AggUtils
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
@@ -206,20 +205,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       }
     }
 
-    private def hashJoinSupported
-        (leftKeys: Seq[Expression], rightKeys: Seq[Expression]): Boolean = {
-      val result = leftKeys.concat(rightKeys).forall(e => UnsafeRowUtils.isBinaryStable(e.dataType))
-      if (!result) {
-        val keysNotSupportingHashJoin = leftKeys.concat(rightKeys).filterNot(
-          e => UnsafeRowUtils.isBinaryStable(e.dataType))
-        logWarning("Hash based joins are not supported due to " +
-          "joining on keys that don't support binary equality. " +
-          "Keys not supporting hash joins: " + keysNotSupportingHashJoin
-          .map(e => e.toString + " due to DataType: " + e.dataType.typeName).mkString(", "))
-      }
-      result
-    }
-
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
 
       // If it is an equi-join, we first look at the join hints w.r.t. the following order:
@@ -246,8 +231,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         val hashJoinSupport = hashJoinSupported(leftKeys, rightKeys)
         def createBroadcastHashJoin(onlyLookingAtHint: Boolean) = {
           if (hashJoinSupport) {
-            val buildSide = getBroadcastBuildSide(
-              left, right, joinType, hint, onlyLookingAtHint, conf)
+            val buildSide = getBroadcastBuildSide(j, onlyLookingAtHint, conf)
             checkHintBuildSide(onlyLookingAtHint, buildSide, joinType, hint, true)
             buildSide.map {
               buildSide =>
@@ -267,8 +251,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
         def createShuffleHashJoin(onlyLookingAtHint: Boolean) = {
           if (hashJoinSupport) {
-            val buildSide = getShuffleHashJoinBuildSide(
-              left, right, joinType, hint, onlyLookingAtHint, conf)
+            val buildSide = getShuffleHashJoinBuildSide(j, onlyLookingAtHint, conf)
             checkHintBuildSide(onlyLookingAtHint, buildSide, joinType, hint, false)
             buildSide.map {
               buildSide =>
@@ -440,6 +423,18 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case EventTimeWatermark(columnName, delay, child) =>
         EventTimeWatermarkExec(columnName, delay, planLater(child)) :: Nil
 
+      case UpdateEventTimeWatermarkColumn(columnName, delay, child) =>
+        // we expect watermarkDelay to be resolved before physical planning.
+        if (delay.isEmpty) {
+          // This is a sanity check. We should not reach here as delay is updated during
+          // query plan resolution in [[ResolveUpdateEventTimeWatermarkColumn]] Analyzer rule.
+          throw SparkException.internalError(
+            "No watermark delay found in UpdateEventTimeWatermarkColumn logical node. " +
+              "You have hit a query analyzer bug. " +
+              "Please report your query to Spark user mailing list.")
+        }
+        UpdateEventTimeColumnExec(columnName, delay.get, None, planLater(child)) :: Nil
+
       case PhysicalAggregation(
         namedGroupingExpressions, aggregateExpressions, rewrittenResultExpressions, child) =>
 
@@ -964,6 +959,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.SampleExec(lb, ub, withReplacement, seed, planLater(child)) :: Nil
       case logical.LocalRelation(output, data, _) =>
         LocalTableScanExec(output, data) :: Nil
+      case logical.EmptyRelation(l) => EmptyRelationExec(l) :: Nil
       case CommandResult(output, _, plan, data) => CommandResultExec(output, plan, data) :: Nil
       // We should match the combination of limit and offset first, to get the optimal physical
       // plan, instead of planning limit and offset separately.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 5a0bf09a1713b..920f61574770d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -24,6 +24,8 @@ import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import org.apache.spark.{broadcast, SparkException, SparkUnsupportedOperationException}
+import org.apache.spark.internal.LogKeys.{CODEGEN_STAGE_ID, CONFIG, ERROR, HUGE_METHOD_LIMIT, MAX_METHOD_CODE_SIZE, TREE_NODE}
+import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -163,8 +165,10 @@ trait CodegenSupport extends SparkPlan {
         }
       }
 
+    @scala.annotation.nowarn("cat=deprecation")
     val inputVars = inputVarsCandidate match {
-      case stream: LazyList[ExprCode] => stream.force
+      case stream: Stream[ExprCode] => stream.force
+      case lazyList: LazyList[ExprCode] => lazyList.force
       case other => other
     }
 
@@ -406,7 +410,7 @@ trait CodegenSupport extends SparkPlan {
       if (Utils.isTesting) {
         throw SparkException.internalError(errMsg)
       } else {
-        logWarning(s"[BUG] $errMsg Please open a JIRA ticket to report it.")
+        logWarning(log"[BUG] ${MDC(ERROR, errMsg)} Please open a JIRA ticket to report it.")
       }
     }
     if (parent.limitNotReachedChecks.isEmpty) {
@@ -729,17 +733,21 @@ case class WholeStageCodegenExec(child: SparkPlan)(val codegenStageId: Int)
     } catch {
       case NonFatal(_) if !Utils.isTesting && conf.codegenFallback =>
         // We should already saw the error message
-        logWarning(s"Whole-stage codegen disabled for plan (id=$codegenStageId):\n $treeString")
+        logWarning(log"Whole-stage codegen disabled for plan " +
+          log"(id=${MDC(CODEGEN_STAGE_ID, codegenStageId)}):\n " +
+          log"${MDC(TREE_NODE, treeString)}")
         return child.execute()
     }
 
     // Check if compiled code has a too large function
     if (compiledCodeStats.maxMethodCodeSize > conf.hugeMethodLimit) {
-      logInfo(s"Found too long generated codes and JIT optimization might not work: " +
-        s"the bytecode size (${compiledCodeStats.maxMethodCodeSize}) is above the limit " +
-        s"${conf.hugeMethodLimit}, and the whole-stage codegen was disabled " +
-        s"for this plan (id=$codegenStageId). To avoid this, you can raise the limit " +
-        s"`${SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.key}`:\n$treeString")
+      logInfo(log"Found too long generated codes and JIT optimization might not work: " +
+        log"the bytecode size (${MDC(MAX_METHOD_CODE_SIZE, compiledCodeStats.maxMethodCodeSize)})" +
+        log" is above the limit ${MDC(HUGE_METHOD_LIMIT, conf.hugeMethodLimit)}, " +
+        log"and the whole-stage codegen was disabled for this plan " +
+        log"(id=${MDC(CODEGEN_STAGE_ID, codegenStageId)}). To avoid this, you can raise the limit" +
+        log" `${MDC(CONFIG, SQLConf.WHOLESTAGE_HUGE_METHOD_LIMIT.key)}`:\n" +
+        log"${MDC(TREE_NODE, treeString)}")
       return child.execute()
     }
 
@@ -947,6 +955,10 @@ case class CollapseCodegenStages(
         // Do not make LogicalTableScanExec the root of WholeStageCodegen
         // to support the fast driver-local collect/take paths.
         plan
+      case plan: EmptyRelationExec =>
+        // Do not make EmptyRelationExec the root of WholeStageCodegen
+        // to support the fast driver-local collect/take paths.
+        plan
       case plan: CommandResultExec =>
         // Do not make CommandResultExec the root of WholeStageCodegen
         // to support the fast driver-local collect/take paths.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index 7bee641a00e73..014d23f2f4101 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.adaptive
 
+import org.apache.spark.internal.LogKeys.{BATCH_NAME, RULE_NAME}
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, EliminateLimits, OptimizeOneRowPlan}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LogicalPlanIntegrity}
@@ -52,7 +54,8 @@ class AQEOptimizer(conf: SQLConf, extendedRuntimeOptimizerRules: Seq[Rule[Logica
       val filteredRules = batch.rules.filter { rule =>
         val exclude = excludedRules.contains(rule.ruleName)
         if (exclude) {
-          logInfo(s"Optimization rule '${rule.ruleName}' is excluded from the optimizer.")
+          logInfo(log"Optimization rule '${MDC(RULE_NAME, rule.ruleName)}' is excluded from " +
+            log"the optimizer.")
         }
         !exclude
       }
@@ -61,8 +64,8 @@ class AQEOptimizer(conf: SQLConf, extendedRuntimeOptimizerRules: Seq[Rule[Logica
       } else if (filteredRules.nonEmpty) {
         Some(Batch(batch.name, batch.strategy, filteredRules: _*))
       } else {
-        logInfo(s"Optimization batch '${batch.name}' is excluded from the optimizer " +
-          s"as all enclosed rules have been excluded.")
+        logInfo(log"Optimization batch '${MDC(BATCH_NAME, batch.name)}' is excluded from " +
+          log"the optimizer as all enclosed rules have been excluded.")
         None
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala
index 7951a6f36b9bd..7b3e0cd549b85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.optimizer.PropagateEmptyRelationBase
 import org.apache.spark.sql.catalyst.planning.ExtractSingleColumnNullAwareAntiJoin
+import org.apache.spark.sql.catalyst.plans.logical.EmptyRelation
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, LOGICAL_QUERY_STAGE, TRUE_OR_FALSE_LITERAL}
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
@@ -39,6 +40,8 @@ object AQEPropagateEmptyRelation extends PropagateEmptyRelationBase {
   override protected def nonEmpty(plan: LogicalPlan): Boolean =
     super.nonEmpty(plan) || getEstimatedRowCount(plan).exists(_ > 0)
 
+  override protected def empty(plan: LogicalPlan): LogicalPlan = EmptyRelation(plan)
+
   private def isRootRepartition(plan: LogicalPlan): Boolean = plan match {
     case l: LogicalQueryStage if l.getTagValue(ROOT_REPARTITION).isDefined => true
     case _ => false
@@ -61,6 +64,8 @@ object AQEPropagateEmptyRelation extends PropagateEmptyRelationBase {
         None
       }
 
+    case _: EmptyRelation => Some(0)
+
     case _ => None
   }
 
@@ -82,6 +87,13 @@ object AQEPropagateEmptyRelation extends PropagateEmptyRelationBase {
     case _ => false
   }
 
+  // A broadcast query stage can't be executed without the join operator.
+  // TODO: we can return the original query plan before broadcast.
+  override protected def canExecuteWithoutJoin(plan: LogicalPlan): Boolean = plan match {
+    case LogicalQueryStage(_, _: BroadcastQueryStageExec) => false
+    case _ => true
+  }
+
   override protected def applyInternal(p: LogicalPlan): LogicalPlan = p.transformUpWithPruning(
     // LOCAL_RELATION and TRUE_OR_FALSE_LITERAL pattern are matched at
     // `PropagateEmptyRelationBase.commonApplyFunc`
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRuleContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRuleContext.scala
new file mode 100644
index 0000000000000..23817be71c89c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRuleContext.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import scala.collection.mutable
+
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.sql.catalyst.SQLConfHelper
+
+/**
+ * Provide the functionality to modify the next plan fragment configs in AQE rules.
+ * The configs will be cleanup before going to execute next plan fragment.
+ * To get instance, use: {{{ AdaptiveRuleContext.get() }}}
+ *
+ * @param isSubquery if the input query plan is subquery
+ * @param isFinalStage if the next stage is final stage
+ */
+@Experimental
+@DeveloperApi
+case class AdaptiveRuleContext(isSubquery: Boolean, isFinalStage: Boolean) {
+
+  /**
+   * Set SQL configs for next plan fragment. The configs will affect all of rules in AQE,
+   * i.e., the runtime optimizer, planner, queryStagePreparationRules, queryStageOptimizerRules,
+   * columnarRules.
+   * This configs will be cleared before going to get the next plan fragment.
+   */
+  private val nextPlanFragmentConf = new mutable.HashMap[String, String]()
+
+  private[sql] def withFinalStage(isFinalStage: Boolean): AdaptiveRuleContext = {
+    if (this.isFinalStage == isFinalStage) {
+      this
+    } else {
+      val newRuleContext = copy(isFinalStage = isFinalStage)
+      newRuleContext.setConfigs(this.configs())
+      newRuleContext
+    }
+  }
+
+  def setConfig(key: String, value: String): Unit = {
+    nextPlanFragmentConf.put(key, value)
+  }
+
+  def setConfigs(kvs: Map[String, String]): Unit = {
+    kvs.foreach(kv => nextPlanFragmentConf.put(kv._1, kv._2))
+  }
+
+  private[sql] def configs(): Map[String, String] = nextPlanFragmentConf.toMap
+
+  private[sql] def clearConfigs(): Unit = nextPlanFragmentConf.clear()
+}
+
+object AdaptiveRuleContext extends SQLConfHelper {
+  private val ruleContextThreadLocal = new ThreadLocal[AdaptiveRuleContext]
+
+  /**
+   * If a rule is applied inside AQE then the returned value is always defined, else return None.
+   */
+  def get(): Option[AdaptiveRuleContext] = Option(ruleContextThreadLocal.get())
+
+  private[sql] def withRuleContext[T](ruleContext: AdaptiveRuleContext)(block: => T): T = {
+    assert(ruleContext != null)
+    val origin = ruleContextThreadLocal.get()
+    ruleContextThreadLocal.set(ruleContext)
+    try {
+      val conf = ruleContext.configs()
+      withSQLConf(conf.toSeq: _*) {
+        block
+      }
+    } finally {
+      ruleContextThreadLocal.set(origin)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index a5e681535cb82..f21960aeedd64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.adaptive
 
 import java.util
-import java.util.concurrent.LinkedBlockingQueue
+import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue}
 
 import scala.collection.concurrent.TrieMap
 import scala.collection.mutable
@@ -28,6 +28,8 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.SparkException
 import org.apache.spark.broadcast
+import org.apache.spark.internal.{MDC, MessageWithContext}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
@@ -74,13 +76,32 @@ case class AdaptiveSparkPlanExec(
 
   @transient private val lock = new Object()
 
-  @transient private val logOnLevel: ( => String) => Unit = conf.adaptiveExecutionLogLevel match {
-    case "TRACE" => logTrace(_)
-    case "DEBUG" => logDebug(_)
-    case "INFO" => logInfo(_)
-    case "WARN" => logWarning(_)
-    case "ERROR" => logError(_)
-    case _ => logDebug(_)
+  @transient private val logOnLevel: ( => MessageWithContext) => Unit =
+    conf.adaptiveExecutionLogLevel match {
+      case "TRACE" => logTrace(_)
+      case "INFO" => logInfo(_)
+      case "WARN" => logWarning(_)
+      case "ERROR" => logError(_)
+      case _ => logDebug(_)
+    }
+
+  @transient private var ruleContext = new AdaptiveRuleContext(
+    isSubquery = isSubquery,
+    isFinalStage = false)
+
+  private def withRuleContext[T](f: => T): T =
+    AdaptiveRuleContext.withRuleContext(ruleContext) { f }
+
+  private def applyPhysicalRulesWithRuleContext(
+      plan: => SparkPlan,
+      rules: Seq[Rule[SparkPlan]],
+      loggerAndBatchName: Option[(PlanChangeLogger[SparkPlan], String)] = None): SparkPlan = {
+    // Apply the last rules if exists before going to apply the next batch of rules,
+    // so that we can propagate the configs.
+    val newPlan = plan
+    withRuleContext {
+      applyPhysicalRules(newPlan, rules, loggerAndBatchName)
+    }
   }
 
   @transient private val planChangeLogger = new PlanChangeLogger[SparkPlan]()
@@ -159,7 +180,9 @@ case class AdaptiveSparkPlanExec(
     collapseCodegenStagesRule
   )
 
-  private def optimizeQueryStage(plan: SparkPlan, isFinalStage: Boolean): SparkPlan = {
+  private def optimizeQueryStage(
+      plan: SparkPlan,
+      isFinalStage: Boolean): SparkPlan = withRuleContext {
     val rules = if (isFinalStage &&
         !conf.getConf(SQLConf.ADAPTIVE_EXECUTION_APPLY_FINAL_STAGE_SHUFFLE_OPTIMIZATIONS)) {
       queryStageOptimizerRules.filterNot(_.isInstanceOf[AQEShuffleReadRule])
@@ -195,7 +218,7 @@ case class AdaptiveSparkPlanExec(
   }
 
   private def applyQueryPostPlannerStrategyRules(plan: SparkPlan): SparkPlan = {
-    applyPhysicalRules(
+    applyPhysicalRulesWithRuleContext(
       plan,
       context.session.sessionState.adaptiveRulesHolder.queryPostPlannerStrategyRules,
       Some((planChangeLogger, "AQE Query Post Planner Strategy Rules"))
@@ -203,7 +226,7 @@ case class AdaptiveSparkPlanExec(
   }
 
   @transient val initialPlan = context.session.withActive {
-    applyPhysicalRules(
+    applyPhysicalRulesWithRuleContext(
       applyQueryPostPlannerStrategyRules(inputPlan),
       queryStagePreparationRules,
       Some((planChangeLogger, "AQE Preparations")))
@@ -280,6 +303,7 @@ case class AdaptiveSparkPlanExec(
       val errors = new mutable.ArrayBuffer[Throwable]()
       var stagesToReplace = Seq.empty[QueryStageExec]
       while (!result.allChildStagesMaterialized) {
+        ruleContext.clearConfigs()
         currentPhysicalPlan = result.newPlan
         if (result.newStages.nonEmpty) {
           stagesToReplace = result.newStages ++ stagesToReplace
@@ -302,6 +326,11 @@ case class AdaptiveSparkPlanExec(
             try {
               stage.materialize().onComplete { res =>
                 if (res.isSuccess) {
+                  // record shuffle IDs for successful stages for cleanup
+                  stage.plan.collect {
+                    case s: ShuffleExchangeLike =>
+                      context.shuffleIds.put(s.shuffleId, true)
+                  }
                   events.offer(StageSuccess(stage, res.get))
                 } else {
                   events.offer(StageFailure(stage, res.failed.get))
@@ -353,8 +382,9 @@ case class AdaptiveSparkPlanExec(
           val newCost = costEvaluator.evaluateCost(newPhysicalPlan)
           if (newCost < origCost ||
             (newCost == origCost && currentPhysicalPlan != newPhysicalPlan)) {
-            logOnLevel("Plan changed:\n" +
-              sideBySide(currentPhysicalPlan.treeString, newPhysicalPlan.treeString).mkString("\n"))
+            lazy val plans =
+              sideBySide(currentPhysicalPlan.treeString, newPhysicalPlan.treeString).mkString("\n")
+            logOnLevel(log"Plan changed:\n${MDC(QUERY_PLAN, plans)}")
             cleanUpTempTags(newPhysicalPlan)
             currentPhysicalPlan = newPhysicalPlan
             currentLogicalPlan = newLogicalPlan
@@ -365,11 +395,13 @@ case class AdaptiveSparkPlanExec(
         result = createQueryStages(currentPhysicalPlan)
       }
 
+      ruleContext = ruleContext.withFinalStage(isFinalStage = true)
       // Run the final plan when there's no more unfinished stages.
-      currentPhysicalPlan = applyPhysicalRules(
+      currentPhysicalPlan = applyPhysicalRulesWithRuleContext(
         optimizeQueryStage(result.newPlan, isFinalStage = true),
         postStageCreationRules(supportsColumnar),
         Some((planChangeLogger, "AQE Post Stage Creation")))
+      ruleContext.clearConfigs()
       _isFinalPlan = true
       executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan)))
       currentPhysicalPlan
@@ -384,7 +416,7 @@ case class AdaptiveSparkPlanExec(
     if (shouldUpdatePlan && currentPhysicalPlan.exists(_.subqueries.nonEmpty)) {
       getExecutionId.foreach(onUpdatePlan(_, Seq.empty))
     }
-    logOnLevel(s"Final plan:\n$currentPhysicalPlan")
+    logOnLevel(log"Final plan:\n${MDC(QUERY_PLAN, currentPhysicalPlan)}")
   }
 
   override def executeCollect(): Array[InternalRow] = {
@@ -587,7 +619,7 @@ case class AdaptiveSparkPlanExec(
     val queryStage = plan match {
       case e: Exchange =>
         val optimized = e.withNewChildren(Seq(optimizeQueryStage(e.child, isFinalStage = false)))
-        val newPlan = applyPhysicalRules(
+        val newPlan = applyPhysicalRulesWithRuleContext(
           optimized,
           postStageCreationRules(outputsColumnar = plan.supportsColumnar),
           Some((planChangeLogger, "AQE Post Stage Creation")))
@@ -714,9 +746,11 @@ case class AdaptiveSparkPlanExec(
   private def reOptimize(logicalPlan: LogicalPlan): Option[(SparkPlan, LogicalPlan)] = {
     try {
       logicalPlan.invalidateStatsCache()
-      val optimized = optimizer.execute(logicalPlan)
-      val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next()
-      val newPlan = applyPhysicalRules(
+      val optimized = withRuleContext { optimizer.execute(logicalPlan) }
+      val sparkPlan = withRuleContext {
+        context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next()
+      }
+      val newPlan = applyPhysicalRulesWithRuleContext(
         applyQueryPostPlannerStrategyRules(sparkPlan),
         preprocessingRules ++ queryStagePreparationRules,
         Some((planChangeLogger, "AQE Replanning")))
@@ -737,7 +771,8 @@ case class AdaptiveSparkPlanExec(
       Some((finalPlan, optimized))
     } catch {
       case e: InvalidAQEPlanException[_] =>
-        logOnLevel(s"Re-optimize - ${e.getMessage()}:\n${e.plan}")
+        logOnLevel(log"Re-optimize - ${MDC(ERROR, e.getMessage())}:\n" +
+          log"${MDC(QUERY_PLAN, e.plan)}")
         None
     }
   }
@@ -795,7 +830,8 @@ case class AdaptiveSparkPlanExec(
           s.cancel()
         } catch {
           case NonFatal(t) =>
-            logError(s"Exception in cancelling query stage: ${s.treeString}", t)
+            logError(log"Exception in cancelling query stage: " +
+              log"${MDC(QUERY_PLAN, s.treeString)}", t)
         }
       case _ =>
     }
@@ -869,6 +905,8 @@ case class AdaptiveExecutionContext(session: SparkSession, qe: QueryExecution) {
    */
   val stageCache: TrieMap[SparkPlan, ExchangeQueryStageExec] =
     new TrieMap[SparkPlan, ExchangeQueryStageExec]()
+
+  val shuffleIds: ConcurrentHashMap[Int, Boolean] = new ConcurrentHashMap[Int, Boolean]()
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index 50f2b7c81453e..8517911d70262 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.adaptive
 
 import scala.collection.mutable
 
+import org.apache.spark.internal.LogKeys.{CONFIG, SUB_QUERY}
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions.{DynamicPruningSubquery, ListQuery, SubqueryExpression}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -67,8 +69,8 @@ case class InsertAdaptiveSparkPlan(
           AdaptiveSparkPlanExec(newPlan, adaptiveExecutionContext, preprocessingRules, isSubquery)
         } catch {
           case SubqueryAdaptiveNotSupportedException(subquery) =>
-            logWarning(s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} is enabled " +
-              s"but is not supported for sub-query: $subquery.")
+            logWarning(log"${MDC(CONFIG, SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)} is enabled " +
+              log"but is not supported for sub-query: ${MDC(SUB_QUERY, subquery)}.")
             plan
         }
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
index 8ce2452cc141d..506f52fd9072e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, RepartitionOperation, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, RepartitionOperation, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LOGICAL_QUERY_STAGE, REPARTITION_OPERATION, TreePattern}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
@@ -35,8 +36,8 @@ import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 // TODO we can potentially include only [[QueryStageExec]] in this class if we make the aggregation
 // planning aware of partitioning.
 case class LogicalQueryStage(
-    logicalPlan: LogicalPlan,
-    physicalPlan: SparkPlan) extends LeafNode {
+    override val logicalPlan: LogicalPlan,
+    override val physicalPlan: SparkPlan) extends logical.LogicalQueryStage {
 
   override def output: Seq[Attribute] = logicalPlan.output
   override val isStreaming: Boolean = logicalPlan.isStreaming
@@ -71,4 +72,14 @@ case class LogicalQueryStage(
   }
 
   override def maxRows: Option[Long] = stats.rowCount.map(_.min(Long.MaxValue).toLong)
+
+  override def isMaterialized: Boolean = physicalPlan.exists {
+    case s: QueryStageExec => s.isMaterialized
+    case _ => false
+  }
+
+  override def isDirectStage: Boolean = physicalPlan match {
+    case _: QueryStageExec => true
+    case _ => false
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index 7db9271aee0c4..71e138e6152b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicReference
 
 import scala.concurrent.Future
 
-import org.apache.spark.{FutureAction, MapOutputStatistics, SparkException}
+import org.apache.spark.{MapOutputStatistics, SparkException}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -51,13 +51,18 @@ abstract class QueryStageExec extends LeafExecNode {
    */
   val plan: SparkPlan
 
+  /**
+   * Name of this query stage which is unique in the entire query plan.
+   */
+  val name: String = s"${this.getClass.getSimpleName}-$id"
+
   /**
    * Materialize this query stage, to prepare for the execution, like submitting map stages,
    * broadcasting data, etc. The caller side can use the returned [[Future]] to wait until this
    * stage is ready.
    */
   final def materialize(): Future[Any] = {
-    logDebug(s"Materialize query stage ${this.getClass.getSimpleName}: $id")
+    logDebug(s"Materialize query stage: $name")
     doMaterialize()
   }
 
@@ -151,7 +156,12 @@ abstract class ExchangeQueryStageExec extends QueryStageExec {
   /**
    * Cancel the stage materialization if in progress; otherwise do nothing.
    */
-  def cancel(): Unit
+  final def cancel(): Unit = {
+    logDebug(s"Cancel query stage: $name")
+    doCancel()
+  }
+
+  protected def doCancel(): Unit
 
   /**
    * The canonicalized plan before applying query stage optimizer rules.
@@ -184,9 +194,7 @@ case class ShuffleQueryStageExec(
 
   def advisoryPartitionSize: Option[Long] = shuffle.advisoryPartitionSize
 
-  @transient private lazy val shuffleFuture = shuffle.submitShuffleJob
-
-  override protected def doMaterialize(): Future[Any] = shuffleFuture
+  override protected def doMaterialize(): Future[Any] = shuffle.submitShuffleJob
 
   override def newReuseInstance(
       newStageId: Int, newOutput: Seq[Attribute]): ExchangeQueryStageExec = {
@@ -198,18 +206,14 @@ case class ShuffleQueryStageExec(
     reuse
   }
 
-  override def cancel(): Unit = shuffleFuture match {
-    case action: FutureAction[MapOutputStatistics] if !action.isCompleted =>
-      action.cancel()
-    case _ =>
-  }
+  override protected def doCancel(): Unit = shuffle.cancelShuffleJob
 
   /**
    * Returns the Option[MapOutputStatistics]. If the shuffle map stage has no partition,
    * this method returns None, as there is no map statistics.
    */
   def mapStats: Option[MapOutputStatistics] = {
-    assert(resultOption.get().isDefined, s"${getClass.getSimpleName} should already be ready")
+    assert(resultOption.get().isDefined, s"$name should already be ready")
     val stats = resultOption.get().get.asInstanceOf[MapOutputStatistics]
     Option(stats)
   }
@@ -236,9 +240,7 @@ case class BroadcastQueryStageExec(
       throw SparkException.internalError(s"wrong plan for broadcast stage:\n ${plan.treeString}")
   }
 
-  override protected def doMaterialize(): Future[Any] = {
-    broadcast.submitBroadcastJob
-  }
+  override protected def doMaterialize(): Future[Any] = broadcast.submitBroadcastJob
 
   override def newReuseInstance(
       newStageId: Int, newOutput: Seq[Attribute]): ExchangeQueryStageExec = {
@@ -250,12 +252,7 @@ case class BroadcastQueryStageExec(
     reuse
   }
 
-  override def cancel(): Unit = {
-    if (!broadcast.relationFuture.isDone) {
-      sparkContext.cancelJobsWithTag(broadcast.jobTag)
-      broadcast.relationFuture.cancel(true)
-    }
-  }
+  override protected def doCancel(): Unit = broadcast.cancelBroadcastJob()
 
   override def getRuntimeStatistics: Statistics = broadcast.runtimeStatistics
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
index 9370b3d8d1d74..bb7d904402ded 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.adaptive
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{MapOutputStatistics, MapOutputTrackerMaster, SparkEnv}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.execution.{CoalescedPartitionSpec, PartialReducerPartitionSpec, ShufflePartitionSpec}
 
 object ShufflePartitionsUtil extends Logging {
@@ -61,8 +61,10 @@ object ShufflePartitionsUtil extends Logging {
     val targetSize = maxTargetSize.min(advisoryTargetSize).max(minPartitionSize)
 
     val shuffleIds = mapOutputStatistics.flatMap(_.map(_.shuffleId)).mkString(", ")
-    logInfo(s"For shuffle($shuffleIds), advisory target size: $advisoryTargetSize, " +
-      s"actual target size $targetSize, minimum partition size: $minPartitionSize")
+    logInfo(log"For shuffle(${MDC(LogKeys.SHUFFLE_ID, shuffleIds)}, advisory target size: " +
+      log"${MDC(LogKeys.ADVISORY_TARGET_SIZE, advisoryTargetSize)}, actual target size " +
+      log"${MDC(LogKeys.TARGET_SIZE, targetSize)}, minimum partition size: " +
+      log"${MDC(LogKeys.PARTITION_SIZE, minPartitionSize)}")
 
     // If `inputPartitionSpecs` are all empty, it means skew join optimization is not applied.
     if (inputPartitionSpecs.forall(_.isEmpty)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
index 9523bf1a1c023..e2d8ac8988043 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.SparkException
+import org.apache.spark.internal.LogKeys.MAX_JVM_METHOD_PARAMS_LENGTH
+import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, ExpressionEquals, UnsafeRow}
@@ -340,11 +342,11 @@ trait AggregateCodegenSupport
         }
         Some(splitCodes)
       } else {
-        val errMsg = "Failed to split aggregate code into small functions because the parameter " +
-          "length of at least one split function went over the JVM limit: " +
-          CodeGenerator.MAX_JVM_METHOD_PARAMS_LENGTH
+        val errMsg = log"Failed to split aggregate code into small functions because the " +
+          log"parameter length of at least one split function went over the JVM limit: " +
+          log"${MDC(MAX_JVM_METHOD_PARAMS_LENGTH, CodeGenerator.MAX_JVM_METHOD_PARAMS_LENGTH)}"
         if (Utils.isTesting) {
-          throw SparkException.internalError(errMsg)
+          throw SparkException.internalError(errMsg.message)
         } else {
           logInfo(errMsg)
           None
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index bdf17607d77c5..8f2b7ca5cba25 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -22,6 +22,8 @@ import java.util.concurrent.TimeUnit._
 import scala.collection.mutable
 
 import org.apache.spark.TaskContext
+import org.apache.spark.internal.LogKeys.CONFIG
+import org.apache.spark.internal.MDC
 import org.apache.spark.memory.SparkOutOfMemoryError
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -410,8 +412,8 @@ case class HashAggregateExec(
   private def enableTwoLevelHashMap(): Unit = {
     if (!checkIfFastHashMapSupported()) {
       if (!Utils.isTesting) {
-        logInfo(s"${SQLConf.ENABLE_TWOLEVEL_AGG_MAP.key} is set to true, but"
-          + " current version of codegened fast hashmap does not support this aggregate.")
+        logInfo(log"${MDC(CONFIG, SQLConf.ENABLE_TWOLEVEL_AGG_MAP.key)} is set to true, but" +
+          log" current version of codegened fast hashmap does not support this aggregate.")
       }
     } else {
       isFastHashMapEnabled = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
index 57b8fd8570f2b..a4a6dc8e4ab01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.aggregate
 
 import org.apache.spark.{SparkEnv, SparkException, TaskContext}
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.{config, Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CONFIG, HASH_MAP_SIZE, OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
@@ -174,10 +175,12 @@ class ObjectAggregationIterator(
         // The hash map gets too large, makes a sorted spill and clear the map.
         if (hashMap.size >= fallbackCountThreshold && inputRows.hasNext) {
           logInfo(
-            s"Aggregation hash map size ${hashMap.size} reaches threshold " +
-              s"capacity ($fallbackCountThreshold entries), spilling and falling back to sort" +
-              " based aggregation. You may change the threshold by adjust option " +
-              SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key
+            log"Aggregation hash map size ${MDC(HASH_MAP_SIZE, hashMap.size)} reaches threshold " +
+              log"capacity " +
+              log"(${MDC(OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD, fallbackCountThreshold)}" +
+              log" entries), spilling and falling back to sort based aggregation. You may change " +
+              log"the threshold by adjust option " +
+              log"${MDC(CONFIG, SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key)}"
           )
 
           // Falls back to sort-based aggregation
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 4a922dcb062e5..9652a48e5270e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -100,8 +100,8 @@ private[columnar] class FloatColumnAccessor(buffer: ByteBuffer)
 private[columnar] class DoubleColumnAccessor(buffer: ByteBuffer)
   extends NativeColumnAccessor(buffer, DOUBLE)
 
-private[columnar] class StringColumnAccessor(buffer: ByteBuffer)
-  extends NativeColumnAccessor(buffer, STRING)
+private[columnar] class StringColumnAccessor(buffer: ByteBuffer, dataType: StringType)
+  extends NativeColumnAccessor(buffer, STRING(dataType))
 
 private[columnar] class BinaryColumnAccessor(buffer: ByteBuffer)
   extends BasicColumnAccessor[Array[Byte]](buffer, BINARY)
@@ -147,7 +147,7 @@ private[sql] object ColumnAccessor {
         new LongColumnAccessor(buf)
       case FloatType => new FloatColumnAccessor(buf)
       case DoubleType => new DoubleColumnAccessor(buf)
-      case StringType => new StringColumnAccessor(buf)
+      case s: StringType => new StringColumnAccessor(buf, s)
       case BinaryType => new BinaryColumnAccessor(buf)
       case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
         new CompactDecimalColumnAccessor(buf, dt)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
index 367547155beef..9fafdb7948416 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
@@ -122,7 +122,8 @@ private[columnar]
 class DoubleColumnBuilder extends NativeColumnBuilder(new DoubleColumnStats, DOUBLE)
 
 private[columnar]
-class StringColumnBuilder extends NativeColumnBuilder(new StringColumnStats, STRING)
+class StringColumnBuilder(dataType: StringType)
+  extends NativeColumnBuilder(new StringColumnStats(dataType), STRING(dataType))
 
 private[columnar]
 class BinaryColumnBuilder extends ComplexColumnBuilder(new BinaryColumnStats, BINARY)
@@ -185,7 +186,7 @@ private[columnar] object ColumnBuilder {
         new LongColumnBuilder
       case FloatType => new FloatColumnBuilder
       case DoubleType => new DoubleColumnBuilder
-      case StringType => new StringColumnBuilder
+      case s: StringType => new StringColumnBuilder(s)
       case BinaryType => new BinaryColumnBuilder
       case CalendarIntervalType => new IntervalColumnBuilder
       case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
index 18ef84262aad3..45f489cb13c2a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
@@ -255,14 +255,16 @@ private[columnar] final class DoubleColumnStats extends ColumnStats {
     Array[Any](lower, upper, nullCount, count, sizeInBytes)
 }
 
-private[columnar] final class StringColumnStats extends ColumnStats {
+private[columnar] final class StringColumnStats(collationId: Int) extends ColumnStats {
+  def this(dt: StringType) = this(dt.collationId)
+
   protected var upper: UTF8String = null
   protected var lower: UTF8String = null
 
   override def gatherStats(row: InternalRow, ordinal: Int): Unit = {
     if (!row.isNullAt(ordinal)) {
       val value = row.getUTF8String(ordinal)
-      val size = STRING.actualSize(row, ordinal)
+      val size = STRING(collationId).actualSize(row, ordinal)
       gatherValueStats(value, size)
     } else {
       gatherNullStats()
@@ -270,8 +272,8 @@ private[columnar] final class StringColumnStats extends ColumnStats {
   }
 
   def gatherValueStats(value: UTF8String, size: Int): Unit = {
-    if (upper == null || value.binaryCompare(upper) > 0) upper = value.clone()
-    if (lower == null || value.binaryCompare(lower) < 0) lower = value.clone()
+    if (upper == null || value.semanticCompare(upper, collationId) > 0) upper = value.clone()
+    if (lower == null || value.semanticCompare(lower, collationId) < 0) lower = value.clone()
     sizeInBytes += size
     count += 1
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index ee1f9b4133026..b8e63294f3cdc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -491,8 +491,8 @@ private[columnar] trait DirectCopyColumnType[JvmType] extends ColumnType[JvmType
   }
 }
 
-private[columnar] object STRING
-  extends NativeColumnType(PhysicalStringType(StringType.collationId), 8)
+private[columnar] case class STRING(collationId: Int)
+  extends NativeColumnType(PhysicalStringType(collationId), 8)
     with DirectCopyColumnType[UTF8String] {
 
   override def actualSize(row: InternalRow, ordinal: Int): Int = {
@@ -532,6 +532,12 @@ private[columnar] object STRING
   override def clone(v: UTF8String): UTF8String = v.clone()
 }
 
+private[columnar] object STRING {
+  def apply(dt: StringType): STRING = {
+    STRING(dt.collationId)
+  }
+}
+
 private[columnar] case class COMPACT_DECIMAL(precision: Int, scale: Int)
   extends NativeColumnType(PhysicalDecimalType(precision, scale), 8) {
 
@@ -821,7 +827,7 @@ private[columnar] object ColumnType {
       case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType => LONG
       case FloatType => FLOAT
       case DoubleType => DOUBLE
-      case StringType => STRING
+      case s: StringType => STRING(s)
       case BinaryType => BINARY
       case i: CalendarIntervalType => CALENDAR_INTERVAL
       case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS => COMPACT_DECIMAL(dt)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index 5eadc7d47c92e..75416b8789142 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -86,7 +86,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
           classOf[LongColumnAccessor].getName
         case FloatType => classOf[FloatColumnAccessor].getName
         case DoubleType => classOf[DoubleColumnAccessor].getName
-        case StringType => classOf[StringColumnAccessor].getName
+        case _: StringType => classOf[StringColumnAccessor].getName
         case BinaryType => classOf[BinaryColumnAccessor].getName
         case CalendarIntervalType => classOf[IntervalColumnAccessor].getName
         case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
@@ -101,7 +101,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
       val createCode = dt match {
         case t if CodeGenerator.isPrimitiveType(dt) =>
           s"$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder));"
-        case NullType | StringType | BinaryType | CalendarIntervalType =>
+        case NullType | BinaryType | CalendarIntervalType =>
           s"$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder));"
         case other =>
           s"""$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
index 46044f6919d17..86d76856e12bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/compression/compressionSchemes.scala
@@ -176,7 +176,7 @@ private[columnar] case object RunLengthEncoding extends CompressionScheme {
   }
 
   override def supports(columnType: ColumnType[_]): Boolean = columnType match {
-    case INT | LONG | SHORT | BYTE | STRING | BOOLEAN => true
+    case INT | LONG | SHORT | BYTE | _: STRING | BOOLEAN => true
     case _ => false
   }
 
@@ -373,7 +373,7 @@ private[columnar] case object DictionaryEncoding extends CompressionScheme {
   }
 
   override def supports(columnType: ColumnType[_]): Boolean = columnType match {
-    case INT | LONG | STRING => true
+    case INT | LONG | _: STRING => true
     case _ => false
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 299f41eb55e17..65a7a0ebbd916 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -42,7 +42,7 @@ case class AnalyzeColumnCommand(
     val sessionState = sparkSession.sessionState
 
     tableIdent.database match {
-      case Some(db) if db == sparkSession.sharedState.globalTempViewManager.database =>
+      case Some(db) if db == sparkSession.sharedState.globalTempDB =>
         val plan = sessionState.catalog.getGlobalTempView(tableIdent.identifier).getOrElse {
           throw QueryCompilationErrors.noSuchTableError(db, tableIdent.identifier)
         }
@@ -61,8 +61,8 @@ case class AnalyzeColumnCommand(
 
   private def analyzeColumnInCachedData(plan: LogicalPlan, sparkSession: SparkSession): Boolean = {
     val cacheManager = sparkSession.sharedState.cacheManager
-    val planToLookup = sparkSession.sessionState.executePlan(plan).analyzed
-    cacheManager.lookupCachedData(planToLookup).map { cachedData =>
+    val df = Dataset.ofRows(sparkSession, plan)
+    cacheManager.lookupCachedData(df).map { cachedData =>
       val columnsToAnalyze = getColumnsToAnalyze(
         tableIdent, cachedData.cachedRepresentation, columnNames, allColumns)
       cacheManager.analyzeColumnCacheQuery(sparkSession, cachedData, columnsToAnalyze)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTablesCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTablesCommand.scala
index c9b22a7d1b258..1650af74bc242 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTablesCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeTablesCommand.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.command
 
 import scala.util.control.NonFatal
 
+import org.apache.spark.internal.LogKeys.{DATABASE_NAME, ERROR, TABLE_NAME}
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.{Row, SparkSession}
 
 
@@ -37,8 +39,8 @@ case class AnalyzeTablesCommand(
         CommandUtils.analyzeTable(sparkSession, tbl, noScan)
       } catch {
         case NonFatal(e) =>
-          logWarning(s"Failed to analyze table ${tbl.table} in the " +
-            s"database $db because of ${e.toString}", e)
+          logWarning(log"Failed to analyze table ${MDC(TABLE_NAME, tbl.table)} in the " +
+            log"database ${MDC(DATABASE_NAME, db)} because of ${MDC(ERROR, e.toString)}}", e)
       }
     }
     Seq.empty[Row]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index eccf16ecea13f..7acd1cb0852b9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -24,7 +24,8 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{COUNT, DATABASE_NAME, ERROR, TABLE_NAME, TIME}
 import org.apache.spark.sql.{Column, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, UnresolvedAttribute}
@@ -91,11 +92,12 @@ object CommandUtils extends Logging {
     } else {
       // Calculate table size as a sum of the visible partitions. See SPARK-21079
       val partitions = sessionState.catalog.listPartitions(catalogTable.identifier)
-      logInfo(s"Starting to calculate sizes for ${partitions.length} partitions.")
+      logInfo(log"Starting to calculate sizes for ${MDC(COUNT, partitions.length)} " +
+        log"partitions.")
       calculatePartitionStats(spark, catalogTable, partitions, partitionRowCount)
     }
-    logInfo(s"It took ${(System.nanoTime() - startTime) / (1000 * 1000)} ms to calculate" +
-      s" the total size for table ${catalogTable.identifier}.")
+    logInfo(log"It took ${MDC(TIME, (System.nanoTime() - startTime) / (1000 * 1000))} ms to " +
+      log"calculate the total size for table ${MDC(TABLE_NAME, catalogTable.identifier)}.")
     (totalSize, newPartitions)
   }
 
@@ -154,9 +156,9 @@ object CommandUtils extends Logging {
         getPathSize(fs, fs.getFileStatus(path))
       } catch {
         case NonFatal(e) =>
-          logWarning(
-            s"Failed to get the size of table ${identifier.table} in the " +
-              s"database ${identifier.database} because of ${e.toString}", e)
+          logWarning(log"Failed to get the size of table ${MDC(TABLE_NAME, identifier.table)} " +
+            log"in the database ${MDC(DATABASE_NAME, identifier.database)} because of " +
+            log"${MDC(ERROR, e.toString)}", e)
           0L
       }
     }.getOrElse(0L)
@@ -238,7 +240,7 @@ object CommandUtils extends Logging {
       // Analyzes a catalog view if the view is cached
       val table = sparkSession.table(tableIdent.quotedString)
       val cacheManager = sparkSession.sharedState.cacheManager
-      if (cacheManager.lookupCachedData(table.logicalPlan).isDefined) {
+      if (cacheManager.lookupCachedData(table).isDefined) {
         if (!noScan) {
           // To collect table stats, materializes an underlying columnar RDD
           table.count()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
new file mode 100644
index 0000000000000..d2aaa93fcca06
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateSQLFunctionCommand.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.catalog.SQLFunction
+
+/**
+ * The DDL command that creates a SQL function.
+ * For example:
+ * {{{
+ *    CREATE [OR REPLACE] [TEMPORARY] FUNCTION [IF NOT EXISTS] [db_name.]function_name
+ *    ([param_name param_type [COMMENT param_comment], ...])
+ *    RETURNS {ret_type | TABLE (ret_name ret_type [COMMENT ret_comment], ...])}
+ *    [function_properties] function_body;
+ *
+ *    function_properties:
+ *      [NOT] DETERMINISTIC | COMMENT function_comment | [ CONTAINS SQL | READS SQL DATA ]
+ *
+ *    function_body:
+ *      RETURN {expression | TABLE ( query )}
+ * }}}
+ */
+case class CreateSQLFunctionCommand(
+    name: FunctionIdentifier,
+    inputParamText: Option[String],
+    returnTypeText: String,
+    exprText: Option[String],
+    queryText: Option[String],
+    comment: Option[String],
+    isDeterministic: Option[Boolean],
+    containsSQL: Option[Boolean],
+    isTableFunc: Boolean,
+    isTemp: Boolean,
+    ignoreIfExists: Boolean,
+    replace: Boolean)
+    extends CreateUserDefinedFunctionCommand {
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    import SQLFunction._
+
+    val parser = sparkSession.sessionState.sqlParser
+
+    val inputParam = inputParamText.map(parser.parseTableSchema)
+    val returnType = parseReturnTypeText(returnTypeText, isTableFunc, parser)
+
+    val function = SQLFunction(
+      name,
+      inputParam,
+      returnType.getOrElse(if (isTableFunc) Right(null) else Left(null)),
+      exprText,
+      queryText,
+      comment,
+      isDeterministic,
+      containsSQL,
+      isTableFunc,
+      Map.empty)
+
+    // TODO: Implement the rest of the method.
+
+    Seq.empty
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateUserDefinedFunctionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateUserDefinedFunctionCommand.scala
new file mode 100644
index 0000000000000..bebb0f5cf6c38
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CreateUserDefinedFunctionCommand.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.catalog.{LanguageSQL, RoutineLanguage, UserDefinedFunctionErrors}
+import org.apache.spark.sql.catalyst.plans.logical.IgnoreCachedData
+
+/**
+ * The base class for CreateUserDefinedFunctionCommand
+ */
+abstract class CreateUserDefinedFunctionCommand
+  extends LeafRunnableCommand with IgnoreCachedData
+
+
+object CreateUserDefinedFunctionCommand {
+
+  /**
+   * This factory methods serves as a central place to verify required inputs and
+   * returns the CREATE command for the parsed user defined function.
+   */
+  // scalastyle:off argcount
+  def apply(
+      name: FunctionIdentifier,
+      inputParamText: Option[String],
+      returnTypeText: String,
+      exprText: Option[String],
+      queryText: Option[String],
+      comment: Option[String],
+      isDeterministic: Option[Boolean],
+      containsSQL: Option[Boolean],
+      language: RoutineLanguage,
+      isTableFunc: Boolean,
+      isTemp: Boolean,
+      ignoreIfExists: Boolean,
+      replace: Boolean
+  ): CreateUserDefinedFunctionCommand = {
+    // scalastyle:on argcount
+
+    assert(language != null)
+
+    language match {
+      case LanguageSQL =>
+        CreateSQLFunctionCommand(
+          name,
+          inputParamText,
+          returnTypeText,
+          exprText,
+          queryText,
+          comment,
+          isDeterministic,
+          containsSQL,
+          isTableFunc,
+          isTemp,
+          ignoreIfExists,
+          replace)
+
+      case other =>
+        throw UserDefinedFunctionErrors.unsupportedUserDefinedFunction(other)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
index 67d38b28c83ea..7c690c8ccc08d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/InsertIntoDataSourceDirCommand.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.command
 
+import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical.{CTEInChildren, CTERelationDef, LogicalPlan, WithCTE}
@@ -70,7 +72,7 @@ case class InsertIntoDataSourceDirCommand(
       sparkSession.sessionState.executePlan(dataSource.planForWriting(saveMode, query)).toRdd
     } catch {
       case ex: AnalysisException =>
-        logError(s"Failed to write to directory " + storage.locationUri.toString, ex)
+        logError(log"Failed to write to directory ${MDC(URI, storage.locationUri.toString)}", ex)
         throw ex
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index 672417f1adbf0..4e513fc3e8c1d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CONFIG, CONFIG2, KEY, VALUE}
 import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.parser.ParseException
@@ -51,8 +52,9 @@ case class SetCommand(kv: Option[(String, Option[String])])
     case Some((SQLConf.Deprecated.MAPRED_REDUCE_TASKS, Some(value))) =>
       val runFunc = (sparkSession: SparkSession) => {
         logWarning(
-          s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
-            s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS.key} instead.")
+          log"Property ${MDC(CONFIG, SQLConf.Deprecated.MAPRED_REDUCE_TASKS)} is deprecated, " +
+            log"automatically converted to ${MDC(CONFIG2, SQLConf.SHUFFLE_PARTITIONS.key)} " +
+            log"instead.")
         if (value.toInt < 1) {
           val msg =
             s"Setting negative ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} for automatically " +
@@ -68,8 +70,9 @@ case class SetCommand(kv: Option[(String, Option[String])])
     case Some((SQLConf.Replaced.MAPREDUCE_JOB_REDUCES, Some(value))) =>
       val runFunc = (sparkSession: SparkSession) => {
         logWarning(
-          s"Property ${SQLConf.Replaced.MAPREDUCE_JOB_REDUCES} is Hadoop's property, " +
-            s"automatically converted to ${SQLConf.SHUFFLE_PARTITIONS.key} instead.")
+          log"Property ${MDC(CONFIG, SQLConf.Replaced.MAPREDUCE_JOB_REDUCES)} is Hadoop's " +
+            log"property, automatically converted to " +
+            log"${MDC(CONFIG2, SQLConf.SHUFFLE_PARTITIONS.key)} instead.")
         if (value.toInt < 1) {
           val msg =
             s"Setting negative ${SQLConf.Replaced.MAPREDUCE_JOB_REDUCES} for automatically " +
@@ -111,11 +114,12 @@ case class SetCommand(kv: Option[(String, Option[String])])
         }
         if (sparkSession.conf.get(CATALOG_IMPLEMENTATION.key).equals("hive") &&
             key.startsWith("hive.")) {
-          logWarning(s"'SET $key=$value' might not work, since Spark doesn't support changing " +
-            "the Hive config dynamically. Please pass the Hive-specific config by adding the " +
-            s"prefix spark.hadoop (e.g. spark.hadoop.$key) when starting a Spark application. " +
-            "For details, see the link: https://spark.apache.org/docs/latest/configuration.html#" +
-            "dynamically-loading-spark-properties.")
+          logWarning(log"'SET ${MDC(KEY, key)}=${MDC(VALUE, value)}' might not work, since Spark " +
+            log"doesn't support changing the Hive config dynamically. Please pass the " +
+            log"Hive-specific config by adding the prefix spark.hadoop " +
+            log"(e.g. spark.hadoop.${MDC(KEY, key)}) when starting a Spark application. For " +
+            log"details, see the link: https://spark.apache.org/docs/latest/configuration.html#" +
+            log"dynamically-loading-spark-properties.")
         }
         sparkSession.conf.set(key, value)
         Seq(Row(key, value))
@@ -155,8 +159,8 @@ case class SetCommand(kv: Option[(String, Option[String])])
     case Some((SQLConf.Deprecated.MAPRED_REDUCE_TASKS, None)) =>
       val runFunc = (sparkSession: SparkSession) => {
         logWarning(
-          s"Property ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} is deprecated, " +
-            s"showing ${SQLConf.SHUFFLE_PARTITIONS.key} instead.")
+          log"Property ${MDC(CONFIG, SQLConf.Deprecated.MAPRED_REDUCE_TASKS)} is deprecated, " +
+            log"showing ${MDC(CONFIG2, SQLConf.SHUFFLE_PARTITIONS.key)} instead.")
         Seq(Row(
           SQLConf.SHUFFLE_PARTITIONS.key,
           sparkSession.sessionState.conf.defaultNumShufflePartitions.toString))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/UnsetNamespacePropertiesCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/UnsetNamespacePropertiesCommand.scala
new file mode 100644
index 0000000000000..243b51b09e3bc
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/UnsetNamespacePropertiesCommand.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.ResolvedNamespace
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
+import org.apache.spark.sql.connector.catalog.NamespaceChange
+
+/**
+ * A command that unsets database/schema/namespace properties.
+ *
+ * The syntax of this command is:
+ * {{{
+ *    ALTER (DATABASE|SCHEMA|NAMESPACE) ...
+ *      UNSET (DBPROPERTIES|PROPERTIES) ('key1', 'key2', ...);
+ * }}}
+ */
+case class UnsetNamespacePropertiesCommand(
+    ident: LogicalPlan,
+    propKeys: Seq[String]) extends UnaryRunnableCommand {
+
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    val ResolvedNamespace(catalog, ns, _) = child
+    val changes = propKeys.map {
+      NamespaceChange.removeProperty
+    }
+    // If the property does not exist, the change should succeed.
+    catalog.asNamespaceCatalog.alterNamespace(ns.toArray, changes: _*)
+
+    Seq.empty
+  }
+
+  override def child: LogicalPlan = ident
+
+  override protected def withNewChildInternal(
+      newChild: LogicalPlan): UnsetNamespacePropertiesCommand =
+    copy(ident = newChild)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index a1e9c4229b194..ea2736b2c1266 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -25,8 +25,8 @@ import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
-import org.apache.spark.sql.catalyst.trees.LeafLike
+import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan, SupervisingCommand}
+import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
 import org.apache.spark.sql.connector.ExternalCommandRunner
 import org.apache.spark.sql.execution.{CommandExecutionMode, ExplainMode, LeafExecNode, SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.metric.SQLMetric
@@ -51,6 +51,7 @@ trait RunnableCommand extends Command {
 }
 
 trait LeafRunnableCommand extends RunnableCommand with LeafLike[LogicalPlan]
+trait UnaryRunnableCommand extends RunnableCommand with UnaryLike[LogicalPlan]
 
 /**
  * A physical operator that executes the run method of a `RunnableCommand` and
@@ -157,7 +158,7 @@ case class DataWritingCommandExec(cmd: DataWritingCommand, child: SparkPlan)
 case class ExplainCommand(
     logicalPlan: LogicalPlan,
     mode: ExplainMode)
-  extends LeafRunnableCommand {
+  extends RunnableCommand with SupervisingCommand {
 
   override val output: Seq[Attribute] =
     Seq(AttributeReference("plan", StringType, nullable = true)())
@@ -171,6 +172,9 @@ case class ExplainCommand(
     ("Error occurred during query planning: \n" + cause.getMessage).split("\n")
       .map(Row(_)).toImmutableArraySeq
   }
+
+  def withTransformedSupervisedPlan(transformer: LogicalPlan => LogicalPlan): LogicalPlan =
+    copy(logicalPlan = transformer(logicalPlan))
 }
 
 /** An explain command for users to see how a streaming batch is executed. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index 1283bd8809082..539d8346a5cad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.command
 
 import java.net.URI
 
+import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical.{CTEInChildren, CTERelationDef, LogicalPlan, WithCTE}
@@ -230,7 +232,8 @@ case class CreateDataSourceTableAsSelectCommand(
       dataSource.writeAndRead(mode, query, outputColumnNames)
     } catch {
       case ex: AnalysisException =>
-        logError(s"Failed to write to table ${table.identifier.unquotedString}", ex)
+        logError(log"Failed to write to table " +
+          log"${MDC(TABLE_NAME, table.identifier.unquotedString)}", ex)
         throw ex
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index a5e48784ada1a..6f402188910e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -356,8 +356,8 @@ case class AlterTableUnsetPropertiesCommand(
 
 
 /**
- * A command to change the column for a table, only support changing the comment of a non-partition
- * column for now.
+ * A command to change the column for a table, only support changing the comment or collation of
+ * the data type or nested types (recursively) of a non-partition column for now.
  *
  * The syntax of using this command in SQL is:
  * {{{
@@ -387,32 +387,45 @@ case class AlterTableChangeColumnCommand(
     }
     // Find the origin column from dataSchema by column name.
     val originColumn = findColumnByName(table.dataSchema, columnName, resolver)
-    // Throw an AnalysisException if the column name/dataType is changed.
-    if (!columnEqual(originColumn, newColumn, resolver)) {
+    val validType = canEvolveType(originColumn, newColumn)
+    // Throw an AnalysisException on attempt to change collation of bucket column.
+    if (validType && originColumn.dataType != newColumn.dataType) {
+      val isBucketColumn = table.bucketSpec match {
+        case Some(bucketSpec) => bucketSpec.bucketColumnNames.exists(resolver(columnName, _))
+        case _ => false
+      }
+      if (isBucketColumn) {
+        throw QueryCompilationErrors.cannotAlterCollationBucketColumn(
+          table.qualifiedName, columnName)
+      }
+    }
+    // Throw an AnalysisException if the column name is changed or we cannot evolve the data type.
+    // Only changes in collation of column data type or its nested types (recursively) are allowed.
+    if (!validType || !namesEqual(originColumn, newColumn, resolver)) {
       throw QueryCompilationErrors.alterTableChangeColumnNotSupportedForColumnTypeError(
         toSQLId(table.identifier.nameParts), originColumn, newColumn, this.origin)
     }
 
     val newDataSchema = table.dataSchema.fields.map { field =>
       if (field.name == originColumn.name) {
-        // Create a new column from the origin column with the new comment.
-        val withNewComment: StructField =
-          addComment(field, newColumn.getComment())
+        // Create a new column from the origin column with the new type and new comment.
+        val withNewTypeAndComment: StructField =
+          addComment(withNewType(field, newColumn.dataType), newColumn.getComment())
         // Create a new column from the origin column with the new current default value.
         if (newColumn.getCurrentDefaultValue().isDefined) {
           if (newColumn.getCurrentDefaultValue().get.nonEmpty) {
             val result: StructField =
-              addCurrentDefaultValue(withNewComment, newColumn.getCurrentDefaultValue())
+              addCurrentDefaultValue(withNewTypeAndComment, newColumn.getCurrentDefaultValue())
             // Check that the proposed default value parses and analyzes correctly, and that the
             // type of the resulting expression is equivalent or coercible to the destination column
             // type.
             ResolveDefaultColumns.analyze(result, "ALTER TABLE ALTER COLUMN")
             result
           } else {
-            withNewComment.clearCurrentDefaultValue()
+            withNewTypeAndComment.clearCurrentDefaultValue()
           }
         } else {
-          withNewComment
+          withNewTypeAndComment
         }
       } else {
         field
@@ -432,6 +445,10 @@ case class AlterTableChangeColumnCommand(
     }.getOrElse(throw QueryCompilationErrors.cannotFindColumnError(name, schema.fieldNames))
   }
 
+  // Change the dataType of the column.
+  private def withNewType(column: StructField, dataType: DataType): StructField =
+    column.copy(dataType = dataType)
+
   // Add the comment to a column, if comment is empty, return the original column.
   private def addComment(column: StructField, comment: Option[String]): StructField =
     comment.map(column.withComment).getOrElse(column)
@@ -442,10 +459,17 @@ case class AlterTableChangeColumnCommand(
     value.map(column.withCurrentDefaultValue).getOrElse(column)
 
   // Compare a [[StructField]] to another, return true if they have the same column
-  // name(by resolver) and dataType.
-  private def columnEqual(
+  // name(by resolver).
+  private def namesEqual(
       field: StructField, other: StructField, resolver: Resolver): Boolean = {
-    resolver(field.name, other.name) && field.dataType == other.dataType
+    resolver(field.name, other.name)
+  }
+
+  // Compare dataType of [[StructField]] to another, return true if it is valid to evolve the type
+  // when altering column. Only changes in collation of data type or its nested types (recursively)
+  // are allowed.
+  private def canEvolveType(from: StructField, to: StructField): Boolean = {
+    DataType.equalsIgnoreCompatibleCollation(from.dataType, to.dataType)
   }
 }
 
@@ -696,7 +720,7 @@ case class RepairTableCommand(
     }
 
     val root = new Path(table.location)
-    logInfo(s"Recover all the partitions in $root")
+    logInfo(log"Recover all the partitions in ${MDC(LogKeys.PATH, root)}")
     val hadoopConf = spark.sessionState.newHadoopConf()
     val fs = root.getFileSystem(hadoopConf)
 
@@ -716,14 +740,16 @@ case class RepairTableCommand(
           evalPool.shutdown()
         }
       val total = partitionSpecsAndLocs.length
-      logInfo(s"Found $total partitions in $root")
+      logInfo(log"Found ${MDC(LogKeys.NUM_PARTITIONS, total)} partitions " +
+        log"in ${MDC(LogKeys.PATH, root)}")
 
       val partitionStats = if (spark.sessionState.conf.gatherFastStats) {
         gatherPartitionStats(spark, partitionSpecsAndLocs, fs, pathFilter, threshold)
       } else {
         Map.empty[Path, PartitionStatistics]
       }
-      logInfo(s"Finished to gather the fast stats for all $total partitions.")
+      logInfo(log"Finished to gather the fast stats for all " +
+        log"${MDC(LogKeys.NUM_PARTITIONS, total)} partitions.")
 
       addPartitions(spark, table, partitionSpecsAndLocs, partitionStats)
       total
@@ -736,12 +762,14 @@ case class RepairTableCommand(
       spark.catalog.refreshTable(tableIdentWithDB)
     } catch {
       case NonFatal(e) =>
-        logError(s"Cannot refresh the table '$tableIdentWithDB'. A query of the table " +
-          "might return wrong result if the table was cached. To avoid such issue, you should " +
-          "uncache the table manually via the UNCACHE TABLE command after table recovering will " +
-          "complete fully.", e)
+        logError(log"Cannot refresh the table '${MDC(LogKeys.TABLE_NAME, tableIdentWithDB)}'. " +
+          log"A query of the table might return wrong result if the table was cached. " +
+          log"To avoid such issue, you should uncache the table manually via the UNCACHE TABLE " +
+          log"command after table recovering will complete fully.", e)
     }
-    logInfo(s"Recovered all partitions: added ($addedAmount), dropped ($droppedAmount).")
+    logInfo(log"Recovered all partitions: " +
+      log"added (${MDC(LogKeys.NUM_ADDED_PARTITIONS, addedAmount)}), " +
+      log"dropped (${MDC(LogKeys.NUM_DROPPED_PARTITIONS, droppedAmount)}).")
     Seq.empty[Row]
   }
 
@@ -782,12 +810,13 @@ case class RepairTableCommand(
           scanPartitions(spark, fs, filter, st.getPath, spec ++ Map(partitionNames.head -> value),
             partitionNames.drop(1), threshold, resolver, evalTaskSupport)
         } else {
-          logWarning(
-            s"expected partition column ${partitionNames.head}, but got ${ps(0)}, ignoring it")
+          logWarning(log"expected partition column " +
+            log"${MDC(LogKeys.EXPECTED_PARTITION_COLUMN, partitionNames.head)}," +
+            log" but got ${MDC(LogKeys.ACTUAL_PARTITION_COLUMN, ps(0))}, ignoring it")
           Seq.empty
         }
       } else {
-        logWarning(s"ignore ${new Path(path, name)}")
+        logWarning(log"ignore ${MDC(LogKeys.PATH, new Path(path, name))}")
         Seq.empty
       }
     }
@@ -811,7 +840,8 @@ case class RepairTableCommand(
         Math.min(spark.sparkContext.defaultParallelism, 10000))
       // gather the fast stats for all the partitions otherwise Hive metastore will list all the
       // files for all the new partitions in sequential way, which is super slow.
-      logInfo(s"Gather the fast stats in parallel using $numParallelism tasks.")
+      logInfo(log"Gather the fast stats in parallel using ${MDC(LogKeys.COUNT, numParallelism)} " +
+        log"tasks.")
       spark.sparkContext.parallelize(locations, numParallelism)
         .mapPartitions { locationsEachPartition =>
           val pathFilter = getPathFilter(serializableConfiguration.value)
@@ -1028,7 +1058,8 @@ object DDLUtils extends Logging {
       DataSource.lookupDataSource(provider, SQLConf.get).getConstructor().newInstance()
     } catch {
       case e: Throwable =>
-        logError(s"Failed to find data source: $provider when check data column names.", e)
+        logError(log"Failed to find data source: ${MDC(LogKeys.DATA_SOURCE, provider)} " +
+          log"when check data column names.", e)
         return
     }
     source match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 1a97b965da2bd..ee0074dfe61b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -760,7 +760,7 @@ case class DescribeTableCommand(
  * 7. Common table expressions (CTEs)
  */
 case class DescribeQueryCommand(queryText: String, plan: LogicalPlan)
-  extends DescribeCommandBase with CTEInChildren {
+  extends DescribeCommandBase with SupervisingCommand with CTEInChildren {
 
   override val output = DescribeCommandSchema.describeTableAttributes()
 
@@ -776,6 +776,9 @@ case class DescribeQueryCommand(queryText: String, plan: LogicalPlan)
   override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = {
     copy(plan = WithCTE(plan, cteDefs))
   }
+
+  def withTransformedSupervisedPlan(transformer: LogicalPlan => LogicalPlan): LogicalPlan =
+    copy(plan = transformer(plan))
 }
 
 /**
@@ -1110,6 +1113,7 @@ trait ShowCreateTableCommandBase extends SQLConfHelper {
     showViewDataColumns(metadata, builder)
     showTableComment(metadata, builder)
     showViewProperties(metadata, builder)
+    showViewSchemaBinding(metadata, builder)
     showViewText(metadata, builder)
   }
 
@@ -1139,6 +1143,12 @@ trait ShowCreateTableCommandBase extends SQLConfHelper {
     }
   }
 
+  private def showViewSchemaBinding(metadata: CatalogTable, builder: StringBuilder): Unit = {
+    if (SQLConf.get.viewSchemaBindingEnabled) {
+      builder ++= s"WITH SCHEMA ${metadata.viewSchemaMode.toString}\n"
+    }
+  }
+
   private def showViewText(metadata: CatalogTable, builder: StringBuilder): Unit = {
     builder ++= metadata.viewText.mkString("AS ", "", "\n")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index d71d0d43683cb..e1061a46db7b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -22,10 +22,11 @@ import scala.collection.mutable
 import org.json4s.JsonAST.{JArray, JString}
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{SQLConfHelper, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, ViewType}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, GlobalTempView, LocalTempView, SchemaEvolution, SchemaUnsupported, ViewSchemaMode, ViewType}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, TemporaryViewRelation}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, SubqueryExpression, VariableReference}
 import org.apache.spark.sql.catalyst.plans.logical.{AnalysisOnlyCommand, CTEInChildren, CTERelationDef, LogicalPlan, Project, View, WithCTE}
@@ -56,6 +57,7 @@ import org.apache.spark.util.ArrayImplicits._
  * @param replace if true, and if the view already exists, updates it; if false, and if the view
  *                already exists, throws analysis exception.
  * @param viewType the expected view type to be created with this command.
+ * @param viewSchemaMode the tolerance of the view towards schema changes
  * @param isAnalyzed whether this command is analyzed or not.
  */
 case class CreateViewCommand(
@@ -68,6 +70,7 @@ case class CreateViewCommand(
     allowExisting: Boolean,
     replace: Boolean,
     viewType: ViewType,
+    viewSchemaMode: ViewSchemaMode = SchemaUnsupported,
     isAnalyzed: Boolean = false,
     referredTempFunctions: Seq[String] = Seq.empty)
   extends RunnableCommand with AnalysisOnlyCommand with CTEInChildren {
@@ -106,6 +109,10 @@ case class CreateViewCommand(
         throw QueryCompilationErrors.cannotCreateViewTooManyColumnsError(
           name, userSpecifiedColumns.map(_._1), analyzedPlan)
       }
+      if (viewSchemaMode == SchemaEvolution) {
+        throw SparkException.internalError(
+          "View with user column list has viewSchemaMode EVOLUTION")
+      }
     }
 
     val catalog = sparkSession.sessionState.catalog
@@ -203,7 +210,7 @@ case class CreateViewCommand(
     val aliasedSchema = CharVarcharUtils.getRawSchema(
       aliasPlan(session, analyzedPlan).schema, session.sessionState.conf)
     val newProperties = generateViewProperties(
-      properties, session, analyzedPlan, aliasedSchema.fieldNames)
+      properties, session, analyzedPlan.schema.fieldNames, aliasedSchema.fieldNames, viewSchemaMode)
 
     CatalogTable(
       identifier = name,
@@ -301,7 +308,11 @@ case class AlterViewAsCommand(
     CommandUtils.uncacheTableOrView(session, viewIdent)
 
     val newProperties = generateViewProperties(
-      viewMeta.properties, session, analyzedPlan, analyzedPlan.schema.fieldNames)
+      viewMeta.properties,
+      session,
+      analyzedPlan.schema.fieldNames, // The query output column names
+      analyzedPlan.schema.fieldNames, // Will match the view schema names
+      viewMeta.viewSchemaMode)
 
     val newSchema = CharVarcharUtils.getRawSchema(analyzedPlan.schema)
     val updatedViewMeta = viewMeta.copy(
@@ -318,6 +329,50 @@ case class AlterViewAsCommand(
   }
 }
 
+/**
+ * Alter a view with given schema binding. If the view name contains database prefix, this command
+ * will alter a permanent view matching the given name, or throw an exception if view not exist.
+ * Else, this command will try to alter a temporary view first, if view not exist, try permanent
+ * view next, if still not exist, throw an exception.
+ *
+ * @param name the name of this view.
+ * @param viewSchemaMode The new schema binding mode.
+ */
+case class AlterViewSchemaBindingCommand(name: TableIdentifier, viewSchemaMode: ViewSchemaMode)
+  extends LeafRunnableCommand {
+
+  import ViewHelper._
+
+  override def run(session: SparkSession): Seq[Row] = {
+    val isTemporary = session.sessionState.catalog.isTempView(name)
+    if (isTemporary) {
+      throw QueryCompilationErrors.cannotAlterTempViewWithSchemaBindingError()
+    }
+    alterPermanentView(session, viewSchemaMode)
+    Seq.empty[Row]
+  }
+
+  private def alterPermanentView(session: SparkSession, viewSchemaMode: ViewSchemaMode): Unit = {
+    val viewMeta = session.sessionState.catalog.getTableMetadata(name)
+
+    val viewIdent = viewMeta.identifier
+
+    logDebug(s"Try to uncache ${viewIdent.quotedString} before replacing.")
+    CommandUtils.uncacheTableOrView(session, viewIdent)
+
+    val newProperties = generateViewProperties(
+      viewMeta.properties,
+      session,
+      viewMeta.viewQueryColumnNames.toArray,
+      viewMeta.schema.fieldNames,
+      viewSchemaMode)
+
+    val updatedViewMeta = viewMeta.copy(properties = newProperties)
+
+    session.sessionState.catalog.alterTable(updatedViewMeta)
+  }
+}
+
 /**
  * A command for users to get views in the given database.
  * If a databaseName is not given, the current database will be used.
@@ -360,6 +415,7 @@ object ViewHelper extends SQLConfHelper with Logging {
     "spark.sql.hive.convertMetastoreParquet",
     "spark.sql.hive.convertMetastoreOrc",
     "spark.sql.hive.convertInsertingPartitionedTable",
+    "spark.sql.hive.convertInsertingUnpartitionedTable",
     "spark.sql.hive.convertMetastoreCtas",
     SQLConf.ADDITIONAL_REMOTE_REPOSITORIES.key)
 
@@ -436,6 +492,20 @@ object ViewHelper extends SQLConfHelper with Logging {
     }
   }
 
+  /**
+   * Convert the viewSchemaMode to `properties`.
+   * If the mode UNSUPPORTED do not store anything for backward compatibility.
+   */
+  private def viewSchemaModeToProps(viewSchemaMode: ViewSchemaMode): Map[String, String] = {
+    if (viewSchemaMode == SchemaUnsupported) {
+      Map.empty
+    } else {
+      val props = new mutable.HashMap[String, String]
+      props.put(VIEW_SCHEMA_MODE, viewSchemaMode.toString)
+      props.toMap
+    }
+  }
+
   /**
    * Convert the temporary object names to `properties`.
    */
@@ -485,13 +555,12 @@ object ViewHelper extends SQLConfHelper with Logging {
   def generateViewProperties(
       properties: Map[String, String],
       session: SparkSession,
-      analyzedPlan: LogicalPlan,
+      queryOutput: Array[String],
       fieldNames: Array[String],
+      viewSchemaMode: ViewSchemaMode,
       tempViewNames: Seq[Seq[String]] = Seq.empty,
       tempFunctionNames: Seq[String] = Seq.empty,
       tempVariableNames: Seq[Seq[String]] = Seq.empty): Map[String, String] = {
-    // for createViewCommand queryOutput may be different from fieldNames
-    val queryOutput = analyzedPlan.schema.fieldNames
 
     val conf = session.sessionState.conf
 
@@ -506,7 +575,8 @@ object ViewHelper extends SQLConfHelper with Logging {
         manager.currentCatalog.name, manager.currentNamespace.toImmutableArraySeq) ++
       sqlConfigsToProps(conf) ++
       generateQueryColumnNames(queryOutput.toImmutableArraySeq) ++
-      referredTempNamesToProps(tempViewNames, tempFunctionNames, tempVariableNames)
+      referredTempNamesToProps(tempViewNames, tempFunctionNames, tempVariableNames) ++
+      viewSchemaModeToProps(viewSchemaMode)
   }
 
   /**
@@ -718,8 +788,8 @@ object ViewHelper extends SQLConfHelper with Logging {
     // TBLPROPERTIES is not allowed for temporary view, so we don't use it for
     // generating temporary view properties
     val newProperties = generateViewProperties(
-      Map.empty, session, analyzedPlan, viewSchema.fieldNames, tempViews,
-      tempFunctions, tempVariables)
+      Map.empty, session, analyzedPlan.schema.fieldNames, viewSchema.fieldNames, SchemaUnsupported,
+      tempViews, tempFunctions, tempVariables)
 
     CatalogTable(
       identifier = viewName,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
index b5bf337a5a2e6..1b7b0d702ab98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_COMPARISON, IN}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{CharType, Metadata, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -66,9 +67,10 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
             r.copy(dataCols = cleanedDataCols, partitionCols = cleanedPartCols)
           })
       }
-      paddingForStringComparison(newPlan)
+      paddingForStringComparison(newPlan, padCharCol = false)
     } else {
-      paddingForStringComparison(plan)
+      paddingForStringComparison(
+        plan, padCharCol = !conf.getConf(SQLConf.LEGACY_NO_CHAR_PADDING_IN_PREDICATE))
     }
   }
 
@@ -90,7 +92,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
     }
   }
 
-  private def paddingForStringComparison(plan: LogicalPlan): LogicalPlan = {
+  private def paddingForStringComparison(plan: LogicalPlan, padCharCol: Boolean): LogicalPlan = {
     plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
       case operator => operator.transformExpressionsUpWithPruning(
         _.containsAnyPattern(BINARY_COMPARISON, IN)) {
@@ -99,12 +101,12 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
         // String literal is treated as char type when it's compared to a char type column.
         // We should pad the shorter one to the longer length.
         case b @ BinaryComparison(e @ AttrOrOuterRef(attr), lit) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
+          padAttrLitCmp(e, attr.metadata, padCharCol, lit).map { newChildren =>
             b.withNewChildren(newChildren)
           }.getOrElse(b)
 
         case b @ BinaryComparison(lit, e @ AttrOrOuterRef(attr)) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
+          padAttrLitCmp(e, attr.metadata, padCharCol, lit).map { newChildren =>
             b.withNewChildren(newChildren.reverse)
           }.getOrElse(b)
 
@@ -117,9 +119,10 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
               val literalCharLengths = literalChars.map(_.numChars())
               val targetLen = (length +: literalCharLengths).max
               Some(i.copy(
-                value = addPadding(e, length, targetLen),
+                value = addPadding(e, length, targetLen, alwaysPad = padCharCol),
                 list = list.zip(literalCharLengths).map {
-                  case (lit, charLength) => addPadding(lit, charLength, targetLen)
+                  case (lit, charLength) =>
+                    addPadding(lit, charLength, targetLen, alwaysPad = false)
                 } ++ nulls.map(Literal.create(_, StringType))))
             case _ => None
           }.getOrElse(i)
@@ -162,6 +165,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
   private def padAttrLitCmp(
       expr: Expression,
       metadata: Metadata,
+      padCharCol: Boolean,
       lit: Expression): Option[Seq[Expression]] = {
     if (expr.dataType == StringType) {
       CharVarcharUtils.getRawType(metadata).flatMap {
@@ -174,7 +178,14 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
             if (length < stringLitLen) {
               Some(Seq(StringRPad(expr, Literal(stringLitLen)), lit))
             } else if (length > stringLitLen) {
-              Some(Seq(expr, StringRPad(lit, Literal(length))))
+              val paddedExpr = if (padCharCol) {
+                StringRPad(expr, Literal(length))
+              } else {
+                expr
+              }
+              Some(Seq(paddedExpr, StringRPad(lit, Literal(length))))
+            } else if (padCharCol)  {
+              Some(Seq(StringRPad(expr, Literal(length)), lit))
             } else {
               None
             }
@@ -186,7 +197,15 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
     }
   }
 
-  private def addPadding(expr: Expression, charLength: Int, targetLength: Int): Expression = {
-    if (targetLength > charLength) StringRPad(expr, Literal(targetLength)) else expr
+  private def addPadding(
+      expr: Expression,
+      charLength: Int,
+      targetLength: Int,
+      alwaysPad: Boolean): Expression = {
+    if (targetLength > charLength) {
+      StringRPad(expr, Literal(targetLength))
+    } else if (alwaysPad) {
+      StringRPad(expr, Literal(charLength))
+    } else expr
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
index 8a9fbd15e2e81..1858a84213598 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
@@ -26,7 +26,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SparkContext, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{ACTUAL_NUM_FILES, EXPECTED_NUM_FILES}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.BasicWriteJobStatsTracker._
@@ -119,9 +120,9 @@ class BasicWriteTaskStatsTracker(
     } catch {
       case e: NumberFormatException =>
         // warn but don't dump the whole stack
-        logInfo(s"Failed to parse" +
-          s" ${BasicWriteJobStatsTracker.FILE_LENGTH_XATTR}:$e;" +
-          s" bytes written may be under-reported");
+        logInfo(log"Failed to parse " +
+          log"${MDC(LogKeys.FILE_LENGTH_XATTR, BasicWriteJobStatsTracker.FILE_LENGTH_XATTR)}:" +
+          log"${MDC(LogKeys.ERROR, e)}; bytes written may be under-reported");
       case e: UnsupportedOperationException =>
         // this is not unusual; ignore
         logDebug(s"XAttr not supported on path $path", e);
@@ -166,9 +167,9 @@ class BasicWriteTaskStatsTracker(
     }
 
     if (numSubmittedFiles != numFiles) {
-      logWarning(s"Expected $numSubmittedFiles files, but only saw $numFiles. " +
-        "This could be due to the output format not writing empty files, " +
-        "or files being not immediately visible in the filesystem.")
+      logWarning(log"Expected ${MDC(EXPECTED_NUM_FILES, numSubmittedFiles)} files, but only saw " +
+        log"${MDC(ACTUAL_NUM_FILES, numFiles)}. This could be due to the output format not " +
+        log"writing empty files, or files being not immediately visible in the filesystem.")
     }
     taskCommitTimeMetric.foreach(_ += taskCommitTime)
     BasicWriteTaskStats(partitions.toSeq, numFiles, numBytes, numRows)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 4c2d6a4cdf5ef..d88b5ee8877d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -27,7 +27,8 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, DATA_SOURCE, DATA_SOURCES, PATHS}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogUtils}
@@ -695,8 +696,9 @@ object DataSource extends Logging {
             throw QueryCompilationErrors
               .foundMultipleXMLDataSourceError(provider1, sourceNames, externalSource.getName)
           } else if (internalSources.size == 1) {
-            logWarning(s"Multiple sources found for $provider1 (${sourceNames.mkString(", ")}), " +
-              s"defaulting to the internal datasource (${internalSources.head.getClass.getName}).")
+            logWarning(log"Multiple sources found for ${MDC(DATA_SOURCE, provider1)} " +
+              log"(${MDC(DATA_SOURCES, sourceNames.mkString(", "))}), defaulting to the " +
+              log"internal datasource (${MDC(CLASS_NAME, internalSources.head.getClass.getName)}).")
             internalSources.head.getClass
           } else {
             throw QueryCompilationErrors.findMultipleDataSourceError(provider1, sourceNames)
@@ -784,7 +786,7 @@ object DataSource extends Logging {
           globResult
         }.flatten
       } catch {
-        case e: SparkException => throw e.getCause
+        case e: SparkException => throw ThreadUtils.wrapCallerStacktrace(e.getCause)
       }
 
     if (checkFilesExist) {
@@ -796,7 +798,7 @@ object DataSource extends Logging {
           }
         }
       } catch {
-        case e: SparkException => throw e.getCause
+        case e: SparkException => throw ThreadUtils.wrapCallerStacktrace(e.getCause)
       }
     }
 
@@ -807,7 +809,7 @@ object DataSource extends Logging {
       }
       if (filteredIn.isEmpty) {
         logWarning(
-          s"All paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
+          log"All paths were ignored:\n  ${MDC(PATHS, filteredOut.mkString("\n  "))}")
       } else {
         logDebug(
           s"Some paths were ignored:\n  ${filteredOut.mkString("\n  ")}")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
index 2f4555effce3a..93fc6cf367cfc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
@@ -21,7 +21,8 @@ import java.util.Locale
 import java.util.concurrent.ConcurrentHashMap
 
 import org.apache.spark.api.python.PythonUtils
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.DATA_SOURCE
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.python.UserDefinedPythonDataSource
 import org.apache.spark.util.Utils
@@ -53,7 +54,8 @@ class DataSourceManager extends Logging {
     }
     val previousValue = runtimeDataSourceBuilders.put(normalizedName, source)
     if (previousValue != null) {
-      logWarning(f"The data source $name replaced a previously registered data source.")
+      logWarning(log"The data source ${MDC(DATA_SOURCE, name)} replaced a previously " +
+        log"registered data source.")
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 845d969df0885..5d2310c130703 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -24,7 +24,8 @@ import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.PREDICATES
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName, SQLConfHelper}
@@ -53,7 +54,7 @@ import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.{PartitioningUtils => CatalystPartitioningUtils}
-import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -494,47 +495,81 @@ object DataSourceStrategy
       val partitionSet = AttributeSet(partitionColumns)
       val predicates = ExpressionSet(normalizedFilters
         .flatMap(extractPredicatesWithinOutputSet(_, partitionSet)))
-      logInfo(s"Pruning directories with: ${predicates.mkString(",")}")
+      logInfo(log"Pruning directories with: ${MDC(PREDICATES, predicates.mkString(","))}")
       predicates
     }
   }
 
+  /**
+   * Creates a collation aware filter if the input data type is string with non-default collation
+   */
+  private def collationAwareFilter(filter: sources.Filter, dataType: DataType): Filter = {
+    if (!SchemaUtils.hasNonUTF8BinaryCollation(dataType)) {
+      return filter
+    }
+
+    filter match {
+      case sources.EqualTo(attribute, value) =>
+        CollatedEqualTo(attribute, value, dataType)
+      case sources.EqualNullSafe(attribute, value) =>
+        CollatedEqualNullSafe(attribute, value, dataType)
+      case sources.GreaterThan(attribute, value) =>
+        CollatedGreaterThan(attribute, value, dataType)
+      case sources.GreaterThanOrEqual(attribute, value) =>
+        CollatedGreaterThanOrEqual(attribute, value, dataType)
+      case sources.LessThan(attribute, value) =>
+        CollatedLessThan(attribute, value, dataType)
+      case sources.LessThanOrEqual(attribute, value) =>
+        CollatedLessThanOrEqual(attribute, value, dataType)
+      case sources.In(attribute, values) =>
+        CollatedIn(attribute, values, dataType)
+      case sources.StringStartsWith(attribute, value) =>
+        CollatedStringStartsWith(attribute, value, dataType)
+      case sources.StringEndsWith(attribute, value) =>
+        CollatedStringEndsWith(attribute, value, dataType)
+      case sources.StringContains(attribute, value) =>
+        CollatedStringContains(attribute, value, dataType)
+      case other =>
+        other
+    }
+  }
+
   private def translateLeafNodeFilter(
       predicate: Expression,
       pushableColumn: PushableColumnBase): Option[Filter] = predicate match {
-    case expressions.EqualTo(pushableColumn(name), Literal(v, t)) =>
-      Some(sources.EqualTo(name, convertToScala(v, t)))
-    case expressions.EqualTo(Literal(v, t), pushableColumn(name)) =>
-      Some(sources.EqualTo(name, convertToScala(v, t)))
-
-    case expressions.EqualNullSafe(pushableColumn(name), Literal(v, t)) =>
-      Some(sources.EqualNullSafe(name, convertToScala(v, t)))
-    case expressions.EqualNullSafe(Literal(v, t), pushableColumn(name)) =>
-      Some(sources.EqualNullSafe(name, convertToScala(v, t)))
-
-    case expressions.GreaterThan(pushableColumn(name), Literal(v, t)) =>
-      Some(sources.GreaterThan(name, convertToScala(v, t)))
-    case expressions.GreaterThan(Literal(v, t), pushableColumn(name)) =>
-      Some(sources.LessThan(name, convertToScala(v, t)))
-
-    case expressions.LessThan(pushableColumn(name), Literal(v, t)) =>
-      Some(sources.LessThan(name, convertToScala(v, t)))
-    case expressions.LessThan(Literal(v, t), pushableColumn(name)) =>
-      Some(sources.GreaterThan(name, convertToScala(v, t)))
-
-    case expressions.GreaterThanOrEqual(pushableColumn(name), Literal(v, t)) =>
-      Some(sources.GreaterThanOrEqual(name, convertToScala(v, t)))
-    case expressions.GreaterThanOrEqual(Literal(v, t), pushableColumn(name)) =>
-      Some(sources.LessThanOrEqual(name, convertToScala(v, t)))
-
-    case expressions.LessThanOrEqual(pushableColumn(name), Literal(v, t)) =>
-      Some(sources.LessThanOrEqual(name, convertToScala(v, t)))
-    case expressions.LessThanOrEqual(Literal(v, t), pushableColumn(name)) =>
-      Some(sources.GreaterThanOrEqual(name, convertToScala(v, t)))
+    case expressions.EqualTo(e @ pushableColumn(name), Literal(v, t)) =>
+      Some(collationAwareFilter(sources.EqualTo(name, convertToScala(v, t)), e.dataType))
+    case expressions.EqualTo(Literal(v, t), e @ pushableColumn(name)) =>
+      Some(collationAwareFilter(sources.EqualTo(name, convertToScala(v, t)), e.dataType))
+
+    case expressions.EqualNullSafe(e @ pushableColumn(name), Literal(v, t)) =>
+      Some(collationAwareFilter(sources.EqualNullSafe(name, convertToScala(v, t)), e.dataType))
+    case expressions.EqualNullSafe(Literal(v, t), e @ pushableColumn(name)) =>
+      Some(collationAwareFilter(sources.EqualNullSafe(name, convertToScala(v, t)), e.dataType))
+
+    case expressions.GreaterThan(e @ pushableColumn(name), Literal(v, t)) =>
+      Some(collationAwareFilter(sources.GreaterThan(name, convertToScala(v, t)), e.dataType))
+    case expressions.GreaterThan(Literal(v, t), e @ pushableColumn(name)) =>
+      Some(collationAwareFilter(sources.LessThan(name, convertToScala(v, t)), e.dataType))
+
+    case expressions.LessThan(e @ pushableColumn(name), Literal(v, t)) =>
+      Some(collationAwareFilter(sources.LessThan(name, convertToScala(v, t)), e.dataType))
+    case expressions.LessThan(Literal(v, t), e @ pushableColumn(name)) =>
+      Some(collationAwareFilter(sources.GreaterThan(name, convertToScala(v, t)), e.dataType))
+
+    case expressions.GreaterThanOrEqual(e @ pushableColumn(name), Literal(v, t)) =>
+      Some(collationAwareFilter(sources.GreaterThanOrEqual(name, convertToScala(v, t)), e.dataType))
+    case expressions.GreaterThanOrEqual(Literal(v, t), e @ pushableColumn(name)) =>
+      Some(collationAwareFilter(sources.LessThanOrEqual(name, convertToScala(v, t)), e.dataType))
+
+    case expressions.LessThanOrEqual(e @ pushableColumn(name), Literal(v, t)) =>
+      Some(collationAwareFilter(sources.LessThanOrEqual(name, convertToScala(v, t)), e.dataType))
+    case expressions.LessThanOrEqual(Literal(v, t), e @ pushableColumn(name)) =>
+      Some(collationAwareFilter(sources.GreaterThanOrEqual(name, convertToScala(v, t)), e.dataType))
 
     case expressions.InSet(e @ pushableColumn(name), set) =>
       val toScala = CatalystTypeConverters.createToScalaConverter(e.dataType)
-      Some(sources.In(name, set.toArray.map(toScala)))
+      Some(collationAwareFilter(sources.In(name, set.toArray.map(toScala)), e.dataType))
 
     // Because we only convert In to InSet in Optimizer when there are more than certain
     // items. So it is possible we still get an In expression here that needs to be pushed
@@ -542,20 +577,20 @@ object DataSourceStrategy
     case expressions.In(e @ pushableColumn(name), list) if list.forall(_.isInstanceOf[Literal]) =>
       val hSet = list.map(_.eval(EmptyRow))
       val toScala = CatalystTypeConverters.createToScalaConverter(e.dataType)
-      Some(sources.In(name, hSet.toArray.map(toScala)))
+      Some(collationAwareFilter(sources.In(name, hSet.toArray.map(toScala)), e.dataType))
 
     case expressions.IsNull(pushableColumn(name)) =>
       Some(sources.IsNull(name))
     case expressions.IsNotNull(pushableColumn(name)) =>
       Some(sources.IsNotNull(name))
-    case expressions.StartsWith(pushableColumn(name), Literal(v: UTF8String, StringType)) =>
-      Some(sources.StringStartsWith(name, v.toString))
+    case expressions.StartsWith(e @ pushableColumn(name), Literal(v: UTF8String, StringType)) =>
+      Some(collationAwareFilter(sources.StringStartsWith(name, v.toString), e.dataType))
 
-    case expressions.EndsWith(pushableColumn(name), Literal(v: UTF8String, StringType)) =>
-      Some(sources.StringEndsWith(name, v.toString))
+    case expressions.EndsWith(e @ pushableColumn(name), Literal(v: UTF8String, StringType)) =>
+      Some(collationAwareFilter(sources.StringEndsWith(name, v.toString), e.dataType))
 
-    case expressions.Contains(pushableColumn(name), Literal(v: UTF8String, StringType)) =>
-      Some(sources.StringContains(name, v.toString))
+    case expressions.Contains(e @ pushableColumn(name), Literal(v: UTF8String, StringType)) =>
+      Some(collationAwareFilter(sources.StringContains(name, v.toString), e.dataType))
 
     case expressions.Literal(true, BooleanType) =>
       Some(sources.AlwaysTrue)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index 0db5de7243404..c80dc83079675 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -26,9 +26,9 @@ import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
 import org.apache.spark.{SparkException, SparkUpgradeException}
-import org.apache.spark.sql.{SPARK_LEGACY_DATETIME_METADATA_KEY, SPARK_LEGACY_INT96_METADATA_KEY, SPARK_TIMEZONE_METADATA_KEY, SPARK_VERSION_METADATA_KEY}
+import org.apache.spark.sql.{sources, SPARK_LEGACY_DATETIME_METADATA_KEY, SPARK_LEGACY_INT96_METADATA_KEY, SPARK_TIMEZONE_METADATA_KEY, SPARK_VERSION_METADATA_KEY}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, ExpressionSet, GetStructField, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, ExpressionSet, PredicateHelper}
 import org.apache.spark.sql.catalyst.util.RebaseDateTime
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.datasources.parquet.ParquetOptions
 import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
 
@@ -280,22 +280,15 @@ object DataSourceUtils extends PredicateHelper {
     (ExpressionSet(partitionFilters ++ extraPartitionFilter).toSeq, dataFilters)
   }
 
-  /**
-   * Determines whether a filter should be pushed down to the data source or not.
-   *
-   * @param expression The filter expression to be evaluated.
-   * @param isCollationPushDownSupported Whether the data source supports collation push down.
-   * @return A boolean indicating whether the filter should be pushed down or not.
-   */
-  def shouldPushFilter(expression: Expression, isCollationPushDownSupported: Boolean): Boolean = {
-    if (!expression.deterministic) return false
-
-    isCollationPushDownSupported || !expression.exists {
-      case childExpression @ (_: Attribute | _: GetStructField) =>
-        // don't push down filters for types with non-binary sortable collation
-        // as it could lead to incorrect results
-        SchemaUtils.hasNonBinarySortableCollatedString(childExpression.dataType)
-
+  def containsFiltersWithCollation(filter: sources.Filter): Boolean = {
+    filter match {
+      case sources.And(left, right) =>
+        containsFiltersWithCollation(left) || containsFiltersWithCollation(right)
+      case sources.Or(left, right) =>
+        containsFiltersWithCollation(left) || containsFiltersWithCollation(right)
+      case sources.Not(child) =>
+        containsFiltersWithCollation(child)
+      case _: sources.CollatedFilter => true
       case _ => false
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index 0785b0cbe9e23..36c59950fe209 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -223,12 +223,6 @@ trait FileFormat {
    */
   def fileConstantMetadataExtractors: Map[String, PartitionedFile => Any] =
     FileFormat.BASE_METADATA_EXTRACTORS
-
-  /**
-   * Returns whether the file format supports filter push down
-   * for non utf8 binary collated columns.
-   */
-  def supportsCollationPushDown: Boolean = false
 }
 
 object FileFormat {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index 1dbb6ce26f693..7d071124b0b30 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -22,7 +22,8 @@ import org.apache.hadoop.fs.{FileAlreadyExistsException, Path}
 import org.apache.hadoop.mapreduce.TaskAttemptContext
 
 import org.apache.spark.TaskOutputFileAlreadyExistException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CONFIG, NUM_CONCURRENT_WRITER}
 import org.apache.spark.internal.io.{FileCommitProtocol, FileNameSpec}
 import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.shuffle.FetchFailedException
@@ -558,9 +559,11 @@ class DynamicPartitionDataConcurrentWriter(
         new WriterStatus(currentWriter, recordsInFile, fileCounter))
       if (concurrentWriters.size >= concurrentOutputWriterSpec.maxWriters && !sorted) {
         // Fall back to sort-based sequential writer mode.
-        logInfo(s"Number of concurrent writers ${concurrentWriters.size} reaches the threshold. " +
-          "Fall back from concurrent writers to sort-based sequential writer. You may change " +
-          s"threshold with configuration ${SQLConf.MAX_CONCURRENT_OUTPUT_FILE_WRITERS.key}")
+        logInfo(log"Number of concurrent writers " +
+          log"${MDC(NUM_CONCURRENT_WRITER, concurrentWriters.size)} reaches the threshold. " +
+          log"Fall back from concurrent writers to sort-based sequential writer. You may change " +
+          log"threshold with configuration " +
+          log"${MDC(CONFIG, SQLConf.MAX_CONCURRENT_OUTPUT_FILE_WRITERS.key)}")
         sorted = true
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 3bfa3413f6796..91749ddd794fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -26,7 +26,8 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
@@ -269,19 +270,20 @@ object FileFormatWriter extends Logging {
       val ret = f
       val commitMsgs = ret.map(_.commitMsg)
 
-      logInfo(s"Start to commit write Job ${description.uuid}.")
+      logInfo(log"Start to commit write Job ${MDC(LogKeys.UUID, description.uuid)}.")
       val (_, duration) = Utils
         .timeTakenMs { committer.commitJob(job, commitMsgs.toImmutableArraySeq) }
-      logInfo(s"Write Job ${description.uuid} committed. Elapsed time: $duration ms.")
+      logInfo(log"Write Job ${MDC(LogKeys.UUID, description.uuid)} committed. " +
+        log"Elapsed time: ${MDC(LogKeys.ELAPSED_TIME, duration)} ms.")
 
       processStats(
         description.statsTrackers, ret.map(_.summary.stats).toImmutableArraySeq, duration)
-      logInfo(s"Finished processing stats for write job ${description.uuid}.")
+      logInfo(log"Finished processing stats for write job ${MDC(LogKeys.UUID, description.uuid)}.")
 
       // return a set of all the partition paths that were updated during this job
       ret.map(_.summary.updatedPartitions).reduceOption(_ ++ _).getOrElse(Set.empty)
     } catch { case cause: Throwable =>
-      logError(s"Aborting job ${description.uuid}.", cause)
+      logError(log"Aborting job ${MDC(WRITE_JOB_UUID, description.uuid)}.", cause)
       committer.abortJob(job)
       throw cause
     }
@@ -404,7 +406,7 @@ object FileFormatWriter extends Logging {
     })(catchBlock = {
       // If there is an error, abort the task
       dataWriter.abort()
-      logError(s"Job $jobId aborted.")
+      logError(log"Job ${MDC(JOB_ID, jobId)} aborted.")
     }, finallyBlock = {
       dataWriter.close()
     })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndexOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndexOptions.scala
index 1c352e3748f21..5a300dae4daab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndexOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndexOptions.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 
 object FileIndexOptions extends DataSourceOptions {
   val IGNORE_MISSING_FILES = newOption(FileSourceOptions.IGNORE_MISSING_FILES)
+  val IGNORE_INVALID_PARTITION_PATHS = newOption("ignoreInvalidPartitionPaths")
   val TIME_ZONE = newOption(DateTimeUtils.TIMEZONE_OPTION)
   val RECURSIVE_FILE_LOOKUP = newOption("recursiveFileLookup")
   val BASE_PATH_PARAM = newOption("basePath")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
index 836f0b0698793..8a47a28de845c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FilePartition.scala
@@ -21,7 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 import scala.math.BigDecimal.RoundingMode
 
 import org.apache.spark.Partition
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CONFIG, DESIRED_NUM_PARTITIONS, MAX_NUM_PARTITIONS, NUM_PARTITIONS}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.InputPartition
 import org.apache.spark.sql.execution.ScanFileListing
@@ -98,9 +99,11 @@ object FilePartition extends Logging {
       val desiredSplitBytes =
         (totalSizeInBytes / BigDecimal(maxPartNum.get)).setScale(0, RoundingMode.UP).longValue
       val desiredPartitions = getFilePartitions(partitionedFiles, desiredSplitBytes, openCostBytes)
-      logWarning(s"The number of partitions is ${partitions.size}, which exceeds the maximum " +
-        s"number configured: ${maxPartNum.get}. Spark rescales it to ${desiredPartitions.size} " +
-        s"by ignoring the configuration of ${SQLConf.FILES_MAX_PARTITION_BYTES.key}.")
+      logWarning(log"The number of partitions is ${MDC(NUM_PARTITIONS, partitions.size)}, " +
+        log"which exceeds the maximum number configured: " +
+        log"${MDC(MAX_NUM_PARTITIONS, maxPartNum.get)}. Spark rescales it to " +
+        log"${MDC(DESIRED_NUM_PARTITIONS, desiredPartitions.size)} by ignoring the " +
+        log"configuration of ${MDC(CONFIG, SQLConf.FILES_MAX_PARTITION_BYTES.key)}.")
       desiredPartitions
     } else {
       partitions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 825b8154f6815..9bcdbadf7c5c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -24,6 +24,8 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{Partition => RDDPartition, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.LogKeys.{CURRENT_FILE, PATH}
+import org.apache.spark.internal.MDC
 import org.apache.spark.paths.SparkPath
 import org.apache.spark.rdd.{InputFileBlockHolder, RDD}
 import org.apache.spark.sql.SparkSession
@@ -231,7 +233,7 @@ class FileScanRDD(
         if (files.hasNext) {
           currentFile = files.next()
           updateMetadataRow()
-          logInfo(s"Reading File $currentFile")
+          logInfo(log"Reading File ${MDC(CURRENT_FILE, currentFile)}")
           // Sets InputFileBlockHolder for the file block's information
           InputFileBlockHolder
             .set(currentFile.urlEncodedPath, currentFile.start, currentFile.length)
@@ -259,14 +261,14 @@ class FileScanRDD(
                   }
                 } catch {
                   case e: FileNotFoundException if ignoreMissingFiles =>
-                    logWarning(s"Skipped missing file: $currentFile", e)
+                    logWarning(log"Skipped missing file: ${MDC(PATH, currentFile)}", e)
                     finished = true
                     null
                   // Throw FileNotFoundException even if `ignoreCorruptFiles` is true
                   case e: FileNotFoundException if !ignoreMissingFiles => throw e
                   case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
-                    logWarning(
-                      s"Skipped the rest of the content in the corrupted file: $currentFile", e)
+                    logWarning(log"Skipped the rest of the content in the corrupted file: " +
+                      log"${MDC(PATH, currentFile)}", e)
                     finished = true
                     null
                 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index f2dcbe26104f7..27019ab047ff2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -21,7 +21,8 @@ import java.util.Locale
 
 import scala.collection.mutable
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{NUM_PRUNED, POST_SCAN_FILTERS, PUSHED_FILTERS, TOTAL}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions
@@ -137,9 +138,8 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
 
     val numBucketsSelected = matchedBuckets.cardinality()
 
-    logInfo {
-      s"Pruned ${numBuckets - numBucketsSelected} out of $numBuckets buckets."
-    }
+    logInfo(log"Pruned ${MDC(NUM_PRUNED, numBuckets - numBucketsSelected)} " +
+      log"out of ${MDC(TOTAL, numBuckets)} buckets.")
 
     // None means all the buckets need to be scanned
     if (numBucketsSelected == numBuckets) {
@@ -160,11 +160,8 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
       //  - filters that need to be evaluated again after the scan
       val filterSet = ExpressionSet(filters)
 
-      val filtersToPush = filters.filter(f =>
-          DataSourceUtils.shouldPushFilter(f, fsRelation.fileFormat.supportsCollationPushDown))
-
       val normalizedFilters = DataSourceStrategy.normalizeExprs(
-        filtersToPush, l.output)
+        filters.filter(_.deterministic), l.output)
 
       val partitionColumns =
         l.resolve(
@@ -206,19 +203,18 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
         DataSourceUtils.supportNestedPredicatePushdown(fsRelation)
       val pushedFilters = dataFilters
         .flatMap(DataSourceStrategy.translateFilter(_, supportNestedPredicatePushdown))
-      logInfo(s"Pushed Filters: ${pushedFilters.mkString(",")}")
+      logInfo(log"Pushed Filters: ${MDC(PUSHED_FILTERS, pushedFilters.mkString(","))}")
 
       // Predicates with both partition keys and attributes need to be evaluated after the scan.
       val afterScanFilters = filterSet -- partitionKeyFilters.filter(_.references.nonEmpty)
-      logInfo(s"Post-Scan Filters: ${afterScanFilters.mkString(",")}")
+      logInfo(log"Post-Scan Filters: ${MDC(POST_SCAN_FILTERS, afterScanFilters.mkString(","))}")
 
       val filterAttributes = AttributeSet(afterScanFilters ++ stayUpFilters)
       val requiredExpressions: Seq[NamedExpression] = filterAttributes.toSeq ++ projects
       val requiredAttributes = AttributeSet(requiredExpressions)
 
-      val readDataColumns = dataColumns
+      val readDataColumns = dataColumnsWithoutPartitionCols
         .filter(requiredAttributes.contains)
-        .filterNot(partitionColumns.contains)
 
       // Metadata attributes are part of a column of type struct up to this point. Here we extract
       // this column from the schema and specify a matcher for that.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
index 80002ecdaf8da..2bb8476a9f0e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileStatusCache.scala
@@ -25,7 +25,8 @@ import scala.jdk.CollectionConverters._
 import com.google.common.cache._
 import org.apache.hadoop.fs.{FileStatus, Path}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CACHED_TABLE_PARTITION_METADATA_SIZE, MAX_TABLE_PARTITION_METADATA_SIZE}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.util.SizeEstimator
 
@@ -111,8 +112,8 @@ private class SharedInMemoryCache(maxSizeInBytes: Long, cacheTTL: Long) extends
       override def weigh(key: (ClientId, Path), value: Array[FileStatus]): Int = {
         val estimate = (SizeEstimator.estimate(key) + SizeEstimator.estimate(value)) / weightScale
         if (estimate > Int.MaxValue) {
-          logWarning(s"Cached table partition metadata size is too big. Approximating to " +
-            s"${Int.MaxValue.toLong * weightScale}.")
+          logWarning(log"Cached table partition metadata size is too big. Approximating to " +
+            log"${MDC(CACHED_TABLE_PARTITION_METADATA_SIZE, Int.MaxValue.toLong * weightScale)}.")
           Int.MaxValue
         } else {
           estimate.toInt
@@ -126,9 +127,10 @@ private class SharedInMemoryCache(maxSizeInBytes: Long, cacheTTL: Long) extends
         if (removed.getCause == RemovalCause.SIZE &&
           warnedAboutEviction.compareAndSet(false, true)) {
           logWarning(
-            "Evicting cached table partition metadata from memory due to size constraints " +
-              "(spark.sql.hive.filesourcePartitionFileCacheSize = "
-              + maxSizeInBytes + " bytes). This may impact query planning performance.")
+            log"Evicting cached table partition metadata from memory due to size constraints " +
+              log"(spark.sql.hive.filesourcePartitionFileCacheSize = " +
+              log"${MDC(MAX_TABLE_PARTITION_METADATA_SIZE, maxSizeInBytes)} bytes). " +
+              log"This may impact query planning performance.")
         }
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index 0f66aa816d96c..3b8a20c7cf741 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -24,7 +24,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{COUNT, ELAPSED_TIME}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.FileSourceOptions
@@ -136,8 +137,8 @@ class InMemoryFileIndex(
       fileStatusCache.putLeafFiles(path, leafFiles.toArray)
       output ++= leafFiles
     }
-    logInfo(s"It took ${(System.nanoTime() - startTime) / (1000 * 1000)} ms to list leaf files" +
-      s" for ${paths.length} paths.")
+    logInfo(log"It took ${MDC(ELAPSED_TIME, (System.nanoTime() - startTime) / (1000 * 1000))} ms" +
+      log" to list leaf files for ${MDC(COUNT, paths.length)} paths.")
     output
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index 3efe614bcef92..07be3f89872cc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -24,7 +24,8 @@ import scala.collection.mutable
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{COUNT, PERCENT, TOTAL}
 import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
@@ -69,6 +70,13 @@ abstract class PartitioningAwareFileIndex(
     caseInsensitiveMap.getOrElse(FileIndexOptions.RECURSIVE_FILE_LOOKUP, "false").toBoolean
   }
 
+  protected lazy val ignoreInvalidPartitionPaths: Boolean = {
+    caseInsensitiveMap
+      .get(FileIndexOptions.IGNORE_INVALID_PARTITION_PATHS)
+      .map(_.toBoolean)
+      .getOrElse(sparkSession.sessionState.conf.ignoreInvalidPartitionPaths)
+  }
+
   override def listFiles(
       partitionFilters: Seq[Expression], dataFilters: Seq[Expression]): Seq[PartitionDirectory] = {
     def isNonEmptyFile(f: FileStatus): Boolean = {
@@ -161,7 +169,8 @@ abstract class PartitioningAwareFileIndex(
         userSpecifiedSchema = userSpecifiedSchema,
         caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis,
         validatePartitionColumns = sparkSession.sessionState.conf.validatePartitionColumns,
-        timeZoneId = timeZoneId)
+        timeZoneId = timeZoneId,
+        ignoreInvalidPartitionPaths = ignoreInvalidPartitionPaths)
     }
   }
 
@@ -190,8 +199,8 @@ abstract class PartitioningAwareFileIndex(
         val total = partitions.length
         val selectedSize = selected.length
         val percentPruned = (1 - selectedSize.toDouble / total.toDouble) * 100
-        s"Selected $selectedSize partitions out of $total, " +
-          s"pruned ${if (total == 0) "0" else s"$percentPruned%"} partitions."
+        log"Selected ${MDC(COUNT, selectedSize)} partitions out of ${MDC(TOTAL, total)}, " +
+          log"pruned ${MDC(PERCENT, if (total == 0) "0" else percentPruned)} partitions."
       }
 
       selected
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 56cba0e0561d1..3b2d601b81fb5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -106,9 +106,10 @@ object PartitioningUtils extends SQLConfHelper {
       userSpecifiedSchema: Option[StructType],
       caseSensitive: Boolean,
       validatePartitionColumns: Boolean,
-      timeZoneId: String): PartitionSpec = {
+      timeZoneId: String,
+      ignoreInvalidPartitionPaths: Boolean): PartitionSpec = {
     parsePartitions(paths, typeInference, basePaths, userSpecifiedSchema, caseSensitive,
-      validatePartitionColumns, DateTimeUtils.getZoneId(timeZoneId))
+      validatePartitionColumns, DateTimeUtils.getZoneId(timeZoneId), ignoreInvalidPartitionPaths)
   }
 
   private[datasources] def parsePartitions(
@@ -118,7 +119,8 @@ object PartitioningUtils extends SQLConfHelper {
       userSpecifiedSchema: Option[StructType],
       caseSensitive: Boolean,
       validatePartitionColumns: Boolean,
-      zoneId: ZoneId): PartitionSpec = {
+      zoneId: ZoneId,
+      ignoreInvalidPartitionPaths: Boolean): PartitionSpec = {
     val userSpecifiedDataTypes = if (userSpecifiedSchema.isDefined) {
       val nameToDataType = userSpecifiedSchema.get.fields.map(f => f.name -> f.dataType).toMap
       if (!caseSensitive) {
@@ -171,7 +173,7 @@ object PartitioningUtils extends SQLConfHelper {
       // TODO: Selective case sensitivity.
       val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase())
       assert(
-        discoveredBasePaths.distinct.size == 1,
+        ignoreInvalidPartitionPaths || discoveredBasePaths.distinct.size == 1,
         "Conflicting directory structures detected. Suspicious paths:\b" +
           discoveredBasePaths.distinct.mkString("\n\t", "\n\t", "\n\n") +
           "If provided paths are partition directories, please set " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
index b0431d1df3987..1dffea4e1bc87 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
@@ -63,8 +63,7 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
             _))
         if filters.nonEmpty && fsRelation.partitionSchema.nonEmpty =>
       val normalizedFilters = DataSourceStrategy.normalizeExprs(
-        filters.filter(f => !SubqueryExpression.hasSubquery(f) &&
-          DataSourceUtils.shouldPushFilter(f, fsRelation.fileFormat.supportsCollationPushDown)),
+        filters.filter(f => f.deterministic && !SubqueryExpression.hasSubquery(f)),
         logicalRelation.output)
       val (partitionKeyFilters, _) = DataSourceUtils
         .getPartitionFiltersAndDataFilters(partitionSchema, normalizedFilters)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
index 144be2316f091..03e988eb0bd2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SQLHadoopMapReduceCommitProtocol.scala
@@ -21,7 +21,8 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{OutputCommitter, TaskAttemptContext}
 import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.CLASS_NAME
 import org.apache.spark.internal.io.HadoopMapReduceCommitProtocol
 import org.apache.spark.sql.internal.SQLConf
 
@@ -44,7 +45,8 @@ class SQLHadoopMapReduceCommitProtocol(
       configuration.getClass(SQLConf.OUTPUT_COMMITTER_CLASS.key, null, classOf[OutputCommitter])
 
     if (clazz != null) {
-      logInfo(s"Using user defined output committer class ${clazz.getCanonicalName}")
+      logInfo(log"Using user defined output committer class " +
+        log"${MDC(CLASS_NAME, clazz.getCanonicalName)}")
 
       // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat
       // has an associated output committer. To override this output committer,
@@ -64,7 +66,8 @@ class SQLHadoopMapReduceCommitProtocol(
         committer = ctor.newInstance()
       }
     }
-    logInfo(s"Using output committer class ${committer.getClass.getCanonicalName}")
+    logInfo(log"Using output committer class " +
+      log"${MDC(CLASS_NAME, committer.getClass.getCanonicalName)}")
     committer
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
index d7a8d7aec0b7b..1d6c2a6f81124 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
@@ -213,9 +213,9 @@ object V1WritesUtils {
     }
   }
 
-  def getWriteFilesOpt(child: SparkPlan): Option[WriteFilesExec] = {
+  def getWriteFilesOpt(child: SparkPlan): Option[WriteFilesExecBase] = {
     child.collectFirst {
-      case w: WriteFilesExec => w
+      case w: WriteFilesExecBase => w
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
index a4fd57e7dffad..c6c34b7fcea3f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
@@ -58,6 +58,14 @@ case class WriteFiles(
     copy(child = newChild)
 }
 
+trait WriteFilesExecBase extends UnaryExecNode {
+  override def output: Seq[Attribute] = Seq.empty
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    throw SparkException.internalError(s"$nodeName does not support doExecute")
+  }
+}
+
 /**
  * Responsible for writing files.
  */
@@ -67,9 +75,7 @@ case class WriteFilesExec(
     partitionColumns: Seq[Attribute],
     bucketSpec: Option[BucketSpec],
     options: Map[String, String],
-    staticPartitions: TablePartitionSpec) extends UnaryExecNode {
-  override def output: Seq[Attribute] = Seq.empty
-
+    staticPartitions: TablePartitionSpec) extends WriteFilesExecBase {
   override protected def doExecuteWrite(
       writeFilesSpec: WriteFilesSpec): RDD[WriterCommitMessage] = {
     val rdd = child.execute()
@@ -105,10 +111,6 @@ case class WriteFilesExec(
     }
   }
 
-  override protected def doExecute(): RDD[InternalRow] = {
-    throw SparkException.internalError(s"$nodeName does not support doExecute")
-  }
-
   override protected def stringArgs: Iterator[Any] = Iterator(child)
 
   override protected def withNewChildInternal(newChild: SparkPlan): WriteFilesExec =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index cf7c536bdaecb..a8730c20dbcb5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.execution.datasources.csv
 import java.io.{FileNotFoundException, IOException}
 import java.nio.charset.{Charset, StandardCharsets}
 
+import scala.util.control.NonFatal
+
 import com.univocity.parsers.csv.CsvParser
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
@@ -28,11 +30,14 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
 
 import org.apache.spark.TaskContext
 import org.apache.spark.input.{PortableDataStream, StreamInputFormat}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.PATH
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.rdd.{BinaryFileRDD, RDD}
 import org.apache.spark.sql.{Dataset, Encoders, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVInferSchema, CSVOptions, UnivocityParser}
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
@@ -202,13 +207,16 @@ object MultiLineCSVDataSource extends CSVDataSource with Logging {
           encoding = parsedOptions.charset)
       } catch {
         case e: FileNotFoundException if ignoreMissingFiles =>
-          logWarning(s"Skipped missing file: ${lines.getPath()}", e)
+          logWarning(log"Skipped missing file: ${MDC(PATH, lines.getPath())}", e)
           Array.empty[Array[String]]
         case e: FileNotFoundException if !ignoreMissingFiles => throw e
         case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
-          logWarning(
-            s"Skipped the rest of the content in the corrupted file: ${lines.getPath()}", e)
+          logWarning(log"Skipped the rest of the content in the corrupted file: " +
+            log"${MDC(PATH, lines.getPath())}", e)
           Array.empty[Array[String]]
+        case NonFatal(e) =>
+          val path = SparkPath.fromPathString(lines.getPath())
+          throw QueryExecutionErrors.cannotReadFilesError(e, path.urlEncoded)
       }
     }.take(1).headOption match {
       case Some(firstRow) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 43db0c6eef114..481cc80fe5225 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -25,7 +25,7 @@ import org.apache.commons.io.FilenameUtils
 import org.apache.spark.SparkFiles
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.TimestampNTZType
 import org.apache.spark.util.Utils
@@ -52,7 +52,14 @@ class JDBCOptions(
    */
   val asProperties: Properties = {
     val properties = new Properties()
-    parameters.originalMap.foreach { case (k, v) => properties.setProperty(k, v) }
+    parameters.originalMap.foreach { case (k, v) =>
+      // If an option value is `null`, throw a user-friendly error. Keys here cannot be null, as
+      // scala's implementation of Maps prohibits null keys.
+      if (v == null) {
+        throw QueryCompilationErrors.nullDataSourceOption(k)
+      }
+      properties.setProperty(k, v)
+    }
     properties
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 8c430e231e399..1b71dc9221f78 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -23,7 +23,8 @@ import scala.util.Using
 import scala.util.control.NonFatal
 
 import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.SQL_TEXT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.connector.expressions.filter.Predicate
@@ -266,7 +267,7 @@ class JDBCRDD(
     options.sessionInitStatement match {
       case Some(sql) =>
         val statement = conn.prepareStatement(sql)
-        logInfo(s"Executing sessionInitStatement: $sql")
+        logInfo(log"Executing sessionInitStatement: ${MDC(SQL_TEXT, sql)}")
         try {
           statement.setQueryTimeout(options.queryTimeout)
           statement.execute()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 4f19d3df40b3c..2c4158dfe1533 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -21,7 +21,8 @@ import scala.collection.mutable.ArrayBuffer
 import scala.math.BigDecimal.RoundingMode
 
 import org.apache.spark.Partition
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLAUSES, LOWER_BOUND, NEW_VALUE, NUM_PARTITIONS, OLD_VALUE, UPPER_BOUND}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis._
@@ -114,12 +115,12 @@ private[sql] object JDBCRelation extends Logging {
           (upperBound - lowerBound) < 0) {
         partitioning.numPartitions
       } else {
-        logWarning("The number of partitions is reduced because the specified number of " +
-          "partitions is less than the difference between upper bound and lower bound. " +
-          s"Updated number of partitions: ${upperBound - lowerBound}; Input number of " +
-          s"partitions: ${partitioning.numPartitions}; " +
-          s"Lower bound: ${boundValueToString(lowerBound)}; " +
-          s"Upper bound: ${boundValueToString(upperBound)}.")
+        logWarning(log"The number of partitions is reduced because the specified number of " +
+          log"partitions is less than the difference between upper bound and lower bound. " +
+          log"Updated number of partitions: ${MDC(NEW_VALUE, upperBound - lowerBound)}; " +
+          log"Input number of partitions: ${MDC(OLD_VALUE, partitioning.numPartitions)}; " +
+          log"Lower bound: ${MDC(LOWER_BOUND, boundValueToString(lowerBound))}; " +
+          log"Upper bound: ${MDC(UPPER_BOUND, boundValueToString(upperBound))}.")
         upperBound - lowerBound
       }
 
@@ -163,8 +164,9 @@ private[sql] object JDBCRelation extends Logging {
       i = i + 1
     }
     val partitions = ans.toArray
-    logInfo(s"Number of partitions: $numPartitions, WHERE clauses of these partitions: " +
-      partitions.map(_.asInstanceOf[JDBCPartition].whereClause).mkString(", "))
+    val clauses = partitions.map(_.asInstanceOf[JDBCPartition].whereClause).mkString(", ")
+    logInfo(log"Number of partitions: ${MDC(NUM_PARTITIONS, numPartitions)}, " +
+      log"WHERE clauses of these partitions: ${MDC(CLAUSES, clauses)}")
     partitions
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 53b0b8b5d29de..f7d2d61eab653 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -19,10 +19,9 @@ package org.apache.spark.sql.execution.datasources.jdbc
 
 import java.math.{BigDecimal => JBigDecimal}
 import java.nio.charset.StandardCharsets
-import java.sql.{Connection, Date, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, Timestamp}
+import java.sql.{Connection, Date, JDBCType, PreparedStatement, ResultSet, ResultSetMetaData, SQLException, Time, Timestamp}
 import java.time.{Instant, LocalDate}
 import java.util
-import java.util.concurrent.TimeUnit
 
 import scala.annotation.tailrec
 import scala.collection.mutable.ArrayBuffer
@@ -32,7 +31,8 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException, TaskContext}
 import org.apache.spark.executor.InputMetrics
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{DEFAULT_ISOLATION_LEVEL, ISOLATION_LEVEL}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.{DecimalPrecision, Resolver}
@@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_MILLIS
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.connector.catalog.{Identifier, TableChange}
 import org.apache.spark.sql.connector.catalog.index.{SupportsIndex, TableIndex}
@@ -486,15 +487,8 @@ object JdbcUtils extends Logging with SQLConfHelper {
     // It stores the number of milliseconds after midnight, 00:00:00.000000
     case TimestampType if metadata.contains("logical_time_type") =>
       (rs: ResultSet, row: InternalRow, pos: Int) => {
-        val rawTime = rs.getTime(pos + 1)
-        if (rawTime != null) {
-          val localTimeMicro = TimeUnit.NANOSECONDS.toMicros(
-            rawTime.toLocalTime().toNanoOfDay())
-          val utcTimeMicro = toUTCTime(localTimeMicro, conf.sessionLocalTimeZone)
-          row.setLong(pos, utcTimeMicro)
-        } else {
-          row.update(pos, null)
-        }
+        row.update(pos, nullSafeConvert[Time](
+          rs.getTime(pos + 1), t => Math.multiplyExact(t.getTime, MICROS_PER_MILLIS)))
       }
 
     case TimestampType =>
@@ -506,6 +500,14 @@ object JdbcUtils extends Logging with SQLConfHelper {
           row.update(pos, null)
         }
 
+    case TimestampNTZType if metadata.contains("logical_time_type") =>
+      (rs: ResultSet, row: InternalRow, pos: Int) =>
+        val micros = nullSafeConvert[Time](rs.getTime(pos + 1), t => {
+          val time = dialect.convertJavaTimestampToTimestampNTZ(new Timestamp(t.getTime))
+          localDateTimeToMicros(time)
+        })
+        row.update(pos, micros)
+
     case TimestampNTZType =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>
         val t = rs.getTimestamp(pos + 1)
@@ -771,11 +773,13 @@ object JdbcUtils extends Logging with SQLConfHelper {
             // Finally update to actually requested level if possible
             finalIsolationLevel = isolationLevel
           } else {
-            logWarning(s"Requested isolation level $isolationLevel is not supported; " +
-                s"falling back to default isolation level $defaultIsolation")
+            logWarning(log"Requested isolation level ${MDC(ISOLATION_LEVEL, isolationLevel)} " +
+              log"is not supported; falling back to default isolation level " +
+              log"${MDC(DEFAULT_ISOLATION_LEVEL, defaultIsolation)}")
           }
         } else {
-          logWarning(s"Requested isolation level $isolationLevel, but transactions are unsupported")
+          logWarning(log"Requested isolation level ${MDC(ISOLATION_LEVEL, isolationLevel)}, " +
+            log"but transactions are unsupported")
         }
       } catch {
         case NonFatal(e) => logWarning("Exception while detecting transaction support", e)
@@ -875,16 +879,15 @@ object JdbcUtils extends Logging with SQLConfHelper {
    * Compute the schema string for this RDD.
    */
   def schemaString(
+      dialect: JdbcDialect,
       schema: StructType,
       caseSensitive: Boolean,
-      url: String,
       createTableColumnTypes: Option[String] = None): String = {
     val sb = new StringBuilder()
-    val dialect = JdbcDialects.get(url)
     val userSpecifiedColTypesMap = createTableColumnTypes
-      .map(parseUserSpecifiedCreateTableColumnTypes(schema, caseSensitive, _))
+      .map(parseUserSpecifiedCreateTableColumnTypes(dialect, schema, caseSensitive, _))
       .getOrElse(Map.empty[String, String])
-    schema.fields.foreach { field =>
+    schema.foreach { field =>
       val name = dialect.quoteIdentifier(field.name)
       val typ = userSpecifiedColTypesMap
         .getOrElse(field.name, getJdbcType(field.dataType, dialect).databaseTypeDefinition)
@@ -900,6 +903,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
    * use in-place of the default data type.
    */
   private def parseUserSpecifiedCreateTableColumnTypes(
+      dialect: JdbcDialect,
       schema: StructType,
       caseSensitive: Boolean,
       createTableColumnTypes: String): Map[String, String] = {
@@ -916,7 +920,9 @@ object JdbcUtils extends Logging with SQLConfHelper {
       }
     }
 
-    val userSchemaMap = userSchema.fields.map(f => f.name -> f.dataType.catalogString).toMap
+    val userSchemaMap = userSchema
+      .map(f => f.name -> getJdbcType(f.dataType, dialect).databaseTypeDefinition)
+      .toMap
     if (caseSensitive) userSchemaMap else CaseInsensitiveMap(userSchemaMap)
   }
 
@@ -985,7 +991,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
     val statement = conn.createStatement
     val dialect = JdbcDialects.get(options.url)
     val strSchema = schemaString(
-      schema, caseSensitive, options.url, options.createTableColumnTypes)
+      dialect, schema, caseSensitive, options.createTableColumnTypes)
     try {
       statement.setQueryTimeout(options.queryTimeout)
       dialect.createTable(statement, tableName, strSchema, options)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
index 7fb6e98fb0468..6174c017f6047 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonFileFormat.scala
@@ -55,8 +55,10 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
       options,
       sparkSession.sessionState.conf.sessionLocalTimeZone,
       sparkSession.sessionState.conf.columnNameOfCorruptRecord)
-    JsonDataSource(parsedOptions).inferSchema(
-      sparkSession, files, parsedOptions)
+    parsedOptions.singleVariantColumn match {
+      case Some(columnName) => Some(StructType(Array(StructField(columnName, VariantType))))
+      case None => JsonDataSource(parsedOptions).inferSchema(sparkSession, files, parsedOptions)
+    }
   }
 
   override def prepareWrite(
@@ -134,7 +136,7 @@ class JsonFileFormat extends TextBasedFileFormat with DataSourceRegister {
   override def equals(other: Any): Boolean = other.isInstanceOf[JsonFileFormat]
 
   override def supportDataType(dataType: DataType): Boolean = dataType match {
-    case _: VariantType => false
+    case _: VariantType => true
 
     case _: AtomicType => true
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
index 55602ce2ed9b4..5727c502a7097 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonOutputWriter.scala
@@ -21,7 +21,8 @@ import java.nio.charset.{Charset, StandardCharsets}
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.TaskAttemptContext
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{ENCODING, PATH}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JSONOptions, JSONOptionsInRead}
 import org.apache.spark.sql.execution.datasources.{CodecStreams, OutputWriter}
@@ -40,8 +41,9 @@ class JsonOutputWriter(
   }
 
   if (JSONOptionsInRead.denyList.contains(encoding)) {
-    logWarning(s"The JSON file ($path) was written in the encoding ${encoding.displayName()}" +
-      " which can be read back by Spark only if multiLine is enabled.")
+    logWarning(log"The JSON file (${MDC(PATH, path)}) was written in the encoding " +
+      log"${MDC(ENCODING, encoding.displayName())} which can be read back by Spark only " +
+      log"if multiLine is enabled.")
   }
 
   private val writer = CodecStreams.createOutputStreamWriter(context, new Path(path), encoding)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index 24943b37d0590..50c28c783b4cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -31,7 +31,8 @@ import org.apache.orc.{BooleanColumnStatistics, ColumnStatistics, DateColumnStat
 
 import org.apache.spark.{SPARK_VERSION_SHORT, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.sql.{SPARK_VERSION_METADATA_KEY, SparkSession}
 import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
@@ -87,7 +88,7 @@ object OrcUtils extends Logging {
     } catch {
       case e: org.apache.orc.FileFormatException =>
         if (ignoreCorruptFiles) {
-          logWarning(s"Skipped the footer in the corrupted file: $file", e)
+          logWarning(log"Skipped the footer in the corrupted file: ${MDC(PATH, file)}", e)
           None
         } else {
           throw QueryExecutionErrors.cannotReadFooterForFileError(file, e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index df367766501d4..e5fbf8be1f0c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -32,7 +32,8 @@ import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GRO
 import org.apache.parquet.hadoop._
 
 import org.apache.spark.TaskContext
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{PATH, SCHEMA}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -409,8 +410,8 @@ object ParquetFileFormat extends Logging {
           }
           .recover { case cause: Throwable =>
             logWarning(
-              s"""Failed to parse serialized Spark schema in Parquet key-value metadata:
-                 |\t$serializedSchema
+              log"""Failed to parse serialized Spark schema in Parquet key-value metadata:
+                 |\t${MDC(SCHEMA, serializedSchema)}
                """.stripMargin,
               cause)
           }
@@ -450,7 +451,7 @@ object ParquetFileFormat extends Logging {
             conf, currentFile, SKIP_ROW_GROUPS)))
       } catch { case e: RuntimeException =>
         if (ignoreCorruptFiles) {
-          logWarning(s"Skipped the footer in the corrupted file: $currentFile", e)
+          logWarning(log"Skipped the footer in the corrupted file: ${MDC(PATH, currentFile)}", e)
           None
         } else {
           throw QueryExecutionErrors.cannotReadFooterForFileError(currentFile.getPath, e)
@@ -526,8 +527,8 @@ object ParquetFileFormat extends Logging {
     }.recoverWith {
       case cause: Throwable =>
         logWarning(
-          "Failed to parse and ignored serialized Spark schema in " +
-            s"Parquet key-value metadata:\n\t$schemaString", cause)
+          log"Failed to parse and ignored serialized Spark schema in " +
+            log"Parquet key-value metadata:\n\t${MDC(SCHEMA, schemaString)}", cause)
         Failure(cause)
     }.toOption
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
index 5020bf7333dea..3e111252bc6fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
@@ -33,7 +33,8 @@ import org.apache.parquet.schema.{PrimitiveType, Types}
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
 
 import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, CONFIG}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
@@ -433,10 +434,11 @@ object ParquetUtils extends Logging {
         classOf[OutputCommitter])
 
     if (conf.get(SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key) == null) {
-      logInfo("Using default output committer for Parquet: " +
-        classOf[ParquetOutputCommitter].getCanonicalName)
+      logInfo(log"Using default output committer for Parquet: " +
+        log"${MDC(CLASS_NAME, classOf[ParquetOutputCommitter].getCanonicalName)}")
     } else {
-      logInfo("Using user defined output committer for Parquet: " + committerClass.getCanonicalName)
+      logInfo(log"Using user defined output committer for Parquet: " +
+        log"${MDC(CLASS_NAME, committerClass.getCanonicalName)}")
     }
 
     conf.setClass(
@@ -485,9 +487,9 @@ object ParquetUtils extends Logging {
     if (ParquetOutputFormat.getJobSummaryLevel(conf) != JobSummaryLevel.NONE
       && !classOf[ParquetOutputCommitter].isAssignableFrom(committerClass)) {
       // output summary is requested, but the class is not a Parquet Committer
-      logWarning(s"Committer $committerClass is not a ParquetOutputCommitter and cannot" +
-        s" create job summaries. " +
-        s"Set Parquet option ${ParquetOutputFormat.JOB_SUMMARY_LEVEL} to NONE.")
+      logWarning(log"Committer ${MDC(CLASS_NAME, committerClass)} is not a " +
+        log"ParquetOutputCommitter and cannot create job summaries. Set Parquet option " +
+        log"${MDC(CONFIG, ParquetOutputFormat.JOB_SUMMARY_LEVEL)} to NONE.")
     }
 
     new OutputWriterFactory {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index fbb2ecb70d395..2f39a1962d2c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -30,11 +30,12 @@ import org.apache.spark.sql.catalyst.util.TypeUtils._
 import org.apache.spark.sql.connector.expressions.{FieldReference, RewritableTransform}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.DDLUtils
+import org.apache.spark.sql.execution.command.ViewHelper.generateViewProperties
 import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.InsertableRelation
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
 import org.apache.spark.sql.util.SchemaUtils
 import org.apache.spark.util.ArrayImplicits._
@@ -620,3 +621,65 @@ object CollationCheck extends (LogicalPlan => Unit) {
   private def isCollationExpression(expression: Expression): Boolean =
     expression.isInstanceOf[Collation] || expression.isInstanceOf[Collate]
 }
+
+
+/**
+ * This rule checks for references to views WITH SCHEMA [TYPE] EVOLUTION and synchronizes the
+ * catalog if evolution was detected.
+ * It does so by walking the resolved plan looking for View operators for persisted views.
+ */
+object ViewSyncSchemaToMetaStore extends (LogicalPlan => Unit) {
+  def apply(plan: LogicalPlan): Unit = {
+    plan.foreach {
+      case View(metaData, false, viewQuery)
+        if (metaData.viewSchemaMode == SchemaTypeEvolution ||
+          metaData.viewSchemaMode == SchemaEvolution) =>
+        val viewSchemaMode = metaData.viewSchemaMode
+        val viewFields = metaData.schema.fields
+        val viewQueryFields = viewQuery.schema.fields
+        val session = SparkSession.getActiveSession.get
+        val redoSignature =
+          viewSchemaMode == SchemaEvolution && viewFields.length != viewQueryFields.length
+        val fieldNames = viewQuery.schema.fieldNames
+
+        val redo = redoSignature || viewFields.zipWithIndex.exists { case (field, index) =>
+          val planField = viewQueryFields(index)
+          (field.dataType != planField.dataType ||
+            field.nullable != planField.nullable ||
+            (viewSchemaMode == SchemaEvolution && (
+              field.getComment() != planField.getComment() ||
+              field.name != planField.name)))
+        }
+
+        if (redo) {
+          val newProperties = if (viewSchemaMode == SchemaEvolution) {
+            generateViewProperties(
+              metaData.properties,
+              session,
+              fieldNames,
+              fieldNames,
+              metaData.viewSchemaMode)
+          } else {
+            metaData.properties
+          }
+          val newSchema = if (viewSchemaMode == SchemaTypeEvolution) {
+            val newFields = viewQuery.schema.map {
+              case StructField(name, dataType, nullable, _) =>
+                StructField(name, dataType, nullable,
+                  viewFields.find(_.name == name).get.metadata)
+            }
+            StructType(newFields)
+          } else {
+            viewQuery.schema
+          }
+          SchemaUtils.checkColumnNameDuplication(fieldNames.toImmutableArraySeq,
+            session.sessionState.conf.resolver)
+          val updatedViewMeta = metaData.copy(
+            properties = newProperties,
+            schema = newSchema)
+          session.sessionState.catalog.alterTable(updatedViewMeta)
+        }
+      case _ => // OK
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
index caa4e3ed386b3..5de51e55816e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextFileFormat.scala
@@ -49,6 +49,12 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
     }
   }
 
+  private def verifyReadSchema(schema: StructType): Unit = {
+    if (schema.size > 1) {
+      throw QueryCompilationErrors.textDataSourceWithMultiColumnsError(schema)
+    }
+  }
+
   override def isSplitable(
       sparkSession: SparkSession,
       options: Map[String, String],
@@ -98,9 +104,7 @@ class TextFileFormat extends TextBasedFileFormat with DataSourceRegister {
       filters: Seq[Filter],
       options: Map[String, String],
       hadoopConf: Configuration): PartitionedFile => Iterator[InternalRow] = {
-    assert(
-      requiredSchema.length <= 1,
-      "Text data source only produces a single data column named \"value\".")
+    verifyReadSchema(requiredSchema)
     val textOptions = new TextOptions(options)
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
index 28241fb0a67ae..56c44a1256815 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CacheTableExec.scala
@@ -19,9 +19,11 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.Locale
 
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.internal.LogKeys.OPTIONS
+import org.apache.spark.internal.MDC
+import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.LocalTempView
+import org.apache.spark.sql.catalyst.analysis.{LocalTempView, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
@@ -32,7 +34,6 @@ import org.apache.spark.storage.StorageLevel
 trait BaseCacheTableExec extends LeafV2CommandExec {
   def relationName: String
   def planToCache: LogicalPlan
-  def dataFrameForCachedPlan: DataFrame
   def isLazy: Boolean
   def options: Map[String, String]
 
@@ -44,18 +45,15 @@ trait BaseCacheTableExec extends LeafV2CommandExec {
     val withoutStorageLevel = options
       .filter { case (k, _) => k.toLowerCase(Locale.ROOT) != storageLevelKey }
     if (withoutStorageLevel.nonEmpty) {
-      logWarning(s"Invalid options: ${withoutStorageLevel.mkString(", ")}")
+      logWarning(log"Invalid options: ${MDC(OPTIONS, withoutStorageLevel.mkString(", "))}")
     }
 
-    session.sharedState.cacheManager.cacheQuery(
-      session,
-      planToCache,
-      Some(relationName),
-      storageLevel)
+    val df = Dataset.ofRows(session, planToCache)
+    session.sharedState.cacheManager.cacheQuery(df, Some(relationName), storageLevel)
 
     if (!isLazy) {
       // Performs eager caching.
-      dataFrameForCachedPlan.count()
+      df.count()
     }
 
     Seq.empty
@@ -72,10 +70,6 @@ case class CacheTableExec(
   override lazy val relationName: String = multipartIdentifier.quoted
 
   override lazy val planToCache: LogicalPlan = relation
-
-  override lazy val dataFrameForCachedPlan: DataFrame = {
-    Dataset.ofRows(session, planToCache)
-  }
 }
 
 case class CacheTableAsSelectExec(
@@ -87,7 +81,10 @@ case class CacheTableAsSelectExec(
     referredTempFunctions: Seq[String]) extends BaseCacheTableExec {
   override lazy val relationName: String = tempViewName
 
-  override lazy val planToCache: LogicalPlan = {
+  override def planToCache: LogicalPlan = UnresolvedRelation(Seq(tempViewName))
+
+  override def run(): Seq[InternalRow] = {
+    // CACHE TABLE AS TABLE creates a temp view and caches the temp view.
     CreateViewCommand(
       name = TableIdentifier(tempViewName),
       userSpecifiedColumns = Nil,
@@ -101,12 +98,7 @@ case class CacheTableAsSelectExec(
       isAnalyzed = true,
       referredTempFunctions = referredTempFunctions
     ).run(session)
-
-    dataFrameForCachedPlan.logicalPlan
-  }
-
-  override lazy val dataFrameForCachedPlan: DataFrame = {
-    session.table(tempViewName)
+    super.run()
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateIndexExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateIndexExec.scala
index 63c8dc6517b9e..60d44101da3b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateIndexExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateIndexExec.scala
@@ -21,6 +21,8 @@ import java.util
 
 import scala.jdk.CollectionConverters._
 
+import org.apache.spark.internal.LogKeys.{INDEX_NAME, TABLE_NAME}
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.IndexAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -55,7 +57,8 @@ case class CreateIndexExec(
         indexName, columns.map(_._1).toArray, colProperties, propertiesWithIndexType.asJava)
     } catch {
       case _: IndexAlreadyExistsException if ignoreIfExists =>
-        logWarning(s"Index $indexName already exists in table ${table.name}. Ignoring.")
+        logWarning(log"Index ${MDC(INDEX_NAME, indexName)} already exists in " +
+          log"table ${MDC(TABLE_NAME, table.name)}. Ignoring.")
     }
     Seq.empty
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
index cb51b7f75f33a..76ba53ef99a00 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateNamespaceExec.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.jdk.CollectionConverters.MapHasAsJava
 
+import org.apache.spark.internal.LogKeys.NAMESPACE
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -47,7 +49,8 @@ case class CreateNamespaceExec(
         catalog.createNamespace(ns, (properties ++ ownership).asJava)
       } catch {
         case _: NamespaceAlreadyExistsException if ifNotExists =>
-          logWarning(s"Namespace ${namespace.quoted} was created concurrently. Ignoring.")
+          logWarning(log"Namespace ${MDC(NAMESPACE, namespace.quoted)} was created concurrently. " +
+            log"Ignoring.")
       }
     } else if (!ifNotExists) {
       throw QueryCompilationErrors.namespaceAlreadyExistsError(ns)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
index 5f3ed7a5bc76c..f55fbafe11ddb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.jdk.CollectionConverters._
 
+import org.apache.spark.internal.LogKeys.TABLE_NAME
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -44,7 +46,8 @@ case class CreateTableExec(
         catalog.createTable(identifier, columns, partitioning.toArray, tableProperties.asJava)
       } catch {
         case _: TableAlreadyExistsException if ignoreIfExists =>
-          logWarning(s"Table ${identifier.quoted} was created concurrently. Ignoring.")
+          logWarning(
+            log"Table ${MDC(TABLE_NAME, identifier.quoted)} was created concurrently. Ignoring.")
       }
     } else if (!ignoreIfExists) {
       throw QueryCompilationErrors.tableAlreadyExistsError(identifier)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 828d737f93fa9..7a668b75c3c73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -22,7 +22,8 @@ import scala.collection.mutable
 import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.EXPR
 import org.apache.spark.sql.{SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
@@ -82,7 +83,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
   // given table, the cache's storage level is returned.
   private def invalidateTableCache(r: ResolvedTable)(): Option[StorageLevel] = {
     val v2Relation = DataSourceV2Relation.create(r.table, Some(r.catalog), Some(r.identifier))
-    val cache = session.sharedState.cacheManager.lookupCachedData(v2Relation)
+    val cache = session.sharedState.cacheManager.lookupCachedData(session, v2Relation)
     session.sharedState.cacheManager.uncacheQuery(session, v2Relation, cascade = true)
     if (cache.isDefined) {
       val cacheLevel = cache.get.cachedRepresentation.cacheBuilder.storageLevel
@@ -650,7 +651,7 @@ private[sql] object DataSourceV2Strategy extends Logging {
       Some(new Predicate("IN", FieldReference(name) +: literals))
 
     case other =>
-      logWarning(s"Can't translate $other to source filter, unsupported expression")
+      logWarning(log"Can't translate ${MDC(EXPR, other)} to source filter, unsupported expression")
       None
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIndexExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIndexExec.scala
index 085f961193771..4fe6c3cd4a0e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIndexExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropIndexExec.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.internal.LogKeys.INDEX_NAME
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchIndexException
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -34,7 +36,7 @@ case class DropIndexExec(
       table.dropIndex(indexName)
     } catch {
       case _: NoSuchIndexException if ignoreIfNotExists =>
-        logWarning(s"Index $indexName does not exist. Ignoring.")
+        logWarning(log"Index ${MDC(INDEX_NAME, indexName)} does not exist. Ignoring.")
     }
     Seq.empty
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
index 2f443a0bb1fad..b9f058b55ed02 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileBatchWrite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.hadoop.mapreduce.Job
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, PhysicalWriteInfo, WriterCommitMessage}
 import org.apache.spark.sql.execution.datasources.{WriteJobDescription, WriteTaskResult}
@@ -33,14 +33,15 @@ class FileBatchWrite(
   extends BatchWrite with Logging {
   override def commit(messages: Array[WriterCommitMessage]): Unit = {
     val results = messages.map(_.asInstanceOf[WriteTaskResult])
-    logInfo(s"Start to commit write Job ${description.uuid}.")
+    logInfo(log"Start to commit write Job ${MDC(LogKeys.UUID, description.uuid)}.")
     val (_, duration) = Utils
       .timeTakenMs { committer.commitJob(job, results.map(_.commitMsg).toImmutableArraySeq) }
-    logInfo(s"Write Job ${description.uuid} committed. Elapsed time: $duration ms.")
+    logInfo(log"Write Job ${MDC(LogKeys.UUID, description.uuid)} committed. " +
+      log"Elapsed time: ${MDC(LogKeys.ELAPSED_TIME, duration)} ms.")
 
     processStats(
       description.statsTrackers, results.map(_.summary.stats).toImmutableArraySeq, duration)
-    logInfo(s"Finished processing stats for write job ${description.uuid}.")
+    logInfo(log"Finished processing stats for write job ${MDC(LogKeys.UUID, description.uuid)}.")
   }
 
   override def useCommitCoordinator(): Boolean = false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index c7783c4e9b29b..2679f14144569 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -18,7 +18,8 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.io.{FileNotFoundException, IOException}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CURRENT_FILE, PARTITIONED_FILE_READER}
 import org.apache.spark.rdd.InputFileBlockHolder
 import org.apache.spark.sql.catalyst.FileSourceOptions
 import org.apache.spark.sql.connector.read.PartitionReader
@@ -38,7 +39,7 @@ class FilePartitionReader[T](
     if (currentReader == null) {
       if (files.hasNext) {
         val file = files.next()
-        logInfo(s"Reading file $file")
+        logInfo(log"Reading file ${MDC(CURRENT_FILE, file)}")
         // Sets InputFileBlockHolder for the file block's information
         InputFileBlockHolder.set(file.urlEncodedPath, file.start, file.length)
         try {
@@ -64,8 +65,8 @@ class FilePartitionReader[T](
       currentReader != null && currentReader.next()
     } catch {
       case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
-        logWarning(
-          s"Skipped the rest of the content in the corrupted file: $currentReader", e)
+        logWarning(log"Skipped the rest of the content in the corrupted file: " +
+          log"${MDC(PARTITIONED_FILE_READER, currentReader)}", e)
         false
       case e: Throwable =>
         throw FileDataSourceV2.attachFilePath(currentReader.file.urlEncodedPath, e)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 61d61ee7af250..d890107277d6c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -21,7 +21,8 @@ import java.util.{Locale, OptionalLong}
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{PATH, REASON}
 import org.apache.spark.internal.config.IO_WARNING_LARGEFILETHRESHOLD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{AttributeSet, Expression, ExpressionSet}
@@ -164,8 +165,8 @@ trait FileScan extends Scan
       val path = splitFiles(0).toPath
       if (!isSplitable(path) && splitFiles(0).length >
         sparkSession.sparkContext.getConf.get(IO_WARNING_LARGEFILETHRESHOLD)) {
-        logWarning(s"Loading one large unsplittable file ${path.toString} with only one " +
-          s"partition, the reason is: ${getFileUnSplittableReason(path)}")
+        logWarning(log"Loading one large unsplittable file ${MDC(PATH, path.toString)} with only " +
+          log"one partition, the reason is: ${MDC(REASON, getFileUnSplittableReason(path))}")
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index 7cd2779f86f95..447a36fe622c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -70,10 +70,9 @@ abstract class FileScanBuilder(
   }
 
   override def pushFilters(filters: Seq[Expression]): Seq[Expression] = {
-    val (filtersToPush, filtersToRemain) = filters.partition(
-      f => DataSourceUtils.shouldPushFilter(f, supportsCollationPushDown))
+    val (deterministicFilters, nonDeterminsticFilters) = filters.partition(_.deterministic)
     val (partitionFilters, dataFilters) =
-      DataSourceUtils.getPartitionFiltersAndDataFilters(partitionSchema, filtersToPush)
+      DataSourceUtils.getPartitionFiltersAndDataFilters(partitionSchema, deterministicFilters)
     this.partitionFilters = partitionFilters
     this.dataFilters = dataFilters
     val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]
@@ -84,7 +83,7 @@ abstract class FileScanBuilder(
       }
     }
     pushedDataFilters = pushDataFilters(translatedFilters.toArray)
-    dataFilters ++ filtersToRemain
+    dataFilters ++ nonDeterminsticFilters
   }
 
   override def pushedFilters: Array[Predicate] = pushedDataFilters.map(_.toV2)
@@ -96,12 +95,6 @@ abstract class FileScanBuilder(
    */
   protected def pushDataFilters(dataFilters: Array[Filter]): Array[Filter] = Array.empty[Filter]
 
-  /**
-   * Returns whether the file scan builder supports filter pushdown
-   * for non utf8 binary collated columns.
-   */
-  protected def supportsCollationPushDown: Boolean = false
-
   private def createRequiredNameSet(): Set[String] =
     requiredSchema.fields.map(PartitioningUtils.getColName(_, isCaseSensitive)).toSet
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
index 4b1a099d3bac9..f18424b4bcb86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -38,7 +38,7 @@ case class FileWriterFactory (
   @transient private lazy val jobId = SparkHadoopWriterUtils.createJobID(jobTrackerID, 0)
 
   override def createWriter(partitionId: Int, realTaskId: Long): DataWriter[InternalRow] = {
-    val taskAttemptContext = createTaskAttemptContext(partitionId)
+    val taskAttemptContext = createTaskAttemptContext(partitionId, realTaskId.toInt & Int.MaxValue)
     committer.setupTask(taskAttemptContext)
     if (description.partitionColumns.isEmpty) {
       new SingleDirectoryDataWriter(description, taskAttemptContext, committer)
@@ -47,9 +47,11 @@ case class FileWriterFactory (
     }
   }
 
-  private def createTaskAttemptContext(partitionId: Int): TaskAttemptContextImpl = {
+  private def createTaskAttemptContext(
+      partitionId: Int,
+      realTaskId: Int): TaskAttemptContextImpl = {
     val taskId = new TaskID(jobId, TaskType.MAP, partitionId)
-    val taskAttemptId = new TaskAttemptID(taskId, 0)
+    val taskAttemptId = new TaskAttemptID(taskId, realTaskId)
     // Set up the configuration object
     val hadoopConf = description.serializableHadoopConf.value
     hadoopConf.set("mapreduce.job.id", jobId.toString)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala
index 87f70eb696b66..8b8cdc06d398b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression, ExpressionSet, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.planning.{GroupBasedRowLevelOperation, PhysicalOperation}
@@ -62,14 +63,16 @@ object GroupBasedRowLevelOperationScanPlanning extends Rule[LogicalPlan] with Pr
 
       val (scan, output) = PushDownUtils.pruneColumns(scanBuilder, relation, relation.output, Nil)
 
+      // scalastyle:off line.size.limit
       logInfo(
-        s"""
-           |Pushing operators to ${relation.name}
-           |Pushed filters: $pushedFiltersStr
-           |Filters evaluated on data source side: ${evaluatedFilters.mkString(", ")}
-           |Filters evaluated on Spark side: ${postScanFilters.mkString(", ")}
-           |Output: ${output.mkString(", ")}
-         """.stripMargin)
+        log"""
+            |Pushing operators to ${MDC(LogKeys.RELATION_NAME, relation.name)}
+            |Pushed filters: ${MDC(LogKeys.PUSHED_FILTERS, pushedFiltersStr)}
+            |Filters evaluated on data source side: ${MDC(LogKeys.EVALUATED_FILTERS, evaluatedFilters.mkString(", "))}
+            |Filters evaluated on Spark side: ${MDC(LogKeys.POST_SCAN_FILTERS, postScanFilters.mkString(", "))}}
+            |Output: ${MDC(LogKeys.RELATION_OUTPUT, output.mkString(", "))}
+           """.stripMargin)
+      // scalastyle:on line.size.limit
 
       rd transformDown {
         // simplify the join condition in MERGE operations by discarding already evaluated filters
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
index cb7c3efdbe482..cfab28bbd15ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
@@ -16,7 +16,8 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.CLASS_NAME
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -58,8 +59,9 @@ object V2ScanPartitioningAndOrdering extends Rule[LogicalPlan] with SQLConfHelpe
           }
         case _: UnknownPartitioning => None
         case p =>
-          logWarning(s"Spark ignores the partitioning ${p.getClass.getSimpleName}." +
-            " Please use KeyGroupedPartitioning for better performance")
+          logWarning(
+            log"Spark ignores the partitioning ${MDC(CLASS_NAME, p.getClass.getSimpleName)}. " +
+              log"Please use KeyGroupedPartitioning for better performance")
           None
       }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
index 8c262cf56e8b5..2b6fcd9d547f1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.mutable
 
+import org.apache.spark.internal.LogKeys.{AGGREGATE_FUNCTIONS, GROUP_BY_EXPRS, POST_SCAN_FILTERS, PUSHED_FILTERS, RELATION_NAME, RELATION_OUTPUT}
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.expressions.{aggregate, Alias, And, Attribute, AttributeMap, AttributeReference, AttributeSet, Cast, Expression, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.CollapseProject
@@ -86,11 +88,11 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
       val postScanFilters = postScanFiltersWithoutSubquery ++ normalizedFiltersWithSubquery
 
       logInfo(
-        s"""
-           |Pushing operators to ${sHolder.relation.name}
-           |Pushed Filters: $pushedFiltersStr
-           |Post-Scan Filters: ${postScanFilters.mkString(",")}
-         """.stripMargin)
+        log"""
+            |Pushing operators to ${MDC(RELATION_NAME, sHolder.relation.name)}
+            |Pushed Filters: ${MDC(PUSHED_FILTERS, pushedFiltersStr)}
+            |Post-Scan Filters: ${MDC(POST_SCAN_FILTERS, postScanFilters.mkString(","))}
+           """.stripMargin)
 
       val filterCondition = postScanFilters.reduceLeftOption(And)
       filterCondition.map(Filter(_, sHolder)).getOrElse(sHolder)
@@ -214,13 +216,13 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
       holder.pushedAggOutputMap = AttributeMap(groupOutputMap ++ aggOutputMap)
       holder.output = newOutput
       logInfo(
-        s"""
-           |Pushing operators to ${holder.relation.name}
-           |Pushed Aggregate Functions:
-           | ${translatedAgg.aggregateExpressions().mkString(", ")}
-           |Pushed Group by:
-           | ${translatedAgg.groupByExpressions.mkString(", ")}
-         """.stripMargin)
+        log"""
+            |Pushing operators to ${MDC(RELATION_NAME, holder.relation.name)}
+            |Pushed Aggregate Functions:
+            | ${MDC(AGGREGATE_FUNCTIONS, translatedAgg.aggregateExpressions().mkString(", "))}
+            |Pushed Group by:
+            | ${MDC(GROUP_BY_EXPRS, translatedAgg.groupByExpressions.mkString(", "))}
+           """.stripMargin)
 
       if (canCompletePushDown) {
         val projectExpressions = finalResultExprs.map { expr =>
@@ -361,9 +363,9 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
         sHolder.builder, sHolder.relation, normalizedProjects, normalizedFilters)
 
       logInfo(
-        s"""
-           |Output: ${output.mkString(", ")}
-         """.stripMargin)
+        log"""
+            |Output: ${MDC(RELATION_OUTPUT, output.mkString(", "))}
+           """.stripMargin)
 
       val wrappedScan = getWrappedScan(scan, sHolder)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index c65c15fb0ef28..5632595de7cf8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.{SparkEnv, SparkException, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
@@ -376,8 +376,9 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
     val messages = new Array[WriterCommitMessage](rdd.partitions.length)
     val totalNumRowsAccumulator = new LongAccumulator()
 
-    logInfo(s"Start processing data source write support: $batchWrite. " +
-      s"The input RDD has ${messages.length} partitions.")
+    logInfo(log"Start processing data source write support: " +
+      log"${MDC(LogKeys.BATCH_WRITE, batchWrite)}. The input RDD has " +
+      log"${MDC(LogKeys.COUNT, messages.length)}} partitions.")
 
     // Avoid object not serializable issue.
     val writeMetrics: Map[String, SQLMetric] = customMetrics
@@ -396,22 +397,24 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
         }
       )
 
-      logInfo(s"Data source write support $batchWrite is committing.")
+      logInfo(log"Data source write support ${MDC(LogKeys.BATCH_WRITE, batchWrite)} is committing.")
       batchWrite.commit(messages)
-      logInfo(s"Data source write support $batchWrite committed.")
+      logInfo(log"Data source write support ${MDC(LogKeys.BATCH_WRITE, batchWrite)} committed.")
       commitProgress = Some(StreamWriterCommitProgress(totalNumRowsAccumulator.value))
     } catch {
       case cause: Throwable =>
-        logError(s"Data source write support $batchWrite is aborting.")
+        logError(
+          log"Data source write support ${MDC(LogKeys.BATCH_WRITE, batchWrite)} is aborting.")
         try {
           batchWrite.abort(messages)
         } catch {
           case t: Throwable =>
-            logError(s"Data source write support $batchWrite failed to abort.")
+            logError(log"Data source write support ${MDC(LogKeys.BATCH_WRITE, batchWrite)} " +
+              log"failed to abort.")
             cause.addSuppressed(t)
             throw QueryExecutionErrors.writingJobFailedError(cause)
         }
-        logError(s"Data source write support $batchWrite aborted.")
+        logError(log"Data source write support ${MDC(LogKeys.BATCH_WRITE, batchWrite)} aborted.")
         throw cause
     }
 
@@ -449,34 +452,45 @@ trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serial
         val coordinator = SparkEnv.get.outputCommitCoordinator
         val commitAuthorized = coordinator.canCommit(stageId, stageAttempt, partId, attemptId)
         if (commitAuthorized) {
-          logInfo(s"Commit authorized for partition $partId (task $taskId, attempt $attemptId, " +
-            s"stage $stageId.$stageAttempt)")
+          logInfo(log"Commit authorized for partition ${MDC(LogKeys.PARTITION_ID, partId)} " +
+            log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
+            log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
+            log"stage ${MDC(LogKeys.STAGE_ID, stageId)}." +
+            log"${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
+
           dataWriter.commit()
         } else {
           val commitDeniedException = QueryExecutionErrors.commitDeniedError(
             partId, taskId, attemptId, stageId, stageAttempt)
-          logInfo(commitDeniedException.getMessage)
+          logInfo(log"${MDC(LogKeys.ERROR, commitDeniedException.getMessage)}")
           // throwing CommitDeniedException will trigger the catch block for abort
           throw commitDeniedException
         }
 
       } else {
-        logInfo(s"Writer for partition ${context.partitionId()} is committing.")
+        logInfo(log"Writer for partition ${MDC(LogKeys.PARTITION_ID, context.partitionId())} " +
+          log"is committing.")
         dataWriter.commit()
       }
 
-      logInfo(s"Committed partition $partId (task $taskId, attempt $attemptId, " +
-        s"stage $stageId.$stageAttempt)")
+      logInfo(log"Committed partition ${MDC(LogKeys.PARTITION_ID, partId)} " +
+        log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
+        log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
+        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}.${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
 
       DataWritingSparkTaskResult(iterWithMetrics.count, msg)
 
     })(catchBlock = {
       // If there is an error, abort this writer
-      logError(s"Aborting commit for partition $partId (task $taskId, attempt $attemptId, " +
-            s"stage $stageId.$stageAttempt)")
+      logError(log"Aborting commit for partition ${MDC(LogKeys.PARTITION_ID, partId)} " +
+        log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
+        log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
+        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}.${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
       dataWriter.abort()
-      logError(s"Aborted commit for partition $partId (task $taskId, attempt $attemptId, " +
-            s"stage $stageId.$stageAttempt)")
+      logError(log"Aborted commit for partition ${MDC(LogKeys.PARTITION_ID, partId)} " +
+        log"(task ${MDC(LogKeys.TASK_ID, taskId)}, " +
+        log"attempt ${MDC(LogKeys.TASK_ATTEMPT_ID, attemptId)}, " +
+        log"stage ${MDC(LogKeys.STAGE_ID, stageId)}.${MDC(LogKeys.STAGE_ATTEMPT, stageAttempt)})")
     }, finallyBlock = {
       dataWriter.close()
     })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
index dc834893db210..230f30fb1d069 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
@@ -89,7 +89,7 @@ case class JDBCScanBuilder(
   override def pushAggregation(aggregation: Aggregation): Boolean = {
     if (!jdbcOptions.pushDownAggregate) return false
 
-    val compiledAggs = aggregation.aggregateExpressions.flatMap(dialect.compileAggregate)
+    val compiledAggs = aggregation.aggregateExpressions.flatMap(dialect.compileExpression)
     if (compiledAggs.length != aggregation.aggregateExpressions.length) return false
 
     val compiledGroupBys = aggregation.groupByExpressions.flatMap(dialect.compileExpression)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index dbd8ee5981daa..e7a3fe0f8aa7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -131,13 +131,16 @@ class JDBCTableCatalog extends TableCatalog
     checkNamespace(ident.namespace())
     val optionsWithTableName = new JDBCOptions(
       options.parameters + (JDBCOptions.JDBC_TABLE_NAME -> getTableName(ident)))
-    try {
+    JdbcUtils.classifyException(
+      errorClass = "FAILED_JDBC.LOAD_TABLE",
+      messageParameters = Map(
+        "url" -> options.getRedactUrl(),
+        "tableName" -> toSQLId(ident)),
+      dialect,
+      description = s"Failed to load table: $ident"
+    ) {
       val schema = JDBCRDD.resolveTable(optionsWithTableName)
       JDBCTable(ident, schema, optionsWithTableName)
-    } catch {
-      case e: SQLException =>
-        logWarning("Failed to load table", e)
-        throw QueryCompilationErrors.noSuchTableError(ident)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonMicroBatchStream.scala
index 71e6c29bc299b..0fc1df4cd1e9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonMicroBatchStream.scala
@@ -16,12 +16,15 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.python
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory}
-import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.connector.read.streaming.{AcceptsLatestSeenOffset, MicroBatchStream, Offset}
+import org.apache.spark.sql.execution.datasources.v2.python.PythonMicroBatchStream.nextStreamId
 import org.apache.spark.sql.execution.python.PythonStreamingSourceRunner
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.storage.{PythonStreamBlockId, StorageLevel}
 
 case class PythonStreamingSourceOffset(json: String) extends Offset
 
@@ -30,11 +33,22 @@ class PythonMicroBatchStream(
     shortName: String,
     outputSchema: StructType,
     options: CaseInsensitiveStringMap
-  ) extends MicroBatchStream with Logging {
+  )
+  extends MicroBatchStream
+  with Logging
+  with AcceptsLatestSeenOffset {
   private def createDataSourceFunc =
     ds.source.createPythonFunction(
       ds.getOrCreateDataSourceInPython(shortName, options, Some(outputSchema)).dataSource)
 
+  private val streamId = nextStreamId
+  private var nextBlockId = 0L
+
+  // planInputPartitions() maybe be called multiple times for the current microbatch.
+  // Cache the result of planInputPartitions() because it may involve sending data
+  // from python to JVM.
+  private var cachedInputPartition: Option[(String, String, PythonStreamingInputPartition)] = None
+
   private val runner: PythonStreamingSourceRunner =
     new PythonStreamingSourceRunner(createDataSourceFunc, outputSchema)
   runner.init()
@@ -44,9 +58,35 @@ class PythonMicroBatchStream(
   override def latestOffset(): Offset = PythonStreamingSourceOffset(runner.latestOffset())
 
   override def planInputPartitions(start: Offset, end: Offset): Array[InputPartition] = {
-    runner.partitions(start.asInstanceOf[PythonStreamingSourceOffset].json,
-      end.asInstanceOf[PythonStreamingSourceOffset].json)
-      .zipWithIndex.map(p => PythonInputPartition(p._2, p._1))
+    val startOffsetJson = start.asInstanceOf[PythonStreamingSourceOffset].json
+    val endOffsetJson = end.asInstanceOf[PythonStreamingSourceOffset].json
+
+    if (cachedInputPartition.exists(p => p._1 == startOffsetJson && p._2 == endOffsetJson)) {
+      return Array(cachedInputPartition.get._3)
+    }
+
+    val (partitions, rows) = runner.partitions(startOffsetJson, endOffsetJson)
+    if (rows.isDefined) {
+      // Only SimpleStreamReader without partitioning prefetch data.
+      assert(partitions.length == 1)
+      nextBlockId = nextBlockId + 1
+      val blockId = PythonStreamBlockId(streamId, nextBlockId)
+      SparkEnv.get.blockManager.putIterator(
+        blockId, rows.get, StorageLevel.MEMORY_AND_DISK_SER, true)
+      val partition = PythonStreamingInputPartition(0, partitions.head, Some(blockId))
+      cachedInputPartition.foreach(_._3.dropCache())
+      cachedInputPartition = Some((startOffsetJson, endOffsetJson, partition))
+      Array(partition)
+    } else {
+      partitions.zipWithIndex
+        .map(p => PythonStreamingInputPartition(p._2, p._1, None))
+    }
+  }
+
+  override def setLatestSeenOffset(offset: Offset): Unit = {
+    // Call planPartition on python with an empty offset range to initialize the start offset
+    // for the prefetching of simple reader.
+    runner.partitions(offset.json(), offset.json())
   }
 
   private lazy val readInfo: PythonDataSourceReadInfo = {
@@ -57,7 +97,7 @@ class PythonMicroBatchStream(
   }
 
   override def createReaderFactory(): PartitionReaderFactory = {
-    new PythonPartitionReaderFactory(
+    new PythonStreamingPartitionReaderFactory(
       ds.source, readInfo.func, outputSchema, None)
   }
 
@@ -66,9 +106,18 @@ class PythonMicroBatchStream(
   }
 
   override def stop(): Unit = {
+    cachedInputPartition.foreach(_._3.dropCache())
     runner.stop()
   }
 
   override def deserializeOffset(json: String): Offset = PythonStreamingSourceOffset(json)
 }
 
+object PythonMicroBatchStream {
+  private var currentId = 0
+  def nextStreamId: Int = synchronized {
+    currentId = currentId + 1
+    currentId
+  }
+}
+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonScan.scala
index 8fefc8b144a1f..8ebb91c01fc5c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonScan.scala
@@ -41,6 +41,9 @@ class PythonScan(
 
   override def supportedCustomMetrics(): Array[CustomMetric] =
     ds.source.createPythonMetrics()
+
+  override def columnarSupportMode(): Scan.ColumnarSupportMode =
+    Scan.ColumnarSupportMode.UNSUPPORTED
 }
 
 class PythonBatch(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingPartitionReaderFactory.scala
new file mode 100644
index 0000000000000..7d80cc2728102
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingPartitionReaderFactory.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.sql.execution.datasources.v2.python
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connector.metric.CustomTaskMetric
+import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.storage.PythonStreamBlockId
+
+
+case class PythonStreamingInputPartition(
+    index: Int,
+    pickedPartition: Array[Byte],
+    blockId: Option[PythonStreamBlockId]) extends InputPartition {
+  def dropCache(): Unit = {
+    blockId.foreach(SparkEnv.get.blockManager.master.removeBlock(_))
+  }
+}
+
+class PythonStreamingPartitionReaderFactory(
+    source: UserDefinedPythonDataSource,
+    pickledReadFunc: Array[Byte],
+    outputSchema: StructType,
+    jobArtifactUUID: Option[String])
+  extends PartitionReaderFactory with Logging {
+
+  override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
+    val part = partition.asInstanceOf[PythonStreamingInputPartition]
+
+    // Maybe read from cached block prefetched by SimpleStreamReader
+    lazy val cachedBlock = if (part.blockId.isDefined) {
+      val block = SparkEnv.get.blockManager.get[InternalRow](part.blockId.get)
+        .map(_.data.asInstanceOf[Iterator[InternalRow]])
+      if (block.isEmpty) {
+        logWarning(log"Prefetched block ${MDC(LogKeys.BLOCK_ID, part.blockId)} " +
+          log"for Python data source not found.")
+      }
+      block
+    } else None
+
+    new PartitionReader[InternalRow] {
+
+      private[this] val metrics: Map[String, SQLMetric] = PythonCustomMetric.pythonMetrics
+
+      private val outputIter = if (cachedBlock.isEmpty) {
+        // Evaluate the python read UDF if the partition is not cached as block.
+        val evaluatorFactory = source.createMapInBatchEvaluatorFactory(
+          pickledReadFunc,
+          "read_from_data_source",
+          UserDefinedPythonDataSource.readInputSchema,
+          outputSchema,
+          metrics,
+          jobArtifactUUID)
+
+        evaluatorFactory.createEvaluator().eval(
+          part.index, Iterator.single(InternalRow(part.pickedPartition)))
+      } else cachedBlock.get
+
+      override def next(): Boolean = outputIter.hasNext
+
+      override def get(): InternalRow = outputIter.next()
+
+      override def close(): Unit = {}
+
+      override def currentMetricsValues(): Array[CustomTaskMetric] = {
+        source.createPythonTaskMetrics(metrics.map { case (k, v) => k -> v.value })
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingSinkCommitRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingSinkCommitRunner.scala
index a444fdfff7d96..b04ebe92910ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingSinkCommitRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingSinkCommitRunner.scala
@@ -17,17 +17,14 @@
 
 package org.apache.spark.sql.execution.datasources.v2.python
 
-import java.io.{BufferedInputStream, BufferedOutputStream, DataInputStream, DataOutputStream}
+import java.io.{DataInputStream, DataOutputStream}
 
-import scala.jdk.CollectionConverters._
+import net.razorvine.pickle.Pickler
 
-import org.apache.spark.SparkEnv
-import org.apache.spark.api.python.{PythonFunction, PythonWorker, PythonWorkerFactory, PythonWorkerUtils, SpecialLengths}
-import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.BUFFER_SIZE
-import org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT
+import org.apache.spark.api.python.{PythonFunction, PythonWorkerUtils, SpecialLengths}
 import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.python.PythonPlannerRunner
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -38,78 +35,22 @@ import org.apache.spark.sql.types.StructType
  * from the socket, then commit or abort a microbatch.
  */
 class PythonStreamingSinkCommitRunner(
-    func: PythonFunction,
+    dataSourceCls: PythonFunction,
     schema: StructType,
-    overwrite: Boolean) extends Logging {
-  val workerModule: String = "pyspark.sql.worker.python_streaming_sink_runner"
-
-  private val conf = SparkEnv.get.conf
-  protected val bufferSize: Int = conf.get(BUFFER_SIZE)
-  protected val authSocketTimeout = conf.get(PYTHON_AUTH_SOCKET_TIMEOUT)
-
-  private val envVars: java.util.Map[String, String] = func.envVars
-  private val pythonExec: String = func.pythonExec
-  private var pythonWorker: Option[PythonWorker] = None
-  private var pythonWorkerFactory: Option[PythonWorkerFactory] = None
-  protected val pythonVer: String = func.pythonVer
-
-  private var dataOut: DataOutputStream = null
-  private var dataIn: DataInputStream = null
-
-  /**
-   * Initializes the Python worker for running the streaming sink committer.
-   */
-  def init(): Unit = {
-    logInfo(s"Initializing Python runner pythonExec: $pythonExec")
-    val env = SparkEnv.get
-
-    val localdir = env.blockManager.diskBlockManager.localDirs.map(f => f.getPath()).mkString(",")
-    envVars.put("SPARK_LOCAL_DIRS", localdir)
-
-    envVars.put("SPARK_AUTH_SOCKET_TIMEOUT", authSocketTimeout.toString)
-    envVars.put("SPARK_BUFFER_SIZE", bufferSize.toString)
-
-    val workerFactory =
-      new PythonWorkerFactory(pythonExec, workerModule, envVars.asScala.toMap, false)
-    val (worker: PythonWorker, _) = workerFactory.createSimpleWorker(blockingMode = true)
-    pythonWorker = Some(worker)
-    pythonWorkerFactory = Some(workerFactory)
-
-    val stream = new BufferedOutputStream(
-      pythonWorker.get.channel.socket().getOutputStream, bufferSize)
-    dataOut = new DataOutputStream(stream)
-
-    PythonWorkerUtils.writePythonVersion(pythonVer, dataOut)
-
-    val pythonIncludes = func.pythonIncludes.asScala.toSet
-    PythonWorkerUtils.writeSparkFiles(Some("streaming_job"), pythonIncludes, dataOut)
-
-    // Send the user function to python process
-    PythonWorkerUtils.writePythonFunction(func, dataOut)
-
+    messages: Array[WriterCommitMessage],
+    batchId: Long,
+    overwrite: Boolean,
+    abort: Boolean) extends PythonPlannerRunner[Unit](dataSourceCls) {
+  override val workerModule: String = "pyspark.sql.worker.python_streaming_sink_runner"
+
+  override protected def writeToPython(dataOut: DataOutputStream, pickler: Pickler): Unit = {
+    // Send the user function to python process.
+    PythonWorkerUtils.writePythonFunction(dataSourceCls, dataOut)
+    // Send the output schema.
     PythonWorkerUtils.writeUTF(schema.json, dataOut)
-
     dataOut.writeBoolean(overwrite)
 
-    dataOut.flush()
-
-    dataIn = new DataInputStream(
-      new BufferedInputStream(pythonWorker.get.channel.socket().getInputStream, bufferSize))
-
-    val initStatus = dataIn.readInt()
-    if (initStatus == SpecialLengths.PYTHON_EXCEPTION_THROWN) {
-      val msg = PythonWorkerUtils.readUTF(dataIn)
-      throw QueryExecutionErrors.pythonStreamingDataSourceRuntimeError(
-        action = "initialize streaming sink", msg)
-    }
-  }
-
-  init()
-
-  def commitOrAbort(
-      messages: Array[WriterCommitMessage],
-      batchId: Long,
-      abort: Boolean): Unit = {
+    // Send the commit messages.
     dataOut.writeInt(messages.length)
     messages.foreach { message =>
       // Commit messages can be null if there are task failures.
@@ -121,10 +62,14 @@ class PythonStreamingSinkCommitRunner(
       }
     }
     dataOut.writeLong(batchId)
+    // Send whether to invoke `abort` instead of `commit`.
     dataOut.writeBoolean(abort)
-    dataOut.flush()
-    val status = dataIn.readInt()
-    if (status == SpecialLengths.PYTHON_EXCEPTION_THROWN) {
+  }
+
+  override protected def receiveFromPython(dataIn: DataInputStream): Unit = {
+    // Receive any exceptions thrown in the Python worker.
+    val code = dataIn.readInt()
+    if (code == SpecialLengths.PYTHON_EXCEPTION_THROWN) {
       val msg = PythonWorkerUtils.readUTF(dataIn)
       val action = if (abort) "abort" else "commit"
       throw QueryExecutionErrors.pythonStreamingDataSourceRuntimeError(action, msg)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingWrite.scala
index 483fd5a4e0a1e..4c149437a3009 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/python/PythonStreamingWrite.scala
@@ -42,9 +42,6 @@ class PythonStreamingWrite(
       ds.getOrCreateDataSourceInPython(shortName, info.options(), Some(info.schema())).dataSource
     )
 
-  private lazy val pythonStreamingSinkCommitRunner =
-    new PythonStreamingSinkCommitRunner(createDataSourceFunc, info.schema(), isTruncate)
-
   override def createStreamingWriterFactory(
        physicalInfo: PhysicalWriteInfo): StreamingDataWriterFactory = {
     val writeInfo = ds.source.createWriteInfoInPython(
@@ -60,11 +57,23 @@ class PythonStreamingWrite(
   }
 
   override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
-    pythonStreamingSinkCommitRunner.commitOrAbort(messages, epochId, false)
+    new PythonStreamingSinkCommitRunner(
+      createDataSourceFunc,
+      info.schema(),
+      messages,
+      batchId = epochId,
+      overwrite = isTruncate,
+      abort = false).runInPython()
   }
 
   override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
-    pythonStreamingSinkCommitRunner.commitOrAbort(messages, epochId, true)
+    new PythonStreamingSinkCommitRunner(
+      createDataSourceFunc,
+      info.schema(),
+      messages,
+      batchId = epochId,
+      overwrite = isTruncate,
+      abort = true).runInPython()
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
index 1a8f444042c23..e2724cb59754d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
@@ -116,12 +116,16 @@ case class StateSourceOptions(
     batchId: Long,
     operatorId: Int,
     storeName: String,
-    joinSide: JoinSideValues) {
+    joinSide: JoinSideValues,
+    snapshotStartBatchId: Option[Long],
+    snapshotPartitionId: Option[Int]) {
   def stateCheckpointLocation: Path = new Path(resolvedCpLocation, DIR_NAME_STATE)
 
   override def toString: String = {
     s"StateSourceOptions(checkpointLocation=$resolvedCpLocation, batchId=$batchId, " +
-      s"operatorId=$operatorId, storeName=$storeName, joinSide=$joinSide)"
+      s"operatorId=$operatorId, storeName=$storeName, joinSide=$joinSide, " +
+      s"snapshotStartBatchId=${snapshotStartBatchId.getOrElse("None")}, " +
+      s"snapshotPartitionId=${snapshotPartitionId.getOrElse("None")})"
   }
 }
 
@@ -131,6 +135,8 @@ object StateSourceOptions extends DataSourceOptions {
   val OPERATOR_ID = newOption("operatorId")
   val STORE_NAME = newOption("storeName")
   val JOIN_SIDE = newOption("joinSide")
+  val SNAPSHOT_START_BATCH_ID = newOption("snapshotStartBatchId")
+  val SNAPSHOT_PARTITION_ID = newOption("snapshotPartitionId")
 
   object JoinSideValues extends Enumeration {
     type JoinSideValues = Value
@@ -190,7 +196,30 @@ object StateSourceOptions extends DataSourceOptions {
       throw StateDataSourceErrors.conflictOptions(Seq(JOIN_SIDE, STORE_NAME))
     }
 
-    StateSourceOptions(resolvedCpLocation, batchId, operatorId, storeName, joinSide)
+    val snapshotStartBatchId = Option(options.get(SNAPSHOT_START_BATCH_ID)).map(_.toLong)
+    if (snapshotStartBatchId.exists(_ < 0)) {
+      throw StateDataSourceErrors.invalidOptionValueIsNegative(SNAPSHOT_START_BATCH_ID)
+    } else if (snapshotStartBatchId.exists(_ > batchId)) {
+      throw StateDataSourceErrors.invalidOptionValue(
+        SNAPSHOT_START_BATCH_ID, s"value should be less than or equal to $batchId")
+    }
+
+    val snapshotPartitionId = Option(options.get(SNAPSHOT_PARTITION_ID)).map(_.toInt)
+    if (snapshotPartitionId.exists(_ < 0)) {
+      throw StateDataSourceErrors.invalidOptionValueIsNegative(SNAPSHOT_PARTITION_ID)
+    }
+
+    // both snapshotPartitionId and snapshotStartBatchId are required at the same time, because
+    // each partition may have different checkpoint status
+    if (snapshotPartitionId.isDefined && snapshotStartBatchId.isEmpty) {
+      throw StateDataSourceErrors.requiredOptionUnspecified(SNAPSHOT_START_BATCH_ID)
+    } else if (snapshotPartitionId.isEmpty && snapshotStartBatchId.isDefined) {
+      throw StateDataSourceErrors.requiredOptionUnspecified(SNAPSHOT_PARTITION_ID)
+    }
+
+    StateSourceOptions(
+      resolvedCpLocation, batchId, operatorId, storeName,
+      joinSide, snapshotStartBatchId, snapshotPartitionId)
   }
 
   private def resolvedCheckpointLocation(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala
index bbfe3a3f373ec..f09a2763031e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeRow}
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
 import org.apache.spark.sql.execution.datasources.v2.state.metadata.StateMetadataPartitionReader
 import org.apache.spark.sql.execution.datasources.v2.state.utils.SchemaUtil
-import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, PrefixKeyScanStateEncoderSpec, ReadStateStore, StateStoreConf, StateStoreId, StateStoreProvider, StateStoreProviderId}
+import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -93,7 +93,19 @@ class StatePartitionReader(
   }
 
   private lazy val store: ReadStateStore = {
-    provider.getReadStore(partition.sourceOptions.batchId + 1)
+    partition.sourceOptions.snapshotStartBatchId match {
+      case None => provider.getReadStore(partition.sourceOptions.batchId + 1)
+
+      case Some(snapshotStartBatchId) =>
+        if (!provider.isInstanceOf[SupportsFineGrainedReplay]) {
+          throw StateStoreErrors.stateStoreProviderDoesNotSupportFineGrainedReplay(
+            provider.getClass.toString)
+        }
+        provider.asInstanceOf[SupportsFineGrainedReplay]
+          .replayReadStateFromSnapshot(
+            snapshotStartBatchId + 1,
+            partition.sourceOptions.batchId + 1)
+    }
   }
 
   private lazy val iter: Iterator[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala
index 0d69bf708e94f..ffcbcd0872e10 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory, Scan, ScanBuilder}
 import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions.JoinSideValues
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.{LeftSide, RightSide}
-import org.apache.spark.sql.execution.streaming.state.StateStoreConf
+import org.apache.spark.sql.execution.streaming.state.{StateStoreConf, StateStoreErrors}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -81,9 +81,20 @@ class StateScan(
       assert((tail - head + 1) == partitionNums.length,
         s"No continuous partitions in state: ${partitionNums.mkString("Array(", ", ", ")")}")
 
-      partitionNums.map {
-        pn => new StateStoreInputPartition(pn, queryId, sourceOptions)
-      }.toArray
+      sourceOptions.snapshotPartitionId match {
+        case None => partitionNums.map { pn =>
+          new StateStoreInputPartition(pn, queryId, sourceOptions)
+        }.toArray
+
+        case Some(snapshotPartitionId) =>
+          if (partitionNums.contains(snapshotPartitionId)) {
+            Array(new StateStoreInputPartition(snapshotPartitionId, queryId, sourceOptions))
+          } else {
+            throw StateStoreErrors.stateStoreSnapshotPartitionNotFound(
+              snapshotPartitionId, sourceOptions.operatorId,
+              sourceOptions.stateCheckpointLocation.toString)
+          }
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala
index 824968e709baf..dbd39f519e500 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala
@@ -49,16 +49,21 @@ class StateTable(
   }
 
   override def name(): String = {
-    val desc = s"StateTable " +
+    var desc = s"StateTable " +
       s"[stateCkptLocation=${sourceOptions.stateCheckpointLocation}]" +
       s"[batchId=${sourceOptions.batchId}][operatorId=${sourceOptions.operatorId}]" +
       s"[storeName=${sourceOptions.storeName}]"
 
     if (sourceOptions.joinSide != JoinSideValues.none) {
-      desc + s"[joinSide=${sourceOptions.joinSide}]"
-    } else {
-      desc
+      desc += s"[joinSide=${sourceOptions.joinSide}]"
+    }
+    if (sourceOptions.snapshotStartBatchId.isDefined) {
+      desc += s"[snapshotStartBatchId=${sourceOptions.snapshotStartBatchId}]"
+    }
+    if (sourceOptions.snapshotPartitionId.isDefined) {
+      desc += s"[snapshotPartitionId=${sourceOptions.snapshotPartitionId}]"
     }
+    desc
   }
 
   override def capabilities(): util.Set[TableCapability] = CAPABILITY
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala
index e5a5dddefef5b..91f42db46dfb0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala
@@ -116,7 +116,8 @@ class StreamStreamJoinStatePartitionReader(
         partitionId = partition.partition,
         formatVersion,
         skippedNullValueCount = None,
-        useStateStoreCoordinator = false
+        useStateStoreCoordinator = false,
+        snapshotStartVersion = partition.sourceOptions.snapshotStartBatchId.map(_ + 1)
       )
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
index 761d88b5431fa..d19a8adb6ba26 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScan.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.connector.read.PartitionReaderFactory
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.text.TextOptions
 import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
@@ -44,6 +45,12 @@ case class TextScan(
   private val optionsAsScala = options.asScala.toMap
   private lazy val textOptions: TextOptions = new TextOptions(optionsAsScala)
 
+  private def verifyReadSchema(schema: StructType): Unit = {
+    if (schema.size > 1) {
+      throw QueryCompilationErrors.textDataSourceWithMultiColumnsError(schema)
+    }
+  }
+
   override def isSplitable(path: Path): Boolean = {
     super.isSplitable(path) && !textOptions.wholeText
   }
@@ -58,9 +65,7 @@ case class TextScan(
   }
 
   override def createReaderFactory(): PartitionReaderFactory = {
-    assert(
-      readDataSchema.length <= 1,
-      "Text data source only produces a single data column named \"value\".")
+    verifyReadSchema(readDataSchema)
     val hadoopConf = {
       val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
       // Hadoop Configurations are case sensitive.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala
index 87082299615c3..c03c0ba11de57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XSDToSchema.scala
@@ -47,7 +47,7 @@ object XSDToSchema extends Logging{
   def read(xsdPath: Path): StructType = {
     val in = ValidatorUtil.openSchemaFile(xsdPath)
     val xmlSchemaCollection = new XmlSchemaCollection()
-    xmlSchemaCollection.setBaseUri(xsdPath.getParent.toString)
+    xmlSchemaCollection.setBaseUri(xsdPath.toString)
     val xmlSchema = xmlSchemaCollection.read(new InputStreamReader(in))
     getStructType(xmlSchema)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlInputFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlInputFormat.scala
index 4359ac02f5f58..6169cec6f8210 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlInputFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/xml/XmlInputFormat.scala
@@ -20,7 +20,7 @@ import java.io.{InputStream, InputStreamReader, IOException, Reader}
 import java.nio.ByteBuffer
 import java.nio.charset.Charset
 
-import org.apache.commons.io.input.CountingInputStream
+import org.apache.commons.io.input.BoundedInputStream
 import org.apache.hadoop.fs.Seekable
 import org.apache.hadoop.io.{LongWritable, Text}
 import org.apache.hadoop.io.compress._
@@ -67,7 +67,7 @@ private[xml] class XmlRecordReader extends RecordReader[LongWritable, Text] {
   private var end: Long = _
   private var reader: Reader = _
   private var filePosition: Seekable = _
-  private var countingIn: CountingInputStream = _
+  private var countingIn: BoundedInputStream = _
   private var readerLeftoverCharFn: () => Boolean = _
   private var readerByteBuffer: ByteBuffer = _
   private var decompressor: Decompressor = _
@@ -117,7 +117,9 @@ private[xml] class XmlRecordReader extends RecordReader[LongWritable, Text] {
       }
     } else {
       fsin.seek(start)
-      countingIn = new CountingInputStream(fsin)
+      countingIn = BoundedInputStream.builder()
+        .setInputStream(fsin)
+        .get()
       in = countingIn
       // don't use filePosition in this case. We have to count bytes read manually
     }
@@ -156,7 +158,7 @@ private[xml] class XmlRecordReader extends RecordReader[LongWritable, Text] {
     if (filePosition != null) {
       filePosition.getPos
     } else {
-      start + countingIn.getByteCount -
+      start + countingIn.getCount -
         readerByteBuffer.remaining() -
         (if (readerLeftoverCharFn()) 1 else 0)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 866a62a3a0776..7844f470b0ef0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -25,6 +25,8 @@ import scala.concurrent.duration.NANOSECONDS
 import scala.util.control.NonFatal
 
 import org.apache.spark.{broadcast, SparkException}
+import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
@@ -65,11 +67,22 @@ trait BroadcastExchangeLike extends Exchange {
    * It also does the preparations work, such as waiting for the subqueries.
    */
   final def submitBroadcastJob: scala.concurrent.Future[broadcast.Broadcast[Any]] = executeQuery {
+    materializationStarted.set(true)
     completionFuture
   }
 
   protected def completionFuture: scala.concurrent.Future[broadcast.Broadcast[Any]]
 
+  /**
+   * Cancels broadcast job.
+   */
+  final def cancelBroadcastJob(): Unit = {
+    if (isMaterializationStarted() && !this.relationFuture.isDone) {
+      sparkContext.cancelJobsWithTag(this.jobTag)
+      this.relationFuture.cancel(true)
+    }
+  }
+
   /**
    * Returns the runtime statistics after broadcast materialization.
    */
@@ -212,7 +225,7 @@ case class BroadcastExchangeExec(
       relationFuture.get(timeout, TimeUnit.SECONDS).asInstanceOf[broadcast.Broadcast[T]]
     } catch {
       case ex: TimeoutException =>
-        logError(s"Could not execute broadcast in $timeout secs.", ex)
+        logError(log"Could not execute broadcast in ${MDC(TIMEOUT, timeout)} secs.", ex)
         if (!relationFuture.isDone) {
           sparkContext.cancelJobsWithTag(jobTag)
           relationFuture.cancel(true)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index a0f74ef6c3d02..67d879bdd8bf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.exchange
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
@@ -407,11 +408,13 @@ case class EnsureRequirements(
         val leftPartValues = leftSpec.partitioning.partitionValues
         val rightPartValues = rightSpec.partitioning.partitionValues
 
+        val numLeftPartValues = MDC(LogKeys.NUM_LEFT_PARTITION_VALUES, leftPartValues.size)
+        val numRightPartValues = MDC(LogKeys.NUM_RIGHT_PARTITION_VALUES, rightPartValues.size)
         logInfo(
-          s"""
-             |Left side # of partitions: ${leftPartValues.size}
-             |Right side # of partitions: ${rightPartValues.size}
-             |""".stripMargin)
+          log"""
+              |Left side # of partitions: $numLeftPartValues
+              |Right side # of partitions: $numRightPartValues
+              |""".stripMargin)
 
         // As partition keys are compatible, we can pick either left or right as partition
         // expressions
@@ -421,7 +424,8 @@ case class EnsureRequirements(
             .mergePartitions(leftSpec.partitioning, rightSpec.partitioning, partitionExprs)
             .map(v => (v, 1))
 
-        logInfo(s"After merging, there are ${mergedPartValues.size} partitions")
+        logInfo(log"After merging, there are " +
+          log"${MDC(LogKeys.NUM_PARTITIONS, mergedPartValues.size)} partitions")
 
         var replicateLeftSide = false
         var replicateRightSide = false
@@ -445,8 +449,8 @@ case class EnsureRequirements(
           val canReplicateRight = canReplicateRightSide(joinType)
 
           if (!canReplicateLeft && !canReplicateRight) {
-            logInfo("Skipping partially clustered distribution as it cannot be applied for " +
-                s"join type '$joinType'")
+            logInfo(log"Skipping partially clustered distribution as it cannot be applied for " +
+              log"join type '${MDC(LogKeys.JOIN_TYPE, joinType)}'")
           } else {
             val leftLink = left.logicalLink
             val rightLink = right.logicalLink
@@ -455,12 +459,16 @@ case class EnsureRequirements(
               leftLink.isDefined && rightLink.isDefined &&
                   leftLink.get.stats.sizeInBytes > 1 &&
                   rightLink.get.stats.sizeInBytes > 1) {
+              val leftLinkStatsSizeInBytes = MDC(LogKeys.LEFT_LOGICAL_PLAN_STATS_SIZE_IN_BYTES,
+                leftLink.get.stats.sizeInBytes)
+              val rightLinkStatsSizeInBytes = MDC(LogKeys.RIGHT_LOGICAL_PLAN_STATS_SIZE_IN_BYTES,
+                rightLink.get.stats.sizeInBytes)
               logInfo(
-                s"""
+                log"""
                    |Using plan statistics to determine which side of join to fully
                    |cluster partition values:
-                   |Left side size (in bytes): ${leftLink.get.stats.sizeInBytes}
-                   |Right side size (in bytes): ${rightLink.get.stats.sizeInBytes}
+                   |Left side size (in bytes): $leftLinkStatsSizeInBytes
+                   |Right side size (in bytes): $rightLinkStatsSizeInBytes
                    |""".stripMargin)
               leftLink.get.stats.sizeInBytes < rightLink.get.stats.sizeInBytes
             } else {
@@ -477,12 +485,14 @@ case class EnsureRequirements(
             // of partitions can be applied. For instance, replication should not be allowed for
             // the left-hand side of a right outer join.
             if (replicateLeftSide && !canReplicateLeft) {
-              logInfo("Left-hand side is picked but cannot be applied to join type " +
-                  s"'$joinType'. Skipping partially clustered distribution.")
+              logInfo(log"Left-hand side is picked but cannot be applied to join type " +
+                log"'${MDC(LogKeys.JOIN_TYPE, joinType)}'. Skipping partially clustered " +
+                log"distribution.")
               replicateLeftSide = false
             } else if (replicateRightSide && !canReplicateRight) {
-              logInfo("Right-hand side is picked but cannot be applied to join type " +
-                  s"'$joinType'. Skipping partially clustered distribution.")
+              logInfo(log"Right-hand side is picked but cannot be applied to join type " +
+                log"'${MDC(LogKeys.JOIN_TYPE, joinType)}'. Skipping partially clustered " +
+                log"distribution.")
               replicateRightSide = false
             } else {
               // In partially clustered distribution, we should use un-grouped partition values
@@ -499,8 +509,8 @@ case class EnsureRequirements(
                   InternalRowComparableWrapper(partVal, partitionExprs), numParts))
               }
 
-              logInfo("After applying partially clustered distribution, there are " +
-                  s"${mergedPartValues.map(_._2).sum} partitions.")
+              logInfo(log"After applying partially clustered distribution, there are " +
+                log"${MDC(LogKeys.NUM_PARTITIONS, mergedPartValues.map(_._2).sum)} partitions.")
               applyPartialClustering = true
             }
           }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index c02beea4f879c..154070a954f3a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.exchange
 
+import java.util.concurrent.atomic.AtomicBoolean
+
 import org.apache.spark.broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -34,6 +36,17 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
  * "Volcano -- An Extensible and Parallel Query Evaluation System" by Goetz Graefe.
  */
 abstract class Exchange extends UnaryExecNode {
+  /**
+   * This flag aims to detect if the stage materialization is started. This helps
+   * to avoid unnecessary AQE stage materialization when the stage is canceled.
+   */
+  protected val materializationStarted = new AtomicBoolean()
+
+  /**
+   * Exposes status if the materialization is started
+   */
+  def isMaterializationStarted(): Boolean = materializationStarted.get()
+
   override def output: Seq[Attribute] = child.output
   final override val nodePatterns: Seq[TreePattern] = Seq(EXCHANGE)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 69705afbb7c71..70c08edfd8678 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -47,6 +47,15 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 trait ShuffleExchangeLike extends Exchange {
 
+  /**
+   * The asynchronous job that materializes the shuffle. It also does the preparations work,
+   * such as waiting for the subqueries.
+   */
+  @transient private lazy val shuffleFuture: Future[MapOutputStatistics] = executeQuery {
+    materializationStarted.set(true)
+    mapOutputStatisticsFuture
+  }
+
   /**
    * Returns the number of mappers of this shuffle.
    */
@@ -68,15 +77,25 @@ trait ShuffleExchangeLike extends Exchange {
   def shuffleOrigin: ShuffleOrigin
 
   /**
-   * The asynchronous job that materializes the shuffle. It also does the preparations work,
-   * such as waiting for the subqueries.
+   * Submits the shuffle job.
    */
-  final def submitShuffleJob: Future[MapOutputStatistics] = executeQuery {
-    mapOutputStatisticsFuture
-  }
+  final def submitShuffleJob: Future[MapOutputStatistics] = shuffleFuture
 
   protected def mapOutputStatisticsFuture: Future[MapOutputStatistics]
 
+  /**
+   * Cancels the shuffle job.
+   */
+  final def cancelShuffleJob: Unit = {
+    if (isMaterializationStarted()) {
+      shuffleFuture match {
+        case action: FutureAction[MapOutputStatistics] if !action.isCompleted =>
+          action.cancel()
+        case _ =>
+      }
+    }
+  }
+
   /**
    * Returns the shuffle RDD with specified partition specs.
    */
@@ -86,6 +105,11 @@ trait ShuffleExchangeLike extends Exchange {
    * Returns the runtime statistics after shuffle materialization.
    */
   def runtimeStatistics: Statistics
+
+  /**
+   * The shuffle ID.
+   */
+  def shuffleId: Int
 }
 
 // Describes where the shuffle operator comes from.
@@ -166,6 +190,8 @@ case class ShuffleExchangeExec(
     Statistics(dataSize, Some(rowCount))
   }
 
+  override def shuffleId: Int = shuffleDependency.shuffleId
+
   /**
    * A [[ShuffleDependency]] that will partition rows of its child based on
    * the partitioning scheme defined in `newPartitioning`. Those partitions of
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 8ace74c517943..c0fb1c37b2102 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.TaskContext
 import org.apache.spark.rdd.{ParallelCollectionRDD, RDD}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.sql.catalyst.InternalRow
@@ -26,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.metric.{SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter}
-import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.sql.execution.python.HybridRowQueue
 import org.apache.spark.util.collection.Utils
 
 /**
@@ -68,13 +69,13 @@ case class CollectLimitExec(limit: Int = -1, child: SparkPlan, offset: Int = 0)
   override lazy val metrics = readMetrics ++ writeMetrics
   protected override def doExecute(): RDD[InternalRow] = {
     val childRDD = child.execute()
-    if (childRDD.getNumPartitions == 0) {
+    if (childRDD.getNumPartitions == 0 || limit == 0) {
       new ParallelCollectionRDD(sparkContext, Seq.empty[InternalRow], 1, Map.empty)
     } else {
       val singlePartitionRDD = if (childRDD.getNumPartitions == 1) {
         childRDD
       } else {
-        val locallyLimited = if (limit >= 0) {
+        val locallyLimited = if (limit > 0) {
           childRDD.mapPartitionsInternal(_.take(limit))
         } else {
           childRDD
@@ -118,18 +119,57 @@ case class CollectLimitExec(limit: Int = -1, child: SparkPlan, offset: Int = 0)
  * logical plan, which happens when the user is collecting results back to the driver.
  */
 case class CollectTailExec(limit: Int, child: SparkPlan) extends LimitExec {
+  assert(limit >= 0)
+
   override def output: Seq[Attribute] = child.output
   override def outputPartitioning: Partitioning = SinglePartition
   override def executeCollect(): Array[InternalRow] = child.executeTail(limit)
+  private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
+  private lazy val writeMetrics =
+    SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
+  private lazy val readMetrics =
+    SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext)
+  override lazy val metrics = readMetrics ++ writeMetrics
   protected override def doExecute(): RDD[InternalRow] = {
-    // This is a bit hacky way to avoid a shuffle and scanning all data when it performs
-    // at `Dataset.tail`.
-    // Since this execution plan and `execute` are currently called only when
-    // `Dataset.tail` is invoked, the jobs are always executed when they are supposed to be.
-
-    // If we use this execution plan separately like `Dataset.limit` without an actual
-    // job launch, we might just have to mimic the implementation of `CollectLimitExec`.
-    sparkContext.parallelize(executeCollect().toImmutableArraySeq, numSlices = 1)
+    val childRDD = child.execute()
+    if (childRDD.getNumPartitions == 0 || limit == 0) {
+      new ParallelCollectionRDD(sparkContext, Seq.empty[InternalRow], 1, Map.empty)
+    } else {
+      val singlePartitionRDD = if (childRDD.getNumPartitions == 1) {
+        childRDD
+      } else {
+        val locallyLimited = childRDD.mapPartitionsInternal(takeRight)
+        new ShuffledRowRDD(
+          ShuffleExchangeExec.prepareShuffleDependency(
+            locallyLimited,
+            child.output,
+            SinglePartition,
+            serializer,
+            writeMetrics),
+          readMetrics)
+      }
+      singlePartitionRDD.mapPartitionsInternal(takeRight)
+    }
+  }
+
+  private def takeRight(iter: Iterator[InternalRow]): Iterator[InternalRow] = {
+    if (iter.isEmpty) {
+      Iterator.empty[InternalRow]
+    } else {
+      val context = TaskContext.get()
+      val queue = HybridRowQueue.apply(context.taskMemoryManager(), output.size)
+      context.addTaskCompletionListener[Unit](_ => queue.close())
+      var count = 0
+      while (iter.hasNext) {
+        queue.add(iter.next().copy().asInstanceOf[UnsafeRow])
+        if (count < limit) {
+          count += 1
+        } else {
+          queue.remove()
+        }
+      }
+      Iterator.range(0, count).map(_ => queue.remove())
+    }
   }
 
   override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
@@ -355,7 +395,8 @@ case class TakeOrderedAndProjectExec(
     val orderByString = truncatedString(sortOrder, "[", ",", "]", maxFields)
     val outputString = truncatedString(output, "[", ",", "]", maxFields)
 
-    s"TakeOrderedAndProject(limit=$limit, orderBy=$orderByString, output=$outputString)"
+    val offsetStr = if (offset > 0) s" offset=$offset," else ""
+    s"TakeOrderedAndProject(limit=$limit,$offsetStr orderBy=$orderByString, output=$outputString)"
   }
 
   override def stringArgs: Iterator[Any] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
index 8eeb919d0bafd..ae982f2f87f2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
@@ -27,6 +27,8 @@ import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.api.python._
+import org.apache.spark.internal.LogKeys.CONFIG
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.api.python.PythonSQLUtils
 import org.apache.spark.sql.catalyst.InternalRow
@@ -88,9 +90,9 @@ class ApplyInPandasWithStatePythonRunner(
   override val bufferSize: Int = {
     val configuredSize = sqlConf.pandasUDFBufferSize
     if (configuredSize < 4) {
-      logWarning("Pandas execution requires more than 4 bytes. Please configure bigger value " +
-        s"for the configuration '${SQLConf.PANDAS_UDF_BUFFER_SIZE.key}'. " +
-        "Force using the value '4'.")
+      logWarning(log"Pandas execution requires more than 4 bytes. Please configure bigger value " +
+        log"for the configuration '${MDC(CONFIG, SQLConf.PANDAS_UDF_BUFFER_SIZE.key)}'. " +
+        log"Force using the value '4'.")
       4
     } else {
       configuredSize
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala
index e353bf5a51e9a..a8a6fa97c52a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.python
 
+import org.apache.spark.internal.LogKeys.{RDD_ID, SPARK_PLAN_ID}
+import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -108,7 +110,8 @@ case class AttachDistributedSequenceExec(
   override protected[sql] def cleanupResources(): Unit = {
     try {
       if (cached != null && cached.getStorageLevel != StorageLevel.NONE) {
-        logWarning(s"clean up cached RDD(${cached.id}) in AttachDistributedSequenceExec($id)")
+        logWarning(log"clean up cached RDD(${MDC(RDD_ID, cached.id)}) in " +
+          log"AttachDistributedSequenceExec(${MDC(SPARK_PLAN_ID, id)})")
         cached.unpersist(blocking = false)
       }
     } finally {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
index d69213552136d..fca277dae5d55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
@@ -24,7 +24,7 @@ import scala.jdk.CollectionConverters._
 
 import net.razorvine.pickle.{IObjectPickler, Opcodes, Pickler}
 
-import org.apache.spark.SparkException
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.api.python.SerDeUtil
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -183,10 +183,11 @@ object EvaluatePython {
         case c if c.getClass.isArray =>
           val array = c.asInstanceOf[Array[_]]
           if (array.length != fields.length) {
-            throw SparkException.internalError(
-              s"Input row doesn't have expected number of values required by the schema. " +
-              s"${fields.length} fields are required while ${array.length} values are provided."
-            )
+            throw new SparkIllegalArgumentException(
+              errorClass = "STRUCT_ARRAY_LENGTH_MISMATCH",
+              messageParameters = Map(
+                "expected" -> fields.length.toString,
+                "actual" -> array.length.toString))
           }
 
           val row = new GenericInternalRow(fields.length)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
index 90922d89ad10b..e7d4aa9f04607 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
@@ -49,7 +49,7 @@ private[python] trait PythonArrowOutput[OUT <: AnyRef] { self: BasePythonRunner[
       startTime: Long,
       env: SparkEnv,
       worker: PythonWorker,
-      pid: Option[Long],
+      pid: Option[Int],
       releasedOrClosed: AtomicBoolean,
       context: TaskContext): Iterator[OUT] = {
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala
index dd9c5a25e8a74..33612b6947f27 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala
@@ -23,13 +23,20 @@ import java.io.{BufferedInputStream, BufferedOutputStream, DataInputStream, Data
 import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 
+import org.apache.arrow.vector.ipc.ArrowStreamReader
+
 import org.apache.spark.SparkEnv
 import org.apache.spark.api.python.{PythonFunction, PythonWorker, PythonWorkerFactory, PythonWorkerUtils, SpecialLengths}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.PYTHON_EXEC
 import org.apache.spark.internal.config.BUFFER_SIZE
 import org.apache.spark.internal.config.Python.PYTHON_AUTH_SOCKET_TIMEOUT
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 
 object PythonStreamingSourceRunner {
   // When the python process for python_streaming_source_runner receives one of the
@@ -38,6 +45,11 @@ object PythonStreamingSourceRunner {
   val LATEST_OFFSET_FUNC_ID = 885
   val PARTITIONS_FUNC_ID = 886
   val COMMIT_FUNC_ID = 887
+  // Status code for JVM to decide how to receive prefetched record batches
+  // for simple stream reader.
+  val PREFETCHED_RECORDS_NOT_FOUND = 0
+  val NON_EMPTY_PYARROW_RECORD_BATCHES = 1
+  val EMPTY_PYARROW_RECORD_BATCHES = 2
 }
 
 /**
@@ -71,7 +83,7 @@ class PythonStreamingSourceRunner(
    * Initializes the Python worker for running the streaming source.
    */
   def init(): Unit = {
-    logInfo(s"Initializing Python runner pythonExec: $pythonExec")
+    logInfo(log"Initializing Python runner pythonExec: ${MDC(PYTHON_EXEC, pythonExec)}")
     val env = SparkEnv.get
 
     val localdir = env.blockManager.diskBlockManager.localDirs.map(f => f.getPath()).mkString(",")
@@ -101,6 +113,8 @@ class PythonStreamingSourceRunner(
     // Send output schema
     PythonWorkerUtils.writeUTF(outputSchema.json, dataOut)
 
+    dataOut.writeInt(SQLConf.get.arrowMaxRecordsPerBatch)
+
     dataOut.flush()
 
     dataIn = new DataInputStream(
@@ -147,7 +161,8 @@ class PythonStreamingSourceRunner(
   /**
    * Invokes partitions(start, end) function of the stream reader and receive the return value.
    */
-  def partitions(start: String, end: String): Array[Array[Byte]] = {
+  def partitions(start: String, end: String): (Array[Array[Byte]], Option[Iterator[InternalRow]]) =
+  {
     dataOut.writeInt(PARTITIONS_FUNC_ID)
     PythonWorkerUtils.writeUTF(start, dataOut)
     PythonWorkerUtils.writeUTF(end, dataOut)
@@ -164,7 +179,20 @@ class PythonStreamingSourceRunner(
       val pickledPartition: Array[Byte] = PythonWorkerUtils.readBytes(dataIn)
       pickledPartitions.append(pickledPartition)
     }
-    pickledPartitions.toArray
+    val prefetchedRecordsStatus = dataIn.readInt()
+    val iter: Option[Iterator[InternalRow]] = prefetchedRecordsStatus match {
+      case NON_EMPTY_PYARROW_RECORD_BATCHES => Some(readArrowRecordBatches())
+      case PREFETCHED_RECORDS_NOT_FOUND => None
+      case EMPTY_PYARROW_RECORD_BATCHES => Some(Iterator.empty)
+      case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
+        val msg = PythonWorkerUtils.readUTF(dataIn)
+        throw QueryExecutionErrors.pythonStreamingDataSourceRuntimeError(
+          action = "planPartitions", msg)
+      case _ =>
+        throw QueryExecutionErrors.pythonStreamingDataSourceRuntimeError(
+          action = "planPartitions", s"unknown status code $prefetchedRecordsStatus")
+    }
+    (pickledPartitions.toArray, iter)
   }
 
   /**
@@ -186,7 +214,8 @@ class PythonStreamingSourceRunner(
    * Stop the python worker process and invoke stop() on stream reader.
    */
   def stop(): Unit = {
-    logInfo(s"Stopping streaming runner for module: $workerModule.")
+    logInfo(log"Stopping streaming runner for module: " +
+      log"${MDC(LogKeys.MODULE_NAME, workerModule)}.")
     try {
       pythonWorkerFactory.foreach { factory =>
         pythonWorker.foreach { worker =>
@@ -199,4 +228,30 @@ class PythonStreamingSourceRunner(
         logError("Exception when trying to kill worker", e)
     }
   }
+
+  private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+    s"stream reader for $pythonExec", 0, Long.MaxValue)
+
+  def readArrowRecordBatches(): Iterator[InternalRow] = {
+    assert(dataIn.readInt() == SpecialLengths.START_ARROW_STREAM)
+    val reader = new ArrowStreamReader(dataIn, allocator)
+    val root = reader.getVectorSchemaRoot()
+    // When input is empty schema can't be read.
+    val schema = ArrowUtils.fromArrowSchema(root.getSchema())
+    assert(schema == outputSchema)
+
+    val vectors = root.getFieldVectors().asScala.map { vector =>
+      new ArrowColumnVector(vector)
+    }.toArray[ColumnVector]
+    val rows = ArrayBuffer[InternalRow]()
+    while (reader.loadNextBatch()) {
+      val batch = new ColumnarBatch(vectors)
+      batch.setNumRows(root.getRowCount)
+      // Need to copy the row because the ColumnarBatch row iterator use
+      // the same underlying Internal row.
+      rows.appendAll(batch.rowIterator().asScala.map(_.copy()))
+    }
+    reader.close(false)
+    rows.iterator
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
index bbe9fbfc748db..87ff5a0ec4333 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
@@ -80,7 +80,7 @@ abstract class BasePythonUDFRunner(
       startTime: Long,
       env: SparkEnv,
       worker: PythonWorker,
-      pid: Option[Long],
+      pid: Option[Int],
       releasedOrClosed: AtomicBoolean,
       context: TaskContext): Iterator[Array[Byte]] = {
     new ReaderIterator(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala
index 5e0c5ff92fdab..ce30a54c8d4e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/RowQueue.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.memory.MemoryBlock
+import org.apache.spark.util.Utils
 
 /**
  * A RowQueue is an FIFO queue for UnsafeRow.
@@ -288,8 +289,12 @@ private[python] case class HybridRowQueue(
   }
 }
 
-private[python] object HybridRowQueue {
+private[sql] object HybridRowQueue {
   def apply(taskMemoryMgr: TaskMemoryManager, file: File, fields: Int): HybridRowQueue = {
     HybridRowQueue(taskMemoryMgr, file, fields, SparkEnv.get.serializerManager)
   }
+
+  def apply(taskMemoryMgr: TaskMemoryManager, fields: Int): HybridRowQueue = {
+    apply(taskMemoryMgr, new File(Utils.getLocalDir(SparkEnv.get.conf)), fields)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
index 819fd1bd297f8..45ecf87009505 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/r/ArrowRRunner.scala
@@ -30,6 +30,7 @@ import org.apache.spark.TaskContext
 import org.apache.spark.api.r._
 import org.apache.spark.api.r.SpecialLengths
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.arrow.ArrowWriter
 import org.apache.spark.sql.types.StructType
@@ -138,6 +139,10 @@ class ArrowRRunner(
 
       private var batchLoaded = true
 
+      private def format(v: Double): String = {
+        "%.3f".format(v)
+      }
+
       protected override def read(): ColumnarBatch = try {
         if (reader != null && batchLoaded) {
           batchLoaded = reader.loadNextBatch()
@@ -161,17 +166,14 @@ class ArrowRRunner(
               val input = dataStream.readDouble
               val compute = dataStream.readDouble
               val output = dataStream.readDouble
-              logInfo(
-                ("Times: boot = %.3f s, init = %.3f s, broadcast = %.3f s, " +
-                  "read-input = %.3f s, compute = %.3f s, write-output = %.3f s, " +
-                  "total = %.3f s").format(
-                  boot,
-                  init,
-                  broadcast,
-                  input,
-                  compute,
-                  output,
-                  boot + init + broadcast + input + compute + output))
+              logInfo(log"Times: boot = ${MDC(LogKeys.BOOT, format(boot))} s, " +
+                log"init = ${MDC(LogKeys.INIT, format(init))} s, " +
+                log"broadcast = ${MDC(LogKeys.BROADCAST, format(broadcast))} s, " +
+                log"read-input = ${MDC(LogKeys.INPUT, format(input))} s, " +
+                log"compute = ${MDC(LogKeys.COMPUTE, format(compute))} s, " +
+                log"write-output = ${MDC(LogKeys.OUTPUT, format(output))} s, " +
+                log"total = ${MDC(LogKeys.TOTAL,
+                  format(boot + init + broadcast + input + compute + output))} s")
               read()
             case length if length > 0 =>
               // Likewise, there looks no way to send each batch in streaming format via socket
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
index 686e0bb868865..6db01624fd26b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.{CompletableFuture, ConcurrentLinkedDeque, ThreadPoo
 
 import scala.jdk.CollectionConverters._
 
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.errors.QueryExecutionErrors
 
@@ -125,7 +126,8 @@ class AsyncCommitLog(sparkSession: SparkSession, path: String, executorService:
             }
           } catch {
             case e: Throwable =>
-              logError(s"Encountered error while writing batch ${batchId} to commit log", e)
+              logError(log"Encountered error while writing batch " +
+                log"${MDC(LogKeys.BATCH_ID, batchId)} to commit log", e)
               future.completeExceptionally(e)
           }
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
index a89a9132e03e0..54a8855b77cdb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.jdk.CollectionConverters._
 
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.util.{Clock, SystemClock}
@@ -159,7 +160,8 @@ class AsyncOffsetSeqLog(
             }
           } catch {
             case e: Throwable =>
-              logError(s"Encountered error while writing batch ${batchId} to offset log", e)
+              logError(log"Encountered error while writing batch " +
+                log"${MDC(LogKeys.BATCH_ID, batchId)} to offset log", e)
               future.completeExceptionally(e)
           }
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecution.scala
index ec24ec0fd335c..4a7cb5b71a77f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecution.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.execution.streaming
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicLong
 
+import org.apache.spark.internal.LogKeys.{BATCH_ID, PRETTY_ID_STRING}
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.streaming.WriteToStream
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -156,8 +158,8 @@ class AsyncProgressTrackingMicroBatchExecution(
         }
       })
       .exceptionally((th: Throwable) => {
-        logError(s"Encountered error while performing" +
-          s" async offset write for batch ${execCtx.batchId}", th)
+        logError(log"Encountered error while performing async offset write for batch " +
+          log"${MDC(BATCH_ID, execCtx.batchId)}", th)
         errorNotifier.markError(th)
         return
       })
@@ -188,8 +190,8 @@ class AsyncProgressTrackingMicroBatchExecution(
         commitLog
           .addAsync(execCtx.batchId, CommitMetadata(watermarkTracker.currentWatermark))
           .exceptionally((th: Throwable) => {
-            logError(s"Got exception during async write to commit log" +
-              s" for batch ${execCtx.batchId}", th)
+            logError(log"Got exception during async write to commit log for batch " +
+              log"${MDC(BATCH_ID, execCtx.batchId)}", th)
             errorNotifier.markError(th)
             return
           })
@@ -221,7 +223,8 @@ class AsyncProgressTrackingMicroBatchExecution(
     super.cleanup()
 
     ThreadUtils.shutdown(asyncWritesExecutorService)
-    logInfo(s"Async progress tracking executor pool for query ${prettyIdString} has been shutdown")
+    logInfo(log"Async progress tracking executor pool for query " +
+      log"${MDC(PRETTY_ID_STRING, prettyIdString)} has been shutdown")
   }
 
   // used for testing
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AvailableNowDataStreamWrapper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AvailableNowDataStreamWrapper.scala
index 18dd2eba083ad..f42250c3c702d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AvailableNowDataStreamWrapper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AvailableNowDataStreamWrapper.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{DELEGATE, READ_LIMIT}
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, ReadLimit, SparkDataStream, SupportsAdmissionControl, SupportsTriggerAvailableNow}
 import org.apache.spark.sql.connector.read.streaming
 
@@ -29,10 +30,10 @@ class AvailableNowDataStreamWrapper(val delegate: SparkDataStream)
   extends SparkDataStream with SupportsTriggerAvailableNow with Logging {
 
   // See SPARK-45178 for more details.
-  logWarning("Activating the wrapper implementation of Trigger.AvailableNow for source " +
-    s"[$delegate]. Note that this might introduce possibility of deduplication, dataloss, " +
-    "correctness issue. Enable the config with extreme care. We strongly recommend to contact " +
-    "the data source developer to support Trigger.AvailableNow.")
+  logWarning(log"Activating the wrapper implementation of Trigger.AvailableNow for source " +
+    log"[${MDC(DELEGATE, delegate)}]. Note that this might introduce possibility of " +
+    log"deduplication, dataloss, correctness issue. Enable the config with extreme care. We " +
+    log"strongly recommend to contact the data source developer to support Trigger.AvailableNow.")
 
   private var fetchedOffset: streaming.Offset = _
 
@@ -71,8 +72,8 @@ class AvailableNowDataStreamWrapper(val delegate: SparkDataStream)
     case s: SupportsAdmissionControl =>
       val limit = s.getDefaultReadLimit
       if (limit != ReadLimit.allAvailable()) {
-        logWarning(s"The read limit $limit is ignored because source $delegate does not " +
-          "support running Trigger.AvailableNow queries.")
+        logWarning(log"The read limit ${MDC(READ_LIMIT, limit)} is ignored because source " +
+          log"${MDC(DELEGATE, delegate)} does not support running Trigger.AvailableNow queries.")
       }
       ReadLimit.allAvailable()
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
index 34c5dee0997b9..982cc13c40868 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
@@ -26,7 +26,8 @@ import org.apache.hadoop.fs._
 import org.apache.hadoop.fs.local.{LocalFs, RawLocalFs}
 import org.apache.hadoop.fs.permission.FsPermission
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{FINAL_PATH, PATH, TEMP_PATH}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.RenameHelperMethods
 import org.apache.spark.sql.internal.SQLConf
@@ -143,7 +144,8 @@ object CheckpointFileManager extends Logging {
       this(fm, path, generateTempPath(path), overwrite)
     }
 
-    logInfo(s"Writing atomically to $finalPath using temp file $tempPath")
+    logInfo(log"Writing atomically to ${MDC(FINAL_PATH, finalPath)} using temp file " +
+      log"${MDC(TEMP_PATH, tempPath)}")
     @volatile private var terminated = false
 
     override def close(): Unit = synchronized {
@@ -154,8 +156,8 @@ object CheckpointFileManager extends Logging {
           fm.renameTempFile(tempPath, finalPath, overwriteIfPossible)
         } catch {
           case fe: FileAlreadyExistsException =>
-            logWarning(
-              s"Failed to rename temp file $tempPath to $finalPath because file exists", fe)
+            logWarning(log"Failed to rename temp file ${MDC(TEMP_PATH, tempPath)} to " +
+              log"${MDC(PATH, finalPath)} because file exists", fe)
             if (!overwriteIfPossible) throw fe
         }
 
@@ -165,7 +167,8 @@ object CheckpointFileManager extends Logging {
             s"But $finalPath does not exist.")
         }
 
-        logInfo(s"Renamed temp file $tempPath to $finalPath")
+        logInfo(log"Renamed temp file ${MDC(TEMP_PATH, tempPath)} to " +
+          log"${MDC(FINAL_PATH, finalPath)}")
       } finally {
         terminated = true
       }
@@ -178,13 +181,13 @@ object CheckpointFileManager extends Logging {
           underlyingStream.close()
         } catch {
           case NonFatal(e) =>
-            logWarning(s"Error cancelling write to $finalPath, " +
-              s"continuing to delete temp path $tempPath", e)
+            logWarning(log"Error cancelling write to ${MDC(PATH, finalPath)}, continuing to " +
+              log"delete temp path ${MDC(TEMP_PATH, tempPath)}", e)
         }
         fm.delete(tempPath)
       } catch {
         case NonFatal(e) =>
-          logWarning(s"Error deleting temp file $tempPath", e)
+          logWarning(log"Error deleting temp file ${MDC(TEMP_PATH, tempPath)}", e)
       } finally {
         terminated = true
       }
@@ -210,10 +213,10 @@ object CheckpointFileManager extends Logging {
     } catch {
       case e: UnsupportedFileSystemException =>
         logWarning(
-          "Could not use FileContext API for managing Structured Streaming checkpoint files at " +
-            s"$path. Using FileSystem API instead for managing log files. If the implementation " +
-            s"of FileSystem.rename() is not atomic, then the correctness and fault-tolerance of" +
-            s"your Structured Streaming is not guaranteed.")
+          log"Could not use FileContext API for managing Structured Streaming checkpoint files " +
+            log"at ${MDC(PATH, path)}. Using FileSystem API instead for managing log files. If " +
+            log"the implementation of FileSystem.rename() is not atomic, then the correctness " +
+            log"and fault-tolerance of your Structured Streaming is not guaranteed.")
         new FileSystemBasedCheckpointFileManager(path, hadoopConf)
     }
   }
@@ -274,7 +277,8 @@ class FileSystemBasedCheckpointFileManager(path: Path, hadoopConf: Configuration
         throw QueryExecutionErrors.renameSrcPathNotFoundError(srcPath)
       } else {
         val e = QueryExecutionErrors.failedRenameTempFileError(srcPath, dstPath)
-        logWarning(e.getMessage)
+        logWarning(log"Failed to rename temp file ${MDC(TEMP_PATH, srcPath)} to " +
+          log"${MDC(PATH, dstPath)} as FileSystem.rename returned false.")
         throw e
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ColumnFamilySchemaUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ColumnFamilySchemaUtils.scala
new file mode 100644
index 0000000000000..68f3fa434389e
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ColumnFamilySchemaUtils.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.sql.Encoder
+import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchema.{COMPOSITE_KEY_ROW_SCHEMA, KEY_ROW_SCHEMA, VALUE_ROW_SCHEMA, VALUE_ROW_SCHEMA_WITH_TTL}
+import org.apache.spark.sql.execution.streaming.state.{ColumnFamilySchema, ColumnFamilySchemaV1, NoPrefixKeyStateEncoderSpec, PrefixKeyScanStateEncoderSpec}
+
+trait ColumnFamilySchemaUtils {
+  def getValueStateSchema[T](stateName: String, hasTtl: Boolean): ColumnFamilySchema
+
+  def getListStateSchema[T](stateName: String, hasTtl: Boolean): ColumnFamilySchema
+
+  def getMapStateSchema[K, V](
+      stateName: String,
+      userKeyEnc: Encoder[K],
+      hasTtl: Boolean): ColumnFamilySchema
+}
+
+object ColumnFamilySchemaUtilsV1 extends ColumnFamilySchemaUtils {
+
+  def getValueStateSchema[T](stateName: String, hasTtl: Boolean): ColumnFamilySchemaV1 = {
+    ColumnFamilySchemaV1(
+      stateName,
+      KEY_ROW_SCHEMA,
+      if (hasTtl) {
+        VALUE_ROW_SCHEMA_WITH_TTL
+      } else {
+        VALUE_ROW_SCHEMA
+      },
+      NoPrefixKeyStateEncoderSpec(KEY_ROW_SCHEMA))
+  }
+
+  def getListStateSchema[T](stateName: String, hasTtl: Boolean): ColumnFamilySchemaV1 = {
+    ColumnFamilySchemaV1(
+      stateName,
+      KEY_ROW_SCHEMA,
+      if (hasTtl) {
+        VALUE_ROW_SCHEMA_WITH_TTL
+      } else {
+        VALUE_ROW_SCHEMA
+      },
+      NoPrefixKeyStateEncoderSpec(KEY_ROW_SCHEMA))
+  }
+
+  def getMapStateSchema[K, V](
+      stateName: String,
+      userKeyEnc: Encoder[K],
+      hasTtl: Boolean): ColumnFamilySchemaV1 = {
+    ColumnFamilySchemaV1(
+      stateName,
+      COMPOSITE_KEY_ROW_SCHEMA,
+      if (hasTtl) {
+        VALUE_ROW_SCHEMA_WITH_TTL
+      } else {
+        VALUE_ROW_SCHEMA
+      },
+      PrefixKeyScanStateEncoderSpec(COMPOSITE_KEY_ROW_SCHEMA, 1),
+      Some(userKeyEnc.schema))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
index 8d38bba1f2a63..d6770452e71f3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLog.scala
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path
 import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.util.Utils
@@ -103,8 +104,9 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
         defaultCompactInterval, compactibleBatchIds(0).toInt)
     }
     assert(interval > 0, s"intervalValue = $interval not positive value.")
-    logInfo(s"Set the compact interval to $interval " +
-      s"[defaultCompactInterval: $defaultCompactInterval]")
+    logInfo(log"Set the compact interval to ${MDC(LogKeys.COMPACT_INTERVAL, interval)} " +
+      log"[defaultCompactInterval: " +
+      log"${MDC(LogKeys.DEFAULT_COMPACT_INTERVAL, defaultCompactInterval)}]")
     interval
   }
 
@@ -240,7 +242,8 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
     }
 
     if (elapsedMs >= COMPACT_LATENCY_WARN_THRESHOLD_MS) {
-      logWarning(s"Compacting took $elapsedMs ms for compact batch $batchId")
+      logWarning(log"Compacting took ${MDC(LogKeys.ELAPSED_TIME, elapsedMs)} ms for " +
+        log"compact batch ${MDC(LogKeys.BATCH_ID, batchId)}")
     } else {
       logDebug(s"Compacting took $elapsedMs ms for compact batch $batchId")
     }
@@ -306,8 +309,9 @@ abstract class CompactibleFileStreamLog[T <: AnyRef : ClassTag](
       assert(isCompactionBatch(minCompactionBatchId, compactInterval),
         s"$minCompactionBatchId is not a compaction batch")
 
-      logInfo(s"Current compact batch id = $currentBatchId " +
-        s"min compaction batch id to delete = $minCompactionBatchId")
+      logInfo(log"Current compact batch id = ${MDC(LogKeys.CURRENT_BATCH_ID, currentBatchId)} " +
+        log"min compaction batch id to delete = " +
+        log"${MDC(LogKeys.MIN_COMPACTION_BATCH_ID, minCompactionBatchId)}")
 
       val expiredTime = System.currentTimeMillis() - fileCleanupDelayMs
       fileManager.list(metadataPath, (path: Path) => {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 7e094fee32547..54041abdc9ab4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -19,11 +19,14 @@ package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Predicate, SortOrder, UnsafeProjection}
+import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
+import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark.updateEventTimeColumn
+import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToMillis
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.types.MetadataBuilder
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.AccumulatorV2
 
@@ -107,25 +110,72 @@ case class EventTimeWatermarkExec(
   }
 
   // Update the metadata on the eventTime column to include the desired delay.
-  override val output: Seq[Attribute] = child.output.map { a =>
-    if (a semanticEquals eventTime) {
-      val updatedMetadata = new MetadataBuilder()
-        .withMetadata(a.metadata)
-        .putLong(EventTimeWatermark.delayKey, delayMs)
-        .build()
-      a.withMetadata(updatedMetadata)
-    } else if (a.metadata.contains(EventTimeWatermark.delayKey)) {
-      // Remove existing watermark
-      val updatedMetadata = new MetadataBuilder()
-        .withMetadata(a.metadata)
-        .remove(EventTimeWatermark.delayKey)
-        .build()
-      a.withMetadata(updatedMetadata)
-    } else {
-      a
-    }
+  override val output: Seq[Attribute] = {
+    val delayMs = EventTimeWatermark.getDelayMs(delay)
+    updateEventTimeColumn(child.output, delayMs, eventTime)
   }
 
   override protected def withNewChildInternal(newChild: SparkPlan): EventTimeWatermarkExec =
     copy(child = newChild)
 }
+
+/**
+ * Updates the event time column to [[eventTime]] in the child output.
+ * Any watermark calculations performed after this node will use the
+ * updated eventTimeColumn.
+ *
+ * This node also ensures that output emitted by the child node adheres
+ * to watermark. If the child node emits rows which are older than global
+ * watermark, the node will throw an query execution error and fail the user
+ * query.
+ */
+case class UpdateEventTimeColumnExec(
+    eventTime: Attribute,
+    delay: CalendarInterval,
+    eventTimeWatermarkForLateEvents: Option[Long],
+    child: SparkPlan) extends UnaryExecNode {
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    child.execute().mapPartitions[InternalRow] { dataIterator =>
+      val watermarkExpression = WatermarkSupport.watermarkExpression(
+        Some(eventTime), eventTimeWatermarkForLateEvents)
+
+      if (watermarkExpression.isEmpty) {
+        // watermark should always be defined in this node.
+        throw QueryExecutionErrors.cannotGetEventTimeWatermarkError()
+      }
+
+      val predicate = Predicate.create(watermarkExpression.get, child.output)
+      new Iterator[InternalRow] {
+        override def hasNext: Boolean = dataIterator.hasNext
+
+        override def next(): InternalRow = {
+          val row = dataIterator.next()
+          if (predicate.eval(row)) {
+            // child node emitted a row which is older than current watermark
+            // this is not allowed
+            val boundEventTimeExpression = bindReference[Expression](eventTime, child.output)
+            val eventTimeProjection = UnsafeProjection.create(boundEventTimeExpression)
+            val rowEventTime = eventTimeProjection(row)
+            throw QueryExecutionErrors.emittedRowsAreOlderThanWatermark(
+              eventTimeWatermarkForLateEvents.get, rowEventTime.getLong(0))
+          }
+          row
+        }
+      }
+    }
+  }
+
+  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
+
+  override def outputPartitioning: Partitioning = child.outputPartitioning
+
+  // Update the metadata on the eventTime column to include the desired delay.
+  override val output: Seq[Attribute] = {
+    val delayMs = EventTimeWatermark.getDelayMs(delay)
+    updateEventTimeColumn(child.output, delayMs, eventTime)
+  }
+
+  override protected def withNewChildInternal(newChild: SparkPlan): UpdateEventTimeColumnExec =
+    copy(child = newChild)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index 07c1ccc432cdb..b259f9dbcdcb2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -125,6 +125,30 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging
     matchedMode
   }
 
+  /**
+   * maximum number of files to cache to be processed in subsequent batches
+   */
+  val maxCachedFiles: Int = parameters.get("maxCachedFiles").map { str =>
+    Try(str.toInt).filter(_ >= 0).getOrElse {
+      throw new IllegalArgumentException(
+        s"Invalid value '$str' for option 'maxCachedFiles', must be an integer greater than or " +
+          "equal to 0")
+    }
+  }.getOrElse(10000)
+
+  /**
+   * ratio of cached input to max files/bytes to allow for listing from input source when
+   * there are fewer cached files/bytes than could be available to be read
+   */
+  val discardCachedInputRatio: Float = parameters.get("discardCachedInputRatio").map { str =>
+    Try(str.toFloat).filter(x => 0 <= x && x <= 1).getOrElse {
+      throw new IllegalArgumentException(
+        s"Invalid value '$str' for option 'discardCachedInputRatio', must be a positive float " +
+          "between 0 and 1"
+      )
+    }
+  }.getOrElse(0.2f)
+
   private def withBooleanParameter(name: String, default: Boolean) = {
     parameters.get(name).map { str =>
       try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index ea8db3c99de92..638da08d0fd9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -23,7 +23,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.SparkException
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{BATCH_ID, ERROR, PATH}
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.expressions._
@@ -60,8 +61,8 @@ object FileStreamSink extends Logging {
         } catch {
           case e: SparkException => throw e
           case NonFatal(e) =>
-            logWarning(s"Assume no metadata directory. Error while looking for " +
-              s"metadata directory in the path: $singlePath.", e)
+            logWarning(log"Assume no metadata directory. Error while looking for " +
+              log"metadata directory in the path: ${MDC(PATH, singlePath)}.", e)
             false
         }
       case _ => false
@@ -84,7 +85,7 @@ object FileStreamSink extends Logging {
         } catch {
           case NonFatal(e) =>
             // We may not have access to this directory. Don't fail the query if that happens.
-            logWarning(e.getMessage, e)
+            logWarning(log"${MDC(ERROR, e.getMessage)}", e)
             false
         }
       if (legacyMetadataPathExists) {
@@ -145,7 +146,7 @@ class FileStreamSink(
 
   override def addBatch(batchId: Long, data: DataFrame): Unit = {
     if (batchId <= fileLog.getLatestBatchId().getOrElse(-1L)) {
-      logInfo(s"Skipping already committed batch $batchId")
+      logInfo(log"Skipping already committed batch ${MDC(BATCH_ID, batchId)}")
     } else {
       val committer = FileCommitProtocol.instantiate(
         className = sparkSession.sessionState.conf.streamingFileCommitProtocolClass,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index d8aa31be47972..556438811c44d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.streaming
 
 import org.apache.hadoop.fs.FileStatus
 
+import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.MDC
 import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.SQLConf
@@ -101,7 +103,7 @@ class FileStreamSinkLog(
 
   val retentionMs: Long = _retentionMs match {
     case Some(retention) =>
-      logInfo(s"Retention is set to $retention ms")
+      logInfo(log"Retention is set to ${MDC(TIME_UNITS, retention)} ms")
       retention
 
     case _ => Long.MaxValue
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index eacbd0447d16f..4a9b2d11b7e0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
 
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
@@ -114,6 +114,11 @@ class FileStreamSource(
       "the same and causes data lost.")
   }
 
+
+  private val maxCachedFiles = sourceOptions.maxCachedFiles
+
+  private val discardCachedInputRatio = sourceOptions.discardCachedInputRatio
+
   /** A mapping from a file that we have processed to some timestamp it was last modified. */
   // Visible for testing and debugging in production.
   val seenFiles = new SeenFilesMap(maxFileAgeMs, fileNameOnly)
@@ -125,8 +130,9 @@ class FileStreamSource(
   }
   seenFiles.purge()
 
-  logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, " +
-    s"maxBytesPerBatch = $maxBytesPerBatch, maxFileAgeMs = $maxFileAgeMs")
+  logInfo(log"maxFilesPerBatch = ${MDC(LogKeys.NUM_FILES, maxFilesPerBatch)}, " +
+    log"maxBytesPerBatch = ${MDC(LogKeys.NUM_BYTES, maxBytesPerBatch)}, " +
+    log"maxFileAgeMs = ${MDC(LogKeys.TIME_UNITS, maxFileAgeMs)}")
 
   private var unreadFiles: Seq[NewFileEntry] = _
 
@@ -178,12 +184,14 @@ class FileStreamSource(
       }
     }
 
+    val shouldCache = !sourceOptions.latestFirst && allFilesForTriggerAvailableNow == null
+
     // Obey user's setting to limit the number of files in this batch trigger.
     val (batchFiles, unselectedFiles) = limit match {
-      case files: ReadMaxFiles if !sourceOptions.latestFirst =>
+      case files: ReadMaxFiles if shouldCache =>
         // we can cache and reuse remaining fetched list of files in further batches
         val (bFiles, usFiles) = newFiles.splitAt(files.maxFiles())
-        if (usFiles.size < files.maxFiles() * DISCARD_UNSEEN_INPUT_RATIO) {
+        if (usFiles.size < files.maxFiles() * discardCachedInputRatio) {
           // Discard unselected files if the number of files are smaller than threshold.
           // This is to avoid the case when the next batch would have too few files to read
           // whereas there're new files available.
@@ -194,14 +202,14 @@ class FileStreamSource(
         }
 
       case files: ReadMaxFiles =>
-        // implies "sourceOptions.latestFirst = true" which we want to refresh the list per batch
+        // don't use the cache, just take files for the next batch
         (newFiles.take(files.maxFiles()), null)
 
-      case files: ReadMaxBytes if !sourceOptions.latestFirst =>
+      case files: ReadMaxBytes if shouldCache =>
         // we can cache and reuse remaining fetched list of files in further batches
         val (FilesSplit(bFiles, _), FilesSplit(usFiles, rSize)) =
           takeFilesUntilMax(newFiles, files.maxBytes())
-        if (rSize.toDouble < (files.maxBytes() * DISCARD_UNSEEN_INPUT_RATIO)) {
+        if (rSize.toDouble < (files.maxBytes() * discardCachedInputRatio)) {
           // Discard unselected files if the total size of files is smaller than threshold.
           // This is to avoid the case when the next batch would have too small of a size of
           // files to read whereas there're new files available.
@@ -212,16 +220,16 @@ class FileStreamSource(
         }
 
       case files: ReadMaxBytes =>
+        // don't use the cache, just take files for the next batch
         val (FilesSplit(bFiles, _), _) = takeFilesUntilMax(newFiles, files.maxBytes())
-        // implies "sourceOptions.latestFirst = true" which we want to refresh the list per batch
         (bFiles, null)
 
       case _: ReadAllAvailable => (newFiles, null)
     }
 
     if (unselectedFiles != null && unselectedFiles.nonEmpty) {
-      logTrace(s"Taking first $MAX_CACHED_UNSEEN_FILES unread files.")
-      unreadFiles = unselectedFiles.take(MAX_CACHED_UNSEEN_FILES)
+      logTrace(s"Taking first $maxCachedFiles unread files.")
+      unreadFiles = unselectedFiles.take(maxCachedFiles)
       logTrace(s"${unreadFiles.size} unread files are available for further batches.")
     } else {
       unreadFiles = null
@@ -250,7 +258,8 @@ class FileStreamSource(
         FileEntry(path = p.urlEncoded, timestamp = timestamp, batchId = metadataLogCurrentOffset)
       }.toArray
       if (metadataLog.add(metadataLogCurrentOffset, fileEntries)) {
-        logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files")
+        logInfo(log"Log offset set to ${MDC(LogKeys.LOG_OFFSET, metadataLogCurrentOffset)} " +
+          log"with ${MDC(LogKeys.NUM_FILES, batchFiles.size)} new files")
       } else {
         throw new IllegalStateException("Concurrent update to the log. Multiple streaming jobs " +
           s"detected for $metadataLogCurrentOffset")
@@ -290,7 +299,9 @@ class FileStreamSource(
 
     assert(startOffset <= endOffset)
     val files = metadataLog.get(Some(startOffset + 1), Some(endOffset)).flatMap(_._2)
-    logInfo(s"Processing ${files.length} files from ${startOffset + 1}:$endOffset")
+    logInfo(log"Processing ${MDC(LogKeys.NUM_FILES, files.length)} files from " +
+      log"${MDC(LogKeys.FILE_START_OFFSET, startOffset + 1)}:" +
+      log"${MDC(LogKeys.FILE_END_OFFSET, endOffset)}")
     logTrace(s"Files are:\n\t" + files.mkString("\n\t"))
     val newDataSource =
       DataSource(
@@ -380,7 +391,8 @@ class FileStreamSource(
     val listingTimeMs = NANOSECONDS.toMillis(endTime - startTime)
     if (listingTimeMs > 2000) {
       // Output a warning when listing files uses more than 2 seconds.
-      logWarning(s"Listed ${files.size} file(s) in $listingTimeMs ms")
+      logWarning(log"Listed ${MDC(LogKeys.NUM_FILES, files.size)} file(s) in " +
+        log"${MDC(LogKeys.ELAPSED_TIME, listingTimeMs)} ms")
     } else {
       logTrace(s"Listed ${files.size} file(s) in $listingTimeMs ms")
     }
@@ -421,9 +433,6 @@ object FileStreamSource {
   /** Timestamp for file modification time, in ms since January 1, 1970 UTC. */
   type Timestamp = Long
 
-  val DISCARD_UNSEEN_INPUT_RATIO = 0.2
-  val MAX_CACHED_UNSEEN_FILES = 10000
-
   case class FileEntry(
       path: String, // uri-encoded path string
       timestamp: Timestamp,
@@ -628,11 +637,13 @@ object FileStreamSource {
 
         logDebug(s"Archiving completed file $curPath to $newPath")
         if (!fileSystem.rename(curPath, newPath)) {
-          logWarning(s"Fail to move $curPath to $newPath / skip moving file.")
+          logWarning(log"Fail to move ${MDC(LogKeys.CURRENT_PATH, curPath)} to " +
+            log"${MDC(LogKeys.NEW_PATH, newPath)} / skip moving file.")
         }
       } catch {
         case NonFatal(e) =>
-          logWarning(s"Fail to move $curPath to $newPath / skip moving file.", e)
+          logWarning(log"Fail to move ${MDC(LogKeys.CURRENT_PATH, curPath)} to " +
+            log"${MDC(LogKeys.NEW_PATH, newPath)} / skip moving file.", e)
       }
     }
   }
@@ -646,12 +657,14 @@ object FileStreamSource {
         logDebug(s"Removing completed file $curPath")
 
         if (!fileSystem.delete(curPath, false)) {
-          logWarning(s"Failed to remove $curPath / skip removing file.")
+          logWarning(
+            log"Failed to remove ${MDC(LogKeys.CURRENT_PATH, curPath)} / skip removing file.")
         }
       } catch {
         case NonFatal(e) =>
           // Log to error but swallow exception to avoid process being stopped
-          logWarning(s"Fail to remove $curPath / skip removing file.", e)
+          logWarning(
+            log"Fail to remove ${MDC(LogKeys.CURRENT_PATH, curPath)} / skip removing file.", e)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index 01b16b63fa278..3969320aa1a8b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -18,6 +18,8 @@ package org.apache.spark.sql.execution.streaming
 
 import java.util.concurrent.TimeUnit.NANOSECONDS
 
+import org.apache.hadoop.conf.Configuration
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -187,6 +189,14 @@ trait FlatMapGroupsWithStateExecBase
     })
   }
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration,
+      batchId: Long,
+      stateSchemaVersion: Int): Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      groupingAttributes.toStructType, stateManager.stateSchema, session.sqlContext.sessionState)
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     stateManager // force lazy init at driver
     metrics // force lazy init at driver
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index b3eedbf93f040..251cc16acdf43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -29,7 +29,8 @@ import org.apache.hadoop.fs._
 import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -265,7 +266,7 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
 
   override def getLatest(): Option[(Long, T)] = {
     listBatches.sorted.lastOption.map { batchId =>
-      logInfo(s"Getting latest batch $batchId")
+      logInfo(log"Getting latest batch ${MDC(BATCH_ID, batchId)}")
       (batchId, getExistingBatch(batchId))
     }
   }
@@ -335,7 +336,7 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
       batchCache.synchronized {
         batchCache.keySet.asScala.toArray
       }
-    logInfo("BatchIds found from listing: " + batchIds.sorted.mkString(", "))
+    logInfo(log"BatchIds found from listing: ${MDC(BATCH_ID, batchIds.sorted.mkString(", "))}")
 
     if (batchIds.isEmpty) {
       Array.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index cfccfff3a1382..2759af639390b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -22,7 +22,8 @@ import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{BATCH_TIMESTAMP, ERROR}
 import org.apache.spark.sql.{SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.expressions.{CurrentBatchTimestamp, ExpressionWithRandomSeed}
@@ -30,7 +31,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
+import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, SerializeFromObjectExec, SparkPlan, SparkPlanner, UnaryExecNode}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, MergingSessionsExec, ObjectHashAggregateExec, SortAggregateExec, UpdatingSessionsExec}
 import org.apache.spark.sql.execution.datasources.v2.state.metadata.StateMetadataPartitionReader
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
@@ -84,6 +85,12 @@ class IncrementalExecution(
     .map(SQLConf.SHUFFLE_PARTITIONS.valueConverter)
     .getOrElse(sparkSession.sessionState.conf.numShufflePartitions)
 
+  /**
+   * This value dictates which schema format version the state schema should be written in
+   * for all operators other than TransformWithState.
+   */
+  private val STATE_SCHEMA_DEFAULT_VERSION: Int = 2
+
   /**
    * See [SPARK-18339]
    * Walk the optimized logical plan and replace CurrentBatchTimestamp
@@ -101,7 +108,7 @@ class IncrementalExecution(
       tracker).transformAllExpressionsWithPruning(
       _.containsAnyPattern(CURRENT_LIKE, EXPRESSION_WITH_RANDOM_SEED)) {
       case ts @ CurrentBatchTimestamp(timestamp, _, _) =>
-        logInfo(s"Current batch timestamp = $timestamp")
+        logInfo(log"Current batch timestamp = ${MDC(BATCH_TIMESTAMP, timestamp)}")
         ts.toLiteral
       case e: ExpressionWithRandomSeed => e.withNewSeed(Utils.random.nextLong())
     }
@@ -186,14 +193,31 @@ class IncrementalExecution(
     }
   }
 
-  object WriteStatefulOperatorMetadataRule extends SparkPlanPartialRule {
+  // Planning rule used to record the state schema for the first run and validate state schema
+  // changes across query runs.
+  object StateSchemaAndOperatorMetadataRule extends SparkPlanPartialRule {
     override val rule: PartialFunction[SparkPlan, SparkPlan] = {
-      case stateStoreWriter: StateStoreWriter if isFirstBatch =>
-        val metadata = stateStoreWriter.operatorStateMetadata()
-        val metadataWriter = new OperatorStateMetadataWriter(new Path(
-          checkpointLocation, stateStoreWriter.getStateInfo.operatorId.toString), hadoopConf)
-        metadataWriter.write(metadata)
-        stateStoreWriter
+      // In the case of TransformWithStateExec, we want to collect this StateSchema
+      // filepath, and write this path out in the OperatorStateMetadata file
+      case statefulOp: StatefulOperator if isFirstBatch =>
+        val stateSchemaVersion = statefulOp match {
+          case _: TransformWithStateExec => sparkSession.sessionState.conf.
+            getConf(SQLConf.STREAMING_TRANSFORM_WITH_STATE_OP_STATE_SCHEMA_VERSION)
+          case _ => STATE_SCHEMA_DEFAULT_VERSION
+        }
+        val stateSchemaPaths = statefulOp.
+          validateAndMaybeEvolveStateSchema(hadoopConf, currentBatchId, stateSchemaVersion)
+        // write out the state schema paths to the metadata file
+        statefulOp match {
+          case stateStoreWriter: StateStoreWriter =>
+            val metadata = stateStoreWriter.operatorStateMetadata()
+            // TODO: [SPARK-48849] Populate metadata with stateSchemaPaths if metadata version is v2
+            val metadataWriter = new OperatorStateMetadataWriter(new Path(
+              checkpointLocation, stateStoreWriter.getStateInfo.operatorId.toString), hadoopConf)
+            metadataWriter.write(metadata)
+          case _ =>
+        }
+        statefulOp
     }
   }
 
@@ -346,6 +370,28 @@ class IncrementalExecution(
           eventTimeWatermarkForEviction = inputWatermarkForEviction(m.stateInfo.get)
         )
 
+      // UpdateEventTimeColumnExec is used to tag the eventTime column, and validate
+      // emitted rows adhere to watermark in the output of transformWithState.
+      // Hence, this node shares the same watermark value as TransformWithStateExec.
+      // However, given that UpdateEventTimeColumnExec does not store any state, it
+      // does not have any StateInfo. We simply use the StateInfo of transformWithStateExec
+      // to propagate watermark to both UpdateEventTimeColumnExec and transformWithStateExec.
+      case UpdateEventTimeColumnExec(eventTime, delay, None,
+        SerializeFromObjectExec(serializer,
+        t: TransformWithStateExec)) if t.stateInfo.isDefined =>
+
+        val stateInfo = t.stateInfo.get
+        val iwLateEvents = inputWatermarkForLateEvents(stateInfo)
+        val iwEviction = inputWatermarkForEviction(stateInfo)
+
+        UpdateEventTimeColumnExec(eventTime, delay, iwLateEvents,
+          SerializeFromObjectExec(serializer,
+            t.copy(
+              eventTimeWatermarkForLateEvents = iwLateEvents,
+              eventTimeWatermarkForEviction = iwEviction)
+          ))
+
+
       case t: TransformWithStateExec if t.stateInfo.isDefined =>
         t.copy(
           eventTimeWatermarkForLateEvents = inputWatermarkForLateEvents(t.stateInfo.get),
@@ -419,9 +465,9 @@ class IncrementalExecution(
           } catch {
             case e: Exception =>
               // no need to throw fatal error, returns empty map
-              logWarning("Error reading metadata path for stateful operator. " +
-                s"This may due to no prior committed batch, or previously run on lower versions:" +
-                s" ${e.getMessage}")
+              logWarning(log"Error reading metadata path for stateful operator. This may due to " +
+                log"no prior committed batch, or previously run on lower versions: " +
+                log"${MDC(ERROR, e.getMessage)}")
           }
         }
         ret
@@ -448,9 +494,11 @@ class IncrementalExecution(
       if (isFirstBatch && currentBatchId != 0) {
         checkOperatorValidWithMetadata(planWithStateOpId)
       }
-      // The rule doesn't change the plan but cause the side effect that metadata is written
-      // in the checkpoint directory of stateful operator.
-      planWithStateOpId transform WriteStatefulOperatorMetadataRule.rule
+
+      // The rule below doesn't change the plan but can cause the side effect that
+      // metadata/schema is written in the checkpoint directory of stateful operator.
+      planWithStateOpId transform StateSchemaAndOperatorMetadataRule.rule
+
       simulateWatermarkPropagation(planWithStateOpId)
       planWithStateOpId transform WatermarkPropagationRule.rule
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
index 32bc21cea6ed4..dc72f8bcd5600 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
@@ -137,6 +137,7 @@ class ListStateImplWithTTL[S](
   /** Remove this state. */
   override def clear(): Unit = {
     store.remove(stateTypesEncoder.encodeGroupingKey(), stateName)
+    clearTTLState()
   }
 
   private def validateNewState(newState: Array[S]): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
index 46ce33687890d..b382642eb6bf6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ManifestFileCommitProtocol.scala
@@ -25,7 +25,8 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{BATCH_ID, PATH}
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -74,7 +75,7 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
     pendingCommitFiles.clear()
 
     if (fileLog.add(batchId, fileStatuses)) {
-      logInfo(s"Committed batch $batchId")
+      logInfo(log"Committed batch ${MDC(BATCH_ID, batchId)}")
     } else {
       throw new IllegalStateException(s"Race while writing batch $batchId")
     }
@@ -95,7 +96,8 @@ class ManifestFileCommitProtocol(jobId: String, path: String)
           }
         } catch {
           case e: IOException =>
-            logWarning(s"Fail to remove temporary file $path, continue removing next.", e)
+            logWarning(log"Fail to remove temporary file ${MDC(PATH, path)}, " +
+              log"continue removing next.", e)
         }
       }
       pendingCommitFiles.clear()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
new file mode 100644
index 0000000000000..2ab06f36dd5f7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
@@ -0,0 +1,252 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.Encoder
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchema.{COMPOSITE_KEY_ROW_SCHEMA, VALUE_ROW_SCHEMA_WITH_TTL}
+import org.apache.spark.sql.execution.streaming.state.{PrefixKeyScanStateEncoderSpec, StateStore, StateStoreErrors}
+import org.apache.spark.sql.streaming.{MapState, TTLConfig}
+import org.apache.spark.util.NextIterator
+
+/**
+ * Class that provides a concrete implementation for map state associated with state
+ * variables (with ttl expiration support) used in the streaming transformWithState operator.
+ * @param store - reference to the StateStore instance to be used for storing state
+ * @param stateName  - name of the state variable
+ * @param keyExprEnc - Spark SQL encoder for key
+ * @param userKeyEnc  - Spark SQL encoder for the map key
+ * @param valEncoder - SQL encoder for state variable
+ * @param ttlConfig  - the ttl configuration (time to live duration etc.)
+ * @param batchTimestampMs - current batch processing timestamp.
+ * @tparam K - type of key for map state variable
+ * @tparam V - type of value for map state variable
+ * @return - instance of MapState of type [K,V] that can be used to store state persistently
+ */
+class MapStateImplWithTTL[K, V](
+    store: StateStore,
+    stateName: String,
+    keyExprEnc: ExpressionEncoder[Any],
+    userKeyEnc: Encoder[K],
+    valEncoder: Encoder[V],
+    ttlConfig: TTLConfig,
+    batchTimestampMs: Long) extends CompositeKeyTTLStateImpl(stateName, store, batchTimestampMs)
+  with MapState[K, V] with Logging {
+
+  private val keySerializer = keyExprEnc.createSerializer()
+  private val stateTypesEncoder = new CompositeKeyStateEncoder(
+    keySerializer, userKeyEnc, valEncoder, COMPOSITE_KEY_ROW_SCHEMA, stateName, hasTtl = true)
+
+  private val ttlExpirationMs =
+    StateTTL.calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
+
+  initialize()
+
+  private def initialize(): Unit = {
+    store.createColFamilyIfAbsent(stateName, COMPOSITE_KEY_ROW_SCHEMA, VALUE_ROW_SCHEMA_WITH_TTL,
+      PrefixKeyScanStateEncoderSpec(COMPOSITE_KEY_ROW_SCHEMA, 1))
+  }
+
+  /** Whether state exists or not. */
+  override def exists(): Boolean = {
+    iterator().nonEmpty
+  }
+
+  /** Get the state value if it exists */
+  override def getValue(key: K): V = {
+    StateStoreErrors.requireNonNullStateValue(key, stateName)
+    val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(key)
+    val retRow = store.get(encodedCompositeKey, stateName)
+
+    if (retRow != null) {
+      if (!stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
+        stateTypesEncoder.decodeValue(retRow)
+      } else {
+        null.asInstanceOf[V]
+      }
+    } else {
+      null.asInstanceOf[V]
+    }
+  }
+
+  /** Check if the user key is contained in the map */
+  override def containsKey(key: K): Boolean = {
+    StateStoreErrors.requireNonNullStateValue(key, stateName)
+    getValue(key) != null
+  }
+
+  /** Update value for given user key */
+  override def updateValue(key: K, value: V): Unit = {
+    StateStoreErrors.requireNonNullStateValue(key, stateName)
+    StateStoreErrors.requireNonNullStateValue(value, stateName)
+
+    val serializedGroupingKey = stateTypesEncoder.serializeGroupingKey()
+    val serializedUserKey = stateTypesEncoder.serializeUserKey(key)
+
+    val encodedValue = stateTypesEncoder.encodeValue(value, ttlExpirationMs)
+    val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(
+      serializedGroupingKey, serializedUserKey)
+    store.put(encodedCompositeKey, encodedValue, stateName)
+
+    upsertTTLForStateKey(ttlExpirationMs, serializedGroupingKey, serializedUserKey)
+  }
+
+  /** Get the map associated with grouping key */
+  override def iterator(): Iterator[(K, V)] = {
+    val encodedGroupingKey = stateTypesEncoder.encodeGroupingKey()
+    val unsafeRowPairIterator = store.prefixScan(encodedGroupingKey, stateName)
+    new NextIterator[(K, V)] {
+      override protected def getNext(): (K, V) = {
+        val iter = unsafeRowPairIterator.dropWhile { rowPair =>
+          stateTypesEncoder.isExpired(rowPair.value, batchTimestampMs)
+        }
+        if (iter.hasNext) {
+          val currentRowPair = iter.next()
+          val key = stateTypesEncoder.decodeCompositeKey(currentRowPair.key)
+          val value = stateTypesEncoder.decodeValue(currentRowPair.value)
+          (key, value)
+        } else {
+          finished = true
+          null.asInstanceOf[(K, V)]
+        }
+      }
+
+      override protected def close(): Unit = {}
+    }
+  }
+
+  /** Get the list of keys present in map associated with grouping key */
+  override def keys(): Iterator[K] = {
+    iterator().map(_._1)
+  }
+
+  /** Get the list of values present in map associated with grouping key */
+  override def values(): Iterator[V] = {
+    iterator().map(_._2)
+  }
+
+  /** Remove user key from map state */
+  override def removeKey(key: K): Unit = {
+    StateStoreErrors.requireNonNullStateValue(key, stateName)
+    val compositeKey = stateTypesEncoder.encodeCompositeKey(key)
+    store.remove(compositeKey, stateName)
+  }
+
+  /** Remove this state. */
+  override def clear(): Unit = {
+    keys().foreach { itr =>
+      removeKey(itr)
+    }
+    clearTTLState()
+  }
+
+  /**
+   * Clears the user state associated with this grouping key
+   * if it has expired. This function is called by Spark to perform
+   * cleanup at the end of transformWithState processing.
+   *
+   * Spark uses a secondary index to determine if the user state for
+   * this grouping key has expired. However, its possible that the user
+   * has updated the TTL and secondary index is out of date. Implementations
+   * must validate that the user State has actually expired before cleanup based
+   * on their own State data.
+   *
+   * @param groupingKey grouping key for which cleanup should be performed.
+   * @param userKey     user key for which cleanup should be performed.
+   */
+  override def clearIfExpired(groupingKey: Array[Byte], userKey: Array[Byte]): Long = {
+    val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(groupingKey, userKey)
+    val retRow = store.get(encodedCompositeKey, stateName)
+    var numRemovedElements = 0L
+    if (retRow != null) {
+      if (stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
+        store.remove(encodedCompositeKey, stateName)
+        numRemovedElements += 1
+      }
+    }
+    numRemovedElements
+  }
+
+  /*
+   * Internal methods to probe state for testing. The below methods exist for unit tests
+   * to read the state ttl values, and ensure that values are persisted correctly in
+   * the underlying state store.
+   */
+
+  /**
+   * Retrieves the value from State even if its expired. This method is used
+   * in tests to read the state store value, and ensure if its cleaned up at the
+   * end of the micro-batch.
+   */
+  private[sql] def getWithoutEnforcingTTL(userKey: K): Option[V] = {
+    val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(userKey)
+    val retRow = store.get(encodedCompositeKey, stateName)
+
+    if (retRow != null) {
+      val resState = stateTypesEncoder.decodeValue(retRow)
+      Some(resState)
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Read the ttl value associated with the grouping and user key.
+   */
+  private[sql] def getTTLValue(userKey: K): Option[(V, Long)] = {
+    val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(userKey)
+    val retRow = store.get(encodedCompositeKey, stateName)
+
+    // if the returned row is not null, we want to return the value associated with the
+    // ttlExpiration
+    Option(retRow).flatMap { row =>
+      val ttlExpiration = stateTypesEncoder.decodeTtlExpirationMs(row)
+      ttlExpiration.map(expiration => (stateTypesEncoder.decodeValue(row), expiration))
+    }
+  }
+
+  /**
+   * Get all ttl values stored in ttl state for current implicit
+   * grouping key.
+   */
+  private[sql] def getKeyValuesInTTLState(): Iterator[(K, Long)] = {
+    val ttlIterator = ttlIndexIterator()
+    val implicitGroupingKey = stateTypesEncoder.serializeGroupingKey()
+    var nextValue: Option[(K, Long)] = None
+
+    new Iterator[(K, Long)] {
+      override def hasNext: Boolean = {
+        while (nextValue.isEmpty && ttlIterator.hasNext) {
+          val nextTtlValue = ttlIterator.next()
+          val groupingKey = nextTtlValue.groupingKey
+          if (groupingKey sameElements implicitGroupingKey) {
+            val userKey = stateTypesEncoder.decodeUserKeyFromTTLRow(nextTtlValue)
+            nextValue = Some(userKey, nextTtlValue.expirationMs)
+          }
+        }
+        nextValue.isDefined
+      }
+
+      override def next(): (K, Long) = {
+        val result = nextValue.get
+        nextValue = None
+        result
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
index 6eaccfb6d9347..45bb69a9c056b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MetadataLogFileIndex.scala
@@ -21,6 +21,8 @@ import scala.collection.mutable
 
 import org.apache.hadoop.fs.{FileStatus, Path}
 
+import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.types.StructType
@@ -47,7 +49,7 @@ class MetadataLogFileIndex(
     metadataDir
   }
 
-  logInfo(s"Reading streaming file log from $metadataDirectory")
+  logInfo(log"Reading streaming file log from ${MDC(METADATA_DIRECTORY, metadataDirectory)}")
   private val metadataLog =
     new FileStreamSinkLog(FileStreamSinkLog.VERSION, sparkSession, metadataDirectory.toString)
   private val allFilesFromLog = metadataLog.allFiles().map(_.toFileStatus).filterNot(_.isDirectory)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index ae5a033538abf..f636413f7c518 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.streaming
 import scala.collection.mutable.{Map => MutableMap}
 import scala.collection.mutable
 
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp, FileSourceMetadataAttribute, LocalTimestamp}
@@ -101,17 +102,17 @@ class MicroBatchExecution(
         // See SPARK-45178 for more details.
         if (sparkSession.sessionState.conf.getConf(
             SQLConf.STREAMING_TRIGGER_AVAILABLE_NOW_WRAPPER_ENABLED)) {
-          logInfo("Configured to use the wrapper of Trigger.AvailableNow for query " +
-            s"$prettyIdString.")
+          logInfo(log"Configured to use the wrapper of Trigger.AvailableNow for query " +
+            log"${MDC(LogKeys.PRETTY_ID_STRING, prettyIdString)}.")
           MultiBatchExecutor()
         } else {
           val supportsTriggerAvailableNow = sources.distinct.forall { src =>
             val supports = src.isInstanceOf[SupportsTriggerAvailableNow]
             if (!supports) {
-              logWarning(s"source [$src] does not support Trigger.AvailableNow. Falling back to " +
-                "single batch execution. Note that this may not guarantee processing new data if " +
-                "there is an uncommitted batch. Please consult with data source developer to " +
-                "support Trigger.AvailableNow.")
+              logWarning(log"source [${MDC(LogKeys.SPARK_DATA_STREAM, src)}] does not support " +
+                log"Trigger.AvailableNow. Falling back to single batch execution. Note that this " +
+                log"may not guarantee processing new data if there is an uncommitted batch. " +
+                log"Please consult with data source developer to support Trigger.AvailableNow.")
             }
 
             supports
@@ -156,7 +157,9 @@ class MicroBatchExecution(
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
           val source = dataSourceV1.createSource(metadataPath)
           nextSourceId += 1
-          logInfo(s"Using Source [$source] from DataSourceV1 named '$sourceName' [$dataSourceV1]")
+          logInfo(log"Using Source [${MDC(LogKeys.STREAMING_SOURCE, source)}] " +
+            log"from DataSourceV1 named '${MDC(LogKeys.STREAMING_DATA_SOURCE_NAME, sourceName)}' " +
+            log"[${MDC(LogKeys.STREAMING_DATA_SOURCE_DESCRIPTION, dataSourceV1)}]")
           StreamingExecutionRelation(source, output, dataSourceV1.catalogTable)(sparkSession)
         })
 
@@ -169,7 +172,9 @@ class MicroBatchExecution(
             // Materialize source to avoid creating it in every batch
             val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
             nextSourceId += 1
-            logInfo(s"Reading table [$table] from DataSourceV2 named '$srcName' $dsStr")
+            logInfo(log"Reading table [${MDC(LogKeys.STREAMING_TABLE, table)}] " +
+              log"from DataSourceV2 named '${MDC(LogKeys.STREAMING_DATA_SOURCE_NAME, srcName)}' " +
+              log"${MDC(LogKeys.STREAMING_DATA_SOURCE_DESCRIPTION, dsStr)}")
             // TODO: operator pushdown.
             val scan = table.newScanBuilder(options).build()
             val stream = scan.toMicroBatchStream(metadataPath)
@@ -187,7 +192,9 @@ class MicroBatchExecution(
             val source =
               v1.get.asInstanceOf[StreamingRelation].dataSource.createSource(metadataPath)
             nextSourceId += 1
-            logInfo(s"Using Source [$source] from DataSourceV2 named '$srcName' $dsStr")
+            logInfo(log"Using Source [${MDC(LogKeys.STREAMING_SOURCE, source)}] from " +
+              log"DataSourceV2 named '${MDC(LogKeys.STREAMING_DATA_SOURCE_NAME, srcName)}' " +
+              log"${MDC(LogKeys.STREAMING_DATA_SOURCE_DESCRIPTION, dsStr)}")
             // We don't have a catalog table but may have a table identifier. Given this is about
             // v1 fallback path, we just give up and set the catalog table as None.
             StreamingExecutionRelation(source, output, None)(sparkSession)
@@ -211,8 +218,8 @@ class MicroBatchExecution(
           case s: SupportsAdmissionControl =>
             val limit = s.getDefaultReadLimit
             if (limit != ReadLimit.allAvailable()) {
-              logWarning(
-                s"The read limit $limit for $s is ignored when Trigger.Once is used.")
+              logWarning(log"The read limit ${MDC(LogKeys.READ_LIMIT, limit)} for " +
+                log"${MDC(LogKeys.SPARK_DATA_STREAM, s)} is ignored when Trigger.Once is used.")
             }
             s -> ReadLimit.allAvailable()
           case s =>
@@ -278,7 +285,7 @@ class MicroBatchExecution(
       // microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak
       sparkSession.sparkContext.cancelJobGroup(runId.toString)
     }
-    logInfo(s"Query $prettyIdString was stopped")
+    logInfo(log"Query ${MDC(LogKeys.PRETTY_ID_STRING, prettyIdString)} was stopped")
   }
 
   private val watermarkPropagator = WatermarkPropagator(sparkSession.sessionState.conf)
@@ -288,7 +295,8 @@ class MicroBatchExecution(
 
     // shutdown and cleanup required for async log purge mechanism
     asyncLogPurgeShutdown()
-    logInfo(s"Async log purge executor pool for query ${prettyIdString} has been shutdown")
+    logInfo(log"Async log purge executor pool for query " +
+      log"${MDC(LogKeys.PRETTY_ID_STRING, prettyIdString)} has been shutdown")
   }
 
   private def initializeExecution(
@@ -304,7 +312,7 @@ class MicroBatchExecution(
     setLatestExecutionContext(execCtx)
 
     populateStartOffsets(execCtx, sparkSessionForStream)
-    logInfo(s"Stream started from ${execCtx.startOffsets}")
+    logInfo(log"Stream started from ${MDC(LogKeys.STREAMING_OFFSETS_START, execCtx.startOffsets)}")
     execCtx
   }
   /**
@@ -411,12 +419,12 @@ class MicroBatchExecution(
   def validateOffsetLogAndGetPrevOffset(latestBatchId: Long): Option[OffsetSeq] = {
     if (latestBatchId != 0) {
       Some(offsetLog.get(latestBatchId - 1).getOrElse {
-        logError(s"The offset log for batch ${latestBatchId - 1} doesn't exist, " +
-          s"which is required to restart the query from the latest batch $latestBatchId " +
-          "from the offset log. Please ensure there are two subsequent offset logs " +
-          "available for the latest batch via manually deleting the offset file(s). " +
-          "Please also ensure the latest batch for commit log is equal or one batch " +
-          "earlier than the latest batch for offset log.")
+        logError(log"The offset log for batch ${MDC(LogKeys.BATCH_ID, latestBatchId - 1)} " +
+          log"doesn't exist, which is required to restart the query from the latest batch " +
+          log"${MDC(LogKeys.LATEST_BATCH_ID, latestBatchId)} from the offset log. Please ensure " +
+          log"there are two subsequent offset logs available for the latest batch via manually " +
+          log"deleting the offset file(s). Please also ensure the latest batch for commit log is " +
+          log"equal or one batch earlier than the latest batch for offset log.")
         throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
       })
     } else {
@@ -510,16 +518,17 @@ class MicroBatchExecution(
                   // here, so we do nothing here.
               }
             } else if (latestCommittedBatchId < latestBatchId - 1) {
-              logWarning(s"Batch completion log latest batch id is " +
-                s"${latestCommittedBatchId}, which is not trailing " +
-                s"batchid $latestBatchId by one")
+              logWarning(log"Batch completion log latest batch id is " +
+                log"${MDC(LogKeys.LATEST_COMMITTED_BATCH_ID, latestCommittedBatchId)}, which is " +
+                log"not trailing batchid ${MDC(LogKeys.LATEST_BATCH_ID, latestBatchId)} by one")
             }
           case None => logInfo("no commit log present")
         }
         // initialize committed offsets to start offsets of the most recent committed batch
         committedOffsets = execCtx.startOffsets
-        logInfo(s"Resuming at batch ${execCtx.batchId} with committed offsets " +
-          s"${execCtx.startOffsets} and available offsets ${execCtx.endOffsets}")
+        logInfo(log"Resuming at batch ${MDC(LogKeys.BATCH_ID, execCtx.batchId)} with committed " +
+          log"offsets ${MDC(LogKeys.STREAMING_OFFSETS_START, execCtx.startOffsets)} and " +
+          log"available offsets ${MDC(LogKeys.STREAMING_OFFSETS_END, execCtx.endOffsets)}")
       case None => // We are starting this stream for the first time.
         logInfo(s"Starting new streaming query.")
         execCtx.batchId = 0
@@ -748,8 +757,8 @@ class MicroBatchExecution(
                 }
               } else if (catalogTable.exists(_ ne newRelation.catalogTable.get)) {
                 // Output a warning if `catalogTable` is provided by the source rather than engine
-                logWarning(
-                  s"Source $source should not produce the information of catalog table by its own.")
+                logWarning(log"Source ${MDC(LogKeys.SPARK_DATA_STREAM, source)} should not " +
+                  log"produce the information of catalog table by its own.")
               }
               newRelation
           }
@@ -873,8 +882,8 @@ class MicroBatchExecution(
       throw QueryExecutionErrors.concurrentStreamLogUpdate(execCtx.batchId)
     }
 
-    logInfo(s"Committed offsets for batch ${execCtx.batchId}. " +
-      s"Metadata ${execCtx.offsetSeqMetadata.toString}")
+    logInfo(log"Committed offsets for batch ${MDC(LogKeys.BATCH_ID, execCtx.batchId)}. " +
+      log"Metadata ${MDC(LogKeys.OFFSET_SEQUENCE_METADATA, execCtx.offsetSeqMetadata.toString)}")
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index 006d6221e55aa..f0be33ad9a9d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -20,12 +20,13 @@ package org.apache.spark.sql.execution.streaming
 import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CONFIG, DEFAULT_VALUE, NEW_VALUE, OLD_VALUE, TIP}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager, SymmetricHashJoinStateManager}
-import org.apache.spark.sql.internal.SQLConf.{FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, _}
+import org.apache.spark.sql.internal.SQLConf._
 
 
 /**
@@ -143,8 +144,9 @@ object OffsetSeqMetadata extends Logging {
           // Config value exists in the metadata, update the session config with this value
           val optionalValueInSession = sessionConf.getOption(confKey)
           if (optionalValueInSession.isDefined && optionalValueInSession.get != valueInMetadata) {
-            logWarning(s"Updating the value of conf '$confKey' in current session from " +
-              s"'${optionalValueInSession.get}' to '$valueInMetadata'.")
+            logWarning(log"Updating the value of conf '${MDC(CONFIG, confKey)}' in current " +
+              log"session from '${MDC(OLD_VALUE, optionalValueInSession.get)}' " +
+              log"to '${MDC(NEW_VALUE, valueInMetadata)}'.")
           }
           sessionConf.set(confKey, valueInMetadata)
 
@@ -156,14 +158,15 @@ object OffsetSeqMetadata extends Logging {
 
             case Some(defaultValue) =>
               sessionConf.set(confKey, defaultValue)
-              logWarning(s"Conf '$confKey' was not found in the offset log, " +
-                s"using default value '$defaultValue'")
+              logWarning(log"Conf '${MDC(CONFIG, confKey)}' was not found in the offset log, " +
+                log"using default value '${MDC(DEFAULT_VALUE, defaultValue)}'")
 
             case None =>
               val valueStr = sessionConf.getOption(confKey).map { v =>
                 s" Using existing session conf value '$v'."
               }.getOrElse { " No value set in session conf." }
-              logWarning(s"Conf '$confKey' was not found in the offset log. $valueStr")
+              logWarning(log"Conf '${MDC(CONFIG, confKey)}' was not found in the offset log. " +
+                log"${MDC(TIP, valueStr)}")
 
           }
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index 0d32eed9b6bdb..c440ec451b724 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -25,7 +25,7 @@ import java.util.{Optional, UUID}
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.optimizer.InlineCTE
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan, WithCTE}
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2ScanRelation, StreamWriterCommitProgress}
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryIdleEvent, QueryProgressEvent}
-import org.apache.spark.util.Clock
+import org.apache.spark.util.{Clock, Utils}
 
 /**
  * Responsible for continually reporting statistics about the amount of data processed as well
@@ -81,7 +81,8 @@ class ProgressReporter(
 
     addNewProgress(newProgress)
     postEvent(new QueryProgressEvent(newProgress))
-    logInfo(s"Streaming query made progress: $newProgress")
+    logInfo(
+      log"Streaming query made progress: ${MDC(LogKeys.STREAMING_QUERY_PROGRESS, newProgress)}")
   }
 
   private def addNewProgress(newProgress: StreamingQueryProgress): Unit = {
@@ -103,8 +104,8 @@ class ProgressReporter(
       addNewProgress(newProgress)
       if (lastNoExecutionProgressEventTime > Long.MinValue) {
         postEvent(new QueryIdleEvent(id, runId, formatTimestamp(currentTriggerStartTimestamp)))
-        logInfo(s"Streaming query has been idle and waiting for new data more than " +
-          s"${noDataProgressEventInterval} ms.")
+        logInfo(log"Streaming query has been idle and waiting for new data more than " +
+          log"${MDC(LogKeys.TIME_UNITS, noDataProgressEventInterval)} ms.")
       }
 
       lastNoExecutionProgressEventTime = now
@@ -333,33 +334,44 @@ abstract class ProgressContext(
       inputTimeSec: Double,
       processingTimeSec: Double): Seq[SourceProgress] = {
     sources.distinct.map { source =>
-      val numRecords = execStats.flatMap(_.inputRows.get(source)).getOrElse(0L)
-      val sourceMetrics = source match {
-        case withMetrics: ReportsSourceMetrics =>
-          withMetrics.metrics(Optional.ofNullable(latestStreamProgress.get(source).orNull))
-        case _ => Map[String, String]().asJava
+      val (result, duration) = Utils.timeTakenMs {
+        val numRecords = execStats.flatMap(_.inputRows.get(source)).getOrElse(0L)
+        val sourceMetrics = source match {
+          case withMetrics: ReportsSourceMetrics =>
+            withMetrics.metrics(Optional.ofNullable(latestStreamProgress.get(source).orNull))
+          case _ => Map[String, String]().asJava
+        }
+        new SourceProgress(
+          description = source.toString,
+          startOffset = currentTriggerStartOffsets.get(source).orNull,
+          endOffset = currentTriggerEndOffsets.get(source).orNull,
+          latestOffset = currentTriggerLatestOffsets.get(source).orNull,
+          numInputRows = numRecords,
+          inputRowsPerSecond = numRecords / inputTimeSec,
+          processedRowsPerSecond = numRecords / processingTimeSec,
+          metrics = sourceMetrics
+        )
       }
-      new SourceProgress(
-        description = source.toString,
-        startOffset = currentTriggerStartOffsets.get(source).orNull,
-        endOffset = currentTriggerEndOffsets.get(source).orNull,
-        latestOffset = currentTriggerLatestOffsets.get(source).orNull,
-        numInputRows = numRecords,
-        inputRowsPerSecond = numRecords / inputTimeSec,
-        processedRowsPerSecond = numRecords / processingTimeSec,
-        metrics = sourceMetrics
-      )
+      logInfo(log"Extracting source progress metrics for source=" +
+        log"${MDC(LogKeys.SOURCE, source.toString)} " +
+        log"took duration_ms=${MDC(LogKeys.DURATION, duration)}")
+      result
     }
   }
 
   private def extractSinkProgress(execStats: Option[ExecutionStats]): SinkProgress = {
-    val sinkOutput = execStats.flatMap(_.outputRows)
-    val sinkMetrics = sink match {
-      case withMetrics: ReportsSinkMetrics => withMetrics.metrics()
-      case _ => Map[String, String]().asJava
-    }
+    val (result, duration) = Utils.timeTakenMs {
+      val sinkOutput = execStats.flatMap(_.outputRows)
+      val sinkMetrics = sink match {
+        case withMetrics: ReportsSinkMetrics => withMetrics.metrics()
+        case _ => Map[String, String]().asJava
+      }
 
-    SinkProgress(sink.toString, sinkOutput, sinkMetrics)
+      SinkProgress(sink.toString, sinkOutput, sinkMetrics)
+    }
+    logInfo(log"Extracting sink progress metrics for sink=${MDC(LogKeys.SINK, sink.toString)} " +
+      log"took duration_ms=${MDC(LogKeys.DURATION, duration)}")
+    result
   }
 
   /**
@@ -382,9 +394,10 @@ abstract class ProgressContext(
     val finishTriggerDurationMillis = triggerClock.getTimeMillis() - triggerEndTimestamp
     val thresholdForLoggingMillis = 60 * 1000
     if (finishTriggerDurationMillis > math.max(thresholdForLoggingMillis, processingTimeMills)) {
-      logWarning("Query progress update takes longer than batch processing time. Progress " +
-        s"update takes $finishTriggerDurationMillis milliseconds. Batch processing takes " +
-        s"$processingTimeMills milliseconds")
+      logWarning(log"Query progress update takes longer than batch processing time. Progress " +
+        log"update takes ${MDC(LogKeys.FINISH_TRIGGER_DURATION, finishTriggerDurationMillis)} " +
+        log"milliseconds. Batch processing takes " +
+        log"${MDC(LogKeys.PROCESSING_TIME, processingTimeMills)} milliseconds")
     }
   }
 
@@ -485,11 +498,10 @@ abstract class ProgressContext(
         if (!metricWarningLogged) {
           def toString[T](seq: Seq[T]): String = s"(size = ${seq.size}), ${seq.mkString(", ")}"
 
-          logWarning(
-            "Could not report metrics as number leaves in trigger logical plan did not match that" +
-              s" of the execution plan:\n" +
-              s"logical plan leaves: ${toString(allLogicalPlanLeaves)}\n" +
-              s"execution plan leaves: ${toString(allExecPlanLeaves)}\n")
+          logWarning(log"Could not report metrics as number leaves in trigger logical plan did " +
+            log"not match that of the execution plan:\nlogical plan leaves: " +
+            log"${MDC(LogKeys.LOGICAL_PLAN_LEAVES, toString(allLogicalPlanLeaves))}\nexecution " +
+            log"plan leaves: ${MDC(LogKeys.EXECUTION_PLAN_LEAVES, toString(allExecPlanLeaves))}\n")
           metricWarningLogged = true
         }
         Map.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala
index 35bb7db6a6e13..0d4ab9d147b8d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala
@@ -23,6 +23,8 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.internal.LogKeys.{CHECKPOINT_LOCATION, CHECKPOINT_ROOT, CONFIG, PATH}
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -42,8 +44,8 @@ object ResolveWriteToStream extends Rule[LogicalPlan] with SQLConfHelper {
       val (resolvedCheckpointLocation, deleteCheckpointOnStop) = resolveCheckpointLocation(s)
 
       if (conf.adaptiveExecutionEnabled) {
-        logWarning(s"${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key} " +
-          "is not supported in streaming DataFrames/Datasets and will be disabled.")
+        logWarning(log"${MDC(CONFIG, SQLConf.ADAPTIVE_EXECUTION_ENABLED.key)} " +
+          log"is not supported in streaming DataFrames/Datasets and will be disabled.")
       }
 
       if (conf.isUnsupportedOperationCheckEnabled) {
@@ -77,10 +79,11 @@ object ResolveWriteToStream extends Rule[LogicalPlan] with SQLConfHelper {
       if (s.useTempCheckpointLocation) {
         deleteCheckpointOnStop = true
         val tempDir = Utils.createTempDir(namePrefix = "temporary").getCanonicalPath
-        logWarning("Temporary checkpoint location created which is deleted normally when" +
-          s" the query didn't fail: $tempDir. If it's required to delete it under any" +
-          s" circumstances, please set ${SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION.key} to" +
-          s" true. Important to know deleting temp checkpoint folder is best effort.")
+        logWarning(log"Temporary checkpoint location created which is deleted normally when" +
+          log" the query didn't fail: ${MDC(PATH, tempDir)}. If it's required to delete " +
+          log"it under any circumstances, please set " +
+          log"${MDC(CONFIG, SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION.key)} to" +
+          log" true. Important to know deleting temp checkpoint folder is best effort.")
         // SPARK-42676 - Write temp checkpoints for streaming queries to local filesystem
         // even if default FS is set differently.
         // This is a band-aid fix. Ideally we should convert `tempDir` to URIs, but there
@@ -131,7 +134,8 @@ object ResolveWriteToStream extends Rule[LogicalPlan] with SQLConfHelper {
       val checkpointDir = fileManager.createCheckpointDirectory()
       checkpointDir.toString
     }
-    logInfo(s"Checkpoint root $checkpointLocation resolved to $resolvedCheckpointRoot.")
+    logInfo(log"Checkpoint root ${MDC(CHECKPOINT_LOCATION, checkpointLocation)} " +
+      log"resolved to ${MDC(CHECKPOINT_ROOT, resolvedCheckpointRoot)}.")
     (resolvedCheckpointRoot, deleteCheckpointOnStop)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
index 56b0731e0db47..ed881b49ec1e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
@@ -27,6 +27,9 @@ import org.apache.spark.sql.types.{BinaryType, LongType, StructType}
 
 object TransformWithStateKeyValueRowSchema {
   val KEY_ROW_SCHEMA: StructType = new StructType().add("key", BinaryType)
+  val COMPOSITE_KEY_ROW_SCHEMA: StructType = new StructType()
+    .add("key", BinaryType)
+    .add("userKey", BinaryType)
   val VALUE_ROW_SCHEMA: StructType = new StructType()
     .add("value", BinaryType)
   val VALUE_ROW_SCHEMA_WITH_TTL: StructType = new StructType()
@@ -192,6 +195,28 @@ class CompositeKeyStateEncoder[GK, K, V](
     compositeKeyRow
   }
 
+  def decodeUserKeyFromTTLRow(row: CompositeKeyTTLRow): K = {
+    val bytes = row.userKey
+    reusedKeyRow.pointTo(bytes, bytes.length)
+    val userKey = userKeyRowToObjDeserializer.apply(reusedKeyRow)
+    userKey
+  }
+
+  /**
+   * Grouping key and user key are encoded as a row of `schemaForCompositeKeyRow` schema.
+   * Grouping key will be encoded in `RocksDBStateEncoder` as the prefix column.
+   */
+  def encodeCompositeKey(
+      groupingKeyByteArr: Array[Byte],
+      userKeyByteArr: Array[Byte]): UnsafeRow = {
+    val compositeKeyRow = compositeKeyProjection(InternalRow(groupingKeyByteArr, userKeyByteArr))
+    compositeKeyRow
+  }
+
+  def serializeUserKey(userKey: K): Array[Byte] = {
+    userKeySerializer.apply(userKey).asInstanceOf[UnsafeRow].getBytes
+  }
+
   /**
    * The input row is of composite Key schema.
    * Only user key is returned though grouping key also exist in the row.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
index 885df96a206a0..277e1516425d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
@@ -19,14 +19,16 @@ package org.apache.spark.sql.execution.streaming
 import java.util
 import java.util.UUID
 
+import scala.collection.mutable
+
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.streaming.StatefulProcessorHandleState.PRE_INIT
 import org.apache.spark.sql.execution.streaming.state._
-import org.apache.spark.sql.streaming.{ListState, MapState, QueryInfo, StatefulProcessorHandle, TimeMode, TTLConfig, ValueState}
+import org.apache.spark.sql.streaming.{ListState, MapState, QueryInfo, TimeMode, TTLConfig, ValueState}
 import org.apache.spark.util.Utils
 
 /**
@@ -48,7 +50,7 @@ object ImplicitGroupingKeyTracker {
  */
 object StatefulProcessorHandleState extends Enumeration {
   type StatefulProcessorHandleState = Value
-  val CREATED, INITIALIZED, DATA_PROCESSED, TIMER_PROCESSED, CLOSED = Value
+  val CREATED, PRE_INIT, INITIALIZED, DATA_PROCESSED, TIMER_PROCESSED, CLOSED = Value
 }
 
 class QueryInfoImpl(
@@ -85,7 +87,7 @@ class StatefulProcessorHandleImpl(
     isStreaming: Boolean = true,
     batchTimestampMs: Option[Long] = None,
     metrics: Map[String, SQLMetric] = Map.empty)
-  extends StatefulProcessorHandle with Logging {
+  extends StatefulProcessorHandleImplBase(timeMode) with Logging {
   import StatefulProcessorHandleState._
 
   /**
@@ -96,6 +98,8 @@ class StatefulProcessorHandleImpl(
 
   private val BATCH_QUERY_ID = "00000000-0000-0000-0000-000000000000"
 
+  currState = CREATED
+
   private def buildQueryInfo(): QueryInfo = {
     val taskCtxOpt = Option(TaskContext.get())
     val (queryId, batchId) = if (!isStreaming) {
@@ -113,22 +117,14 @@ class StatefulProcessorHandleImpl(
 
   private lazy val currQueryInfo: QueryInfo = buildQueryInfo()
 
-  private var currState: StatefulProcessorHandleState = CREATED
-
   private def incrementMetric(metricName: String): Unit = {
     metrics.get(metricName).foreach(_.add(1))
   }
 
-  def setHandleState(newState: StatefulProcessorHandleState): Unit = {
-    currState = newState
-  }
-
-  def getHandleState: StatefulProcessorHandleState = currState
-
   override def getValueState[T](
       stateName: String,
       valEncoder: Encoder[T]): ValueState[T] = {
-    verifyStateVarOperations("get_value_state")
+    verifyStateVarOperations("get_value_state", CREATED)
     incrementMetric("numValueStateVars")
     val resultState = new ValueStateImpl[T](store, stateName, keyEncoder, valEncoder)
     resultState
@@ -138,7 +134,7 @@ class StatefulProcessorHandleImpl(
       stateName: String,
       valEncoder: Encoder[T],
       ttlConfig: TTLConfig): ValueState[T] = {
-    verifyStateVarOperations("get_value_state")
+    verifyStateVarOperations("get_value_state", CREATED)
     validateTTLConfig(ttlConfig, stateName)
 
     assert(batchTimestampMs.isDefined)
@@ -153,25 +149,6 @@ class StatefulProcessorHandleImpl(
 
   private lazy val timerState = new TimerStateImpl(store, timeMode, keyEncoder)
 
-  private def verifyStateVarOperations(operationType: String): Unit = {
-    if (currState != CREATED) {
-      throw StateStoreErrors.cannotPerformOperationWithInvalidHandleState(operationType,
-        currState.toString)
-    }
-  }
-
-  private def verifyTimerOperations(operationType: String): Unit = {
-    if (timeMode == NoTime) {
-      throw StateStoreErrors.cannotPerformOperationWithInvalidTimeMode(operationType,
-        timeMode.toString)
-    }
-
-    if (currState < INITIALIZED || currState >= TIMER_PROCESSED) {
-      throw StateStoreErrors.cannotPerformOperationWithInvalidHandleState(operationType,
-        currState.toString)
-    }
-  }
-
   /**
    * Function to register a timer for the given expiryTimestampMs
    * @param expiryTimestampMs - timestamp in milliseconds for the timer to expire
@@ -232,14 +209,14 @@ class StatefulProcessorHandleImpl(
    * @param stateName - name of the state variable
    */
   override def deleteIfExists(stateName: String): Unit = {
-    verifyStateVarOperations("delete_if_exists")
+    verifyStateVarOperations("delete_if_exists", CREATED)
     if (store.removeColFamilyIfExists(stateName)) {
       incrementMetric("numDeletedStateVars")
     }
   }
 
   override def getListState[T](stateName: String, valEncoder: Encoder[T]): ListState[T] = {
-    verifyStateVarOperations("get_list_state")
+    verifyStateVarOperations("get_list_state", CREATED)
     incrementMetric("numListStateVars")
     val resultState = new ListStateImpl[T](store, stateName, keyEncoder, valEncoder)
     resultState
@@ -265,7 +242,7 @@ class StatefulProcessorHandleImpl(
       valEncoder: Encoder[T],
       ttlConfig: TTLConfig): ListState[T] = {
 
-    verifyStateVarOperations("get_list_state")
+    verifyStateVarOperations("get_list_state", CREATED)
     validateTTLConfig(ttlConfig, stateName)
 
     assert(batchTimestampMs.isDefined)
@@ -281,12 +258,29 @@ class StatefulProcessorHandleImpl(
       stateName: String,
       userKeyEnc: Encoder[K],
       valEncoder: Encoder[V]): MapState[K, V] = {
-    verifyStateVarOperations("get_map_state")
+    verifyStateVarOperations("get_map_state", CREATED)
     incrementMetric("numMapStateVars")
     val resultState = new MapStateImpl[K, V](store, stateName, keyEncoder, userKeyEnc, valEncoder)
     resultState
   }
 
+  override def getMapState[K, V](
+      stateName: String,
+      userKeyEnc: Encoder[K],
+      valEncoder: Encoder[V],
+      ttlConfig: TTLConfig): MapState[K, V] = {
+    verifyStateVarOperations("get_map_state", CREATED)
+    validateTTLConfig(ttlConfig, stateName)
+
+    assert(batchTimestampMs.isDefined)
+    val mapStateWithTTL = new MapStateImplWithTTL[K, V](store, stateName, keyEncoder, userKeyEnc,
+      valEncoder, ttlConfig, batchTimestampMs.get)
+    incrementMetric("numMapStateWithTTLVars")
+    ttlStates.add(mapStateWithTTL)
+
+    mapStateWithTTL
+  }
+
   private def validateTTLConfig(ttlConfig: TTLConfig, stateName: String): Unit = {
     val ttlDuration = ttlConfig.ttlDuration
     if (timeMode != TimeMode.ProcessingTime()) {
@@ -296,3 +290,111 @@ class StatefulProcessorHandleImpl(
     }
   }
 }
+
+/**
+ * This DriverStatefulProcessorHandleImpl is used within TransformWithExec
+ * on the driver side to collect the columnFamilySchemas before any processing is
+ * actually done. We need this class because we can only collect the schemas after
+ * the StatefulProcessor is initialized.
+ */
+class DriverStatefulProcessorHandleImpl(timeMode: TimeMode)
+  extends StatefulProcessorHandleImplBase(timeMode) {
+
+  private[sql] val columnFamilySchemaUtils = ColumnFamilySchemaUtilsV1
+
+  // Because this is only happening on the driver side, there is only
+  // one task modifying and accessing this map at a time
+  private[sql] val columnFamilySchemas: mutable.Map[String, ColumnFamilySchema] =
+    new mutable.HashMap[String, ColumnFamilySchema]()
+
+  def getColumnFamilySchemas: Map[String, ColumnFamilySchema] = columnFamilySchemas.toMap
+
+  override def getValueState[T](stateName: String, valEncoder: Encoder[T]): ValueState[T] = {
+    verifyStateVarOperations("get_value_state", PRE_INIT)
+    val colFamilySchema = columnFamilySchemaUtils.
+      getValueStateSchema(stateName, false)
+    columnFamilySchemas.put(stateName, colFamilySchema)
+    null.asInstanceOf[ValueState[T]]
+  }
+
+  override def getValueState[T](
+      stateName: String,
+      valEncoder: Encoder[T],
+      ttlConfig: TTLConfig): ValueState[T] = {
+    verifyStateVarOperations("get_value_state", PRE_INIT)
+    val colFamilySchema = columnFamilySchemaUtils.
+      getValueStateSchema(stateName, true)
+    columnFamilySchemas.put(stateName, colFamilySchema)
+    null.asInstanceOf[ValueState[T]]
+  }
+
+  override def getListState[T](stateName: String, valEncoder: Encoder[T]): ListState[T] = {
+    verifyStateVarOperations("get_list_state", PRE_INIT)
+    val colFamilySchema = columnFamilySchemaUtils.
+      getListStateSchema(stateName, false)
+    columnFamilySchemas.put(stateName, colFamilySchema)
+    null.asInstanceOf[ListState[T]]
+  }
+
+  override def getListState[T](
+      stateName: String,
+      valEncoder: Encoder[T],
+      ttlConfig: TTLConfig): ListState[T] = {
+    verifyStateVarOperations("get_list_state", PRE_INIT)
+    val colFamilySchema = columnFamilySchemaUtils.
+      getListStateSchema(stateName, true)
+    columnFamilySchemas.put(stateName, colFamilySchema)
+    null.asInstanceOf[ListState[T]]
+  }
+
+  override def getMapState[K, V](
+      stateName: String,
+      userKeyEnc: Encoder[K],
+      valEncoder: Encoder[V]): MapState[K, V] = {
+    verifyStateVarOperations("get_map_state", PRE_INIT)
+    val colFamilySchema = columnFamilySchemaUtils.
+      getMapStateSchema(stateName, userKeyEnc, false)
+    columnFamilySchemas.put(stateName, colFamilySchema)
+    null.asInstanceOf[MapState[K, V]]
+  }
+
+  override def getMapState[K, V](
+      stateName: String,
+      userKeyEnc: Encoder[K],
+      valEncoder: Encoder[V],
+      ttlConfig: TTLConfig): MapState[K, V] = {
+    verifyStateVarOperations("get_map_state", PRE_INIT)
+    val colFamilySchema = columnFamilySchemaUtils.
+      getMapStateSchema(stateName, userKeyEnc, true)
+    columnFamilySchemas.put(stateName, colFamilySchema)
+    null.asInstanceOf[MapState[K, V]]
+  }
+
+  /** Function to return queryInfo for currently running task */
+  override def getQueryInfo(): QueryInfo = {
+    new QueryInfoImpl(UUID.randomUUID(), UUID.randomUUID(), 0L)
+  }
+
+  /**
+   * Methods that are only included to satisfy the interface.
+   * These methods will fail if called from the driver side, as the handle
+   * will be in the PRE_INIT phase, and all these timer operations need to be
+   * called from the INITIALIZED phase.
+   */
+  override def registerTimer(expiryTimestampMs: Long): Unit = {
+    verifyTimerOperations("register_timer")
+  }
+
+  override def deleteTimer(expiryTimestampMs: Long): Unit = {
+    verifyTimerOperations("delete_timer")
+  }
+
+  override def listTimers(): Iterator[Long] = {
+    verifyTimerOperations("list_timers")
+    Iterator.empty
+  }
+
+  override def deleteIfExists(stateName: String): Unit = {
+    verifyStateVarOperations("delete_if_exists", PRE_INIT)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImplBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImplBase.scala
new file mode 100644
index 0000000000000..3b952967e35d9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImplBase.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.sql.catalyst.plans.logical.NoTime
+import org.apache.spark.sql.execution.streaming.StatefulProcessorHandleState.{INITIALIZED, PRE_INIT, StatefulProcessorHandleState, TIMER_PROCESSED}
+import org.apache.spark.sql.execution.streaming.state.StateStoreErrors
+import org.apache.spark.sql.streaming.{StatefulProcessorHandle, TimeMode}
+
+abstract class StatefulProcessorHandleImplBase(timeMode: TimeMode)
+  extends StatefulProcessorHandle {
+
+  protected var currState: StatefulProcessorHandleState = PRE_INIT
+
+  def setHandleState(newState: StatefulProcessorHandleState): Unit = {
+    currState = newState
+  }
+
+  def getHandleState: StatefulProcessorHandleState = currState
+
+  def verifyTimerOperations(operationType: String): Unit = {
+    if (timeMode == NoTime) {
+      throw StateStoreErrors.cannotPerformOperationWithInvalidTimeMode(operationType,
+        timeMode.toString)
+    }
+
+    if (currState < INITIALIZED || currState >= TIMER_PROCESSED) {
+      throw StateStoreErrors.cannotPerformOperationWithInvalidHandleState(operationType,
+        currState.toString)
+    }
+  }
+
+  def verifyStateVarOperations(
+      operationType: String,
+      requiredState: StatefulProcessorHandleState): Unit = {
+    if (currState != requiredState) {
+      throw StateStoreErrors.cannotPerformOperationWithInvalidHandleState(operationType,
+        currState.toString)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 50a73082a8c4a..81f7acdb755bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -32,7 +32,8 @@ import com.google.common.util.concurrent.UncheckedExecutionException
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{JobArtifactSet, SparkContext, SparkException, SparkThrowable}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CHECKPOINT_PATH, CHECKPOINT_ROOT, LOGICAL_PLAN, PATH, PRETTY_ID_STRING, SPARK_DATA_STREAM}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
@@ -260,7 +261,8 @@ abstract class StreamExecution(
    * has been posted to all the listeners.
    */
   def start(): Unit = {
-    logInfo(s"Starting $prettyIdString. Use $resolvedCheckpointRoot to store the query checkpoint.")
+    logInfo(log"Starting ${MDC(PRETTY_ID_STRING, prettyIdString)}. " +
+      log"Use ${MDC(CHECKPOINT_ROOT, resolvedCheckpointRoot)} to store the query checkpoint.")
     queryExecutionThread.setDaemon(true)
     queryExecutionThread.start()
     startLatch.await()  // Wait until thread started and QueryStart event has been posted
@@ -318,6 +320,10 @@ abstract class StreamExecution(
           batchWatermarkMs = 0, batchTimestampMs = 0, sparkSessionForStream.conf)
 
         if (state.compareAndSet(INITIALIZING, ACTIVE)) {
+          // Log logical plan at the start of the query to help debug issues related to
+          // plan changes.
+          logInfo(log"Finish initializing with logical plan:\n${MDC(LOGICAL_PLAN, logicalPlan)}")
+
           // Unblock `awaitInitialization`
           initializationLatch.countDown()
           runActivatedStream(sparkSessionForStream)
@@ -366,7 +372,7 @@ abstract class StreamExecution(
           case _ => None
         }
 
-        logError(s"Query $prettyIdString terminated with error", e)
+        logError(log"Query ${MDC(PRETTY_ID_STRING, prettyIdString)} terminated with error", e)
         getLatestExecutionContext().updateStatusMessage(s"Terminated with exception: $message")
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
         // handle them
@@ -405,13 +411,13 @@ abstract class StreamExecution(
               .getConf(SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION) || exception.isEmpty)) {
           val checkpointPath = new Path(resolvedCheckpointRoot)
           try {
-            logInfo(s"Deleting checkpoint $checkpointPath.")
+            logInfo(log"Deleting checkpoint ${MDC(CHECKPOINT_PATH, checkpointPath)}.")
             fileManager.delete(checkpointPath)
           } catch {
             case NonFatal(e) =>
               // Deleting temp checkpoint folder is best effort, don't throw non fatal exceptions
               // when we cannot delete them.
-              logWarning(s"Cannot delete $checkpointPath", e)
+              logWarning(log"Cannot delete ${MDC(PATH, checkpointPath)}", e)
           }
         }
       } finally {
@@ -446,7 +452,8 @@ abstract class StreamExecution(
         source.stop()
       } catch {
         case NonFatal(e) =>
-          logWarning(s"Failed to stop streaming source: $source. Resources may have leaked.", e)
+          logWarning(log"Failed to stop streaming source: ${MDC(SPARK_DATA_STREAM, source)}. " +
+            log"Resources may have leaked.", e)
       }
     }
   }
@@ -682,7 +689,7 @@ object StreamExecution {
     classOf[ClosedByInterruptException].getName)
   val PROXY_ERROR = (
     "py4j.protocol.Py4JJavaError: An error occurred while calling" +
-    s".+(\\r\\n|\\r|\\n): (${IO_EXCEPTION_NAMES.mkString("|")})").r
+      s"((.|\\r\\n|\\r|\\n)*)(${IO_EXCEPTION_NAMES.mkString("|")})").r
 
   @scala.annotation.tailrec
   def isInterruptionException(e: Throwable, sc: SparkContext): Boolean = e match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
index 978cb3c34f606..84519150ca42b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamMetadata.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataInputStream, Path
 import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
 
@@ -60,7 +60,7 @@ object StreamMetadata extends Logging {
         Some(metadata)
       } catch {
         case NonFatal(e) =>
-          logError(s"Error reading stream metadata from $metadataFile", e)
+          logError(log"Error reading stream metadata from ${MDC(LogKeys.PATH, metadataFile)}", e)
           throw e
       } finally {
         IOUtils.closeQuietly(input)
@@ -91,7 +91,8 @@ object StreamMetadata extends Logging {
         if (output != null) {
           output.cancel()
         }
-        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
+        logError(log"Error writing stream metadata ${MDC(LogKeys.METADATA, metadata)} to " +
+          log"${MDC(LogKeys.PATH, metadataFile)}", e)
         throw e
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index 20a05a1000338..ea275a28780ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.streaming
 
 import java.util.concurrent.TimeUnit.NANOSECONDS
 
+import org.apache.hadoop.conf.Configuration
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, GenericInternalRow, JoinedRow, Literal, Predicate, UnsafeProjection, UnsafeRow}
@@ -31,6 +33,7 @@ import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager.KeyToValuePair
 import org.apache.spark.sql.internal.{SessionState, SQLConf}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
 
 
@@ -243,6 +246,26 @@ case class StreamingSymmetricHashJoinExec(
     watermarkUsedForStateCleanup && watermarkHasChanged
   }
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration,
+      batchId: Long,
+      stateSchemaVersion: Int): Array[String] = {
+    var result: Map[String, (StructType, StructType)] = Map.empty
+    // get state schema for state stores on left side of the join
+    result ++= SymmetricHashJoinStateManager.getSchemaForStateStores(LeftSide,
+      left.output, leftKeys, stateFormatVersion)
+
+    // get state schema for state stores on right side of the join
+    result ++= SymmetricHashJoinStateManager.getSchemaForStateStores(RightSide,
+      right.output, rightKeys, stateFormatVersion)
+
+    // validate and maybe evolve schema for all state stores across both sides of the join
+    result.iterator.flatMap { case (stateStoreName, (keySchema, valueSchema)) =>
+      StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+        keySchema, valueSchema, session.sessionState, storeName = stateStoreName)
+    }.toArray
+  }
+
   protected override def doExecute(): RDD[InternalRow] = {
     val stateStoreCoord = session.sessionState.streamingQueryManager.stateStoreCoordinator
     val stateStoreNames = SymmetricHashJoinStateManager.allStateStoreNames(LeftSide, RightSide)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
index b245f8fc14d47..02efcefe19ca6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
@@ -27,6 +27,10 @@ object StateTTLSchema {
   val TTL_KEY_ROW_SCHEMA: StructType = new StructType()
     .add("expirationMs", LongType)
     .add("groupingKey", BinaryType)
+  val TTL_COMPOSITE_KEY_ROW_SCHEMA: StructType = new StructType()
+    .add("expirationMs", LongType)
+    .add("groupingKey", BinaryType)
+    .add("userKey", BinaryType)
   val TTL_VALUE_ROW_SCHEMA: StructType =
     StructType(Array(StructField("__dummy__", NullType)))
 }
@@ -41,6 +45,18 @@ case class SingleKeyTTLRow(
     groupingKey: Array[Byte],
     expirationMs: Long)
 
+/**
+ * Encapsulates the ttl row information stored in [[CompositeKeyTTLStateImpl]].
+ *
+ * @param groupingKey grouping key for which ttl is set
+ * @param userKey user key for which ttl is set
+ * @param expirationMs expiration time for the grouping key
+ */
+case class CompositeKeyTTLRow(
+   groupingKey: Array[Byte],
+   userKey: Array[Byte],
+   expirationMs: Long)
+
 /**
  * Represents the underlying state for secondary TTL Index for a user defined
  * state variable.
@@ -59,23 +75,6 @@ trait TTLState {
    * @return number of values cleaned up.
    */
   def clearExpiredState(): Long
-
-  /**
-   * Clears the user state associated with this grouping key
-   * if it has expired. This function is called by Spark to perform
-   * cleanup at the end of transformWithState processing.
-   *
-   * Spark uses a secondary index to determine if the user state for
-   * this grouping key has expired. However, its possible that the user
-   * has updated the TTL and secondary index is out of date. Implementations
-   * must validate that the user State has actually expired before cleanup based
-   * on their own State data.
-   *
-   * @param groupingKey grouping key for which cleanup should be performed.
-   *
-   * @return how many state objects were cleaned up.
-   */
-  def clearIfExpired(groupingKey: Array[Byte]): Long
 }
 
 /**
@@ -99,6 +98,18 @@ abstract class SingleKeyTTLStateImpl(
   store.createColFamilyIfAbsent(ttlColumnFamilyName, TTL_KEY_ROW_SCHEMA, TTL_VALUE_ROW_SCHEMA,
     RangeKeyScanStateEncoderSpec(TTL_KEY_ROW_SCHEMA, Seq(0)), isInternal = true)
 
+  /**
+   * This function will be called when clear() on State Variables
+   * with ttl enabled is called. This function should clear any
+   * associated ttlState, since we are clearing the user state.
+   */
+  def clearTTLState(): Unit = {
+    val iterator = store.iterator(ttlColumnFamilyName)
+    iterator.foreach { kv =>
+      store.remove(kv.key, ttlColumnFamilyName)
+    }
+  }
+
   def upsertTTLForStateKey(
       expirationMs: Long,
       groupingKey: Array[Byte]): Unit = {
@@ -163,6 +174,112 @@ abstract class SingleKeyTTLStateImpl(
       }
     }
   }
+
+  /**
+   * Clears the user state associated with this grouping key
+   * if it has expired. This function is called by Spark to perform
+   * cleanup at the end of transformWithState processing.
+   *
+   * Spark uses a secondary index to determine if the user state for
+   * this grouping key has expired. However, its possible that the user
+   * has updated the TTL and secondary index is out of date. Implementations
+   * must validate that the user State has actually expired before cleanup based
+   * on their own State data.
+   *
+   * @param groupingKey grouping key for which cleanup should be performed.
+   *
+   * @return true if the state was cleared, false otherwise.
+   */
+  def clearIfExpired(groupingKey: Array[Byte]): Long
+}
+
+/**
+ * Manages the ttl information for user state keyed with a single key (grouping key).
+ */
+abstract class CompositeKeyTTLStateImpl(
+    stateName: String,
+    store: StateStore,
+    ttlExpirationMs: Long)
+  extends TTLState {
+
+  import org.apache.spark.sql.execution.streaming.StateTTLSchema._
+
+  private val ttlColumnFamilyName = s"_ttl_$stateName"
+  private val ttlKeyEncoder = UnsafeProjection.create(TTL_COMPOSITE_KEY_ROW_SCHEMA)
+
+  // empty row used for values
+  private val EMPTY_ROW =
+    UnsafeProjection.create(Array[DataType](NullType)).apply(InternalRow.apply(null))
+
+  store.createColFamilyIfAbsent(ttlColumnFamilyName, TTL_COMPOSITE_KEY_ROW_SCHEMA,
+    TTL_VALUE_ROW_SCHEMA, RangeKeyScanStateEncoderSpec(TTL_COMPOSITE_KEY_ROW_SCHEMA,
+      Seq(0)), isInternal = true)
+
+  def clearTTLState(): Unit = {
+    val iterator = store.iterator(ttlColumnFamilyName)
+    iterator.foreach { kv =>
+      store.remove(kv.key, ttlColumnFamilyName)
+    }
+  }
+
+  def upsertTTLForStateKey(
+      expirationMs: Long,
+      groupingKey: Array[Byte],
+      userKey: Array[Byte]): Unit = {
+    val encodedTtlKey = ttlKeyEncoder(InternalRow(expirationMs, groupingKey, userKey))
+    store.put(encodedTtlKey, EMPTY_ROW, ttlColumnFamilyName)
+  }
+
+  /**
+   * Clears any state which has ttl older than [[ttlExpirationMs]].
+   */
+  override def clearExpiredState(): Long = {
+    val iterator = store.iterator(ttlColumnFamilyName)
+    var numRemovedElements = 0L
+    iterator.takeWhile { kv =>
+      val expirationMs = kv.key.getLong(0)
+      StateTTL.isExpired(expirationMs, ttlExpirationMs)
+    }.foreach { kv =>
+      val groupingKey = kv.key.getBinary(1)
+      val userKey = kv.key.getBinary(2)
+      numRemovedElements += clearIfExpired(groupingKey, userKey)
+      store.remove(kv.key, ttlColumnFamilyName)
+    }
+    numRemovedElements
+  }
+
+  private[sql] def ttlIndexIterator(): Iterator[CompositeKeyTTLRow] = {
+    val ttlIterator = store.iterator(ttlColumnFamilyName)
+
+    new Iterator[CompositeKeyTTLRow] {
+      override def hasNext: Boolean = ttlIterator.hasNext
+
+      override def next(): CompositeKeyTTLRow = {
+        val kv = ttlIterator.next()
+        CompositeKeyTTLRow(
+          expirationMs = kv.key.getLong(0),
+          groupingKey = kv.key.getBinary(1),
+          userKey = kv.key.getBinary(2)
+        )
+      }
+    }
+  }
+
+  /**
+   * Clears the user state associated with this grouping key
+   * if it has expired. This function is called by Spark to perform
+   * cleanup at the end of transformWithState processing.
+   *
+   * Spark uses a secondary index to determine if the user state for
+   * this grouping key has expired. However, its possible that the user
+   * has updated the TTL and secondary index is out of date. Implementations
+   * must validate that the user State has actually expired before cleanup based
+   * on their own State data.
+   *
+   * @param groupingKey grouping key for which cleanup should be performed.
+   * @param userKey user key for which cleanup should be performed.
+   */
+  def clearIfExpired(groupingKey: Array[Byte], userKey: Array[Byte]): Long
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
index e83c83df53229..a9c5a70e995d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
@@ -16,7 +16,8 @@
  */
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{EXPIRY_TIMESTAMP, KEY}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
@@ -130,8 +131,8 @@ class TimerStateImpl(
   def registerTimer(expiryTimestampMs: Long): Unit = {
     val groupingKey = getGroupingKey(keyToTsCFName)
     if (exists(groupingKey, expiryTimestampMs)) {
-      logWarning(s"Failed to register timer for key=$groupingKey and " +
-        s"timestamp=$expiryTimestampMs since it already exists")
+      logWarning(log"Failed to register timer for key=${MDC(KEY, groupingKey)} and " +
+        log"timestamp=${MDC(EXPIRY_TIMESTAMP, expiryTimestampMs)} ms since it already exists")
     } else {
       store.put(encodeKey(groupingKey, expiryTimestampMs), EMPTY_ROW, keyToTsCFName)
       store.put(encodeSecIndexKey(groupingKey, expiryTimestampMs), EMPTY_ROW, tsToKeyCFName)
@@ -147,8 +148,8 @@ class TimerStateImpl(
     val groupingKey = getGroupingKey(keyToTsCFName)
 
     if (!exists(groupingKey, expiryTimestampMs)) {
-      logWarning(s"Failed to delete timer for key=$groupingKey and " +
-        s"timestamp=$expiryTimestampMs since it does not exist")
+      logWarning(log"Failed to delete timer for key=${MDC(KEY, groupingKey)} and " +
+        log"timestamp=${MDC(EXPIRY_TIMESTAMP, expiryTimestampMs)} ms since it does not exist")
     } else {
       store.remove(encodeKey(groupingKey, expiryTimestampMs), keyToTsCFName)
       store.remove(encodeSecIndexKey(groupingKey, expiryTimestampMs), tsToKeyCFName)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
index f5d2610d78d9e..60a9f54628a66 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
@@ -19,6 +19,9 @@ package org.apache.spark.sql.execution.streaming
 import java.util.UUID
 import java.util.concurrent.TimeUnit.NANOSECONDS
 
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -78,15 +81,66 @@ case class TransformWithStateExec(
   override def shortName: String = "transformWithStateExec"
 
   override def shouldRunAnotherBatch(newInputWatermark: Long): Boolean = {
+    if (timeMode == ProcessingTime) {
+      // TODO: check if we can return true only if actual timers are registered, or there is
+      // expired state
+      true
+    } else if (outputMode == OutputMode.Append || outputMode == OutputMode.Update) {
+      eventTimeWatermarkForEviction.isDefined &&
+      newInputWatermark > eventTimeWatermarkForEviction.get
+    } else {
+      false
+    }
+  }
+
+  /**
+   * We initialize this processor handle in the driver to run the init function
+   * and fetch the schemas of the state variables initialized in this processor.
+   * @return a new instance of the driver processor handle
+   */
+  private def getDriverProcessorHandle(): DriverStatefulProcessorHandleImpl = {
+    val driverProcessorHandle = new DriverStatefulProcessorHandleImpl(timeMode)
+    driverProcessorHandle.setHandleState(StatefulProcessorHandleState.PRE_INIT)
+    statefulProcessor.setHandle(driverProcessorHandle)
+    statefulProcessor.init(outputMode, timeMode)
+    driverProcessorHandle
+  }
+
+  /**
+   * Fetching the columnFamilySchemas from the StatefulProcessorHandle
+   * after init is called.
+   */
+  private def getColFamilySchemas(): Map[String, ColumnFamilySchema] = {
+    val columnFamilySchemas = getDriverProcessorHandle().getColumnFamilySchemas
+    closeProcessorHandle()
+    columnFamilySchemas
+  }
+
+  /**
+   * This method is used for the driver-side stateful processor after we
+   * have collected all the necessary schemas.
+   * This instance of the stateful processor won't be used again.
+   */
+  private def closeProcessorHandle(): Unit = {
+    statefulProcessor.close()
+    statefulProcessor.setHandle(null)
+  }
+
+  /**
+   * Controls watermark propagation to downstream modes. If timeMode is
+   * ProcessingTime, the output rows cannot be interpreted in eventTime, hence
+   * this node will not propagate watermark in this timeMode.
+   *
+   * For timeMode EventTime, output watermark is same as input Watermark because
+   * transformWithState does not allow users to set the event time column to be
+   * earlier than the watermark.
+   */
+  override def produceOutputWatermark(inputWatermarkMs: Long): Option[Long] = {
     timeMode match {
       case ProcessingTime =>
-        // TODO: check if we can return true only if actual timers are registered, or there is
-        // expired state
-        true
-      case EventTime =>
-        eventTimeWatermarkForEviction.isDefined &&
-          newInputWatermark > eventTimeWatermarkForEviction.get
-      case _ => false
+        None
+      case _ =>
+        Some(inputWatermarkMs)
     }
   }
 
@@ -313,11 +367,55 @@ case class TransformWithStateExec(
         "Number of value state variables with TTL"),
       StatefulOperatorCustomSumMetric("numListStateWithTTLVars",
         "Number of list state variables with TTL"),
+      StatefulOperatorCustomSumMetric("numMapStateWithTTLVars",
+        "Number of map state variables with TTL"),
       StatefulOperatorCustomSumMetric("numValuesRemovedDueToTTLExpiry",
         "Number of values removed due to TTL expiry")
     )
   }
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration,
+      batchId: Long,
+      stateSchemaVersion: Int): Array[String] = {
+    assert(stateSchemaVersion >= 3)
+    val newColumnFamilySchemas = getColFamilySchemas()
+    val schemaFile = new StateSchemaV3File(
+      hadoopConf, stateSchemaDirPath(StateStoreId.DEFAULT_STORE_NAME).toString)
+    // TODO: [SPARK-48849] Read the schema path from the OperatorStateMetadata file
+    // and validate it with the new schema
+
+    // Write the new schema to the schema file
+    val schemaPath = schemaFile.addWithUUID(batchId, newColumnFamilySchemas.values.toList)
+    Array(schemaPath.toString)
+  }
+
+  private def validateSchemas(
+      oldSchemas: List[ColumnFamilySchema],
+      newSchemas: Map[String, ColumnFamilySchema]): Unit = {
+    oldSchemas.foreach { case oldSchema: ColumnFamilySchemaV1 =>
+      newSchemas.get(oldSchema.columnFamilyName).foreach {
+        case newSchema: ColumnFamilySchemaV1 =>
+          StateSchemaCompatibilityChecker.check(
+            (oldSchema.keySchema, oldSchema.valueSchema),
+            (newSchema.keySchema, newSchema.valueSchema),
+            ignoreValueSchema = false
+          )
+      }
+    }
+  }
+
+  private def stateSchemaDirPath(storeName: String): Path = {
+    assert(storeName == StateStoreId.DEFAULT_STORE_NAME)
+    def stateInfo = getStateInfo
+    val stateCheckpointPath =
+      new Path(getStateInfo.checkpointLocation,
+        s"${stateInfo.operatorId.toString}")
+
+    val storeNamePath = new Path(stateCheckpointPath, storeName)
+    new Path(new Path(storeNamePath, "_metadata"), "schema")
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TriggerExecutor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TriggerExecutor.scala
index 143230759724a..bfa838e43e288 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TriggerExecutor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TriggerExecutor.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{ELAPSED_TIME, TRIGGER_INTERVAL}
 import org.apache.spark.util.{Clock, SystemClock}
 
 trait TriggerExecutor {
@@ -98,8 +99,9 @@ case class ProcessingTimeExecutor(
 
   /** Called when a batch falls behind */
   def notifyBatchFallingBehind(realElapsedTimeMs: Long): Unit = {
-    logWarning("Current batch is falling behind. The trigger interval is " +
-      s"${intervalMs} milliseconds, but spent ${realElapsedTimeMs} milliseconds")
+    logWarning(log"Current batch is falling behind. The trigger interval is " +
+      log"${MDC(TRIGGER_INTERVAL, intervalMs)}} milliseconds, but spent " +
+      log"${MDC(ELAPSED_TIME, realElapsedTimeMs)} milliseconds")
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
index dbfa4586dc0a6..0ed5a6f29a984 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
@@ -96,6 +96,7 @@ class ValueStateImplWithTTL[S](
   /** Function to remove state for given key */
   override def clear(): Unit = {
     store.remove(stateTypesEncoder.encodeGroupingKey(), stateName)
+    clearTTLState()
   }
 
   def clearIfExpired(groupingKey: Array[Byte]): Long = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
index b0f8cf9cd1846..54c47ec4e6ed8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
@@ -21,7 +21,8 @@ import java.util.Locale
 
 import scala.collection.mutable
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.internal.SQLConf
@@ -113,7 +114,9 @@ case class WatermarkTracker(policy: MultipleWatermarkPolicy) extends Logging {
     // `org.apache.spark.sql.execution.streaming.MultipleWatermarkPolicy` implementations.
     val chosenGlobalWatermark = policy.chooseGlobalWatermark(operatorToWatermarkMap.values.toSeq)
     if (chosenGlobalWatermark > globalWatermarkMs) {
-      logInfo(s"Updating event-time watermark from $globalWatermarkMs to $chosenGlobalWatermark ms")
+      logInfo(log"Updating event-time watermark from " +
+        log"${MDC(GLOBAL_WATERMARK, globalWatermarkMs)} " +
+        log"to ${MDC(CHOSEN_WATERMARK, chosenGlobalWatermark)} ms")
       globalWatermarkMs = chosenGlobalWatermark
     } else {
       logDebug(s"Event time watermark didn't move: $chosenGlobalWatermark < $globalWatermarkMs")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 920a7c68314b7..633aaf2682dbb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -25,6 +25,8 @@ import java.util.function.UnaryOperator
 import scala.collection.mutable.{Map => MutableMap}
 
 import org.apache.spark.SparkEnv
+import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{CurrentDate, CurrentTimestampLike, LocalTimestamp}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -83,7 +85,9 @@ class ContinuousExecution(
         v2ToRelationMap.getOrElseUpdate(s, {
           val metadataPath = s"$resolvedCheckpointRoot/sources/$nextSourceId"
           nextSourceId += 1
-          logInfo(s"Reading table [$table] from DataSourceV2 named '$sourceName' $dsStr")
+          logInfo(log"Reading table [${MDC(STREAMING_TABLE, table)}] " +
+            log"from DataSourceV2 named '${MDC(STREAMING_DATA_SOURCE_NAME, sourceName)}' " +
+            log"${MDC(STREAMING_DATA_SOURCE_DESCRIPTION, dsStr)}")
           // TODO: operator pushdown.
           val scan = table.newScanBuilder(options).build()
           val stream = scan.toContinuousStream(metadataPath)
@@ -276,7 +280,7 @@ class ContinuousExecution(
               false
             } else if (isActive) {
               execCtx.batchId = epochEndpoint.askSync[Long](IncrementAndGetEpoch)
-              logInfo(s"New epoch ${execCtx.batchId} is starting.")
+              logInfo(log"New epoch ${MDC(BATCH_ID, execCtx.batchId)} is starting.")
               true
             } else {
               false
@@ -307,7 +311,8 @@ class ContinuousExecution(
     } catch {
       case t: Throwable if StreamExecution.isInterruptionException(t, sparkSession.sparkContext) &&
           state.get() == RECONFIGURING =>
-        logInfo(s"Query $id ignoring exception from reconfiguring: $t")
+        logInfo(log"Query ${MDC(QUERY_ID, id)} ignoring exception from reconfiguring: " +
+          log"${MDC(ERROR, t)}")
         // interrupted by reconfiguration - swallow exception so we can restart the query
     } finally {
       // The above execution may finish before getting interrupted, for example, a Spark job having
@@ -440,7 +445,8 @@ class ContinuousExecution(
    */
   def stopInNewThread(error: Throwable): Unit = {
     if (failure.compareAndSet(null, error)) {
-      logError(s"Query $prettyIdString received exception $error")
+      logError(log"Query ${MDC(PRETTY_ID_STRING, prettyIdString)} received exception " +
+        log"${MDC(ERROR, error)}")
       stopInNewThread()
     }
   }
@@ -476,7 +482,7 @@ class ContinuousExecution(
       // We just need to interrupt the long running job.
       interruptAndAwaitExecutionThreadTermination()
     }
-    logInfo(s"Query $prettyIdString was stopped")
+    logInfo(log"Query ${MDC(PRETTY_ID_STRING, prettyIdString)} was stopped")
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
index 8e5548ca2acad..398df496d15ff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousQueuedDataReader.scala
@@ -23,7 +23,8 @@ import java.util.concurrent.{ArrayBlockingQueue, TimeUnit}
 import scala.util.control.NonFatal
 
 import org.apache.spark.{SparkEnv, TaskContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
 import org.apache.spark.sql.connector.read.PartitionReader
@@ -159,7 +160,8 @@ class ContinuousQueuedDataReader(
       } catch {
         case _: InterruptedException =>
           // Continuous shutdown always involves an interrupt; do nothing and shut down quietly.
-          logInfo(s"shutting down interrupted data reader thread $getName")
+          logInfo(log"shutting down interrupted data reader thread " +
+            log"${MDC(THREAD_NAME, getName)}")
 
         case NonFatal(t) =>
           failureReason = t
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
index b41b3c329712f..420c3e3be16d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
@@ -29,7 +29,8 @@ import org.json4s.{DefaultFormats, Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
 import org.apache.spark.SparkEnv
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{HOST, PORT}
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -179,7 +180,7 @@ class TextSocketContinuousStream(
             val line = reader.readLine()
             if (line == null) {
               // End of file reached
-              logWarning(s"Stream closed by $host:$port")
+              logWarning(log"Stream closed by ${MDC(HOST, host)}:${MDC(PORT, port)}")
               return
             }
             TextSocketContinuousStream.this.synchronized {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
index 1d6ba87145d4a..d5daa9a875f83 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.execution.streaming.continuous
 
 import org.apache.spark.{Partition, SparkEnv, TaskContext}
+import org.apache.spark.internal.{LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.connector.write.DataWriter
@@ -68,8 +70,8 @@ class ContinuousWriteRDD(var prev: RDD[InternalRow], writerFactory: StreamingDat
           }
           CustomMetrics.updateMetrics(
             dataWriter.currentMetricsValues.toImmutableArraySeq, customMetrics)
-          logInfo(s"Writer for partition ${context.partitionId()} " +
-            s"in epoch ${EpochTracker.getCurrentEpoch.get} is committing.")
+          logInfo(log"Writer for partition ${MDC(PARTITION_ID, context.partitionId())} " +
+            log"in epoch ${MDC(EPOCH, EpochTracker.getCurrentEpoch.get)} is committing.")
           val msg = dataWriter.commit()
           epochCoordinator.send(
             CommitPartitionEpoch(
@@ -77,8 +79,8 @@ class ContinuousWriteRDD(var prev: RDD[InternalRow], writerFactory: StreamingDat
               EpochTracker.getCurrentEpoch.get,
               msg)
           )
-          logInfo(s"Writer for partition ${context.partitionId()} " +
-            s"in epoch ${EpochTracker.getCurrentEpoch.get} committed.")
+          logInfo(log"Writer for partition ${MDC(PARTITION_ID, context.partitionId())} " +
+            log"in epoch ${MDC(EPOCH, EpochTracker.getCurrentEpoch.get)} committed.")
           EpochTracker.incrementCurrentEpoch()
         } catch {
           case _: InterruptedException =>
@@ -87,9 +89,11 @@ class ContinuousWriteRDD(var prev: RDD[InternalRow], writerFactory: StreamingDat
       })(catchBlock = {
         // If there is an error, abort this writer. We enter this callback in the middle of
         // rethrowing an exception, so compute() will stop executing at this point.
-        logError(s"Writer for partition ${context.partitionId()} is aborting.")
+        logError(log"Writer for partition ${MDC(LogKeys.PARTITION_ID, context.partitionId())} " +
+          log"is aborting.")
         if (dataWriter != null) dataWriter.abort()
-        logError(s"Writer for partition ${context.partitionId()} aborted.")
+        logError(log"Writer for partition ${MDC(LogKeys.PARTITION_ID, context.partitionId())} " +
+          log"aborted.")
       }, finallyBlock = {
         if (dataWriter != null) dataWriter.close()
       })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
index 2b7d68f9b98bf..42ce32e1bc674 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming.continuous
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -47,8 +48,8 @@ case class WriteToContinuousDataSourceExec(write: StreamingWrite, query: SparkPl
       PhysicalWriteInfoImpl(queryRdd.getNumPartitions))
     val rdd = new ContinuousWriteRDD(queryRdd, writerFactory, metrics)
 
-    logInfo(s"Start processing data source write support: $write. " +
-      s"The input RDD has ${rdd.partitions.length} partitions.")
+    logInfo(log"Start processing data source write support: ${MDC(STREAMING_WRITE, write)}. " +
+      log"The input RDD has ${MDC(NUM_PARTITIONS, rdd.partitions.length)} partitions.")
     EpochCoordinatorRef.get(
       sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY),
       sparkContext.env)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
index 60cbaa7e79b95..c687caafdef37 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
@@ -63,7 +63,7 @@ class ForeachBatchSink[T](batchWriter: (Dataset[T], Long) => Unit, encoder: Expr
 /**
  * Exception that wraps the exception thrown in the user provided function in ForeachBatch sink.
  */
-private[streaming] case class ForeachBatchUserFuncException(cause: Throwable)
+private[sql] case class ForeachBatchUserFuncException(cause: Throwable)
   extends SparkException(
     errorClass = "FOREACH_BATCH_USER_FUNCTION_ERROR",
     messageParameters = Map("reason" -> Option(cause.getMessage).getOrElse("")),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
index 5640c7d3ca769..6705201c67316 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
@@ -23,7 +23,8 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.commons.io.IOUtils
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -92,7 +93,7 @@ class RateStreamMicroBatchStream(
     metadataLog.get(0).getOrElse {
       val offset = LongOffset(clock.getTimeMillis())
       metadataLog.add(0, offset)
-      logInfo(s"Start time: $offset")
+      logInfo(log"Start time: ${MDC(TIME_UNITS, offset)}")
       offset
     }.offset
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
index a01f40bead893..597b981ebe556 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala
@@ -25,7 +25,8 @@ import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable.ListBuffer
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{HOST, PORT}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
@@ -79,7 +80,7 @@ class TextSocketMicroBatchStream(host: String, port: Int, numPartitions: Int)
             val line = reader.readLine()
             if (line == null) {
               // End of file reached
-              logWarning(s"Stream closed by $host:$port")
+              logWarning(log"Stream closed by ${MDC(HOST, host)}:${MDC(PORT, port)}")
               return
             }
             TextSocketMicroBatchStream.this.synchronized {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala
index 32ff87f754d74..fe59703a1f458 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala
@@ -32,7 +32,6 @@ trait HDFSBackedStateStoreMap {
   def remove(key: UnsafeRow): UnsafeRow
   def iterator(): Iterator[UnsafeRowPair]
   def prefixScan(prefixKey: UnsafeRow): Iterator[UnsafeRowPair]
-  def clear(): Unit
 }
 
 object HDFSBackedStateStoreMap {
@@ -80,8 +79,6 @@ class NoPrefixHDFSBackedStateStoreMap extends HDFSBackedStateStoreMap {
   override def prefixScan(prefixKey: UnsafeRow): Iterator[UnsafeRowPair] = {
     throw SparkUnsupportedOperationException()
   }
-
-  override def clear(): Unit = map.clear()
 }
 
 class PrefixScannableHDFSBackedStateStoreMap(
@@ -170,9 +167,4 @@ class PrefixScannableHDFSBackedStateStoreMap(
       .iterator
       .map { key => unsafeRowPair.withRows(key, map.get(key)) }
   }
-
-  override def clear(): Unit = {
-    map.clear()
-    prefixKeyToKeysMap.clear()
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 2ecfa0931042b..c4a41ceb4caf4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -32,7 +32,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 
 import org.apache.spark.{SparkConf, SparkEnv}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC, MessageWithContext}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -71,7 +71,8 @@ import org.apache.spark.util.ArrayImplicits._
  * to ensure re-executed RDD operations re-apply updates on the correct past version of the
  * store.
  */
-private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with Logging {
+private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with Logging
+  with SupportsFineGrainedReplay {
 
   private val providerName = "HDFSBackedStateStoreProvider"
 
@@ -169,7 +170,9 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
         verify(state == UPDATING, "Cannot commit after already committed or aborted")
         commitUpdates(newVersion, mapToUpdate, compressedStream)
         state = COMMITTED
-        logInfo(s"Committed version $newVersion for $this to file $finalDeltaFile")
+        logInfo(log"Committed version ${MDC(LogKeys.COMMITTED_VERSION, newVersion)} " +
+          log"for ${MDC(LogKeys.STATE_STORE_PROVIDER, this)} to file " +
+          log"${MDC(LogKeys.FILE_NAME, finalDeltaFile)}")
         newVersion
       } catch {
         case e: Throwable =>
@@ -187,7 +190,8 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       } else {
         state = ABORTED
       }
-      logInfo(s"Aborted version $newVersion for $this")
+      logInfo(log"Aborted version ${MDC(LogKeys.STATE_STORE_VERSION, newVersion)} " +
+        log"for ${MDC(LogKeys.STATE_STORE_PROVIDER, this)}")
     }
 
     /**
@@ -253,14 +257,16 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
   /** Get the state store for making updates to create a new `version` of the store. */
   override def getStore(version: Long): StateStore = {
     val newMap = getLoadedMapForStore(version)
-    logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for update")
+    logInfo(log"Retrieved version ${MDC(LogKeys.STATE_STORE_VERSION, version)} " +
+      log"of ${MDC(LogKeys.STATE_STORE_PROVIDER, HDFSBackedStateStoreProvider.this)} for update")
     new HDFSBackedStateStore(version, newMap)
   }
 
   /** Get the state store for reading to specific `version` of the store. */
   override def getReadStore(version: Long): ReadStateStore = {
     val newMap = getLoadedMapForStore(version)
-    logInfo(s"Retrieved version $version of ${HDFSBackedStateStoreProvider.this} for readonly")
+    logInfo(log"Retrieved version ${MDC(LogKeys.STATE_STORE_VERSION, version)} of " +
+      log"${MDC(LogKeys.STATE_STORE_PROVIDER, HDFSBackedStateStoreProvider.this)} for readonly")
     new HDFSBackedReadStateStore(version, newMap)
   }
 
@@ -337,16 +343,19 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
   /** Do maintenance backing data files, including creating snapshots and cleaning up old files */
   override def doMaintenance(): Unit = {
     try {
-      doSnapshot()
+      doSnapshot("maintenance")
       cleanup()
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Error performing snapshot and cleaning up $this")
+        logWarning(log"Error performing snapshot and cleaning up " + toMessageWithContext)
     }
   }
 
   override def close(): Unit = {
-    synchronized { loadedMaps.values.asScala.foreach(_.clear()) }
+    // Clearing the map resets the TreeMap.root to null, and therefore entries inside the
+    // `loadedMaps` will be de-referenced and GCed automatically when their reference
+    // counts become 0.
+    synchronized { loadedMaps.clear() }
   }
 
   override def supportedCustomMetrics: Seq[StateStoreCustomMetric] = {
@@ -354,9 +363,14 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       Nil
   }
 
+  private def toMessageWithContext: MessageWithContext = {
+    log"HDFSStateStoreProvider[id = (op=${MDC(LogKeys.OP_ID, stateStoreId.operatorId)}," +
+      log"part=${MDC(LogKeys.PARTITION_ID, stateStoreId.partitionId)})," +
+      log"dir = ${MDC(LogKeys.PATH, baseDir)}]"
+  }
+
   override def toString(): String = {
-    s"HDFSStateStoreProvider[" +
-      s"id = (op=${stateStoreId.operatorId},part=${stateStoreId.partitionId}),dir = $baseDir]"
+    toMessageWithContext.message
   }
 
   /* Internal fields and methods */
@@ -428,6 +442,27 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
   private def putStateIntoStateCacheMap(
       newVersion: Long,
       map: HDFSBackedStateStoreMap): Unit = synchronized {
+    val loadedEntries = loadedMaps.size()
+    val earliestLoadedVersion: Option[Long] = if (loadedEntries > 0) {
+      Some(loadedMaps.lastKey())
+    } else {
+      None
+    }
+
+    if (earliestLoadedVersion.isDefined) {
+      logInfo(log"Trying to add version=${MDC(LogKeys.STATE_STORE_VERSION, newVersion)} to state " +
+        log"cache map with current_size=${MDC(LogKeys.NUM_LOADED_ENTRIES, loadedEntries)} and " +
+        log"earliest_loaded_version=" +
+        log"${MDC(LogKeys.EARLIEST_LOADED_VERSION, earliestLoadedVersion.get)}} " +
+        log"and max_versions_to_retain_in_memory=" +
+        log"${MDC(LogKeys.NUM_VERSIONS_RETAIN, numberOfVersionsToRetainInMemory)}")
+    } else {
+      logInfo(log"Trying to add version=${MDC(LogKeys.STATE_STORE_VERSION, newVersion)} to state " +
+        log"cache map with current_size=${MDC(LogKeys.NUM_LOADED_ENTRIES, loadedEntries)} and " +
+        log"max_versions_to_retain_in_memory=" +
+        log"${MDC(LogKeys.NUM_VERSIONS_RETAIN, numberOfVersionsToRetainInMemory)}")
+    }
+
     if (numberOfVersionsToRetainInMemory <= 0) {
       if (loadedMaps.size() > 0) loadedMaps.clear()
       return
@@ -463,9 +498,9 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       return loadedCurrentVersionMap.get
     }
 
-    logWarning(s"The state for version $version doesn't exist in loadedMaps. " +
-      "Reading snapshot file and delta files if needed..." +
-      "Note that this is normal for the first batch of starting query.")
+    logWarning(log"The state for version ${MDC(LogKeys.FILE_VERSION, version)} doesn't exist in " +
+      log"loadedMaps. Reading snapshot file and delta files if needed..." +
+      log"Note that this is normal for the first batch of starting query.")
 
     loadedMapCacheMissCount.increment()
 
@@ -585,10 +620,14 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
     } finally {
       if (input != null) input.close()
     }
-    logInfo(s"Read delta file for version $version of $this from $fileToRead")
+    logInfo(log"Read delta file for version ${MDC(LogKeys.FILE_VERSION, version)} " +
+      log"of ${MDC(LogKeys.STATE_STORE_PROVIDER, this)} from ${MDC(LogKeys.FILE_NAME, fileToRead)}")
   }
 
-  private def writeSnapshotFile(version: Long, map: HDFSBackedStateStoreMap): Unit = {
+  private def writeSnapshotFile(
+      version: Long,
+      map: HDFSBackedStateStoreMap,
+      opType: String): Unit = {
     val targetFile = snapshotFile(version)
     var rawOutput: CancellableFSDataOutputStream = null
     var output: DataOutputStream = null
@@ -612,7 +651,9 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
         cancelDeltaFile(compressedStream = output, rawStream = rawOutput)
         throw e
     }
-    logInfo(s"Written snapshot file for version $version of $this at $targetFile")
+    logInfo(log"Written snapshot file for version ${MDC(LogKeys.FILE_VERSION, version)} of " +
+      log"${MDC(LogKeys.STATE_STORE_PROVIDER, this)} at ${MDC(LogKeys.FILE_NAME, targetFile)} " +
+      log"for ${MDC(LogKeys.OP_TYPE, opType)}")
   }
 
   /**
@@ -637,11 +678,17 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       // SPARK-42668 - Catch and log any other exception thrown while trying to cancel
       // raw stream or close compressed stream.
       case NonFatal(ex) =>
-        logInfo(s"Failed to cancel delta file for provider=$stateStoreId " +
-          s"with exception=$ex")
+        logInfo(log"Failed to cancel delta file for " +
+          log"provider=${MDC(LogKeys.STATE_STORE_ID, stateStoreId)} " +
+          log"with exception=${MDC(LogKeys.ERROR, ex)}")
     }
   }
 
+  /**
+   * Try to read the snapshot file. If the snapshot file is not available, return [[None]].
+   *
+   * @param version the version of the snapshot file
+  */
   private def readSnapshotFile(version: Long): Option[HDFSBackedStateStoreMap] = {
     val fileToRead = snapshotFile(version)
     val map = HDFSBackedStateStoreMap.create(keySchema, numColsPrefixKey)
@@ -687,7 +734,8 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
           }
         }
       }
-      logInfo(s"Read snapshot file for version $version of $this from $fileToRead")
+      logInfo(log"Read snapshot file for version ${MDC(LogKeys.SNAPSHOT_VERSION, version)} of " +
+        log"${MDC(LogKeys.STATE_STORE_PROVIDER, this)} from ${MDC(LogKeys.FILE_NAME, fileToRead)}")
       Some(map)
     } catch {
       case _: FileNotFoundException =>
@@ -699,7 +747,7 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
 
 
   /** Perform a snapshot of the store to allow delta files to be consolidated */
-  private def doSnapshot(): Unit = {
+  private def doSnapshot(opType: String): Unit = {
     try {
       val (files, e1) = Utils.timeTakenMs(fetchFiles())
       logDebug(s"fetchFiles() took $e1 ms.")
@@ -711,7 +759,7 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
         synchronized { Option(loadedMaps.get(lastVersion)) } match {
           case Some(map) =>
             if (deltaFilesForLastVersion.size > storeConf.minDeltasForSnapshot) {
-              val (_, e2) = Utils.timeTakenMs(writeSnapshotFile(lastVersion, map))
+              val (_, e2) = Utils.timeTakenMs(writeSnapshotFile(lastVersion, map, opType))
               logDebug(s"writeSnapshotFile() took $e2 ms.")
             }
           case None =>
@@ -720,7 +768,7 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       }
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Error doing snapshots for $this", e)
+        logWarning(log"Error doing snapshots for " + toMessageWithContext, e)
     }
   }
 
@@ -745,13 +793,15 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
             }
           }
           logDebug(s"deleting files took $e2 ms.")
-          logInfo(s"Deleted files older than ${earliestFileToRetain.version} for $this: " +
-            filesToDelete.mkString(", "))
+          logInfo(log"Deleted files older than " +
+            log"${MDC(LogKeys.FILE_VERSION, earliestFileToRetain.version)} for " +
+            log"${MDC(LogKeys.STATE_STORE_PROVIDER, this)}: " +
+            log"${MDC(LogKeys.FILE_NAME, filesToDelete.mkString(", "))}")
         }
       }
     } catch {
       case NonFatal(e) =>
-        logWarning(s"Error cleaning up files for $this", e)
+        logWarning(log"Error cleaning up files for " + toMessageWithContext, e)
     }
   }
 
@@ -804,8 +854,8 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
             }
           case "snapshot" =>
             versionToFiles.put(version, StoreFile(version, path, isSnapshot = true))
-          case _ =>
-            logWarning(s"Could not identify file $path for $this")
+          case _ => logWarning(
+            log"Could not identify file ${MDC(LogKeys.PATH, path)} for " + toMessageWithContext)
         }
       }
     }
@@ -839,4 +889,93 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       throw new IllegalStateException(msg)
     }
   }
+
+  /**
+   * Get the state store of endVersion by applying delta files on the snapshot of snapshotVersion.
+   * If snapshot for snapshotVersion does not exist, an error will be thrown.
+   *
+   * @param snapshotVersion checkpoint version of the snapshot to start with
+   * @param endVersion   checkpoint version to end with
+   * @return [[HDFSBackedStateStore]]
+   */
+  override def replayStateFromSnapshot(snapshotVersion: Long, endVersion: Long): StateStore = {
+    val newMap = replayLoadedMapFromSnapshot(snapshotVersion, endVersion)
+    logInfo(log"Retrieved snapshot at version " +
+      log"${MDC(LogKeys.STATE_STORE_VERSION, snapshotVersion)} and apply delta files to version " +
+      log"${MDC(LogKeys.STATE_STORE_VERSION, endVersion)} of " +
+      log"${MDC(LogKeys.STATE_STORE_PROVIDER, HDFSBackedStateStoreProvider.this)} for update")
+    new HDFSBackedStateStore(endVersion, newMap)
+  }
+
+  /**
+   * Get the state store of endVersion for reading by applying delta files on the snapshot of
+   * snapshotVersion. If snapshot for snapshotVersion does not exist, an error will be thrown.
+   *
+   * @param snapshotVersion checkpoint version of the snapshot to start with
+   * @param endVersion   checkpoint version to end with
+   * @return [[HDFSBackedReadStateStore]]
+   */
+  override def replayReadStateFromSnapshot(snapshotVersion: Long, endVersion: Long):
+    ReadStateStore = {
+    val newMap = replayLoadedMapFromSnapshot(snapshotVersion, endVersion)
+    logInfo(log"Retrieved snapshot at version " +
+      log"${MDC(LogKeys.STATE_STORE_VERSION, snapshotVersion)} and apply delta files to version " +
+      log"${MDC(LogKeys.STATE_STORE_VERSION, endVersion)} of " +
+      log"${MDC(LogKeys.STATE_STORE_PROVIDER, HDFSBackedStateStoreProvider.this)} for read-only")
+    new HDFSBackedReadStateStore(endVersion, newMap)
+  }
+
+  /**
+   * Construct the state map at endVersion from snapshot of version snapshotVersion.
+   * Returns a new [[HDFSBackedStateStoreMap]]
+   * @param snapshotVersion checkpoint version of the snapshot to start with
+   * @param endVersion   checkpoint version to end with
+   */
+  private def replayLoadedMapFromSnapshot(snapshotVersion: Long, endVersion: Long):
+  HDFSBackedStateStoreMap = synchronized {
+    try {
+      if (snapshotVersion < 1) {
+        throw QueryExecutionErrors.unexpectedStateStoreVersion(snapshotVersion)
+      }
+      if (endVersion < snapshotVersion) {
+        throw QueryExecutionErrors.unexpectedStateStoreVersion(endVersion)
+      }
+
+      val newMap = HDFSBackedStateStoreMap.create(keySchema, numColsPrefixKey)
+      newMap.putAll(constructMapFromSnapshot(snapshotVersion, endVersion))
+
+      newMap
+    }
+    catch {
+      case e: Throwable => throw QueryExecutionErrors.cannotLoadStore(e)
+    }
+  }
+
+  private def constructMapFromSnapshot(snapshotVersion: Long, endVersion: Long):
+  HDFSBackedStateStoreMap = {
+    val (result, elapsedMs) = Utils.timeTakenMs {
+      val startVersionMap = synchronized { Option(loadedMaps.get(snapshotVersion)) } match {
+        case Some(value) => Option(value)
+        case None => readSnapshotFile(snapshotVersion)
+      }
+      if (startVersionMap.isEmpty) {
+        throw StateStoreErrors.stateStoreSnapshotFileNotFound(
+          snapshotFile(snapshotVersion).toString, toString())
+      }
+
+      // Load all the deltas from the version after the start version up to the end version.
+      val resultMap = HDFSBackedStateStoreMap.create(keySchema, numColsPrefixKey)
+      resultMap.putAll(startVersionMap.get)
+      for (deltaVersion <- snapshotVersion + 1 to endVersion) {
+        updateFromDeltaFile(deltaVersion, resultMap)
+      }
+
+      resultMap
+    }
+
+    logDebug(s"Loading snapshot at version $snapshotVersion and apply delta files to version " +
+      s"$endVersion takes $elapsedMs ms.")
+
+    result
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala
index b58c805af9d60..8ce883038401d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.{FSDataOutputStream, Path}
 import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, MetadataVersionUtil}
 
 /**
@@ -105,7 +105,8 @@ class OperatorStateMetadataWriter(stateCheckpointPath: Path, hadoopConf: Configu
       outputStream.close()
     } catch {
       case e: Throwable =>
-        logError(s"Fail to write state metadata file to $metadataFilePath", e)
+        logError(
+          log"Fail to write state metadata file to ${MDC(LogKeys.META_FILE, metadataFilePath)}", e)
         outputStream.cancel()
         throw e
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 61c3d349655fd..28ad197ffb4af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.TimeUnit
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.{mutable, Map}
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 import scala.jdk.CollectionConverters._
 import scala.ref.WeakReference
 import scala.util.Try
@@ -36,7 +36,7 @@ import org.rocksdb.CompressionType._
 import org.rocksdb.TickerType._
 
 import org.apache.spark.TaskContext
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{LogEntry, Logging, LogKeys, MDC}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.util.{NextIterator, Utils}
@@ -74,7 +74,11 @@ class RocksDB(
     loggingId: String = "",
     useColumnFamilies: Boolean = false) extends Logging {
 
-  case class RocksDBSnapshot(checkpointDir: File, version: Long, numKeys: Long) {
+  case class RocksDBSnapshot(
+      checkpointDir: File,
+      version: Long,
+      numKeys: Long,
+      capturedFileMappings: RocksDBFileMappings) {
     def close(): Unit = {
       silentDeleteRecursively(checkpointDir, s"Free up local checkpoint of snapshot $version")
     }
@@ -82,6 +86,7 @@ class RocksDB(
 
   @volatile private var latestSnapshot: Option[RocksDBSnapshot] = None
   @volatile private var lastSnapshotVersion = 0L
+  private val oldSnapshots = new ListBuffer[RocksDBSnapshot]
 
   RocksDBLoader.loadLibrary()
 
@@ -177,10 +182,13 @@ class RocksDB(
     assert(version >= 0)
     acquire(LoadStore)
     recordedMetrics = None
-    logInfo(s"Loading $version")
+    logInfo(log"Loading ${MDC(LogKeys.VERSION_NUM, version)}")
     try {
       if (loadedVersion != version) {
         closeDB()
+        // deep copy is needed to avoid race condition
+        // between maintenance and task threads
+        fileManager.copyFileMapping()
         val latestSnapshotVersion = fileManager.getLatestSnapshotVersion(version)
         val metadata = fileManager.loadCheckpointFromDfs(latestSnapshotVersion, workingDir)
         loadedVersion = latestSnapshotVersion
@@ -189,7 +197,6 @@ class RocksDB(
         if (lastSnapshotVersion > latestSnapshotVersion) {
           // discard any newer snapshots
           lastSnapshotVersion = 0L
-          latestSnapshot = None
         }
         openDB()
 
@@ -212,7 +219,7 @@ class RocksDB(
       if (conf.resetStatsOnLoad) {
         nativeStats.reset
       }
-      logInfo(s"Loaded $version")
+      logInfo(log"Loaded ${MDC(LogKeys.VERSION_NUM, version)}")
     } catch {
       case t: Throwable =>
         loadedVersion = -1  // invalidate loaded data
@@ -226,12 +233,88 @@ class RocksDB(
     this
   }
 
+  /**
+   * Load from the start snapshot version and apply all the changelog records to reach the
+   * end version. Note that this will copy all the necessary files from DFS to local disk as needed,
+   * and possibly restart the native RocksDB instance.
+   *
+   * @param snapshotVersion version of the snapshot to start with
+   * @param endVersion end version
+   * @return A RocksDB instance loaded with the state endVersion replayed from snapshotVersion.
+   *         Note that the instance will be read-only since this method is only used in State Data
+   *         Source.
+   */
+  def loadFromSnapshot(snapshotVersion: Long, endVersion: Long): RocksDB = {
+    assert(snapshotVersion >= 0 && endVersion >= snapshotVersion)
+    acquire(LoadStore)
+    recordedMetrics = None
+    logInfo(
+      log"Loading snapshot at version ${MDC(LogKeys.VERSION_NUM, snapshotVersion)} and apply " +
+      log"changelog files to version ${MDC(LogKeys.VERSION_NUM, endVersion)}.")
+    try {
+      replayFromCheckpoint(snapshotVersion, endVersion)
+
+      logInfo(
+        log"Loaded snapshot at version ${MDC(LogKeys.VERSION_NUM, snapshotVersion)} and apply " +
+        log"changelog files to version ${MDC(LogKeys.VERSION_NUM, endVersion)}.")
+    } catch {
+      case t: Throwable =>
+        loadedVersion = -1  // invalidate loaded data
+        throw t
+    }
+    this
+  }
+
+  /**
+   * Load from the start checkpoint version and apply all the changelog records to reach the
+   * end version.
+   * If the start version does not exist, it will throw an exception.
+   *
+   * @param snapshotVersion start checkpoint version
+   * @param endVersion end version
+   */
+  private def replayFromCheckpoint(snapshotVersion: Long, endVersion: Long): Any = {
+    closeDB()
+    val metadata = fileManager.loadCheckpointFromDfs(snapshotVersion, workingDir)
+    loadedVersion = snapshotVersion
+
+    // reset last snapshot version
+    if (lastSnapshotVersion > snapshotVersion) {
+      // discard any newer snapshots
+      lastSnapshotVersion = 0L
+      latestSnapshot = None
+    }
+    openDB()
+
+    numKeysOnWritingVersion = if (!conf.trackTotalNumberOfRows) {
+      // we don't track the total number of rows - discard the number being track
+      -1L
+    } else if (metadata.numKeys < 0) {
+      // we track the total number of rows, but the snapshot doesn't have tracking number
+      // need to count keys now
+      countKeys()
+    } else {
+      metadata.numKeys
+    }
+    if (loadedVersion != endVersion) replayChangelog(endVersion)
+    // After changelog replay the numKeysOnWritingVersion will be updated to
+    // the correct number of keys in the loaded version.
+    numKeysOnLoadedVersion = numKeysOnWritingVersion
+    fileManagerMetrics = fileManager.latestLoadCheckpointMetrics
+
+    if (conf.resetStatsOnLoad) {
+      nativeStats.reset
+    }
+  }
+
   /**
    * Replay change log from the loaded version to the target version.
    */
   private def replayChangelog(endVersion: Long): Unit = {
     for (v <- loadedVersion + 1 to endVersion) {
-      logInfo(s"replaying changelog from version $loadedVersion -> $endVersion")
+      logInfo(log"replaying changelog from version " +
+        log"${MDC(LogKeys.LOADED_VERSION, loadedVersion)} -> " +
+        log"${MDC(LogKeys.END_VERSION, endVersion)}")
       var changelogReader: StateStoreChangelogReader = null
       try {
         changelogReader = fileManager.getChangelogReader(v, useColumnFamilies)
@@ -461,7 +544,7 @@ class RocksDB(
     verifyColFamilyOperations("iterator", colFamilyName)
 
     val iter = db.newIterator(colFamilyNameToHandleMap(colFamilyName))
-    logInfo(s"Getting iterator from version $loadedVersion")
+    logInfo(log"Getting iterator from version ${MDC(LogKeys.LOADED_VERSION, loadedVersion)}")
     iter.seekToFirst()
 
     // Attempt to close this iterator if there is a task failure, or a task interruption.
@@ -491,7 +574,8 @@ class RocksDB(
     val iter = db.newIterator(colFamilyNameToHandleMap(colFamilyName))
 
     try {
-      logInfo(s"Counting keys - getting iterator from version $loadedVersion")
+      logInfo(log"Counting keys - getting iterator from version " +
+        log"${MDC(LogKeys.LOADED_VERSION, loadedVersion)}")
 
       iter.seekToFirst()
 
@@ -545,7 +629,7 @@ class RocksDB(
     val newVersion = loadedVersion + 1
     try {
 
-      logInfo(s"Flushing updates for $newVersion")
+      logInfo(log"Flushing updates for ${MDC(LogKeys.VERSION_NUM, newVersion)}")
 
       var compactTimeMs = 0L
       var flushTimeMs = 0L
@@ -553,7 +637,7 @@ class RocksDB(
       if (shouldCreateSnapshot()) {
         // Need to flush the change to disk before creating a checkpoint
         // because rocksdb wal is disabled.
-        logInfo(s"Flushing updates for $newVersion")
+        logInfo(log"Flushing updates for ${MDC(LogKeys.VERSION_NUM, newVersion)}")
         flushTimeMs = timeTakenMs {
           // Flush updates to all available column families
           assert(!colFamilyNameToHandleMap.isEmpty)
@@ -571,7 +655,8 @@ class RocksDB(
 
         checkpointTimeMs = timeTakenMs {
           val checkpointDir = createTempDir("checkpoint")
-          logInfo(s"Creating checkpoint for $newVersion in $checkpointDir")
+          logInfo(log"Creating checkpoint for ${MDC(LogKeys.VERSION_NUM, newVersion)} " +
+            log"in ${MDC(LogKeys.PATH, checkpointDir)}")
           // Make sure the directory does not exist. Native RocksDB fails if the directory to
           // checkpoint exists.
           Utils.deleteRecursively(checkpointDir)
@@ -584,14 +669,21 @@ class RocksDB(
           // inside the uploadSnapshot() called below.
           // If changelog checkpointing is enabled, snapshot will be uploaded asynchronously
           // during state store maintenance.
-          latestSnapshot.foreach(_.close())
-          latestSnapshot = Some(
-            RocksDBSnapshot(checkpointDir, newVersion, numKeysOnWritingVersion))
-          lastSnapshotVersion = newVersion
+          synchronized {
+            if (latestSnapshot.isDefined) {
+              oldSnapshots += latestSnapshot.get
+            }
+            latestSnapshot = Some(
+              RocksDBSnapshot(checkpointDir,
+                newVersion,
+                numKeysOnWritingVersion,
+                fileManager.captureFileMapReference()))
+            lastSnapshotVersion = newVersion
+          }
         }
       }
 
-      logInfo(s"Syncing checkpoint for $newVersion to DFS")
+      logInfo(log"Syncing checkpoint for ${MDC(LogKeys.VERSION_NUM, newVersion)} to DFS")
       val fileSyncTimeMs = timeTakenMs {
         if (enableChangelogCheckpointing) {
           try {
@@ -615,7 +707,8 @@ class RocksDB(
         "fileSync" -> fileSyncTimeMs
       )
       recordedMetrics = Some(metrics)
-      logInfo(s"Committed $newVersion, stats = ${recordedMetrics.get.json}")
+      logInfo(log"Committed ${MDC(LogKeys.VERSION_NUM, newVersion)}, " +
+        log"stats = ${MDC(LogKeys.METRICS_JSON, recordedMetrics.get.json)}")
       loadedVersion
     } catch {
       case t: Throwable =>
@@ -638,22 +731,36 @@ class RocksDB(
   }
 
   private def uploadSnapshot(): Unit = {
+    var oldSnapshotsImmutable: List[RocksDBSnapshot] = Nil
     val localCheckpoint = synchronized {
       val checkpoint = latestSnapshot
       latestSnapshot = None
+
+      // Convert mutable list buffer to immutable to prevent
+      // race condition with commit where old snapshot is added
+      oldSnapshotsImmutable = oldSnapshots.toList
+      oldSnapshots.clear()
+
       checkpoint
     }
     localCheckpoint match {
-      case Some(RocksDBSnapshot(localDir, version, numKeys)) =>
+      case Some(RocksDBSnapshot(localDir, version, numKeys, capturedFileMappings)) =>
         try {
           val uploadTime = timeTakenMs {
-            fileManager.saveCheckpointToDfs(localDir, version, numKeys)
+            fileManager.saveCheckpointToDfs(localDir, version, numKeys, capturedFileMappings)
             fileManagerMetrics = fileManager.latestSaveCheckpointMetrics
           }
-          logInfo(s"$loggingId: Upload snapshot of version $version," +
-            s" time taken: $uploadTime ms")
+          logInfo(log"${MDC(LogKeys.LOG_ID, loggingId)}: Upload snapshot of version " +
+            log"${MDC(LogKeys.VERSION_NUM, version)}," +
+            log" time taken: ${MDC(LogKeys.TIME_UNITS, uploadTime)} ms")
         } finally {
           localCheckpoint.foreach(_.close())
+
+          // Clean up old latestSnapshots
+          for (snapshot <- oldSnapshotsImmutable) {
+            snapshot.close()
+          }
+
         }
       case _ =>
     }
@@ -670,30 +777,17 @@ class RocksDB(
     // Make sure changelogWriter gets recreated next time.
     changelogWriter = None
     release(RollbackStore)
-    logInfo(s"Rolled back to $loadedVersion")
+    logInfo(log"Rolled back to ${MDC(LogKeys.VERSION_NUM, loadedVersion)}")
   }
 
   def doMaintenance(): Unit = {
     if (enableChangelogCheckpointing) {
-      // There is race to update latestSnapshot between load(), commit()
-      // and uploadSnapshot().
-      // The load method will reset latestSnapshot to discard any snapshots taken
-      // from newer versions (when a old version is reloaded).
-      // commit() method deletes the existing snapshot while creating a new snapshot.
-      // In order to ensure that the snapshot being uploaded would not be modified
-      // concurrently, we need to synchronize the snapshot access between task thread
-      // and maintenance thread.
-      acquire(StoreMaintenance)
-      try {
-        uploadSnapshot()
-      } finally {
-        release(StoreMaintenance)
-      }
+      uploadSnapshot()
     }
     val cleanupTime = timeTakenMs {
       fileManager.deleteOldVersions(conf.minVersionsToRetain)
     }
-    logInfo(s"Cleaned old data, time taken: $cleanupTime ms")
+    logInfo(log"Cleaned old data, time taken: ${MDC(LogKeys.TIME_UNITS, cleanupTime)} ms")
   }
 
   /** Release all resources */
@@ -771,10 +865,19 @@ class RocksDB(
       .keys.filter(checkInternalColumnFamilies(_)).size
     val numExternalColFamilies = colFamilyNameToHandleMap.keys.size - numInternalColFamilies
 
+    // if bounded memory usage is enabled, we share the block cache across all state providers
+    // running on the same node and account the usage to this single cache. In this case, its not
+    // possible to provide partition level or query level memory usage.
+    val memoryUsage = if (conf.boundedMemoryUsage) {
+      0L
+    } else {
+      readerMemUsage + memTableMemUsage + blockCacheUsage
+    }
+
     RocksDBMetrics(
       numKeysOnLoadedVersion,
       numKeysOnWritingVersion,
-      readerMemUsage + memTableMemUsage + blockCacheUsage,
+      memoryUsage,
       pinnedBlocksMemUsage,
       totalSSTFilesBytes,
       nativeOpsLatencyMicros,
@@ -800,7 +903,7 @@ class RocksDB(
       rocksDBMetricsOpt = recordedMetrics
     } catch {
       case ex: Exception =>
-        logInfo(s"Failed to acquire metrics with exception=$ex")
+        logInfo(log"Failed to acquire metrics with exception=${MDC(LogKeys.ERROR, ex)}")
     } finally {
       release(ReportStoreMetrics)
     }
@@ -838,7 +941,8 @@ class RocksDB(
       Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit] {
         _ => this.release(StoreTaskCompletionListener)
       })
-      logInfo(s"RocksDB instance was acquired by $acquiredThreadInfo for opType=${opType.toString}")
+      logInfo(log"RocksDB instance was acquired by ${MDC(LogKeys.THREAD, acquiredThreadInfo)} " +
+        log"for opType=${MDC(LogKeys.OP_TYPE, opType.toString)}")
     }
   }
 
@@ -849,7 +953,8 @@ class RocksDB(
    * @param opType - operation type releasing the lock
    */
   private def release(opType: RocksDBOpType): Unit = acquireLock.synchronized {
-    logInfo(s"RocksDB instance was released by $acquiredThreadInfo for opType=${opType.toString}")
+    logInfo(log"RocksDB instance was released by ${MDC(LogKeys.THREAD, acquiredThreadInfo)} " +
+      log"for opType=${MDC(LogKeys.OP_TYPE, opType.toString)}")
     acquiredThreadInfo = null
     acquireLock.notifyAll()
   }
@@ -887,7 +992,7 @@ class RocksDB(
     colFamilyHandles.asScala.toList.foreach { handle =>
       colFamilyNameToHandleMap(handle.getName.map(_.toChar).mkString) = handle
     }
-    logInfo(s"Opened DB with conf ${conf}")
+    logInfo(log"Opened DB with conf ${MDC(LogKeys.CONFIG, conf)}")
   }
 
   private def closeDB(): Unit = {
@@ -911,13 +1016,14 @@ class RocksDB(
         // Map DB log level to log4j levels
         // Warn is mapped to info because RocksDB warn is too verbose
         // (e.g. dumps non-warning stuff like stats)
-        val loggingFunc: ( => String) => Unit = infoLogLevel match {
+        val loggingFunc: ( => LogEntry) => Unit = infoLogLevel match {
           case InfoLogLevel.FATAL_LEVEL | InfoLogLevel.ERROR_LEVEL => logError(_)
           case InfoLogLevel.WARN_LEVEL | InfoLogLevel.INFO_LEVEL => logInfo(_)
           case InfoLogLevel.DEBUG_LEVEL => logDebug(_)
           case _ => logTrace(_)
         }
-        loggingFunc(s"[NativeRocksDB-${infoLogLevel.getValue}] $logMsg")
+        loggingFunc(log"[NativeRocksDB-${MDC(LogKeys.ROCKS_DB_LOG_LEVEL, infoLogLevel.getValue)}]" +
+          log" ${MDC(LogKeys.ROCKS_DB_LOG_MESSAGE, logMsg)}")
       }
     }
 
@@ -930,7 +1036,7 @@ class RocksDB(
     // customized logger. We still set it as it might show up in RocksDB config file or logging.
     dbOptions.setInfoLogLevel(dbLogLevel)
     dbOptions.setLogger(dbLogger)
-    logInfo(s"Set RocksDB native logging level to $dbLogLevel")
+    logInfo(log"Set RocksDB native logging level to ${MDC(LogKeys.ROCKS_DB_LOG_LEVEL, dbLogLevel)}")
     dbLogger
   }
 
@@ -945,7 +1051,8 @@ class RocksDB(
       Utils.deleteRecursively(file)
     } catch {
       case e: Exception =>
-        logWarning(s"Error recursively deleting local dir $file while $msg", e)
+        logWarning(log"Error recursively deleting local dir ${MDC(LogKeys.PATH, file)} " +
+          log"while ${MDC(LogKeys.ERROR, msg)}", e)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index bd1daa48f809b..fe7aeeb6fd3f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -38,7 +38,7 @@ import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
 import org.apache.spark.{SparkConf, SparkEnv}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC, MessageWithContext}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager
@@ -133,16 +133,6 @@ class RocksDBFileManager(
 
   import RocksDBImmutableFile._
 
-  private val versionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
-
-
-  // used to keep a mapping of the exact Dfs file that was used to create a local SST file.
-  // The reason this is a separate map because versionToRocksDBFiles can contain multiple similar
-  // SST files to a particular local file (for example 1.sst can map to 1-UUID1.sst in v1 and
-  // 1-UUID2.sst in v2). We need to capture the exact file used to ensure Version ID compatibility
-  // across SST files and RocksDB manifest.
-  private[sql] val localFilesToDfsFiles = new ConcurrentHashMap[String, RocksDBImmutableFile]
-
   private lazy val fm = CheckpointFileManager.create(new Path(dfsRootDir), hadoopConf)
   private val fs = new Path(dfsRootDir).getFileSystem(hadoopConf)
   private val onlyZipFiles = new PathFilter {
@@ -157,6 +147,29 @@ class RocksDBFileManager(
   private def codec = CompressionCodec.createCodec(sparkConf, codecName)
 
   @volatile private var rootDirChecked: Boolean = false
+  @volatile private var fileMappings = RocksDBFileMappings(
+    new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]],
+    new ConcurrentHashMap[String, RocksDBImmutableFile]
+  )
+
+  /**
+   * Make a deep copy of versionToRocksDBFiles and localFilesToDfsFiles to avoid
+   * current task thread from overwriting the file mapping whenever background maintenance
+   * thread attempts to upload a snapshot
+   */
+  def copyFileMapping() : Unit = {
+    val newVersionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
+    val newLocalFilesToDfsFiles = new ConcurrentHashMap[String, RocksDBImmutableFile]
+
+    newVersionToRocksDBFiles.putAll(fileMappings.versionToRocksDBFiles)
+    newLocalFilesToDfsFiles.putAll(fileMappings.localFilesToDfsFiles)
+
+    fileMappings = RocksDBFileMappings(newVersionToRocksDBFiles, newLocalFilesToDfsFiles)
+  }
+
+  def captureFileMapReference(): RocksDBFileMappings = {
+    fileMappings
+  }
 
   def getChangeLogWriter(
       version: Long,
@@ -204,14 +217,20 @@ class RocksDBFileManager(
   def latestSaveCheckpointMetrics: RocksDBFileManagerMetrics = saveCheckpointMetrics
 
   /** Save all the files in given local checkpoint directory as a committed version in DFS */
-  def saveCheckpointToDfs(checkpointDir: File, version: Long, numKeys: Long): Unit = {
-    logFilesInDir(checkpointDir, s"Saving checkpoint files for version $version")
+  def saveCheckpointToDfs(
+      checkpointDir: File,
+      version: Long,
+      numKeys: Long,
+      capturedFileMappings: RocksDBFileMappings): Unit = {
+    logFilesInDir(checkpointDir, log"Saving checkpoint files " +
+      log"for version ${MDC(LogKeys.VERSION_NUM, version)}")
     val (localImmutableFiles, localOtherFiles) = listRocksDBFiles(checkpointDir)
-    val rocksDBFiles = saveImmutableFilesToDfs(version, localImmutableFiles)
+    val rocksDBFiles = saveImmutableFilesToDfs(version, localImmutableFiles, capturedFileMappings)
     val metadata = RocksDBCheckpointMetadata(rocksDBFiles, numKeys)
     val metadataFile = localMetadataFile(checkpointDir)
     metadata.writeToFile(metadataFile)
-    logInfo(s"Written metadata for version $version:\n${metadata.prettyJson}")
+    logInfo(log"Written metadata for version ${MDC(LogKeys.VERSION_NUM, version)}:\n" +
+      log"${MDC(LogKeys.METADATA_JSON, metadata.prettyJson)}")
 
     if (version <= 1 && numKeys <= 0) {
       // If we're writing the initial version and there's no data, we have to explicitly initialize
@@ -227,7 +246,7 @@ class RocksDBFileManager(
       }
     }
     zipToDfsFile(localOtherFiles :+ metadataFile, dfsBatchZipFile(version))
-    logInfo(s"Saved checkpoint file for version $version")
+    logInfo(log"Saved checkpoint file for version ${MDC(LogKeys.VERSION_NUM, version)}")
   }
 
   /**
@@ -237,14 +256,14 @@ class RocksDBFileManager(
    * local directory.
    */
   def loadCheckpointFromDfs(version: Long, localDir: File): RocksDBCheckpointMetadata = {
-    logInfo(s"Loading checkpoint files for version $version")
+    logInfo(log"Loading checkpoint files for version ${MDC(LogKeys.VERSION_NUM, version)}")
     // The unique ids of SST files are checked when opening a rocksdb instance. The SST files
     // in larger versions can't be reused even if they have the same size and name because
     // they belong to another rocksdb instance.
-    versionToRocksDBFiles.keySet().removeIf(_ >= version)
+    fileMappings.versionToRocksDBFiles.keySet().removeIf(_ >= version)
     val metadata = if (version == 0) {
       if (localDir.exists) Utils.deleteRecursively(localDir)
-      localFilesToDfsFiles.clear()
+      fileMappings.localFilesToDfsFiles.clear()
       localDir.mkdirs()
       RocksDBCheckpointMetadata(Seq.empty, 0)
     } else {
@@ -255,13 +274,15 @@ class RocksDBFileManager(
       // Copy the necessary immutable files
       val metadataFile = localMetadataFile(localDir)
       val metadata = RocksDBCheckpointMetadata.readFromFile(metadataFile)
-      logInfo(s"Read metadata for version $version:\n${metadata.prettyJson}")
+      logInfo(log"Read metadata for version ${MDC(LogKeys.VERSION_NUM, version)}:\n" +
+        log"${MDC(LogKeys.METADATA_JSON, metadata.prettyJson)}")
       loadImmutableFilesFromDfs(metadata.immutableFiles, localDir)
-      versionToRocksDBFiles.put(version, metadata.immutableFiles)
+      fileMappings.versionToRocksDBFiles.put(version, metadata.immutableFiles)
       metadataFile.delete()
       metadata
     }
-    logFilesInDir(localDir, s"Loaded checkpoint files for version $version")
+    logFilesInDir(localDir, log"Loaded checkpoint files " +
+      log"for version ${MDC(LogKeys.VERSION_NUM, version)}")
     metadata
   }
 
@@ -327,8 +348,9 @@ class RocksDBFileManager(
       val orphanFiles = fileModificationTimes
         .filter(_._2 < oldestTrackedFileModificationTime).keys.toSeq
       if (orphanFiles.nonEmpty) {
-        logInfo(s"Found ${orphanFiles.size} orphan files: ${orphanFiles.take(20).mkString(", ")}" +
-          "... (display at most 20 filenames) that should be deleted.")
+        logInfo(log"Found ${MDC(LogKeys.NUM_FILES, orphanFiles.size)} orphan files: " +
+          log"${MDC(LogKeys.FILE_MODIFICATION_TIME, orphanFiles.take(20).mkString(", "))}" +
+          log"... (display at most 20 filenames) that should be deleted.")
       }
       orphanFiles
     } else {
@@ -340,10 +362,11 @@ class RocksDBFileManager(
     versionsToDelete.foreach { version =>
       try {
         fm.delete(dfsChangelogFile(version))
-        logInfo(s"Deleted changelog file $version")
+        logInfo(log"Deleted changelog file ${MDC(LogKeys.VERSION_NUM, version)}")
       } catch {
         case e: Exception =>
-          logWarning(s"Error deleting changelog file for version $version", e)
+          logWarning(
+            log"Error deleting changelog file for version ${MDC(LogKeys.FILE_VERSION, version)}", e)
       }
     }
   }
@@ -411,9 +434,9 @@ class RocksDBFileManager(
     // Resolve RocksDB files for all the versions and find the max version each file is used
     val fileToMaxUsedVersion = new mutable.HashMap[String, Long]
     sortedSnapshotVersions.foreach { version =>
-      val files = Option(versionToRocksDBFiles.get(version)).getOrElse {
+      val files = Option(fileMappings.versionToRocksDBFiles.get(version)).getOrElse {
         val newResolvedFiles = getImmutableFilesFromVersionZip(version)
-        versionToRocksDBFiles.put(version, newResolvedFiles)
+        fileMappings.versionToRocksDBFiles.put(version, newResolvedFiles)
         newResolvedFiles
       }
       files.foreach(f => fileToMaxUsedVersion(f.dfsFileName) =
@@ -431,7 +454,8 @@ class RocksDBFileManager(
     val allLogFiles = if (fm.exists(logDir)) fm.list(logDir).toImmutableArraySeq else Seq.empty
     filesToDelete ++= findOrphanFiles(fileToMaxUsedVersion.keys.toSeq, allSstFiles ++ allLogFiles)
       .map(_ -> -1L)
-    logInfo(s"Deleting ${filesToDelete.size} files not used in versions >= $minVersionToRetain")
+    logInfo(log"Deleting ${MDC(LogKeys.NUM_FILES, filesToDelete.size)} " +
+      log"files not used in versions >= ${MDC(LogKeys.VERSION_NUM, minVersionToRetain)}")
     var failedToDelete = 0
     filesToDelete.foreach { case (dfsFileName, maxUsedVersion) =>
       try {
@@ -446,9 +470,10 @@ class RocksDBFileManager(
         case e: Exception =>
           failedToDelete += 1
           if (maxUsedVersion == -1) {
-            logWarning(s"Error deleting orphan file $dfsFileName", e)
+            logWarning(log"Error deleting orphan file ${MDC(LogKeys.PATH, dfsFileName)}", e)
           } else {
-            logWarning(s"Error deleting file $dfsFileName, last used in version $maxUsedVersion", e)
+            logWarning(log"Error deleting file ${MDC(LogKeys.PATH, dfsFileName)}, " +
+              log"last used in version ${MDC(LogKeys.MAX_FILE_VERSION, maxUsedVersion)}", e)
           }
       }
     }
@@ -458,16 +483,18 @@ class RocksDBFileManager(
       val versionFile = dfsBatchZipFile(version)
       try {
         fm.delete(versionFile)
-        versionToRocksDBFiles.remove(version)
+        fileMappings.versionToRocksDBFiles.remove(version)
         logDebug(s"Deleted version $version")
       } catch {
         case e: Exception =>
-          logWarning(s"Error deleting version file $versionFile for version $version", e)
+          logWarning(log"Error deleting version file ${MDC(LogKeys.PATH, versionFile)} for " +
+            log"version ${MDC(LogKeys.FILE_VERSION, version)}", e)
       }
     }
-    logInfo(s"Deleted ${filesToDelete.size - failedToDelete} files (failed to delete" +
-      s"$failedToDelete files) not used in versions >= $minVersionToRetain")
-
+    logInfo(log"Deleted ${MDC(LogKeys.NUM_FILES, filesToDelete.size - failedToDelete)} files " +
+      log"(failed to delete" +
+      log"${MDC(LogKeys.NUM_FILES_FAILED_TO_DELETE, failedToDelete)} files) " +
+      log"not used in versions >= ${MDC(LogKeys.MIN_VERSION_NUM, minVersionToRetain)}")
     val changelogVersionsToDelete = changelogFiles
       .map(_.getName.stripSuffix(".changelog")).map(_.toLong)
       .filter(_ < minVersionToRetain)
@@ -477,21 +504,24 @@ class RocksDBFileManager(
   /** Save immutable files to DFS directory */
   private def saveImmutableFilesToDfs(
       version: Long,
-      localFiles: Seq[File]): Seq[RocksDBImmutableFile] = {
+      localFiles: Seq[File],
+      capturedFileMappings: RocksDBFileMappings): Seq[RocksDBImmutableFile] = {
     // Get the immutable files used in previous versions, as some of those uploaded files can be
     // reused for this version
-    logInfo(s"Saving RocksDB files to DFS for $version")
+    logInfo(log"Saving RocksDB files to DFS for ${MDC(LogKeys.VERSION_NUM, version)}")
 
     var bytesCopied = 0L
     var filesCopied = 0L
     var filesReused = 0L
 
     val immutableFiles = localFiles.map { localFile =>
-      val existingDfsFile = localFilesToDfsFiles.asScala.get(localFile.getName)
+      val existingDfsFile =
+        capturedFileMappings.localFilesToDfsFiles.asScala.get(localFile.getName)
       if (existingDfsFile.isDefined && existingDfsFile.get.sizeBytes == localFile.length()) {
         val dfsFile = existingDfsFile.get
         filesReused += 1
-        logInfo(s"reusing file $dfsFile for $localFile")
+        logInfo(log"reusing file ${MDC(LogKeys.DFS_FILE, dfsFile)} for " +
+          log"${MDC(LogKeys.FILE_NAME, localFile)}")
         RocksDBImmutableFile(localFile.getName, dfsFile.dfsFileName, dfsFile.sizeBytes)
       } else {
         val localFileName = localFile.getName
@@ -504,19 +534,22 @@ class RocksDBFileManager(
         fs.copyFromLocalFile(
           new Path(localFile.getAbsoluteFile.toURI), dfsFile)
         val localFileSize = localFile.length()
-        logInfo(s"Copied $localFile to $dfsFile - $localFileSize bytes")
+        logInfo(log"Copied ${MDC(LogKeys.FILE_NAME, localFile)} to " +
+          log"${MDC(LogKeys.DFS_FILE, dfsFile)} - ${MDC(LogKeys.NUM_BYTES, localFileSize)} bytes")
         filesCopied += 1
         bytesCopied += localFileSize
 
         val immutableDfsFile = RocksDBImmutableFile(localFile.getName, dfsFileName, localFileSize)
-        localFilesToDfsFiles.put(localFileName, immutableDfsFile)
+        capturedFileMappings.localFilesToDfsFiles.put(localFileName, immutableDfsFile)
 
         immutableDfsFile
       }
     }
-    logInfo(s"Copied $filesCopied files ($bytesCopied bytes) from local to" +
-      s" DFS for version $version. $filesReused files reused without copying.")
-    versionToRocksDBFiles.put(version, immutableFiles)
+    logInfo(log"Copied ${MDC(LogKeys.NUM_FILES_COPIED, filesCopied)} files " +
+      log"(${MDC(LogKeys.NUM_BYTES, bytesCopied)} bytes) from local to" +
+      log" DFS for version ${MDC(LogKeys.VERSION_NUM, version)}. " +
+      log"${MDC(LogKeys.NUM_FILES_REUSED, filesReused)} files reused without copying.")
+    capturedFileMappings.versionToRocksDBFiles.put(version, immutableFiles)
 
     // Cleanup locally deleted files from the localFilesToDfsFiles map
     // Locally, SST Files can be deleted due to RocksDB compaction. These files need
@@ -556,7 +589,7 @@ class RocksDBFileManager(
       .foreach { existingFile =>
         val existingFileSize = existingFile.length()
         val requiredFile = requiredFileNameToFileDetails.get(existingFile.getName)
-        val prevDfsFile = localFilesToDfsFiles.asScala.get(existingFile.getName)
+        val prevDfsFile = fileMappings.localFilesToDfsFiles.asScala.get(existingFile.getName)
         val isSameFile = if (requiredFile.isDefined && prevDfsFile.isDefined) {
           requiredFile.get.dfsFileName == prevDfsFile.get.dfsFileName &&
             existingFile.length() == requiredFile.get.sizeBytes
@@ -566,11 +599,14 @@ class RocksDBFileManager(
 
         if (!isSameFile) {
           existingFile.delete()
-          localFilesToDfsFiles.remove(existingFile.getName)
-          logInfo(s"Deleted local file $existingFile with size $existingFileSize mapped" +
-            s" to previous dfsFile ${prevDfsFile.getOrElse("null")}")
+          fileMappings.localFilesToDfsFiles.remove(existingFile.getName)
+          logInfo(log"Deleted local file ${MDC(LogKeys.FILE_NAME, existingFile)} " +
+            log"with size ${MDC(LogKeys.NUM_BYTES, existingFileSize)} mapped" +
+            log" to previous dfsFile ${MDC(LogKeys.DFS_FILE, prevDfsFile.getOrElse("null"))}")
         } else {
-          logInfo(s"reusing $prevDfsFile present at $existingFile for $requiredFile")
+          logInfo(log"reusing ${MDC(LogKeys.DFS_FILE, prevDfsFile)} present at " +
+            log"${MDC(LogKeys.EXISTING_FILE, existingFile)} " +
+            log"for ${MDC(LogKeys.FILE_NAME, requiredFile)}")
         }
       }
 
@@ -595,14 +631,17 @@ class RocksDBFileManager(
         }
         filesCopied += 1
         bytesCopied += localFileSize
-        localFilesToDfsFiles.put(localFileName, file)
-        logInfo(s"Copied $dfsFile to $localFile - $localFileSize bytes")
+        fileMappings.localFilesToDfsFiles.put(localFileName, file)
+        logInfo(log"Copied ${MDC(LogKeys.DFS_FILE, dfsFile)} to " +
+          log"${MDC(LogKeys.FILE_NAME, localFile)} - " +
+          log"${MDC(LogKeys.NUM_BYTES, localFileSize)} bytes")
       } else {
         filesReused += 1
       }
     }
-    logInfo(s"Copied $filesCopied files ($bytesCopied bytes) from DFS to local with " +
-      s"$filesReused files reused.")
+    logInfo(log"Copied ${MDC(LogKeys.NUM_FILES_COPIED, filesCopied)} files " +
+      log"(${MDC(LogKeys.NUM_BYTES, bytesCopied)} bytes) from DFS to local with " +
+      log"${MDC(LogKeys.NUM_FILES_REUSED, filesReused)} files reused.")
 
     loadCheckpointMetrics = RocksDBFileManagerMetrics(
       bytesCopied = bytesCopied,
@@ -613,13 +652,13 @@ class RocksDBFileManager(
   private def removeLocallyDeletedSSTFilesFromDfsMapping(localFiles: Seq[File]): Unit = {
     // clean up deleted SST files from the localFilesToDfsFiles Map
     val currentLocalFiles = localFiles.map(_.getName).toSet
-    val mappingsToClean = localFilesToDfsFiles.asScala
+    val mappingsToClean = fileMappings.localFilesToDfsFiles.asScala
       .keys
       .filterNot(currentLocalFiles.contains)
 
     mappingsToClean.foreach { f =>
-      logInfo(s"cleaning $f from the localFilesToDfsFiles map")
-      localFilesToDfsFiles.remove(f)
+      logInfo(log"cleaning ${MDC(LogKeys.FILE_NAME, f)} from the localFilesToDfsFiles map")
+      fileMappings.localFilesToDfsFiles.remove(f)
     }
   }
 
@@ -653,7 +692,8 @@ class RocksDBFileManager(
         totalBytes += bytes
       }
       zout.close()  // so that any error in closing also cancels the output stream
-      logInfo(s"Zipped $totalBytes bytes (before compression) to $filesStr")
+      logInfo(log"Zipped ${MDC(LogKeys.NUM_BYTES, totalBytes)} bytes (before compression) to " +
+        log"${MDC(LogKeys.FILE_NAME, filesStr)}")
       // The other fields saveCheckpointMetrics should have been filled
       saveCheckpointMetrics =
         saveCheckpointMetrics.copy(zipFileBytesUncompressed = Some(totalBytes))
@@ -661,7 +701,7 @@ class RocksDBFileManager(
       case e: Exception =>
         // Cancel the actual output stream first, so that zout.close() does not write the file
         out.cancel()
-        logError(s"Error zipping to $filesStr", e)
+        logError(log"Error zipping to ${MDC(LogKeys.FILE_NAME, filesStr)}", e)
         throw e
     } finally {
       // Close everything no matter what happened
@@ -671,11 +711,12 @@ class RocksDBFileManager(
   }
 
   /** Log the files present in a directory. This is useful for debugging. */
-  private def logFilesInDir(dir: File, msg: String): Unit = {
+  private def logFilesInDir(dir: File, msg: MessageWithContext): Unit = {
     lazy val files = Option(Utils.recursiveList(dir)).getOrElse(Array.empty).map { f =>
       s"${f.getAbsolutePath} - ${f.length()} bytes"
     }
-    logInfo(s"$msg - ${files.length} files\n\t${files.mkString("\n\t")}")
+    logInfo(msg + log" - ${MDC(LogKeys.NUM_FILES, files.length)} files\n\t" +
+      log"${MDC(LogKeys.FILE_NAME, files.mkString("\n\t"))}")
   }
 
   private def newDFSFileName(localFileName: String): String = {
@@ -727,6 +768,20 @@ class RocksDBFileManager(
   }
 }
 
+/**
+ * Track file mappings in RocksDB across local and remote directories
+ * @param versionToRocksDBFiles Mapping of RocksDB files used across versions for maintenance
+ * @param localFilesToDfsFiles Mapping of the exact Dfs file used to create a local SST file
+ * The reason localFilesToDfsFiles is a separate map because versionToRocksDBFiles can contain
+ *  multiple similar SST files to a particular local file (for example 1.sst can map to 1-UUID1.sst
+ * in v1 and 1-UUID2.sst in v2). We need to capture the exact file used to ensure Version ID
+ * compatibility across SST files and RocksDB manifest.
+ */
+
+case class RocksDBFileMappings(
+    versionToRocksDBFiles: ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]],
+    localFilesToDfsFiles: ConcurrentHashMap[String, RocksDBImmutableFile])
+
 /**
  * Metrics regarding RocksDB file sync between local and DFS.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBMemoryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBMemoryManager.scala
index 38b9dc56838ee..273cbbc5e87d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBMemoryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBMemoryManager.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.execution.streaming.state
 
 import org.rocksdb._
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 
 /**
  * Singleton responsible for managing cache and write buffer manager associated with all RocksDB
@@ -47,8 +48,8 @@ object RocksDBMemoryManager extends Logging {
         }
 
         val totalMemoryUsageInBytes: Long = conf.totalMemoryUsageMB * 1024 * 1024
-        logInfo(s"Creating RocksDB state store LRU cache with " +
-          s"total_size=$totalMemoryUsageInBytes")
+        logInfo(log"Creating RocksDB state store LRU cache with " +
+          log"total_size=${MDC(NUM_BYTES, totalMemoryUsageInBytes)}")
 
         // SPARK-44878 - avoid using strict limit to prevent insertion exception on cache full.
         // Please refer to RocksDB issue here - https://github.com/facebook/rocksdb/issues/8670
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
index e05f9c24f7193..a555f9a40044a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
@@ -24,14 +24,16 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.{SparkConf, SparkEnv}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
 private[sql] class RocksDBStateStoreProvider
-  extends StateStoreProvider with Logging with Closeable {
+  extends StateStoreProvider with Logging with Closeable
+  with SupportsFineGrainedReplay {
   import RocksDBStateStoreProvider._
 
   class RocksDBStateStore(lastVersion: Long) extends StateStore {
@@ -164,7 +166,8 @@ private[sql] class RocksDBStateStoreProvider
         verify(state == UPDATING, "Cannot commit after already committed or aborted")
         val newVersion = rocksDB.commit()
         state = COMMITTED
-        logInfo(s"Committed $newVersion for $id")
+        logInfo(log"Committed ${MDC(VERSION_NUM, newVersion)} " +
+          log"for ${MDC(STATE_STORE_ID, id)}")
         newVersion
       } catch {
         case e: Throwable =>
@@ -174,7 +177,8 @@ private[sql] class RocksDBStateStoreProvider
 
     override def abort(): Unit = {
       verify(state == UPDATING || state == ABORTED, "Cannot abort after already committed")
-      logInfo(s"Aborting ${version + 1} for $id")
+      logInfo(log"Aborting ${MDC(VERSION_NUM, version + 1)} " +
+        log"for ${MDC(STATE_STORE_ID, id)}")
       rocksDB.rollback()
       state = ABORTED
     }
@@ -238,7 +242,8 @@ private[sql] class RocksDBStateStoreProvider
           rocksDBMetrics.totalMemUsageBytes,
           stateStoreCustomMetrics)
       } else {
-        logInfo(s"Failed to collect metrics for store_id=$id and version=$version")
+        logInfo(log"Failed to collect metrics for store_id=${MDC(STATE_STORE_ID, id)} " +
+          log"and version=${MDC(VERSION_NUM, version)}")
         StateStoreMetrics(0, 0, Map.empty)
       }
     }
@@ -363,6 +368,30 @@ private[sql] class RocksDBStateStoreProvider
   private def verify(condition: => Boolean, msg: String): Unit = {
     if (!condition) { throw new IllegalStateException(msg) }
   }
+
+  /**
+   * Get the state store of endVersion by applying delta files on the snapshot of snapshotVersion.
+   * If snapshot for snapshotVersion does not exist, an error will be thrown.
+   *
+   * @param snapshotVersion checkpoint version of the snapshot to start with
+   * @param endVersion   checkpoint version to end with
+   * @return [[StateStore]]
+   */
+  override def replayStateFromSnapshot(snapshotVersion: Long, endVersion: Long): StateStore = {
+    try {
+      if (snapshotVersion < 1) {
+        throw QueryExecutionErrors.unexpectedStateStoreVersion(snapshotVersion)
+      }
+      if (endVersion < snapshotVersion) {
+        throw QueryExecutionErrors.unexpectedStateStoreVersion(endVersion)
+      }
+      rocksDB.loadFromSnapshot(snapshotVersion, endVersion)
+      new RocksDBStateStore(endVersion)
+    }
+    catch {
+      case e: Throwable => throw QueryExecutionErrors.cannotLoadStore(e)
+    }
+  }
 }
 
 object RocksDBStateStoreProvider {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SchemaHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SchemaHelper.scala
index 2eef3d9fc22ed..0a8021ab3de2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SchemaHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SchemaHelper.scala
@@ -20,6 +20,11 @@ package org.apache.spark.sql.execution.streaming.state
 import java.io.StringReader
 
 import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream}
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST._
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods
+import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.sql.execution.streaming.MetadataVersionUtil
 import org.apache.spark.sql.types.StructType
@@ -28,6 +33,57 @@ import org.apache.spark.util.Utils
 /**
  * Helper classes for reading/writing state schema.
  */
+sealed trait ColumnFamilySchema extends Serializable {
+  def jsonValue: JValue
+
+  def json: String
+
+  def columnFamilyName: String
+}
+
+case class ColumnFamilySchemaV1(
+    columnFamilyName: String,
+    keySchema: StructType,
+    valueSchema: StructType,
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    userKeyEncoder: Option[StructType] = None) extends ColumnFamilySchema {
+  def jsonValue: JValue = {
+    ("columnFamilyName" -> JString(columnFamilyName)) ~
+      ("keySchema" -> JString(keySchema.json)) ~
+      ("valueSchema" -> JString(valueSchema.json)) ~
+      ("keyStateEncoderSpec" -> keyStateEncoderSpec.jsonValue) ~
+      ("userKeyEncoder" -> userKeyEncoder.map(s => JString(s.json)).getOrElse(JNothing))
+  }
+
+  def json: String = {
+    compact(render(jsonValue))
+  }
+}
+
+object ColumnFamilySchemaV1 {
+
+  /**
+   * Create a ColumnFamilySchemaV1 object from the Json string
+   * This function is to read the StateSchemaV3 file
+   */
+  def fromJson(json: String): ColumnFamilySchema = {
+    implicit val formats: DefaultFormats.type = DefaultFormats
+    val colFamilyMap = JsonMethods.parse(json).extract[Map[String, Any]]
+    assert(colFamilyMap.isInstanceOf[Map[_, _]],
+      s"Expected Map but got ${colFamilyMap.getClass}")
+    val keySchema = StructType.fromString(colFamilyMap("keySchema").asInstanceOf[String])
+    val valueSchema = StructType.fromString(colFamilyMap("valueSchema").asInstanceOf[String])
+    ColumnFamilySchemaV1(
+      colFamilyMap("columnFamilyName").asInstanceOf[String],
+      keySchema,
+      valueSchema,
+      KeyStateEncoderSpec.fromJson(keySchema, colFamilyMap("keyStateEncoderSpec")
+        .asInstanceOf[Map[String, Any]]),
+      colFamilyMap.get("userKeyEncoder").map(_.asInstanceOf[String]).map(StructType.fromString)
+    )
+  }
+}
+
 object SchemaHelper {
 
   sealed trait SchemaReader {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
index a385c09b38fc3..8aabc0846fe61 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
@@ -17,17 +17,19 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
+import scala.util.Try
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.execution.streaming.CheckpointFileManager
+import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, StatefulOperatorStateInfo}
 import org.apache.spark.sql.execution.streaming.state.SchemaHelper.{SchemaReader, SchemaWriter}
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.sql.types.{DataType, StructType}
 
-case class StateSchemaNotCompatible(message: String) extends Exception(message)
-
 class StateSchemaCompatibilityChecker(
     providerId: StateStoreProviderId,
     hadoopConf: Configuration) extends Logging {
@@ -40,54 +42,6 @@ class StateSchemaCompatibilityChecker(
 
   fm.mkdirs(schemaFileLocation.getParent)
 
-  def check(keySchema: StructType, valueSchema: StructType): Unit = {
-    check(keySchema, valueSchema, ignoreValueSchema = false)
-  }
-
-  def check(keySchema: StructType, valueSchema: StructType, ignoreValueSchema: Boolean): Unit = {
-    if (fm.exists(schemaFileLocation)) {
-      logDebug(s"Schema file for provider $providerId exists. Comparing with provided schema.")
-      val (storedKeySchema, storedValueSchema) = readSchemaFile()
-      if (storedKeySchema.equals(keySchema) &&
-        (ignoreValueSchema || storedValueSchema.equals(valueSchema))) {
-        // schema is exactly same
-      } else if (!schemasCompatible(storedKeySchema, keySchema) ||
-        (!ignoreValueSchema && !schemasCompatible(storedValueSchema, valueSchema))) {
-        val errorMsgForKeySchema = s"- Provided key schema: $keySchema\n" +
-          s"- Existing key schema: $storedKeySchema\n"
-
-        // If it is requested to skip checking the value schema, we also don't expose the value
-        // schema information to the error message.
-        val errorMsgForValueSchema = if (!ignoreValueSchema) {
-          s"- Provided value schema: $valueSchema\n" +
-            s"- Existing value schema: $storedValueSchema\n"
-        } else {
-          ""
-        }
-        val errorMsg = "Provided schema doesn't match to the schema for existing state! " +
-          "Please note that Spark allow difference of field name: check count of fields " +
-          "and data type of each field.\n" +
-          errorMsgForKeySchema +
-          errorMsgForValueSchema +
-          s"If you want to force running query without schema validation, please set " +
-          s"${SQLConf.STATE_SCHEMA_CHECK_ENABLED.key} to false.\n" +
-          "Please note running query with incompatible schema could cause indeterministic" +
-          " behavior."
-        logError(errorMsg)
-        throw StateSchemaNotCompatible(errorMsg)
-      } else {
-        logInfo("Detected schema change which is compatible. Allowing to put rows.")
-      }
-    } else {
-      // schema doesn't exist, create one now
-      logDebug(s"Schema file for provider $providerId doesn't exist. Creating one.")
-      createSchemaFile(keySchema, valueSchema)
-    }
-  }
-
-  private def schemasCompatible(storedSchema: StructType, schema: StructType): Boolean =
-    DataType.equalsIgnoreNameAndCompatibleNullability(schema, storedSchema)
-
   def readSchemaFile(): (StructType, StructType) = {
     val inStream = fm.open(schemaFileLocation)
     try {
@@ -96,14 +50,27 @@ class StateSchemaCompatibilityChecker(
       schemaReader.read(inStream)
     } catch {
       case e: Throwable =>
-        logError(s"Fail to read schema file from $schemaFileLocation", e)
+        logError(log"Fail to read schema file from ${MDC(LogKeys.PATH, schemaFileLocation)}", e)
         throw e
     } finally {
       inStream.close()
     }
   }
 
-  def createSchemaFile(keySchema: StructType, valueSchema: StructType): Unit = {
+  /**
+   * Function to read and return the existing key and value schema from the schema file, if it
+   * exists
+   * @return - Option of (keySchema, valueSchema) if the schema file exists, None otherwise
+   */
+  private def getExistingKeyAndValueSchema(): Option[(StructType, StructType)] = {
+    if (fm.exists(schemaFileLocation)) {
+      Some(readSchemaFile())
+    } else {
+      None
+    }
+  }
+
+  private def createSchemaFile(keySchema: StructType, valueSchema: StructType): Unit = {
     createSchemaFile(keySchema, valueSchema, schemaWriter)
   }
 
@@ -118,16 +85,123 @@ class StateSchemaCompatibilityChecker(
       outStream.close()
     } catch {
       case e: Throwable =>
-        logError(s"Fail to write schema file to $schemaFileLocation", e)
+        logError(log"Fail to write schema file to ${MDC(LogKeys.PATH, schemaFileLocation)}", e)
         outStream.cancel()
         throw e
     }
   }
 
+  def validateAndMaybeEvolveStateSchema(
+      newKeySchema: StructType,
+      newValueSchema: StructType,
+      ignoreValueSchema: Boolean): Unit = {
+    val existingSchema = getExistingKeyAndValueSchema()
+    if (existingSchema.isEmpty) {
+      // write the schema file if it doesn't exist
+      createSchemaFile(newKeySchema, newValueSchema)
+    } else {
+      // validate if the new schema is compatible with the existing schema
+      StateSchemaCompatibilityChecker.
+        check(existingSchema.get, (newKeySchema, newValueSchema), ignoreValueSchema)
+    }
+  }
+
   private def schemaFile(storeCpLocation: Path): Path =
     new Path(new Path(storeCpLocation, "_metadata"), "schema")
 }
 
-object StateSchemaCompatibilityChecker {
+object StateSchemaCompatibilityChecker extends Logging {
   val VERSION = 2
+
+  /**
+   * Function to check if new state store schema is compatible with the existing schema.
+   * @param oldSchema - old state schema
+   * @param newSchema - new state schema
+   * @param ignoreValueSchema - whether to ignore value schema or not
+   */
+  def check(
+      oldSchema: (StructType, StructType),
+      newSchema: (StructType, StructType),
+      ignoreValueSchema: Boolean) : Unit = {
+    val (storedKeySchema, storedValueSchema) = oldSchema
+    val (keySchema, valueSchema) = newSchema
+
+    if (storedKeySchema.equals(keySchema) &&
+      (ignoreValueSchema || storedValueSchema.equals(valueSchema))) {
+      // schema is exactly same
+    } else if (!schemasCompatible(storedKeySchema, keySchema)) {
+      throw StateStoreErrors.stateStoreKeySchemaNotCompatible(storedKeySchema.toString,
+        keySchema.toString)
+    } else if (!ignoreValueSchema && !schemasCompatible(storedValueSchema, valueSchema)) {
+      throw StateStoreErrors.stateStoreValueSchemaNotCompatible(storedValueSchema.toString,
+        valueSchema.toString)
+    } else {
+      logInfo("Detected schema change which is compatible. Allowing to put rows.")
+    }
+  }
+
+  private def schemasCompatible(storedSchema: StructType, schema: StructType): Boolean =
+    DataType.equalsIgnoreNameAndCompatibleNullability(schema, storedSchema)
+
+  private def disallowBinaryInequalityColumn(schema: StructType): Unit = {
+    if (!UnsafeRowUtils.isBinaryStable(schema)) {
+      throw new SparkUnsupportedOperationException(
+        errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
+        messageParameters = Map("schema" -> schema.json)
+      )
+    }
+  }
+
+  /**
+   * Function to validate the schema of the state store and maybe evolve it if needed.
+   * We also verify for binary inequality columns in the schema and disallow them. We then perform
+   * key and value schema validation. Depending on the passed configs, a warning might be logged
+   * or an exception might be thrown if the schema is not compatible.
+   *
+   * @param stateInfo - StatefulOperatorStateInfo containing the state store information
+   * @param hadoopConf - Hadoop configuration
+   * @param newKeySchema - New key schema
+   * @param newValueSchema - New value schema
+   * @param sessionState - session state used to retrieve session config
+   * @param extraOptions - any extra options to be passed for StateStoreConf creation
+   * @param storeName - optional state store name
+   */
+  def validateAndMaybeEvolveStateSchema(
+      stateInfo: StatefulOperatorStateInfo,
+      hadoopConf: Configuration,
+      newKeySchema: StructType,
+      newValueSchema: StructType,
+      sessionState: SessionState,
+      extraOptions: Map[String, String] = Map.empty,
+      storeName: String = StateStoreId.DEFAULT_STORE_NAME): Array[String] = {
+    // SPARK-47776: collation introduces the concept of binary (in)equality, which means
+    // in some collation we no longer be able to just compare the binary format of two
+    // UnsafeRows to determine equality. For example, 'aaa' and 'AAA' can be "semantically"
+    // same in case insensitive collation.
+    // State store is basically key-value storage, and the most provider implementations
+    // rely on the fact that all the columns in the key schema support binary equality.
+    // We need to disallow using binary inequality column in the key schema, before we
+    // could support this in majority of state store providers (or high-level of state
+    // store.)
+    disallowBinaryInequalityColumn(newKeySchema)
+
+    val storeConf = new StateStoreConf(sessionState.conf, extraOptions)
+    val providerId = StateStoreProviderId(StateStoreId(stateInfo.checkpointLocation,
+      stateInfo.operatorId, 0, storeName), stateInfo.queryRunId)
+    val checker = new StateSchemaCompatibilityChecker(providerId, hadoopConf)
+    // regardless of configuration, we check compatibility to at least write schema file
+    // if necessary
+    // if the format validation for value schema is disabled, we also disable the schema
+    // compatibility checker for value schema as well.
+    val result = Try(
+      checker.validateAndMaybeEvolveStateSchema(newKeySchema, newValueSchema,
+        ignoreValueSchema = !storeConf.formatValidationCheckValue)
+    ).toEither.fold(Some(_), _ => None)
+
+    // if schema validation is enabled and an exception is thrown, we re-throw it and fail the query
+    if (storeConf.stateSchemaCheckEnabled && result.isDefined) {
+      throw result.get
+    }
+    Array(checker.schemaFileLocation.toString)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaV3File.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaV3File.scala
new file mode 100644
index 0000000000000..38e6484728126
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaV3File.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.state
+
+import java.io.{InputStream, OutputStream}
+import java.nio.charset.StandardCharsets.UTF_8
+import java.util.UUID
+
+import scala.io.{Source => IOSource}
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager
+import org.apache.spark.sql.execution.streaming.MetadataVersionUtil.validateVersion
+
+/**
+ * The StateSchemaV3File is used to write the schema of multiple column families.
+ * Right now, this is primarily used for the TransformWithState operator, which supports
+ * multiple column families to keep the data for multiple state variables.
+ * We only expect ColumnFamilySchemaV1 to be written and read from this file.
+ * @param hadoopConf Hadoop configuration that is used to read / write metadata files.
+ * @param path Path to the directory that will be used for writing metadata.
+ */
+class StateSchemaV3File(
+    hadoopConf: Configuration,
+    path: String) {
+
+  val metadataPath = new Path(path)
+
+  protected val fileManager: CheckpointFileManager =
+    CheckpointFileManager.create(metadataPath, hadoopConf)
+
+  if (!fileManager.exists(metadataPath)) {
+    fileManager.mkdirs(metadataPath)
+  }
+
+  private def deserialize(in: InputStream): List[ColumnFamilySchema] = {
+    val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
+
+    if (!lines.hasNext) {
+      throw new IllegalStateException("Incomplete log file in the offset commit log")
+    }
+
+    val version = lines.next().trim
+    validateVersion(version, StateSchemaV3File.VERSION)
+
+    lines.map(ColumnFamilySchemaV1.fromJson).toList
+  }
+
+  private def serialize(schemas: List[ColumnFamilySchema], out: OutputStream): Unit = {
+    out.write(s"v${StateSchemaV3File.VERSION}".getBytes(UTF_8))
+    out.write('\n')
+    out.write(schemas.map(_.json).mkString("\n").getBytes(UTF_8))
+  }
+
+  def addWithUUID(batchId: Long, metadata: List[ColumnFamilySchema]): Path = {
+    val schemaFilePath = new Path(metadataPath, s"${batchId}_${UUID.randomUUID().toString}")
+    write(schemaFilePath, out => serialize(metadata, out))
+    schemaFilePath
+  }
+
+  def getWithPath(schemaFilePath: Path): List[ColumnFamilySchema] = {
+    deserialize(fileManager.open(schemaFilePath))
+  }
+
+  protected def write(
+      batchMetadataFile: Path,
+      fn: OutputStream => Unit): Unit = {
+    val output = fileManager.createAtomic(batchMetadataFile, overwriteIfPossible = false)
+    try {
+      fn(output)
+      output.close()
+    } catch {
+      case e: Throwable =>
+        output.cancel()
+        throw e
+    }
+  }
+}
+
+object StateSchemaV3File {
+  val VERSION = 3
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 85f7fed90c6c8..484a6850ce79e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -23,14 +23,17 @@ import java.util.concurrent.atomic.AtomicReference
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
-import scala.util.Try
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
+import org.json4s.{JInt, JString}
+import org.json4s.JsonAST.JValue
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods.{compact, render}
 
-import org.apache.spark.{SparkContext, SparkEnv, SparkUnsupportedOperationException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.{SparkContext, SparkEnv, SparkException}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -279,19 +282,33 @@ case class StateStoreCustomTimingMetric(name: String, desc: String) extends Stat
     SQLMetrics.createTimingMetric(sparkContext, desc)
 }
 
-/**
- * An exception thrown when an invalid UnsafeRow is detected in state store.
- */
-class InvalidUnsafeRowException(error: String)
-  extends RuntimeException("The streaming query failed by state format invalidation. " +
-    "The following reasons may cause this: 1. An old Spark version wrote the checkpoint that is " +
-    "incompatible with the current one; 2. Broken checkpoint files; 3. The query is changed " +
-    "among restart. For the first case, you can try to restart the application without " +
-    s"checkpoint or use the legacy Spark version to process the streaming state.\n$error", null)
+sealed trait KeyStateEncoderSpec {
+  def jsonValue: JValue
+  def json: String = compact(render(jsonValue))
+}
 
-sealed trait KeyStateEncoderSpec
+object KeyStateEncoderSpec {
+  def fromJson(keySchema: StructType, m: Map[String, Any]): KeyStateEncoderSpec = {
+    // match on type
+    m("keyStateEncoderType").asInstanceOf[String] match {
+      case "NoPrefixKeyStateEncoderSpec" =>
+        NoPrefixKeyStateEncoderSpec(keySchema)
+      case "RangeKeyScanStateEncoderSpec" =>
+        val orderingOrdinals = m("orderingOrdinals").
+          asInstanceOf[List[_]].map(_.asInstanceOf[BigInt].toInt)
+        RangeKeyScanStateEncoderSpec(keySchema, orderingOrdinals)
+      case "PrefixKeyScanStateEncoderSpec" =>
+        val numColsPrefixKey = m("numColsPrefixKey").asInstanceOf[BigInt].toInt
+        PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey)
+    }
+  }
+}
 
-case class NoPrefixKeyStateEncoderSpec(keySchema: StructType) extends KeyStateEncoderSpec
+case class NoPrefixKeyStateEncoderSpec(keySchema: StructType) extends KeyStateEncoderSpec {
+  override def jsonValue: JValue = {
+    ("keyStateEncoderType" -> JString("NoPrefixKeyStateEncoderSpec"))
+  }
+}
 
 case class PrefixKeyScanStateEncoderSpec(
     keySchema: StructType,
@@ -299,6 +316,11 @@ case class PrefixKeyScanStateEncoderSpec(
   if (numColsPrefixKey == 0 || numColsPrefixKey >= keySchema.length) {
     throw StateStoreErrors.incorrectNumOrderingColsForPrefixScan(numColsPrefixKey.toString)
   }
+
+  override def jsonValue: JValue = {
+    ("keyStateEncoderType" -> JString("PrefixKeyScanStateEncoderSpec")) ~
+      ("numColsPrefixKey" -> JInt(numColsPrefixKey))
+  }
 }
 
 /** Encodes rows so that they can be range-scanned based on orderingOrdinals */
@@ -308,6 +330,11 @@ case class RangeKeyScanStateEncoderSpec(
   if (orderingOrdinals.isEmpty || orderingOrdinals.length > keySchema.length) {
     throw StateStoreErrors.incorrectNumOrderingColsForRangeScan(orderingOrdinals.length.toString)
   }
+
+  override def jsonValue: JValue = {
+    ("keyStateEncoderType" -> JString("RangeKeyScanStateEncoderSpec")) ~
+      ("orderingOrdinals" -> orderingOrdinals.map(JInt(_)))
+  }
 }
 
 /**
@@ -396,6 +423,12 @@ object StateStoreProvider {
    */
   def create(providerClassName: String): StateStoreProvider = {
     val providerClass = Utils.classForName(providerClassName)
+    if (!classOf[StateStoreProvider].isAssignableFrom(providerClass)) {
+      throw new SparkException(
+        errorClass = "STATE_STORE_INVALID_PROVIDER",
+        messageParameters = Map("inputClass" -> providerClassName),
+        cause = null)
+    }
     providerClass.getConstructor().newInstance().asInstanceOf[StateStoreProvider]
   }
 
@@ -428,17 +461,54 @@ object StateStoreProvider {
       conf: StateStoreConf): Unit = {
     if (conf.formatValidationEnabled) {
       val validationError = UnsafeRowUtils.validateStructuralIntegrityWithReason(keyRow, keySchema)
-      validationError.foreach { error => throw new InvalidUnsafeRowException(error) }
+      validationError.foreach { error =>
+        throw StateStoreErrors.keyRowFormatValidationFailure(error)
+      }
 
       if (conf.formatValidationCheckValue) {
         val validationError =
           UnsafeRowUtils.validateStructuralIntegrityWithReason(valueRow, valueSchema)
-        validationError.foreach { error => throw new InvalidUnsafeRowException(error) }
+        validationError.foreach { error =>
+          throw StateStoreErrors.valueRowFormatValidationFailure(error)
+        }
       }
     }
   }
 }
 
+/**
+ * This is an optional trait to be implemented by [[StateStoreProvider]]s that can read fine
+ * grained state data which is replayed from a specific snapshot version. It is used by the
+ * snapshotStartBatchId option in state data source.
+ */
+trait SupportsFineGrainedReplay {
+
+  /**
+   * Return an instance of [[StateStore]] representing state data of the given version.
+   * The State Store will be constructed from the snapshot at snapshotVersion, and applying delta
+   * files up to the endVersion. If there is no snapshot file at snapshotVersion, an exception will
+   * be thrown.
+   *
+   * @param snapshotVersion checkpoint version of the snapshot to start with
+   * @param endVersion   checkpoint version to end with
+   */
+  def replayStateFromSnapshot(snapshotVersion: Long, endVersion: Long): StateStore
+
+  /**
+   * Return an instance of [[ReadStateStore]] representing state data of the given version.
+   * The State Store will be constructed from the snapshot at snapshotVersion, and applying delta
+   * files up to the endVersion. If there is no snapshot file at snapshotVersion, an exception will
+   * be thrown.
+   * Only implement this if there is read-only optimization for the state store.
+   *
+   * @param snapshotVersion checkpoint version of the snapshot to start with
+   * @param endVersion   checkpoint version to end with
+   */
+  def replayReadStateFromSnapshot(snapshotVersion: Long, endVersion: Long): ReadStateStore = {
+    new WrappedReadStateStore(replayStateFromSnapshot(snapshotVersion, endVersion))
+  }
+}
+
 /**
  * Unique identifier for a provider, used to identify when providers can be reused.
  * Note that `queryRunId` is used uniquely identify a provider, so that the same provider
@@ -524,9 +594,6 @@ object StateStore extends Logging {
   @GuardedBy("loadedProviders")
   private val loadedProviders = new mutable.HashMap[StateStoreProviderId, StateStoreProvider]()
 
-  @GuardedBy("loadedProviders")
-  private val schemaValidated = new mutable.HashMap[StateStoreProviderId, Option[Throwable]]()
-
   private val maintenanceThreadPoolLock = new Object
 
   // Shared exception between threads in thread pool that the scheduling thread
@@ -584,7 +651,21 @@ object StateStore extends Logging {
     }
 
     def stop(): Unit = {
-      threadPool.shutdown()
+      logInfo("Shutting down MaintenanceThreadPool")
+      threadPool.shutdown() // Disable new tasks from being submitted
+
+      // Wait a while for existing tasks to terminate
+      if (!threadPool.awaitTermination(5 * 60, TimeUnit.SECONDS)) {
+        logWarning(
+          s"MaintenanceThreadPool is not able to be terminated within 300 seconds," +
+            " forcefully shutting down now.")
+        threadPool.shutdownNow() // Cancel currently executing tasks
+
+        // Wait a while for tasks to respond to being cancelled
+        if (!threadPool.awaitTermination(60, TimeUnit.SECONDS)) {
+          logError("MaintenanceThreadPool did not terminate")
+        }
+      }
     }
   }
 
@@ -635,15 +716,6 @@ object StateStore extends Logging {
     storeProvider.getStore(version)
   }
 
-  private def disallowBinaryInequalityColumn(schema: StructType): Unit = {
-    if (!UnsafeRowUtils.isBinaryStable(schema)) {
-      throw new SparkUnsupportedOperationException(
-        errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
-        messageParameters = Map("schema" -> schema.json)
-      )
-    }
-  }
-
   private def getStateStoreProvider(
       storeProviderId: StateStoreProviderId,
       keySchema: StructType,
@@ -656,40 +728,6 @@ object StateStore extends Logging {
     loadedProviders.synchronized {
       startMaintenanceIfNeeded(storeConf)
 
-      if (storeProviderId.storeId.partitionId == PARTITION_ID_TO_CHECK_SCHEMA) {
-        val result = schemaValidated.getOrElseUpdate(storeProviderId, {
-          // SPARK-47776: collation introduces the concept of binary (in)equality, which means
-          // in some collation we no longer be able to just compare the binary format of two
-          // UnsafeRows to determine equality. For example, 'aaa' and 'AAA' can be "semantically"
-          // same in case insensitive collation.
-          // State store is basically key-value storage, and the most provider implementations
-          // rely on the fact that all the columns in the key schema support binary equality.
-          // We need to disallow using binary inequality column in the key schema, before we
-          // could support this in majority of state store providers (or high-level of state
-          // store.)
-          disallowBinaryInequalityColumn(keySchema)
-
-          val checker = new StateSchemaCompatibilityChecker(storeProviderId, hadoopConf)
-          // regardless of configuration, we check compatibility to at least write schema file
-          // if necessary
-          // if the format validation for value schema is disabled, we also disable the schema
-          // compatibility checker for value schema as well.
-          val ret = Try(
-            checker.check(keySchema, valueSchema,
-              ignoreValueSchema = !storeConf.formatValidationCheckValue)
-          ).toEither.fold(Some(_), _ => None)
-          if (storeConf.stateSchemaCheckEnabled) {
-            ret
-          } else {
-            None
-          }
-        })
-
-        if (result.isDefined) {
-          throw result.get
-        }
-      }
-
       // SPARK-42567 - Track load time for state store provider and log warning if takes longer
       // than 2s.
       val (provider, loadTimeMs) = Utils.timeTakenMs {
@@ -702,9 +740,10 @@ object StateStore extends Logging {
       }
 
       if (loadTimeMs > 2000L) {
-        logWarning(s"Loaded state store provider in loadTimeMs=$loadTimeMs " +
-          s"for storeId=${storeProviderId.storeId.toString} and " +
-          s"queryRunId=${storeProviderId.queryRunId}")
+        logWarning(log"Loaded state store provider in loadTimeMs=" +
+          log"${MDC(LogKeys.LOAD_TIME, loadTimeMs)} " +
+          log"for storeId=${MDC(LogKeys.STORE_ID, storeProviderId.storeId.toString)} and " +
+          log"queryRunId=${MDC(LogKeys.QUERY_RUN_ID, storeProviderId.queryRunId)}")
       }
 
       val otherProviderIds = loadedProviders.keys.filter(_ != storeProviderId).toSeq
@@ -820,16 +859,18 @@ object StateStore extends Logging {
             provider.doMaintenance()
             if (!verifyIfStoreInstanceActive(id)) {
               unload(id)
-              logInfo(s"Unloaded $provider")
+              logInfo(log"Unloaded ${MDC(LogKeys.STATE_STORE_PROVIDER, provider)}")
             }
           } catch {
             case NonFatal(e) =>
-              logWarning(s"Error managing $provider, stopping management thread", e)
+              logWarning(log"Error managing ${MDC(LogKeys.STATE_STORE_PROVIDER, provider)}, " +
+                log"stopping management thread", e)
               threadPoolException.set(e)
           } finally {
             val duration = System.currentTimeMillis() - startTime
-            val logMsg = s"Finished maintenance task for provider=$id" +
-              s" in elapsed_time=$duration\n"
+            val logMsg =
+              log"Finished maintenance task for provider=${MDC(LogKeys.STATE_STORE_PROVIDER, id)}" +
+                log" in elapsed_time=${MDC(LogKeys.TIME_UNITS, duration)}\n"
             if (duration > 5000) {
               logInfo(logMsg)
             } else {
@@ -841,8 +882,9 @@ object StateStore extends Logging {
           }
         })
       } else {
-        logInfo(s"Not processing partition ${id} for maintenance because it is currently " +
-          s"being processed")
+        logInfo(log"Not processing partition ${MDC(LogKeys.PARTITION_ID, id)} " +
+          log"for maintenance because it is currently " +
+          log"being processed")
       }
     }
   }
@@ -856,8 +898,10 @@ object StateStore extends Logging {
       val providerIdsToUnload = coordinatorRef
         .map(_.reportActiveInstance(storeProviderId, host, executorId, otherProviderIds))
         .getOrElse(Seq.empty[StateStoreProviderId])
-      logInfo(s"Reported that the loaded instance $storeProviderId is active")
-      logDebug(s"The loaded instances are going to unload: ${providerIdsToUnload.mkString(", ")}")
+      logInfo(log"Reported that the loaded instance " +
+        log"${MDC(LogKeys.STATE_STORE_PROVIDER, storeProviderId)} is active")
+      logDebug(log"The loaded instances are going to unload: " +
+        log"${MDC(LogKeys.STATE_STORE_PROVIDER, providerIdsToUnload.mkString(", "))}")
       providerIdsToUnload
     } else {
       Seq.empty[StateStoreProviderId]
@@ -888,7 +932,8 @@ object StateStore extends Logging {
         logDebug("Getting StateStoreCoordinatorRef")
         _coordRef = StateStoreCoordinatorRef.forExecutor(env)
       }
-      logInfo(s"Retrieved reference to StateStoreCoordinator: ${_coordRef}")
+      logInfo(log"Retrieved reference to StateStoreCoordinator: " +
+        log"${MDC(LogKeys.STATE_STORE_PROVIDER, _coordRef)}")
       Some(_coordRef)
     } else {
       _coordRef = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
index 30cf49d8e56d4..b1860be41ac44 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
@@ -25,7 +25,8 @@ import com.google.common.io.ByteStreams
 import org.apache.commons.io.IOUtils
 import org.apache.hadoop.fs.{FSError, Path}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager
@@ -108,8 +109,9 @@ abstract class StateStoreChangelogWriter(
       // IOException into FSError.
       case e: FSError if e.getCause.isInstanceOf[IOException] =>
       case NonFatal(ex) =>
-        logInfo(s"Failed to cancel changelog file $file for state store provider " +
-          s"with exception=$ex")
+        logInfo(log"Failed to cancel changelog file ${MDC(FILE_NAME, file)} " +
+          log"for state store provider " +
+          log"with exception=${MDC(ERROR, ex)}")
     } finally {
       backingFileStream = null
       compressedStream = null
@@ -174,7 +176,7 @@ class StateStoreChangelogWriterV1(
     } catch {
       case e: Throwable =>
         abort()
-        logError(s"Fail to commit changelog file $file because of exception $e")
+        logError(log"Fail to commit changelog file ${MDC(PATH, file)} because of exception", e)
         throw e
     } finally {
       backingFileStream = null
@@ -253,7 +255,7 @@ class StateStoreChangelogWriterV2(
     } catch {
       case e: Throwable =>
         abort()
-        logError(s"Fail to commit changelog file $file because of exception $e")
+        logError(log"Fail to commit changelog file ${MDC(PATH, file)} because of exception", e)
         throw e
     } finally {
       backingFileStream = null
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala
index b8ab32a00851f..4ac813291c00b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
-import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkException, SparkRuntimeException, SparkUnsupportedOperationException}
 
 /**
  * Object for grouping error messages from (most) exceptions thrown from State API V2
@@ -39,6 +39,16 @@ object StateStoreErrors {
     )
   }
 
+  def keyRowFormatValidationFailure(errorMsg: String):
+    StateStoreKeyRowFormatValidationFailure = {
+    new StateStoreKeyRowFormatValidationFailure(errorMsg)
+  }
+
+  def valueRowFormatValidationFailure(errorMsg: String):
+    StateStoreValueRowFormatValidationFailure = {
+    new StateStoreValueRowFormatValidationFailure(errorMsg)
+  }
+
   def unsupportedOperationOnMissingColumnFamily(operationName: String, colFamilyName: String):
     StateStoreUnsupportedOperationOnMissingColumnFamily = {
     new StateStoreUnsupportedOperationOnMissingColumnFamily(operationName, colFamilyName)
@@ -127,6 +137,42 @@ object StateStoreErrors {
       stateName: String): StatefulProcessorTTLMustBePositive = {
     new StatefulProcessorTTLMustBePositive(operationType, stateName)
   }
+
+  def stateStoreKeySchemaNotCompatible(
+      storedKeySchema: String,
+      newKeySchema: String): StateStoreKeySchemaNotCompatible = {
+    new StateStoreKeySchemaNotCompatible(storedKeySchema, newKeySchema)
+  }
+
+  def stateStoreValueSchemaNotCompatible(
+      storedValueSchema: String,
+      newValueSchema: String): StateStoreValueSchemaNotCompatible = {
+    new StateStoreValueSchemaNotCompatible(storedValueSchema, newValueSchema)
+  }
+
+  def stateStoreColumnFamilyMismatch(
+      columnFamilyName: String,
+      oldColumnFamilySchema: String,
+      newColumnFamilySchema: String): StateStoreColumnFamilyMismatch = {
+    new StateStoreColumnFamilyMismatch(
+      columnFamilyName, oldColumnFamilySchema, newColumnFamilySchema)
+  }
+
+  def stateStoreSnapshotFileNotFound(fileToRead: String, clazz: String):
+    StateStoreSnapshotFileNotFound = {
+    new StateStoreSnapshotFileNotFound(fileToRead, clazz)
+  }
+
+  def stateStoreSnapshotPartitionNotFound(
+      snapshotPartitionId: Long, operatorId: Int, checkpointLocation: String):
+    StateStoreSnapshotPartitionNotFound = {
+    new StateStoreSnapshotPartitionNotFound(snapshotPartitionId, operatorId, checkpointLocation)
+  }
+
+  def stateStoreProviderDoesNotSupportFineGrainedReplay(inputClass: String):
+    StateStoreProviderDoesNotSupportFineGrainedReplay = {
+    new StateStoreProviderDoesNotSupportFineGrainedReplay(inputClass)
+  }
 }
 
 class StateStoreMultipleColumnFamiliesNotSupportedException(stateStoreProvider: String)
@@ -156,6 +202,17 @@ class StateStoreUnsupportedOperationException(operationType: String, entity: Str
     messageParameters = Map("operationType" -> operationType, "entity" -> entity)
   )
 
+class StateStoreColumnFamilyMismatch(
+    columnFamilyName: String,
+    oldColumnFamilySchema: String,
+    newColumnFamilySchema: String)
+  extends SparkUnsupportedOperationException(
+    errorClass = "STATE_STORE_COLUMN_FAMILY_SCHEMA_INCOMPATIBLE",
+    messageParameters = Map(
+      "columnFamilyName" -> columnFamilyName,
+      "oldColumnFamilySchema" -> oldColumnFamilySchema,
+      "newColumnFamilySchema" -> newColumnFamilySchema))
+
 class StatefulProcessorCannotPerformOperationWithInvalidTimeMode(
     operationType: String,
     timeMode: String)
@@ -214,3 +271,52 @@ class StatefulProcessorTTLMustBePositive(
   extends SparkUnsupportedOperationException(
     errorClass = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
     messageParameters = Map("operationType" -> operationType, "stateName" -> stateName))
+
+class StateStoreKeySchemaNotCompatible(
+    storedKeySchema: String,
+    newKeySchema: String)
+  extends SparkUnsupportedOperationException(
+    errorClass = "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE",
+    messageParameters = Map(
+      "storedKeySchema" -> storedKeySchema,
+      "newKeySchema" -> newKeySchema))
+
+class StateStoreValueSchemaNotCompatible(
+    storedValueSchema: String,
+    newValueSchema: String)
+  extends SparkUnsupportedOperationException(
+    errorClass = "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE",
+    messageParameters = Map(
+      "storedValueSchema" -> storedValueSchema,
+      "newValueSchema" -> newValueSchema))
+
+class StateStoreSnapshotFileNotFound(fileToRead: String, clazz: String)
+  extends SparkRuntimeException(
+    errorClass = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_MISSING_SNAPSHOT_FILE",
+    messageParameters = Map(
+      "fileToRead" -> fileToRead,
+      "clazz" -> clazz))
+
+class StateStoreSnapshotPartitionNotFound(
+  snapshotPartitionId: Long, operatorId: Int, checkpointLocation: String)
+  extends SparkRuntimeException(
+    errorClass = "CANNOT_LOAD_STATE_STORE.SNAPSHOT_PARTITION_ID_NOT_FOUND",
+    messageParameters = Map(
+      "snapshotPartitionId" -> snapshotPartitionId.toString,
+      "operatorId" -> operatorId.toString,
+      "checkpointLocation" -> checkpointLocation))
+
+class StateStoreKeyRowFormatValidationFailure(errorMsg: String)
+  extends SparkRuntimeException(
+    errorClass = "STATE_STORE_KEY_ROW_FORMAT_VALIDATION_FAILURE",
+    messageParameters = Map("errorMsg" -> errorMsg))
+
+class StateStoreValueRowFormatValidationFailure(errorMsg: String)
+  extends SparkRuntimeException(
+    errorClass = "STATE_STORE_VALUE_ROW_FORMAT_VALIDATION_FAILURE",
+    messageParameters = Map("errorMsg" -> errorMsg))
+
+class StateStoreProviderDoesNotSupportFineGrainedReplay(inputClass: String)
+  extends SparkUnsupportedOperationException(
+    errorClass = "STATE_STORE_PROVIDER_DOES_NOT_SUPPORT_FINE_GRAINED_STATE_REPLAY",
+    messageParameters = Map("inputClass" -> inputClass))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManager.scala
index 5130933f52efa..71df9dc65b419 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManager.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Literal, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
@@ -261,7 +262,7 @@ class StreamingSessionWindowStateManagerImplV1(
 
   override def abortIfNeeded(store: StateStore): Unit = {
     if (!store.hasCommitted) {
-      logInfo(s"Aborted store ${store.id}")
+      logInfo(log"Aborted store ${MDC(STATE_STORE_ID, store.id)}")
       store.abort()
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index 9802a4dce4e5c..4de3170f5db33 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -24,7 +24,8 @@ import scala.annotation.tailrec
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.TaskContext
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{END_INDEX, START_INDEX, STATE_STORE_ID}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, JoinedRow, Literal, SafeProjection, SpecificInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
@@ -86,7 +87,8 @@ class SymmetricHashJoinStateManager(
     partitionId: Int,
     stateFormatVersion: Int,
     skippedNullValueCount: Option[SQLMetric] = None,
-    useStateStoreCoordinator: Boolean = true) extends Logging {
+    useStateStoreCoordinator: Boolean = true,
+    snapshotStartVersion: Option[Long] = None) extends Logging {
   import SymmetricHashJoinStateManager._
 
   /*
@@ -366,9 +368,9 @@ class SymmetricHashJoinStateManager(
 
             // If nulls were found at the end, log a warning for the range of null indices.
             if (nonNullIndex != numValues - 1) {
-              logWarning(s"`keyWithIndexToValue` returns a null value for indices " +
-                s"with range from startIndex=${nonNullIndex + 1} " +
-                s"and endIndex=${numValues - 1}.")
+              logWarning(log"`keyWithIndexToValue` returns a null value for indices " +
+                log"with range from startIndex=${MDC(START_INDEX, nonNullIndex + 1)} " +
+                log"and endIndex=${MDC(END_INDEX, numValues - 1)}.")
             }
 
             // Remove all null values from nonNullIndex + 1 onwards
@@ -462,7 +464,7 @@ class SymmetricHashJoinStateManager(
 
     def abortIfNeeded(): Unit = {
       if (!stateStore.hasCommitted) {
-        logInfo(s"Aborted store ${stateStore.id}")
+        logInfo(log"Aborted store ${MDC(STATE_STORE_ID, stateStore.id)}")
         stateStore.abort()
       }
       // If this class manages a state store provider by itself, it should take care of closing
@@ -479,6 +481,8 @@ class SymmetricHashJoinStateManager(
       val storeProviderId = StateStoreProviderId(
         stateInfo.get, partitionId, getStateStoreName(joinSide, stateStoreType))
       val store = if (useStateStoreCoordinator) {
+        assert(snapshotStartVersion.isEmpty, "Should not use state store coordinator " +
+          "when reading state as data source.")
         StateStore.get(
           storeProviderId, keySchema, valueSchema, NoPrefixKeyStateEncoderSpec(keySchema),
           stateInfo.get.storeVersion, useColumnFamilies = false, storeConf, hadoopConf)
@@ -488,9 +492,18 @@ class SymmetricHashJoinStateManager(
           storeProviderId, keySchema, valueSchema, NoPrefixKeyStateEncoderSpec(keySchema),
           useColumnFamilies = false, storeConf, hadoopConf,
           useMultipleValuesPerKey = false)
-        stateStoreProvider.getStore(stateInfo.get.storeVersion)
+        if (snapshotStartVersion.isDefined) {
+          if (!stateStoreProvider.isInstanceOf[SupportsFineGrainedReplay]) {
+            throw StateStoreErrors.stateStoreProviderDoesNotSupportFineGrainedReplay(
+              stateStoreProvider.getClass.toString)
+          }
+          stateStoreProvider.asInstanceOf[SupportsFineGrainedReplay]
+            .replayStateFromSnapshot(snapshotStartVersion.get, stateInfo.get.storeVersion)
+        } else {
+          stateStoreProvider.getStore(stateInfo.get.storeVersion)
+        }
       }
-      logInfo(s"Loaded store ${store.id}")
+      logInfo(log"Loaded store ${MDC(STATE_STORE_ID, store.id)}")
       store
     }
   }
@@ -766,6 +779,35 @@ object SymmetricHashJoinStateManager {
     }
   }
 
+  def getSchemaForStateStores(
+      joinSide: JoinSide,
+      inputValueAttributes: Seq[Attribute],
+      joinKeys: Seq[Expression],
+      stateFormatVersion: Int): Map[String, (StructType, StructType)] = {
+    var result: Map[String, (StructType, StructType)] = Map.empty
+
+    // get the key and value schema for the KeyToNumValues state store
+    val keySchema = StructType(
+      joinKeys.zipWithIndex.map { case (k, i) => StructField(s"field$i", k.dataType, k.nullable) })
+    val longValueSchema = new StructType().add("value", "long")
+    result += (getStateStoreName(joinSide, KeyToNumValuesType) -> (keySchema, longValueSchema))
+
+    // get the key and value schema for the KeyWithIndexToValue state store
+    val keyWithIndexSchema = keySchema.add("index", LongType)
+    val valueSchema = if (stateFormatVersion == 1) {
+      inputValueAttributes
+    } else if (stateFormatVersion == 2) {
+      inputValueAttributes :+ AttributeReference("matched", BooleanType)()
+    } else {
+      throw new IllegalArgumentException("Incorrect state format version! " +
+        s"version=$stateFormatVersion")
+    }
+    result += (getStateStoreName(joinSide, KeyWithIndexToValueType) ->
+      (keyWithIndexSchema, valueSchema.toStructType))
+
+    result
+  }
+
   private sealed trait StateStoreType
 
   private case object KeyToNumValuesType extends StateStoreType {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 3bf833816bcc4..94d976b568a5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -23,6 +23,8 @@ import java.util.concurrent.TimeUnit._
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
+import org.apache.hadoop.conf.Configuration
+
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.AnalysisException
@@ -70,6 +72,13 @@ trait StatefulOperator extends SparkPlan {
       throw new IllegalStateException("State location not present for execution")
     }
   }
+
+  // Function used to record state schema for the first time and validate it against proposed
+  // schema changes in the future. Runs as part of a planning rule on the driver.
+  // Returns the schema file path for operators that write this to the metadata file,
+  // otherwise None
+  def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int): Array[String]
 }
 
 /**
@@ -359,7 +368,7 @@ object WatermarkSupport {
     if (optionalWatermarkExpression.isEmpty || optionalWatermarkMs.isEmpty) return None
 
     val watermarkAttribute = optionalWatermarkExpression.get
-    // If we are evicting based on a window, use the end of the window.  Otherwise just
+    // If we are evicting based on a window, use the end of the window. Otherwise just
     // use the attribute itself.
     val evictionExpression =
       if (watermarkAttribute.dataType.isInstanceOf[StructType]) {
@@ -424,6 +433,13 @@ case class StateStoreRestoreExec(
   private[sql] val stateManager = StreamingAggregationStateManager.createStateManager(
     keyExpressions, child.output, stateFormatVersion)
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int):
+    Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      keyExpressions.toStructType, stateManager.getStateValueSchema, session.sessionState)
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
 
@@ -486,6 +502,13 @@ case class StateStoreSaveExec(
   private[sql] val stateManager = StreamingAggregationStateManager.createStateManager(
     keyExpressions, child.output, stateFormatVersion)
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int):
+    Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      keyExpressions.toStructType, stateManager.getStateValueSchema, session.sessionState)
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
     assert(outputMode.nonEmpty,
@@ -690,6 +713,13 @@ case class SessionWindowStateStoreRestoreExec(
   private val stateManager = StreamingSessionWindowStateManager.createStateManager(
     keyWithoutSessionExpressions, sessionExpression, child.output, stateFormatVersion)
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int):
+  Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      stateManager.getStateKeySchema, stateManager.getStateValueSchema, session.sessionState)
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
 
@@ -772,6 +802,13 @@ case class SessionWindowStateStoreSaveExec(
   private val stateManager = StreamingSessionWindowStateManager.createStateManager(
     keyWithoutSessionExpressions, sessionExpression, child.output, stateFormatVersion)
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int):
+  Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      stateManager.getStateKeySchema, stateManager.getStateValueSchema, session.sessionState)
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
     assert(outputMode.nonEmpty,
@@ -1079,6 +1116,13 @@ case class StreamingDeduplicateExec(
 
   override protected def withNewChildInternal(newChild: SparkPlan): StreamingDeduplicateExec =
     copy(child = newChild)
+
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int):
+  Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      keyExpressions.toStructType, schemaForValueRow, session.sessionState, extraOptionOnStateStore)
+  }
 }
 
 object StreamingDeduplicateExec {
@@ -1150,6 +1194,13 @@ case class StreamingDeduplicateWithinWatermarkExec(
 
   override def shortName: String = "dedupeWithinWatermark"
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int):
+  Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      keyExpressions.toStructType, schemaForValueRow, session.sessionState, extraOptionOnStateStore)
+  }
+
   override protected def withNewChildInternal(
       newChild: SparkPlan): StreamingDeduplicateWithinWatermarkExec = copy(child = newChild)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
index e0e3ee582bef0..7b3d393ec75d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/streamingLimits.scala
@@ -18,12 +18,14 @@ package org.apache.spark.sql.execution.streaming
 
 import java.util.concurrent.TimeUnit.NANOSECONDS
 
+import org.apache.hadoop.conf.Configuration
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericInternalRow, SortOrder, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, Distribution, Partitioning}
 import org.apache.spark.sql.execution.{LimitExec, SparkPlan, UnaryExecNode}
-import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStoreOps}
+import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateSchemaCompatibilityChecker, StateStoreOps}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{LongType, NullType, StructField, StructType}
 import org.apache.spark.util.{CompletionIterator, NextIterator}
@@ -45,6 +47,13 @@ case class StreamingGlobalLimitExec(
   private val keySchema = StructType(Array(StructField("key", NullType)))
   private val valueSchema = StructType(Array(StructField("value", LongType)))
 
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration, batchId: Long, stateSchemaVersion: Int):
+    Array[String] = {
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      keySchema, valueSchema, session.sessionState)
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     metrics // force lazy init at driver
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index 8a2a5282b69d9..bf33ba2c96f19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -25,7 +25,8 @@ import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
 import org.apache.spark.{JobExecutionStatus, SparkConf}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.CLASS_NAME
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.scheduler._
 import org.apache.spark.sql.connector.metric.CustomMetric
@@ -222,9 +223,9 @@ class SQLAppStatusListener(
             method
           } catch {
             case NonFatal(e) =>
-              logWarning(s"Unable to load custom metric object for class `$className`. " +
-                "Please make sure that the custom metric class is in the classpath and " +
-                "it has 0-arg constructor.", e)
+              logWarning(log"Unable to load custom metric object for class " +
+                log"`${MDC(CLASS_NAME, className)}`. Please make sure that the custom metric " +
+                log"class is in the classpath and it has 0-arg constructor.", e)
               // Cannot initialize custom metric object, we might be in history server that does
               // not have the custom metric class.
               val defaultMethod = (_: Array[Long], _: Array[Long]) => "N/A"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index c48c8bbe4697b..88550fac7303f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -49,6 +49,7 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
  * @tparam OUT The type of the final output result.
  * @since 1.6.0
  */
+@SerialVersionUID(2093413866369130093L)
 abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
 
   /**
@@ -89,8 +90,7 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
   def outputEncoder: Encoder[OUT]
 
   /**
-   * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset`.
-   * operations.
+   * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset` operations.
    * @since 1.6.0
    */
   def toColumn: TypedColumn[IN, OUT] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index e21375713b8a1..882918eb78c7f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -80,6 +80,7 @@ import org.apache.spark.util.Utils
  * @groupname struct_funcs Struct functions
  * @groupname csv_funcs CSV functions
  * @groupname json_funcs JSON functions
+ * @groupname variant_funcs VARIANT functions
  * @groupname xml_funcs XML functions
  * @groupname url_funcs URL functions
  * @groupname partition_transforms Partition transform functions
@@ -1936,6 +1937,15 @@ object functions {
    */
   def try_divide(left: Column, right: Column): Column = Column.fn("try_divide", left, right)
 
+  /**
+   * Returns the remainder of `dividend``/``divisor`. Its result is
+   * always null if `divisor` is 0.
+   *
+   * @group math_funcs
+   * @since 4.0.0
+   */
+  def try_remainder(left: Column, right: Column): Column = Column.fn("try_remainder", left, right)
+
   /**
    * Returns `left``*``right` and the result is null on overflow. The acceptable input types are
    * the same with the `*` operator.
@@ -4144,9 +4154,11 @@ object functions {
   /**
    * Splits str around matches of the given pattern.
    *
-   * @param str a string expression to split
-   * @param pattern a string representing a regular expression. The regex string should be
-   *                a Java regular expression.
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a string representing a regular expression. The regex string should be a Java regular
+   *   expression.
    *
    * @group string_funcs
    * @since 1.5.0
@@ -4156,17 +4168,31 @@ object functions {
   /**
    * Splits str around matches of the given pattern.
    *
-   * @param str a string expression to split
-   * @param pattern a string representing a regular expression. The regex string should be
-   *                a Java regular expression.
-   * @param limit an integer expression which controls the number of times the regex is applied.
-   *        <ul>
-   *          <li>limit greater than 0: The resulting array's length will not be more than limit,
-   *          and the resulting array's last entry will contain all input beyond the last
-   *          matched regex.</li>
-   *          <li>limit less than or equal to 0: `regex` will be applied as many times as
-   *          possible, and the resulting array can be of any size.</li>
-   *        </ul>
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a column of string representing a regular expression. The regex string should be a Java
+   *   regular expression.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def split(str: Column, pattern: Column): Column = Column.fn("split", str, pattern)
+
+  /**
+   * Splits str around matches of the given pattern.
+   *
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a string representing a regular expression. The regex string should be a Java regular
+   *   expression.
+   * @param limit
+   *   an integer expression which controls the number of times the regex is applied. <ul>
+   *   <li>limit greater than 0: The resulting array's length will not be more than limit, and the
+   *   resulting array's last entry will contain all input beyond the last matched regex.</li>
+   *   <li>limit less than or equal to 0: `regex` will be applied as many times as possible, and
+   *   the resulting array can be of any size.</li> </ul>
    *
    * @group string_funcs
    * @since 3.0.0
@@ -4174,6 +4200,27 @@ object functions {
   def split(str: Column, pattern: String, limit: Int): Column =
     Column.fn("split", str, lit(pattern), lit(limit))
 
+  /**
+   * Splits str around matches of the given pattern.
+   *
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a column of string representing a regular expression. The regex string should be a Java
+   *   regular expression.
+   * @param limit
+   *   a column of integer expression which controls the number of times the regex is applied.
+   *   <ul> <li>limit greater than 0: The resulting array's length will not be more than limit,
+   *   and the resulting array's last entry will contain all input beyond the last matched
+   *   regex.</li> <li>limit less than or equal to 0: `regex` will be applied as many times as
+   *   possible, and the resulting array can be of any size.</li> </ul>
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def split(str: Column, pattern: Column, limit: Column): Column =
+    Column.fn("split", str, pattern, limit)
+
   /**
    * Substring starts at `pos` and is of length `len` when str is String type or
    * returns the slice of byte array that starts at `pos` in byte and is of length `len`
@@ -4187,6 +4234,19 @@ object functions {
   def substring(str: Column, pos: Int, len: Int): Column =
     Column.fn("substring", str, lit(pos), lit(len))
 
+  /**
+   * Substring starts at `pos` and is of length `len` when str is String type or
+   * returns the slice of byte array that starts at `pos` in byte and is of length `len`
+   * when str is Binary type
+   *
+   * @note The position is not zero based, but 1 based index.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def substring(str: Column, pos: Column, len: Column): Column =
+    Column.fn("substring", str, pos, len)
+
   /**
    * Returns the substring from string str before count occurrences of the delimiter delim.
    * If count is positive, everything the left of the final delimiter (counting from left) is
@@ -5694,6 +5754,27 @@ object functions {
    */
   def timestamp_micros(e: Column): Column = Column.fn("timestamp_micros", e)
 
+  /**
+   * Gets the difference between the timestamps in the specified units by truncating
+   * the fraction part.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def timestamp_diff(unit: String, start: Column, end: Column): Column = withExpr {
+    TimestampDiff(unit, start.expr, end.expr)
+  }
+
+  /**
+   * Adds the specified number of units to the given timestamp.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def timestamp_add(unit: String, quantity: Column, ts: Column): Column = withExpr {
+    TimestampAdd(unit, quantity.expr, ts.expr)
+  }
+
   /**
    * Parses the `timestamp` expression with the `format` expression
    * to a timestamp without time zone. Returns null with invalid input.
@@ -6595,15 +6676,91 @@ object functions {
   }
 
   /**
-   * Parses a JSON string and constructs a Variant value.
+   * Parses a JSON string and constructs a Variant value. Returns null if the input string is not
+   * a valid JSON value.
    *
    * @param json a string column that contains JSON data.
    *
-   * @group json_funcs
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def try_parse_json(json: Column): Column = Column.fn("try_parse_json", json)
+
+  /**
+   * Parses a JSON string and constructs a Variant value.
+   *
+   * @param json
+   *   a string column that contains JSON data.
+   * @group variant_funcs
    * @since 4.0.0
    */
   def parse_json(json: Column): Column = Column.fn("parse_json", json)
 
+  /**
+   * Check if a variant value is a variant null. Returns true if and only if the input is a
+   * variant null and false otherwise (including in the case of SQL NULL).
+   *
+   * @param v
+   *   a variant column.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def is_variant_null(v: Column): Column = Column.fn("is_variant_null", v)
+
+  /**
+   * Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
+   * `targetType`. Returns null if the path does not exist. Throws an exception if the cast fails.
+   *
+   * @param v
+   *   a variant column.
+   * @param path
+   *   the extraction path. A valid path should start with `$` and is followed by zero or more
+   *   segments like `[123]`, `.name`, `['name']`, or `["name"]`.
+   * @param targetType
+   *   the target data type to cast into, in a DDL-formatted string.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def variant_get(v: Column, path: String, targetType: String): Column =
+    Column.fn("variant_get", v, lit(path), lit(targetType))
+
+  /**
+   * Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
+   * `targetType`. Returns null if the path does not exist or the cast fails..
+   *
+   * @param v
+   *   a variant column.
+   * @param path
+   *   the extraction path. A valid path should start with `$` and is followed by zero or more
+   *   segments like `[123]`, `.name`, `['name']`, or `["name"]`.
+   * @param targetType
+   *   the target data type to cast into, in a DDL-formatted string.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def try_variant_get(v: Column, path: String, targetType: String): Column =
+    Column.fn("try_variant_get", v, lit(path), lit(targetType))
+
+  /**
+   * Returns schema in the SQL format of a variant.
+   *
+   * @param v
+   *   a variant column.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def schema_of_variant(v: Column): Column = Column.fn("schema_of_variant", v)
+
+  /**
+   * Returns the merged schema in the SQL format of a variant column.
+   *
+   * @param v
+   *   a variant column.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def schema_of_variant_agg(v: Column): Column = Column.fn("schema_of_variant_agg", v)
+
   /**
    * Parses a JSON string and infers its schema in DDL format.
    *
@@ -6817,9 +6974,9 @@ object functions {
   /**
    * Returns length of array or map.
    *
-   * The function returns null for null input if spark.sql.legacy.sizeOfNull is set to false or
-   * spark.sql.ansi.enabled is set to true. Otherwise, the function returns -1 for null input.
-   * With the default settings, the function returns -1 for null input.
+   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
+   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input.
+   * With the default settings, the function returns null for null input.
    *
    * @group collection_funcs
    * @since 1.5.0
@@ -6829,9 +6986,9 @@ object functions {
   /**
    * Returns length of array or map. This is an alias of `size` function.
    *
-   * The function returns null for null input if spark.sql.legacy.sizeOfNull is set to false or
-   * spark.sql.ansi.enabled is set to true. Otherwise, the function returns -1 for null input.
-   * With the default settings, the function returns -1 for null input.
+   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
+   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input.
+   * With the default settings, the function returns null for null input.
    *
    * @group collection_funcs
    * @since 3.5.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 63c0d116ba3a4..4660970814e21 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -224,6 +224,7 @@ abstract class BaseSessionStateBuilder(
         TableCapabilityCheck +:
         CommandCheck +:
         CollationCheck +:
+        ViewSyncSchemaToMetaStore +:
         customCheckRules
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index df7c4ab1a0c7d..3e20a23a0a066 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -734,9 +734,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
       // same way as how a permanent view is handled. This also avoids a potential issue where a
       // dependent view becomes invalid because of the above while its data is still cached.
       val viewText = viewDef.desc.viewText
-      val plan = sparkSession.sessionState.executePlan(viewDef)
-      sparkSession.sharedState.cacheManager.uncacheQuery(
-        sparkSession, plan.analyzed, cascade = viewText.isDefined)
+      val df = Dataset.ofRows(sparkSession, viewDef)
+      sparkSession.sharedState.cacheManager.uncacheQuery(df, cascade = viewText.isDefined)
     } catch {
       case NonFatal(_) => // ignore
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 164710cdd8839..2b1451493398f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -29,7 +29,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FsUrlStreamHandlerFactory, Path}
 
 import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CONFIG, CONFIG2, PATH, VALUE}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.CacheManager
@@ -168,11 +169,12 @@ private[sql] class SharedState(
     wrapped
   }
 
+  val globalTempDB = conf.get(GLOBAL_TEMP_DATABASE)
+
   /**
    * A manager for global temporary views.
    */
   lazy val globalTempViewManager: GlobalTempViewManager = {
-    val globalTempDB = conf.get(GLOBAL_TEMP_DATABASE)
     if (externalCatalog.databaseExists(globalTempDB)) {
       throw QueryExecutionErrors.databaseNameConflictWithSystemPreservedDatabaseError(globalTempDB)
     }
@@ -258,8 +260,9 @@ object SharedState extends Logging {
     val sparkWarehouseOption =
       initialConfigs.get(WAREHOUSE_PATH.key).orElse(sparkConf.getOption(WAREHOUSE_PATH.key))
     if (initialConfigs.contains(HIVE_WAREHOUSE_CONF_NAME)) {
-      logWarning(s"Not allowing to set $HIVE_WAREHOUSE_CONF_NAME in SparkSession's " +
-        s"options, please use ${WAREHOUSE_PATH.key} to set statically for cross-session usages")
+      logWarning(log"Not allowing to set ${MDC(CONFIG, HIVE_WAREHOUSE_CONF_NAME)} in " +
+        log"SparkSession's options, please use ${MDC(CONFIG2, WAREHOUSE_PATH.key)} to " +
+        log"set statically for cross-session usages")
     }
     // hive.metastore.warehouse.dir only stay in hadoopConf
     sparkConf.remove(HIVE_WAREHOUSE_CONF_NAME)
@@ -268,8 +271,10 @@ object SharedState extends Logging {
     if (hiveWarehouseDir != null && sparkWarehouseOption.isEmpty) {
       // If hive.metastore.warehouse.dir is set and spark.sql.warehouse.dir is not set,
       // we will respect the value of hive.metastore.warehouse.dir.
-      logInfo(s"${WAREHOUSE_PATH.key} is not set, but $HIVE_WAREHOUSE_CONF_NAME is set. " +
-        s"Setting ${WAREHOUSE_PATH.key} to the value of $HIVE_WAREHOUSE_CONF_NAME.")
+      logInfo(log"${MDC(CONFIG, WAREHOUSE_PATH.key)} is not set, but " +
+        log"${MDC(CONFIG2, HIVE_WAREHOUSE_CONF_NAME)} is set. " +
+        log"Setting ${MDC(CONFIG, WAREHOUSE_PATH.key)} to " +
+        log"the value of ${MDC(CONFIG2, HIVE_WAREHOUSE_CONF_NAME)}.")
       hiveWarehouseDir
     } else {
       // If spark.sql.warehouse.dir is set, we will override hive.metastore.warehouse.dir using
@@ -277,8 +282,9 @@ object SharedState extends Logging {
       // When neither spark.sql.warehouse.dir nor hive.metastore.warehouse.dir is set
       // we will set hive.metastore.warehouse.dir to the default value of spark.sql.warehouse.dir.
       val sparkWarehouseDir = sparkWarehouseOption.getOrElse(WAREHOUSE_PATH.defaultValueString)
-      logInfo(s"Setting $HIVE_WAREHOUSE_CONF_NAME ('$hiveWarehouseDir') to the value of " +
-        s"${WAREHOUSE_PATH.key}.")
+      logInfo(log"Setting ${MDC(CONFIG, HIVE_WAREHOUSE_CONF_NAME)} " +
+        log"('${MDC(VALUE, hiveWarehouseDir)}') to the value of " +
+        log"${MDC(CONFIG2, WAREHOUSE_PATH.key)}.")
       sparkWarehouseDir
     }
   }
@@ -286,7 +292,7 @@ object SharedState extends Logging {
   def qualifyWarehousePath(hadoopConf: Configuration, warehousePath: String): String = {
     val tempPath = new Path(warehousePath)
     val qualified = tempPath.getFileSystem(hadoopConf).makeQualified(tempPath).toString
-    logInfo(s"Warehouse path is '$qualified'.")
+    logInfo(log"Warehouse path is '${MDC(PATH, qualified)}'.")
     qualified
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala
index 8f537aacebe5f..5e79dbbb4d72e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/AggregatedDialect.scala
@@ -26,7 +26,8 @@ import org.apache.spark.sql.types.{DataType, MetadataBuilder}
  *
  * @param dialects List of dialects.
  */
-private class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect {
+private class AggregatedDialect(dialects: List[JdbcDialect])
+  extends JdbcDialect with NoLegacyJDBCError {
 
   require(dialects.nonEmpty)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 31a7c783ba60e..8ccf94166a70e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -24,13 +24,14 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.Expression
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.types._
 
-private case class DB2Dialect() extends JdbcDialect {
+private case class DB2Dialect() extends JdbcDialect with SQLConfHelper with NoLegacyJDBCError {
 
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:db2")
@@ -86,6 +87,8 @@ private case class DB2Dialect() extends JdbcDialect {
       typeName: String,
       size: Int,
       md: MetadataBuilder): Option[DataType] = sqlType match {
+    case Types.SMALLINT if !conf.legacyDB2numericMappingEnabled =>
+      Option(ShortType)
     case Types.REAL => Option(FloatType)
     case Types.OTHER =>
       typeName match {
@@ -99,7 +102,9 @@ private case class DB2Dialect() extends JdbcDialect {
 
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case StringType => Option(JdbcType("CLOB", java.sql.Types.CLOB))
-    case BooleanType => Option(JdbcType("CHAR(1)", java.sql.Types.CHAR))
+    case BooleanType if conf.legacyDB2BooleanMappingEnabled =>
+      Option(JdbcType("CHAR(1)", java.sql.Types.CHAR))
+    case BooleanType => Option(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN))
     case ShortType | ByteType => Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
     case _ => None
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
index 54b8c2622827e..af77f8575dd86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.types._
 
-private case class DatabricksDialect() extends JdbcDialect {
+private case class DatabricksDialect() extends JdbcDialect with NoLegacyJDBCError {
 
   override def canHandle(url: String): Boolean = {
     url.startsWith("jdbc:databricks")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
index 36af0e6aeaf14..7b65a01b5e702 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors
 import org.apache.spark.sql.types._
 
 
-private case class DerbyDialect() extends JdbcDialect {
+private case class DerbyDialect() extends JdbcDialect with NoLegacyJDBCError {
 
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:derby")
@@ -48,9 +48,15 @@ private case class DerbyDialect() extends JdbcDialect {
     case ByteType => Option(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
     case ShortType => Option(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
     case BooleanType => Option(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN))
-    // 31 is the maximum precision and 5 is the default scale for a Derby DECIMAL
-    case t: DecimalType if t.precision > 31 =>
-      Option(JdbcType("DECIMAL(31,5)", java.sql.Types.DECIMAL))
+    // 31 is the maximum precision
+    // https://db.apache.org/derby/docs/10.13/ref/rrefsqlj15260.html
+    case t: DecimalType =>
+      val (p, s) = if (t.precision > 31) {
+        (31, math.max(t.scale - (t.precision - 31), 0))
+      } else {
+        (t.precision, t.scale)
+      }
+      Option(JdbcType(s"DECIMAL($p,$s)", java.sql.Types.DECIMAL))
     case _ => None
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index ebfc6093dc167..3ece44ece9e6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, N
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, DecimalType, MetadataBuilder, ShortType, StringType, TimestampType}
 
-private[sql] case class H2Dialect() extends JdbcDialect {
+private[sql] case class H2Dialect() extends JdbcDialect with NoLegacyJDBCError {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:h2")
 
@@ -259,13 +259,6 @@ private[sql] case class H2Dialect() extends JdbcDialect {
   }
 
   class H2SQLBuilder extends JDBCSQLBuilder {
-    override def escapeSpecialCharsForLikePattern(str: String): String = {
-      str.map {
-        case '_' => "\\_"
-        case '%' => "\\%"
-        case c => c.toString
-      }.mkString
-    }
 
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 5f69d18cad756..290665020f883 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -391,10 +391,10 @@ abstract class JdbcDialect extends Serializable with Logging {
       quoteIdentifier(namedRef.fieldNames.head)
     }
 
-    override def visitCast(l: String, dataType: DataType): String = {
+    override def visitCast(expr: String, exprDataType: DataType, dataType: DataType): String = {
       val databaseTypeDefinition =
         getJDBCType(dataType).map(_.databaseTypeDefinition).getOrElse(dataType.typeName)
-      s"CAST($l AS $databaseTypeDefinition)"
+      s"CAST($expr AS $databaseTypeDefinition)"
     }
 
     override def visitSQLFunction(funcName: String, inputs: Array[String]): String = {
@@ -841,6 +841,23 @@ abstract class JdbcDialect extends Serializable with Logging {
       metadata: MetadataBuilder): Unit = {}
 }
 
+/**
+ * Make the `classifyException` method throw out the original exception
+ */
+trait NoLegacyJDBCError extends JdbcDialect {
+
+  override def classifyException(
+      e: Throwable,
+      errorClass: String,
+      messageParameters: Map[String, String],
+      description: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = errorClass,
+      messageParameters = messageParameters,
+      cause = Some(e))
+  }
+}
+
 /**
  * :: DeveloperApi ::
  * Registry of dialects that apply to every new jdbc `org.apache.spark.sql.DataFrame`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 862e99adc3b0d..d03602b0338c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.jdbc.MsSqlServerDialect.{GEOGRAPHY, GEOMETRY}
 import org.apache.spark.sql.types._
 
 
-private case class MsSqlServerDialect() extends JdbcDialect {
+private case class MsSqlServerDialect() extends JdbcDialect with NoLegacyJDBCError {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:sqlserver")
 
@@ -86,9 +86,13 @@ private case class MsSqlServerDialect() extends JdbcDialect {
       // We shouldn't propagate these queries to MsSqlServer
       expr match {
         case e: Predicate => e.name() match {
-          case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">="
-              if e.children().exists(_.isInstanceOf[Predicate]) =>
-            super.visitUnexpectedExpr(expr)
+          case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">=" =>
+            val Array(l, r) = e.children().map {
+              case p: Predicate => s"CASE WHEN ${inputToSQL(p)} THEN 1 ELSE 0 END"
+              case o => inputToSQL(o)
+            }
+            visitBinaryComparison(e.name(), l, r)
+          case "CASE_WHEN" => visitCaseWhen(expressionsToStringArray(e.children())) + " = 1"
           case _ => super.build(expr)
         }
         case _ => super.build(expr)
@@ -109,22 +113,22 @@ private case class MsSqlServerDialect() extends JdbcDialect {
 
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
-    if (typeName.contains("datetimeoffset")) {
-      // String is recommend by Microsoft SQL Server for datetimeoffset types in non-MS clients
-      Option(StringType)
-    } else {
-      if (SQLConf.get.legacyMsSqlServerNumericMappingEnabled) {
-        None
-      } else {
-        sqlType match {
-          // Data range of TINYINT is 0-255 so it needs to be stored in ShortType.
-          // Reference doc: https://learn.microsoft.com/en-us/sql/t-sql/data-types
-          case java.sql.Types.SMALLINT | java.sql.Types.TINYINT => Some(ShortType)
-          case java.sql.Types.REAL => Some(FloatType)
-          case GEOMETRY | GEOGRAPHY => Some(BinaryType)
-          case _ => None
+    sqlType match {
+      case _ if typeName.contains("datetimeoffset") =>
+        if (SQLConf.get.legacyMsSqlServerDatetimeOffsetMappingEnabled) {
+          Some(StringType)
+        } else {
+          Some(TimestampType)
         }
-      }
+      case java.sql.Types.SMALLINT | java.sql.Types.TINYINT
+          if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled =>
+        // Data range of TINYINT is 0-255 so it needs to be stored in ShortType.
+        // Reference doc: https://learn.microsoft.com/en-us/sql/t-sql/data-types
+        Some(ShortType)
+      case java.sql.Types.REAL if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled =>
+        Some(FloatType)
+      case GEOMETRY | GEOGRAPHY => Some(BinaryType)
+      case _ => None
     }
   }
 
@@ -136,6 +140,7 @@ private case class MsSqlServerDialect() extends JdbcDialect {
     case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY))
     case ShortType if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled =>
       Some(JdbcType("SMALLINT", java.sql.Types.SMALLINT))
+    case ByteType => Some(JdbcType("SMALLINT", java.sql.Types.TINYINT))
     case _ => None
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index d98fcdfd0b23f..0f1bccbb01d51 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.types._
 
-private case class MySQLDialect() extends JdbcDialect with SQLConfHelper {
+private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with NoLegacyJDBCError {
 
   override def canHandle(url : String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:mysql")
@@ -66,6 +66,21 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper {
       }
     }
 
+    override def visitStartsWith(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
+    }
+
+    override def visitEndsWith(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}' ESCAPE '\\\\'"
+    }
+
+    override def visitContains(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
+    }
+
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
       if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
@@ -142,7 +157,7 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper {
         // https://github.com/mysql/mysql-connector-j/blob/8.3.0/src/main/core-api/java/com/mysql/cj/MysqlType.java#L251
         // scalastyle:on line.size.limit
         Some(getTimestampType(md.build()))
-      case Types.TIMESTAMP => Some(TimestampType)
+      case Types.TIMESTAMP if !conf.legacyMySqlTimestampNTZMappingEnabled => Some(TimestampType)
       case _ => None
     }
   }
@@ -228,7 +243,8 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper {
     // In MYSQL, DATETIME is TIMESTAMP WITHOUT TIME ZONE
     // https://github.com/mysql/mysql-connector-j/blob/8.3.0/src/main/core-api/java/com/mysql/cj/MysqlType.java#L251
     // scalastyle:on line.size.limit
-    case TimestampNTZType => Option(JdbcType("DATETIME", java.sql.Types.TIMESTAMP))
+    case TimestampNTZType if !conf.legacyMySqlTimestampNTZMappingEnabled =>
+      Option(JdbcType("DATETIME", java.sql.Types.TIMESTAMP))
     case _ => JdbcUtils.getCommonJDBCType(dt)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 26c816294b52d..627007e275599 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.jdbc.OracleDialect._
 import org.apache.spark.sql.types._
 
 
-private case class OracleDialect() extends JdbcDialect with SQLConfHelper {
+private case class OracleDialect() extends JdbcDialect with SQLConfHelper with NoLegacyJDBCError {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:oracle")
 
@@ -121,6 +121,7 @@ private case class OracleDialect() extends JdbcDialect with SQLConfHelper {
     case ByteType => Some(JdbcType("NUMBER(3)", java.sql.Types.SMALLINT))
     case ShortType => Some(JdbcType("NUMBER(5)", java.sql.Types.SMALLINT))
     case StringType => Some(JdbcType("VARCHAR2(255)", java.sql.Types.VARCHAR))
+    case VarcharType(n) => Some(JdbcType(s"VARCHAR2($n)", java.sql.Types.VARCHAR))
     case TimestampType if !conf.legacyOracleTimestampMappingEnabled =>
       Some(JdbcType("TIMESTAMP WITH LOCAL TIME ZONE", TIMESTAMP_LTZ))
     case _ => None
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index c2c430a7b39d7..03fefd82802ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -24,6 +24,8 @@ import java.util.Locale
 
 import scala.util.Using
 
+import org.apache.spark.internal.LogKeys.COLUMN_NAME
+import org.apache.spark.internal.MDC
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonEmptyNamespaceException, NoSuchIndexException}
@@ -35,7 +37,8 @@ import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.types._
 
 
-private case class PostgresDialect() extends JdbcDialect with SQLConfHelper {
+private case class PostgresDialect()
+  extends JdbcDialect with SQLConfHelper with NoLegacyJDBCError {
 
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:postgresql")
@@ -59,8 +62,8 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper {
         // money type seems to be broken but one workaround is to handle it as string.
         // See SPARK-34333 and https://github.com/pgjdbc/pgjdbc/issues/100
         Some(StringType)
-      case Types.TIMESTAMP
-        if "timestamptz".equalsIgnoreCase(typeName) =>
+      case Types.TIMESTAMP if "timestamptz".equalsIgnoreCase(typeName) &&
+          !conf.legacyPostgresDatetimeMappingEnabled =>
         // timestamptz represents timestamp with time zone, currently it maps to Types.TIMESTAMP.
         // We need to change to Types.TIMESTAMP_WITH_TIMEZONE if the upstream changes.
         Some(TimestampType)
@@ -147,6 +150,8 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper {
     case FloatType => Some(JdbcType("FLOAT4", Types.FLOAT))
     case DoubleType => Some(JdbcType("FLOAT8", Types.DOUBLE))
     case ShortType | ByteType => Some(JdbcType("SMALLINT", Types.SMALLINT))
+    case TimestampType if !conf.legacyPostgresDatetimeMappingEnabled =>
+      Some(JdbcType("TIMESTAMP WITH TIME ZONE", Types.TIMESTAMP))
     case t: DecimalType => Some(
       JdbcType(s"NUMERIC(${t.precision},${t.scale})", java.sql.Types.NUMERIC))
     case ArrayType(et, _) if et.isInstanceOf[AtomicType] || et.isInstanceOf[ArrayType] =>
@@ -368,7 +373,8 @@ private case class PostgresDialect() extends JdbcDialect with SQLConfHelper {
           }
         } catch {
           case e: SQLException =>
-            logWarning(s"Failed to get array dimension for column $columnName", e)
+            logWarning(
+              log"Failed to get array dimension for column ${MDC(COLUMN_NAME, columnName)}", e)
         }
       case _ =>
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/SnowflakeDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/SnowflakeDialect.scala
index 276364d5d89ed..a443a798db7c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/SnowflakeDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/SnowflakeDialect.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
 import org.apache.spark.sql.types.{BooleanType, DataType}
 
-private case class SnowflakeDialect() extends JdbcDialect {
+private case class SnowflakeDialect() extends JdbcDialect with NoLegacyJDBCError {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:snowflake")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
index 7acd22a3f10be..322b259485f56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.types._
 
 
-private case class TeradataDialect() extends JdbcDialect {
+private case class TeradataDialect() extends JdbcDialect with NoLegacyJDBCError {
 
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:teradata")
@@ -42,6 +42,7 @@ private case class TeradataDialect() extends JdbcDialect {
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case StringType => Some(JdbcType("VARCHAR(255)", java.sql.Types.VARCHAR))
     case BooleanType => Option(JdbcType("CHAR(1)", java.sql.Types.CHAR))
+    case ByteType => Option(JdbcType("BYTEINT", java.sql.Types.TINYINT))
     case _ => None
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 1444eea09b27e..96b5e2193f270 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -20,7 +20,7 @@ package org.apache.spark
 import java.util.regex.Pattern
 
 import org.apache.spark.annotation.{DeveloperApi, Unstable}
-import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin, PySparkCurrentOrigin}
 import org.apache.spark.sql.execution.SparkStrategy
 import org.apache.spark.sql.internal.SQLConf
 
@@ -78,31 +78,6 @@ package object sql {
    */
   private[sql] val SPARK_LEGACY_INT96_METADATA_KEY = "org.apache.spark.legacyINT96"
 
-  /**
-   * Captures the current Java stack trace up to a specified depth defined by the
-   * `spark.sql.stackTracesInDataFrameContext` configuration. This method helps in identifying
-   * the call sites in Spark code by filtering out the stack frames until it reaches the
-   * user code calling into Spark. This method is intended to be used for enhancing debuggability
-   * by providing detailed context about where in the Spark source code a particular operation
-   * was called from.
-   *
-   * This functionality is crucial for both debugging purposes and for providing more insightful
-   * logging and error messages. By capturing the stack trace up to a certain depth, it enables
-   * a more precise pinpointing of the execution flow, especially useful when troubleshooting
-   * complex interactions within Spark.
-   *
-   * @return An array of `StackTraceElement` representing the filtered stack trace.
-   */
-  private def captureStackTrace(): Array[StackTraceElement] = {
-    val st = Thread.currentThread().getStackTrace
-    var i = 0
-    // Find the beginning of Spark code traces
-    while (i < st.length && !sparkCode(st(i))) i += 1
-    // Stop at the end of the first Spark code traces
-    while (i < st.length && sparkCode(st(i))) i += 1
-    st.slice(from = i - 1, until = i + SQLConf.get.stackTracesInDataFrameContext)
-  }
-
   /**
    * This helper function captures the Spark API and its call site in the user code from the current
    * stacktrace.
@@ -123,45 +98,16 @@ package object sql {
     if (CurrentOrigin.get.stackTrace.isDefined) {
       f
     } else {
-      val origin = Origin(stackTrace = Some(captureStackTrace()))
-      CurrentOrigin.withOrigin(origin)(f)
-    }
-  }
-
-  /**
-   * This overloaded helper function captures the call site information specifically for PySpark,
-   * using provided PySpark logging information instead of capturing the current Java stack trace.
-   *
-   * This method is designed to enhance the debuggability of PySpark by including PySpark-specific
-   * logging information (e.g., method names and call sites within PySpark scripts) in debug logs,
-   * without the overhead of capturing and processing Java stack traces that are less relevant
-   * to PySpark developers.
-   *
-   * The `pysparkErrorContext` parameter allows for passing PySpark call site information, which
-   * is then included in the Origin context. This facilitates more precise and useful logging for
-   * troubleshooting PySpark applications.
-   *
-   * This method should be used in places where PySpark API calls are made, and PySpark logging
-   * information is available and beneficial for debugging purposes.
-   *
-   * @param pysparkErrorContext Optional PySpark logging information including the call site,
-   *                            represented as a (String, String).
-   *                            This may contain keys like "fragment" and "callSite" to provide
-   *                            detailed context about the PySpark call site.
-   * @param f                   The function that can utilize the modified Origin context with
-   *                            PySpark logging information.
-   * @return The result of executing `f` within the context of the provided PySpark logging
-   *         information.
-   */
-  private[sql] def withOrigin[T](
-      pysparkErrorContext: Option[(String, String)] = None)(f: => T): T = {
-    if (CurrentOrigin.get.stackTrace.isDefined) {
-      f
-    } else {
-      val origin = Origin(
-        stackTrace = Some(captureStackTrace()),
-        pysparkErrorContext = pysparkErrorContext
-      )
+      val st = Thread.currentThread().getStackTrace
+      var i = 0
+      // Find the beginning of Spark code traces
+      while (i < st.length && !sparkCode(st(i))) i += 1
+      // Stop at the end of the first Spark code traces
+      while (i < st.length && sparkCode(st(i))) i += 1
+      val origin = Origin(stackTrace = Some(st.slice(
+        from = i - 1,
+        until = i + SQLConf.get.stackTracesInDataFrameContext)),
+        pysparkErrorContext = PySparkCurrentOrigin.get())
       CurrentOrigin.withOrigin(origin)(f)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 484ed0245ddf6..c1ceed048ae2c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.streaming
 
 import java.util.UUID
 
+import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
+import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
 import org.json4s.{JObject, JString}
 import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL.{jobject2assoc, pair2Assoc}
@@ -140,6 +142,21 @@ object StreamingQueryListener extends Serializable {
     }
   }
 
+  private[spark] object QueryStartedEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryStartedEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryStartedEvent =
+      mapper.readValue[QueryStartedEvent](json)
+  }
+
   /**
    * Event representing any progress updates in a query.
    * @param progress The query progress updates.
@@ -154,6 +171,21 @@ object StreamingQueryListener extends Serializable {
     private def jsonValue: JValue = JObject("progress" -> progress.jsonValue)
   }
 
+  private[spark] object QueryProgressEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryProgressEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryProgressEvent =
+      mapper.readValue[QueryProgressEvent](json)
+  }
+
   /**
    * Event representing that query is idle and waiting for new data to process.
    *
@@ -177,6 +209,21 @@ object StreamingQueryListener extends Serializable {
     }
   }
 
+  private[spark] object QueryIdleEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryTerminatedEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryTerminatedEvent =
+      mapper.readValue[QueryTerminatedEvent](json)
+  }
+
   /**
    * Event representing that termination of a query.
    *
@@ -211,4 +258,19 @@ object StreamingQueryListener extends Serializable {
       ("errorClassOnException" -> JString(errorClassOnException.orNull))
     }
   }
+
+  private[spark] object QueryTerminatedEvent {
+    private val mapper = {
+      val ret = new ObjectMapper() with ClassTagExtensions
+      ret.registerModule(DefaultScalaModule)
+      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+      ret
+    }
+
+    private[spark] def jsonString(event: QueryTerminatedEvent): String =
+      mapper.writeValueAsString(event)
+
+    private[spark] def fromJson(json: String): QueryTerminatedEvent =
+      mapper.readValue[QueryTerminatedEvent](json)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 225f9d1f19a55..55d2e639a56b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -25,7 +25,8 @@ import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Evolving
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{CLASS_NAME, QUERY_ID, RUN_ID}
 import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.streaming.{WriteToStream, WriteToStreamStatement}
@@ -77,7 +78,7 @@ class StreamingQueryManager private[sql] (
         Utils.loadExtensions(classOf[StreamingQueryListener], classNames,
           sparkSession.sparkContext.conf).foreach { listener =>
           addListener(listener)
-          logInfo(s"Registered listener ${listener.getClass.getName}")
+          logInfo(log"Registered listener ${MDC(CLASS_NAME, listener.getClass.getName)}")
         }
       }
     }
@@ -367,8 +368,8 @@ class StreamingQueryManager private[sql] (
       if (activeOption.isDefined) {
         if (shouldStopActiveRun) {
           val oldQuery = activeOption.get
-          logWarning(s"Stopping existing streaming query [id=${query.id}, " +
-            s"runId=${oldQuery.runId}], as a new run is being started.")
+          logWarning(log"Stopping existing streaming query [id=${MDC(QUERY_ID, query.id)}, " +
+            log"runId=${MDC(RUN_ID, oldQuery.runId)}], as a new run is being started.")
           Some(oldQuery)
         } else {
           throw new IllegalStateException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 117daea7d1971..05323d9d03811 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -294,7 +294,7 @@ private object SafeJsonSerializer {
 
   /** Convert map to JValue while handling empty maps. Also, this sorts the keys. */
   def safeMapToJValue[T](map: ju.Map[String, T], valueToJValue: T => JValue): JValue = {
-    if (map.isEmpty) return JNothing
+    if (map == null || map.isEmpty) return JNothing
     val keys = map.asScala.keySet.toSeq.sorted
     keys.map { k => k -> valueToJValue(map.get(k)) : JObject }.reduce(_ ~ _)
   }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
index 237ca0ba88092..2a0c8c00574a1 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameReaderWriterSuite.java
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package test.org.apache.spark.sql;
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java
index e53e977da1494..b9841ee0f9735 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java
@@ -24,6 +24,8 @@
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.streaming.*;
 
+import static org.junit.jupiter.api.Assertions.*;
+
 /**
  * A test stateful processor used with transformWithState arbitrary stateful operator in
  * Structured Streaming. The processor primarily aims to test various functionality of the Java API
@@ -74,7 +76,7 @@ public scala.collection.Iterator<String> handleInputRows(
         } else {
           keyCountMap.updateValue(value, 1L);
         }
-        assert(keyCountMap.containsKey(value));
+        assertTrue(keyCountMap.containsKey(value));
         keysList.appendValue(value);
         sb.append(value);
       }
@@ -82,13 +84,13 @@ public scala.collection.Iterator<String> handleInputRows(
       scala.collection.Iterator<String> keys = keysList.get();
       while (keys.hasNext()) {
         String keyVal = keys.next();
-        assert(keyCountMap.containsKey(keyVal));
-        assert(keyCountMap.getValue(keyVal) > 0);
+        assertTrue(keyCountMap.containsKey(keyVal));
+        assertTrue(keyCountMap.getValue(keyVal) > 0);
       }
 
       count += numRows;
       countState.update(count);
-      assert (countState.get() == count);
+      assertEquals(count, (long) countState.get());
 
       result.add(sb.toString());
     }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java
index bfa542e81e354..55046a7c0d3df 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java
@@ -24,6 +24,8 @@
 import org.apache.spark.sql.Encoders;
 import org.apache.spark.sql.streaming.*;
 
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
 /**
  * A test stateful processor concatenates all input rows for a key and emits the result.
  * Primarily used for testing the Java API for arbitrary stateful operator in structured streaming
@@ -71,7 +73,7 @@ public scala.collection.Iterator<String> handleInputRows(
       }
 
       testState.clear();
-      assert(testState.exists() == false);
+      assertFalse(testState.exists());
 
       result.add(sb.toString());
     }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java
index 0e3f6aed3b681..07bef16cdf2da 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java
@@ -34,6 +34,8 @@
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+
 public class JavaAdvancedDataSourceV2WithV2Filter implements TestingV2Source {
 
   @Override
@@ -66,9 +68,9 @@ public StructType readSchema() {
     public Predicate[] pushPredicates(Predicate[] predicates) {
       Predicate[] supported = Arrays.stream(predicates).filter(f -> {
         if (f.name().equals(">")) {
-          assert(f.children()[0] instanceof FieldReference);
+          assertInstanceOf(FieldReference.class, f.children()[0]);
           FieldReference column = (FieldReference) f.children()[0];
-          assert(f.children()[1] instanceof LiteralValue);
+          assertInstanceOf(LiteralValue.class, f.children()[1]);
           Literal value = (Literal) f.children()[1];
           return column.describe().equals("i") && value.value() instanceof Integer;
         } else {
@@ -78,9 +80,9 @@ public Predicate[] pushPredicates(Predicate[] predicates) {
 
       Predicate[] unsupported = Arrays.stream(predicates).filter(f -> {
         if (f.name().equals(">")) {
-          assert(f.children()[0] instanceof FieldReference);
+          assertInstanceOf(FieldReference.class, f.children()[0]);
           FieldReference column = (FieldReference) f.children()[0];
-          assert(f.children()[1] instanceof LiteralValue);
+          assertInstanceOf(LiteralValue.class, f.children()[1]);
           Literal value = (LiteralValue) f.children()[1];
           return !column.describe().equals("i") || !(value.value() instanceof Integer);
         } else {
@@ -125,9 +127,9 @@ public InputPartition[] planInputPartitions() {
       Integer lowerBound = null;
       for (Predicate predicate : predicates) {
         if (predicate.name().equals(">")) {
-          assert(predicate.children()[0] instanceof FieldReference);
+          assertInstanceOf(FieldReference.class, predicate.children()[0]);
           FieldReference column = (FieldReference) predicate.children()[0];
-          assert(predicate.children()[1] instanceof LiteralValue);
+          assertInstanceOf(LiteralValue.class, predicate.children()[1]);
           Literal value = (Literal) predicate.children()[1];
           if ("i".equals(column.describe()) && value.value() instanceof Integer integer) {
             lowerBound = integer;
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
index 70c9269962089..00cd8e5478a25 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/execution/datasources/orc/FakeKeyProvider.java
@@ -1,13 +1,12 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- *     http://www.apache.org/licenses/LICENSE-2.0
+ *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
diff --git a/sql/core/src/test/resources/collations/ICU-collations-map.md b/sql/core/src/test/resources/collations/ICU-collations-map.md
new file mode 100644
index 0000000000000..a704034c694aa
--- /dev/null
+++ b/sql/core/src/test/resources/collations/ICU-collations-map.md
@@ -0,0 +1,144 @@
+<!-- Automatically generated by ICUCollationsMapSuite -->
+## ICU locale ids to name map
+| Locale id | Locale name |
+| --------- | ----------- |
+| 0 | UNICODE |
+| 1 | af |
+| 2 | am |
+| 3 | ar |
+| 4 | ar_SAU |
+| 5 | as |
+| 6 | az |
+| 7 | be |
+| 8 | bg |
+| 9 | bn |
+| 10 | bo |
+| 11 | br |
+| 12 | bs |
+| 13 | bs_Cyrl |
+| 14 | ca |
+| 15 | ceb |
+| 16 | chr |
+| 17 | cs |
+| 18 | cy |
+| 19 | da |
+| 20 | de |
+| 21 | de_AUT |
+| 22 | dsb |
+| 23 | dz |
+| 24 | ee |
+| 25 | el |
+| 26 | en |
+| 27 | en_USA |
+| 28 | eo |
+| 29 | es |
+| 30 | et |
+| 31 | fa |
+| 32 | fa_AFG |
+| 33 | ff |
+| 34 | ff_Adlm |
+| 35 | fi |
+| 36 | fil |
+| 37 | fo |
+| 38 | fr |
+| 39 | fr_CAN |
+| 40 | fy |
+| 41 | ga |
+| 42 | gl |
+| 43 | gu |
+| 44 | ha |
+| 45 | haw |
+| 46 | he |
+| 47 | he_ISR |
+| 48 | hi |
+| 49 | hr |
+| 50 | hsb |
+| 51 | hu |
+| 52 | hy |
+| 53 | id |
+| 54 | id_IDN |
+| 55 | ig |
+| 56 | is |
+| 57 | it |
+| 58 | ja |
+| 59 | ka |
+| 60 | kk |
+| 61 | kl |
+| 62 | km |
+| 63 | kn |
+| 64 | ko |
+| 65 | kok |
+| 66 | ku |
+| 67 | ky |
+| 68 | lb |
+| 69 | lij |
+| 70 | lkt |
+| 71 | ln |
+| 72 | lo |
+| 73 | lt |
+| 74 | lv |
+| 75 | mk |
+| 76 | ml |
+| 77 | mn |
+| 78 | mr |
+| 79 | ms |
+| 80 | mt |
+| 81 | my |
+| 82 | nb |
+| 83 | nb_NOR |
+| 84 | ne |
+| 85 | nl |
+| 86 | nn |
+| 87 | no |
+| 88 | om |
+| 89 | or |
+| 90 | pa |
+| 91 | pa_Guru |
+| 92 | pa_Guru_IND |
+| 93 | pl |
+| 94 | ps |
+| 95 | pt |
+| 96 | ro |
+| 97 | ru |
+| 98 | sa |
+| 99 | se |
+| 100 | si |
+| 101 | sk |
+| 102 | sl |
+| 103 | smn |
+| 104 | sq |
+| 105 | sr |
+| 106 | sr_Cyrl |
+| 107 | sr_Cyrl_BIH |
+| 108 | sr_Cyrl_MNE |
+| 109 | sr_Cyrl_SRB |
+| 110 | sr_Latn |
+| 111 | sr_Latn_BIH |
+| 112 | sr_Latn_SRB |
+| 113 | sv |
+| 114 | sw |
+| 115 | ta |
+| 116 | te |
+| 117 | th |
+| 118 | tk |
+| 119 | to |
+| 120 | tr |
+| 121 | ug |
+| 122 | uk |
+| 123 | ur |
+| 124 | uz |
+| 125 | vi |
+| 126 | wae |
+| 127 | wo |
+| 128 | xh |
+| 129 | yi |
+| 130 | yo |
+| 131 | zh |
+| 132 | zh_Hans |
+| 133 | zh_Hans_CHN |
+| 134 | zh_Hans_SGP |
+| 135 | zh_Hant |
+| 136 | zh_Hant_HKG |
+| 137 | zh_Hant_MAC |
+| 138 | zh_Hant_TWN |
+| 139 | zu |
diff --git a/sql/core/src/test/resources/log4j2.properties b/sql/core/src/test/resources/log4j2.properties
index 7ab47c16d4f94..fdbc35e70ab44 100644
--- a/sql/core/src/test/resources/log4j2.properties
+++ b/sql/core/src/test/resources/log4j2.properties
@@ -29,6 +29,12 @@ appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{
 appender.console.filter.threshold.type = ThresholdFilter
 appender.console.filter.threshold.level = warn
 
+appender.structured.type = File
+appender.structured.name = structured
+appender.structured.fileName = target/LogQuerySuite.log
+appender.structured.layout.type = JsonTemplateLayout
+appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json
+
 #File Appender
 appender.file.type = File
 appender.file.name = File
@@ -72,3 +78,9 @@ logger.parquet1.level = error
 
 logger.parquet2.name = parquet.CorruptStatistics
 logger.parquet2.level = error
+
+# Custom loggers
+logger.structured.name = org.apache.spark.sql.LogQuerySuite
+logger.structured.level = trace
+logger.structured.appenderRefs = structured
+logger.structured.appenderRef.structured.ref = structured
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 8b70c88332dfb..cf218becdf1d4 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -81,7 +81,7 @@
 | org.apache.spark.sql.catalyst.expressions.Chr | char | SELECT char(65) | struct<char(65):string> |
 | org.apache.spark.sql.catalyst.expressions.Chr | chr | SELECT chr(65) | struct<chr(65):string> |
 | org.apache.spark.sql.catalyst.expressions.Coalesce | coalesce | SELECT coalesce(NULL, 1, NULL) | struct<coalesce(NULL, 1, NULL):int> |
-| org.apache.spark.sql.catalyst.expressions.CollateExpressionBuilder | collate | SELECT COLLATION('Spark SQL' collate UTF8_BINARY_LCASE) | struct<collation(collate(Spark SQL)):string> |
+| org.apache.spark.sql.catalyst.expressions.CollateExpressionBuilder | collate | SELECT COLLATION('Spark SQL' collate UTF8_LCASE) | struct<collation(collate(Spark SQL)):string> |
 | org.apache.spark.sql.catalyst.expressions.Collation | collation | SELECT collation('Spark SQL') | struct<collation(Spark SQL):string> |
 | org.apache.spark.sql.catalyst.expressions.Concat | concat | SELECT concat('Spark', 'SQL') | struct<concat(Spark, SQL):string> |
 | org.apache.spark.sql.catalyst.expressions.ConcatWs | concat_ws | SELECT concat_ws(' ', 'Spark', 'SQL') | struct<concat_ws( , Spark, SQL):string> |
@@ -174,6 +174,7 @@
 | org.apache.spark.sql.catalyst.expressions.IsNaN | isnan | SELECT isnan(cast('NaN' as double)) | struct<isnan(CAST(NaN AS DOUBLE)):boolean> |
 | org.apache.spark.sql.catalyst.expressions.IsNotNull | isnotnull | SELECT isnotnull(1) | struct<(1 IS NOT NULL):boolean> |
 | org.apache.spark.sql.catalyst.expressions.IsNull | isnull | SELECT isnull(1) | struct<(1 IS NULL):boolean> |
+| org.apache.spark.sql.catalyst.expressions.IsValidUTF8 | is_valid_utf8 | SELECT is_valid_utf8('Spark') | struct<is_valid_utf8(Spark):boolean> |
 | org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | SELECT json_object_keys('{}') | struct<json_object_keys({}):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct<from_json({"a":1, "b":0.8}):struct<a:int,b:double>> |
 | org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct<c0:string,c1:string> |
@@ -207,6 +208,7 @@
 | org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZExpressionBuilder | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZExpressionBuilder | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct<make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz> |
+| org.apache.spark.sql.catalyst.expressions.MakeValidUTF8 | make_valid_utf8 | SELECT make_valid_utf8('Spark') | struct<make_valid_utf8(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.MakeYMInterval | make_ym_interval | SELECT make_ym_interval(1, 2) | struct<make_ym_interval(1, 2):interval year to month> |
 | org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct<map_concat(map(1, a, 2, b), map(3, c)):map<int,string>> |
 | org.apache.spark.sql.catalyst.expressions.MapContainsKey | map_contains_key | SELECT map_contains_key(map(1, 'a', 2, 'b'), 1) | struct<map_contains_key(map(1, a, 2, b), 1):boolean> |
@@ -289,8 +291,11 @@
 | org.apache.spark.sql.catalyst.expressions.Sha1 | sha | SELECT sha('Spark') | struct<sha(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.Sha1 | sha1 | SELECT sha1('Spark') | struct<sha1(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.Sha2 | sha2 | SELECT sha2('Spark', 256) | struct<sha2(Spark, 256):string> |
+| org.apache.spark.sql.catalyst.expressions.ShiftLeft | << | SELECT shiftleft(2, 1) | struct<shiftleft(2, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.ShiftLeft | shiftleft | SELECT shiftleft(2, 1) | struct<shiftleft(2, 1):int> |
+| org.apache.spark.sql.catalyst.expressions.ShiftRight | >> | SELECT shiftright(4, 1) | struct<shiftright(4, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.ShiftRight | shiftright | SELECT shiftright(4, 1) | struct<shiftright(4, 1):int> |
+| org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned | >>> | SELECT shiftrightunsigned(4, 1) | struct<shiftrightunsigned(4, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.ShiftRightUnsigned | shiftrightunsigned | SELECT shiftrightunsigned(4, 1) | struct<shiftrightunsigned(4, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.Shuffle | shuffle | SELECT shuffle(array(1, 20, 3, 5)) | struct<shuffle(array(1, 20, 3, 5)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.Signum | sign | SELECT sign(40) | struct<sign(40):double> |
@@ -349,10 +354,12 @@
 | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 3), 2):int> |
 | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> |
 | org.apache.spark.sql.catalyst.expressions.TryReflect | try_reflect | SELECT try_reflect('java.util.UUID', 'randomUUID') | struct<try_reflect(java.util.UUID, randomUUID):string> |
+| org.apache.spark.sql.catalyst.expressions.TryRemainder | try_remainder | SELECT try_remainder(3, 2) | struct<try_remainder(3, 2):int> |
 | org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | SELECT try_subtract(2, 1) | struct<try_subtract(2, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.TryToBinary | try_to_binary | SELECT try_to_binary('abc', 'utf-8') | struct<try_to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.TryToNumber | try_to_number | SELECT try_to_number('454', '999') | struct<try_to_number(454, 999):decimal(3,0)> |
 | org.apache.spark.sql.catalyst.expressions.TryToTimestampExpressionBuilder | try_to_timestamp | SELECT try_to_timestamp('2016-12-31 00:12:00') | struct<try_to_timestamp(2016-12-31 00:12:00):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.TryValidateUTF8 | try_validate_utf8 | SELECT try_validate_utf8('Spark') | struct<try_validate_utf8(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct<typeof(1):string> |
 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
@@ -368,6 +375,7 @@
 | org.apache.spark.sql.catalyst.expressions.UrlDecode | url_decode | SELECT url_decode('https%3A%2F%2Fspark.apache.org') | struct<url_decode(https%3A%2F%2Fspark.apache.org):string> |
 | org.apache.spark.sql.catalyst.expressions.UrlEncode | url_encode | SELECT url_encode('https://spark.apache.org') | struct<url_encode(https://spark.apache.org):string> |
 | org.apache.spark.sql.catalyst.expressions.Uuid | uuid | SELECT uuid() | struct<uuid():string> |
+| org.apache.spark.sql.catalyst.expressions.ValidateUTF8 | validate_utf8 | SELECT validate_utf8('Spark') | struct<validate_utf8(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct<weekday(2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct<weekofyear(2008-02-20):int> |
 | org.apache.spark.sql.catalyst.expressions.WidthBucket | width_bucket | SELECT width_bucket(5.3, 0.2, 10.6, 5) | struct<width_bucket(5.3, 0.2, 10.6, 5):bigint> |
@@ -436,9 +444,11 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<var_pop(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<var_samp(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | variance | SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<variance(col):double> |
-| org.apache.spark.sql.catalyst.expressions.variant.ParseJson | parse_json | SELECT parse_json('{"a":1,"b":0.8}') | struct<parse_json({"a":1,"b":0.8}):variant> |
+| org.apache.spark.sql.catalyst.expressions.variant.IsVariantNull | is_variant_null | SELECT is_variant_null(parse_json('null')) | struct<is_variant_null(parse_json(null)):boolean> |
+| org.apache.spark.sql.catalyst.expressions.variant.ParseJsonExpressionBuilder | parse_json | SELECT parse_json('{"a":1,"b":0.8}') | struct<parse_json({"a":1,"b":0.8}):variant> |
 | org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariant | schema_of_variant | SELECT schema_of_variant(parse_json('null')) | struct<schema_of_variant(parse_json(null)):string> |
 | org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariantAgg | schema_of_variant_agg | SELECT schema_of_variant_agg(parse_json(j)) FROM VALUES ('1'), ('2'), ('3') AS tab(j) | struct<schema_of_variant_agg(parse_json(j)):string> |
+| org.apache.spark.sql.catalyst.expressions.variant.TryParseJsonExpressionBuilder | try_parse_json | SELECT try_parse_json('{"a":1,"b":0.8}') | struct<try_parse_json({"a":1,"b":0.8}):variant> |
 | org.apache.spark.sql.catalyst.expressions.variant.TryVariantGetExpressionBuilder | try_variant_get | SELECT try_variant_get(parse_json('{"a": 1}'), '$.a', 'int') | struct<try_variant_get(parse_json({"a": 1}), $.a):int> |
 | org.apache.spark.sql.catalyst.expressions.variant.VariantGetExpressionBuilder | variant_get | SELECT variant_get(parse_json('{"a": 1}'), '$.a', 'int') | struct<variant_get(parse_json({"a": 1}), $.a):int> |
 | org.apache.spark.sql.catalyst.expressions.xml.XPathBoolean | xpath_boolean | SELECT xpath_boolean('<a><b>1</b></a>','a/b') | struct<xpath_boolean(<a><b>1</b></a>, a/b):boolean> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
index 2998803698c35..57108c4582f45 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
@@ -8,7 +8,7 @@ create temporary view data as select * from values
 CreateViewCommand `data`, select * from values
   ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
   ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
-  as data(a, b, c), false, false, LocalTempView, true
+  as data(a, b, c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x, c#x]
       +- SubqueryAlias data
          +- LocalRelation [a#x, b#x, c#x]
@@ -97,7 +97,7 @@ CreateViewCommand `primitive_arrays`, select * from values (
   float_array,
   date_array,
   timestamp_array
-), false, false, LocalTempView, true
+), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
       +- SubqueryAlias primitive_arrays
          +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out
index 7acfc9277679e..fd927b99c6456 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out
@@ -2,7 +2,7 @@
 -- !query
 create temporary view date_view as select '2011-11-11' date_str, '1' int_str
 -- !query analysis
-CreateViewCommand `date_view`, select '2011-11-11' date_str, '1' int_str, false, false, LocalTempView, true
+CreateViewCommand `date_view`, select '2011-11-11' date_str, '1' int_str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-disabled.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-disabled.sql.out
index d73b72eca3e21..f7b0e3370f9f4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-disabled.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-disabled.sql.out
@@ -276,7 +276,7 @@ Project [hello AS hello#x]
 -- !query
 CREATE TEMPORARY VIEW v(c1 COMMENT "hello") AS SELECT 1
 -- !query analysis
-CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
@@ -304,7 +304,7 @@ Project [hello AS hello#x]
 -- !query
 CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1
 -- !query analysis
-CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out
index c37acb7879c4a..f241f9bd6867c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out
@@ -380,7 +380,7 @@ Project [hello AS hello#x]
 -- !query
 CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1
 -- !query analysis
-CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
@@ -409,7 +409,7 @@ CreateNamespace false
 CREATE TEMPORARY VIEW "myview"("c1") AS
   WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v"
 -- !query analysis
-CreateViewCommand `myview`, [(c1,None)], WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v", false, false, LocalTempView, true
+CreateViewCommand `myview`, [(c1,None)], WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v", false, false, LocalTempView, UNSUPPORTED, true
    +- WithCTE
       :- CTERelationDef xxxx, false
       :  +- SubqueryAlias v
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
index 693cb2a046319..c06d1e5534aed 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `nested`, values
   (1, array(32, 97), array(array(12, 99), array(123, 42), array(1))),
   (2, array(77, -76), array(array(6, 96, 65), array(-1, -2))),
   (3, array(12), array(array(17)))
-  as t(x, ys, zs), false, true, LocalTempView, true
+  as t(x, ys, zs), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [x#x, ys#x, zs#x]
 
@@ -35,6 +35,26 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query analysis
@@ -258,7 +278,7 @@ create or replace temporary view nested as values
 CreateViewCommand `nested`, values
   (1, map(1, 1, 2, 2, 3, 3)),
   (2, map(4, 4, 5, 5, 6, 6))
-  as t(x, ys), false, true, LocalTempView, true
+  as t(x, ys), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [x#x, ys#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
index 03183e6c40005..12756576ded9b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
@@ -1605,7 +1605,7 @@ Project [cast(cast(4 12:12:12 as timestamp) + INTERVAL '4 22:12' DAY TO MINUTE a
 -- !query
 create temporary view interval_view as select '1' str
 -- !query analysis
-CreateViewCommand `interval_view`, select '1' str, false, false, LocalTempView, true
+CreateViewCommand `interval_view`, select '1' str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out
index 7eb7fcff356a4..1fa7b7513993d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out
@@ -431,3 +431,80 @@ SELECT conv('-9223372036854775807', 36, 10)
 -- !query analysis
 Project [conv(-9223372036854775807, 36, 10, true) AS conv(-9223372036854775807, 36, 10)#x]
 +- OneRowRelation
+
+
+-- !query
+SELECT BIN(0)
+-- !query analysis
+Project [bin(cast(0 as bigint)) AS bin(0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT BIN(25)
+-- !query analysis
+Project [bin(cast(25 as bigint)) AS bin(25)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT BIN(25L)
+-- !query analysis
+Project [bin(25) AS bin(25)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT BIN(25.5)
+-- !query analysis
+Project [bin(cast(25.5 as bigint)) AS bin(25.5)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(0Y)
+-- !query analysis
+Project [positive(0) AS (+ 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(25)
+-- !query analysis
+Project [positive(25) AS (+ 25)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(-25L)
+-- !query analysis
+Project [positive(-25) AS (+ -25)#xL]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(25.5)
+-- !query analysis
+Project [positive(25.5) AS (+ 25.5)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE("25.5")
+-- !query analysis
+Project [positive(cast(25.5 as double)) AS (+ 25.5)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE("invalid")
+-- !query analysis
+Project [positive(cast(invalid as double)) AS (+ invalid)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(null)
+-- !query analysis
+Project [positive(cast(null as double)) AS (+ NULL)#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
index 7ffd3cbd8bac6..98664dedf820c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
@@ -384,21 +384,21 @@ Project [btrim(xyxtrimyyx, xy) AS btrim(xyxtrimyyx, xy)#x]
 -- !query
 SELECT btrim(encode(" xyz ", 'utf-8'))
 -- !query analysis
-Project [btrim(encode( xyz , utf-8, false)) AS btrim(encode( xyz , utf-8))#x]
+Project [btrim(encode( xyz , utf-8)) AS btrim(encode( xyz , utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('yxTomxx', 'utf-8'), encode('xyz', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(yxTomxx, utf-8, false), encode(xyz, utf-8, false)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
+Project [btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('xxxbarxxx', 'utf-8'), encode('x', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(xxxbarxxx, utf-8, false), encode(x, utf-8, false)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
+Project [btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
 +- OneRowRelation
 
 
@@ -649,14 +649,14 @@ SetCommand (spark.sql.legacy.javaCharsets,Some(true))
 -- !query
 select encode('hello', 'WINDOWS-1252')
 -- !query analysis
-Project [encode(hello, WINDOWS-1252, true) AS encode(hello, WINDOWS-1252)#x]
+Project [encode(hello, WINDOWS-1252) AS encode(hello, WINDOWS-1252)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x, true) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
@@ -670,14 +670,14 @@ SetCommand (spark.sql.legacy.javaCharsets,Some(false))
 -- !query
 select encode('hello', 'WINDOWS-1252')
 -- !query analysis
-Project [encode(hello, WINDOWS-1252, false) AS encode(hello, WINDOWS-1252)#x]
+Project [encode(hello, WINDOWS-1252) AS encode(hello, WINDOWS-1252)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
@@ -685,18 +685,95 @@ Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
 -- !query
 select encode('hello', 'Windows-xxx')
 -- !query analysis
-Project [encode(hello, Windows-xxx, false) AS encode(hello, Windows-xxx)#x]
+Project [encode(hello, Windows-xxx) AS encode(hello, Windows-xxx)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'Windows-xxx') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(true))
+
+
+-- !query
+select encode('渭城朝雨浥轻尘', 'US-ASCII')
+-- !query analysis
+Project [encode(渭城朝雨浥轻尘, US-ASCII) AS encode(渭城朝雨浥轻尘, US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('渭城朝雨浥轻尘', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(false))
+
+
+-- !query
+select encode('客舍青青柳色新', 'US-ASCII')
+-- !query analysis
+Project [encode(客舍青青柳色新, US-ASCII) AS encode(客舍青青柳色新, US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('客舍青青柳色新', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+select encode(decode(encode('白日依山尽，黄河入海流。欲穷千里目，更上一层楼。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(白日依山尽，黄河入海流。欲穷千里目，更上一层楼。, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(白日依山尽，黄河入海流。欲穷千里目，更上一层楼。, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
 -- !query
 select decode()
 -- !query analysis
@@ -746,7 +823,14 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select decode(encode('abc', 'utf-8'), 'utf-8')
 -- !query analysis
-Project [decode(encode(abc, utf-8, false), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
+Project [decode(encode(abc, utf-8), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(encode('大千世界', 'utf-32'), 'utf-32')
+-- !query analysis
+Project [decode(encode(大千世界, utf-32), utf-32) AS decode(encode(大千世界, utf-32), utf-32)#x]
 +- OneRowRelation
 
 
@@ -856,6 +940,48 @@ Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
    +- LocalRelation [scol#x, ecol#x]
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(true))
+
+
+-- !query
+select decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII')
+-- !query analysis
+Project [decode(0xE58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592, US-ASCII) AS decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(scol, ecol) from values(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(false))
+
+
+-- !query
+select decode(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII')
+-- !query analysis
+Project [decode(0xE8A5BFE587BAE998B3E585B3E697A0E69585E4BABA, US-ASCII) AS decode(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(scol, ecol) from values(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query analysis
@@ -1428,7 +1554,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 CREATE TEMPORARY VIEW fmtTable(fmtField) AS SELECT * FROM VALUES ('invalidFormat')
 -- !query analysis
-CreateViewCommand `fmtTable`, [(fmtField,None)], SELECT * FROM VALUES ('invalidFormat'), false, false, LocalTempView, true
+CreateViewCommand `fmtTable`, [(fmtField,None)], SELECT * FROM VALUES ('invalidFormat'), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col1#x]
       +- LocalRelation [col1#x]
 
@@ -1586,3 +1712,87 @@ select luhn_check(123.456)
 -- !query analysis
 Project [luhn_check(cast(123.456 as string)) AS luhn_check(123.456)#x]
 +- OneRowRelation
+
+
+-- !query
+select is_valid_utf8('')
+-- !query analysis
+Project [is_valid_utf8() AS is_valid_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select is_valid_utf8('abc')
+-- !query analysis
+Project [is_valid_utf8(abc) AS is_valid_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select is_valid_utf8(x'80')
+-- !query analysis
+Project [is_valid_utf8(cast(0x80 as string)) AS is_valid_utf8(X'80')#x]
++- OneRowRelation
+
+
+-- !query
+select make_valid_utf8('')
+-- !query analysis
+Project [make_valid_utf8() AS make_valid_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select make_valid_utf8('abc')
+-- !query analysis
+Project [make_valid_utf8(abc) AS make_valid_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select make_valid_utf8(x'80')
+-- !query analysis
+Project [make_valid_utf8(cast(0x80 as string)) AS make_valid_utf8(X'80')#x]
++- OneRowRelation
+
+
+-- !query
+select validate_utf8('')
+-- !query analysis
+Project [validate_utf8() AS validate_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select validate_utf8('abc')
+-- !query analysis
+Project [validate_utf8(abc) AS validate_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select validate_utf8(x'80')
+-- !query analysis
+Project [validate_utf8(cast(0x80 as string)) AS validate_utf8(X'80')#x]
++- OneRowRelation
+
+
+-- !query
+select try_validate_utf8('')
+-- !query analysis
+Project [try_validate_utf8() AS try_validate_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select try_validate_utf8('abc')
+-- !query analysis
+Project [try_validate_utf8(abc) AS try_validate_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select try_validate_utf8(x'80')
+-- !query analysis
+Project [try_validate_utf8(cast(0x80 as string)) AS try_validate_utf8(X'80')#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out
index ecfc286b79435..bf34490d657e3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out
@@ -236,7 +236,7 @@ create temporary view ttf1 as select * from values
 CreateViewCommand `ttf1`, select * from values
   (1, 2),
   (2, 3)
-  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, true
+  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [current_date#x, current_timestamp#x]
       +- SubqueryAlias ttf1
          +- LocalRelation [current_date#x, current_timestamp#x]
@@ -263,7 +263,7 @@ create temporary view ttf2 as select * from values
 CreateViewCommand `ttf2`, select * from values
   (1, 2),
   (2, 3)
-  as ttf2(a, b), false, false, LocalTempView, true
+  as ttf2(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias ttf2
          +- LocalRelation [a#x, b#x]
@@ -575,7 +575,7 @@ select null - timestamp'2011-11-11 11:11:11'
 -- !query
 create temporary view ts_view as select '2011-11-11 11:11:11' str
 -- !query analysis
-CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, true
+CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 11:11:11 AS str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
index ef17f6b50b90a..30654d1d71e2b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
@@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query analysis
+Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query analysis
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query analysis
+Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query analysis
+Project [try_divide(1, 0) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query analysis
+Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query analysis
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query analysis
+Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query analysis
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
index 459c5613e9196..fb331089d7545 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
@@ -8,7 +8,7 @@ create temporary view data as select * from values
 CreateViewCommand `data`, select * from values
   ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
   ("two", array(21, 22, 23), array(array(211, 212, 213), array(221, 222, 223)))
-  as data(a, b, c), false, false, LocalTempView, true
+  as data(a, b, c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x, c#x]
       +- SubqueryAlias data
          +- LocalRelation [a#x, b#x, c#x]
@@ -97,7 +97,7 @@ CreateViewCommand `primitive_arrays`, select * from values (
   float_array,
   date_array,
   timestamp_array
-), false, false, LocalTempView, true
+), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
       +- SubqueryAlias primitive_arrays
          +- LocalRelation [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
new file mode 100644
index 0000000000000..fe61e684a7ff5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary.sql.out
@@ -0,0 +1,34 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query analysis
+Project [0x AS X''#x]
++- OneRowRelation
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query analysis
+Project [0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333 AS X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query analysis
+Project [cast(Spark as binary) AS CAST(Spark AS BINARY)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query analysis
+Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query analysis
+Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
new file mode 100644
index 0000000000000..fe61e684a7ff5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_base64.sql.out
@@ -0,0 +1,34 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query analysis
+Project [0x AS X''#x]
++- OneRowRelation
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query analysis
+Project [0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333 AS X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query analysis
+Project [cast(Spark as binary) AS CAST(Spark AS BINARY)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query analysis
+Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query analysis
+Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
new file mode 100644
index 0000000000000..fe61e684a7ff5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_basic.sql.out
@@ -0,0 +1,34 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query analysis
+Project [0x AS X''#x]
++- OneRowRelation
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query analysis
+Project [0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333 AS X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query analysis
+Project [cast(Spark as binary) AS CAST(Spark AS BINARY)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query analysis
+Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query analysis
+Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
new file mode 100644
index 0000000000000..fe61e684a7ff5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex.sql.out
@@ -0,0 +1,34 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query analysis
+Project [0x AS X''#x]
++- OneRowRelation
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query analysis
+Project [0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333 AS X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query analysis
+Project [cast(Spark as binary) AS CAST(Spark AS BINARY)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query analysis
+Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query analysis
+Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
new file mode 100644
index 0000000000000..fe61e684a7ff5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/binary_hex_discrete.sql.out
@@ -0,0 +1,34 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query analysis
+Project [0x AS X''#x]
++- OneRowRelation
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query analysis
+Project [0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333 AS X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query analysis
+Project [cast(Spark as binary) AS CAST(Spark AS BINARY)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query analysis
+Project [array(0x, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333, cast(Spark as binary)) AS array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query analysis
+Project [to_csv(named_struct(n, 1, info, 0x4561736F6E2059616F20323031382D31312D31373A31333A33333A3333), Some(America/Los_Angeles)) AS to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out
index 8220aa4bd25bd..1267a984565ad 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/bitwise.sql.out
@@ -182,7 +182,7 @@ CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
 CreateViewCommand `bitwise_test`, SELECT * FROM VALUES
   (1, 1, 1, 1L),
   (2, 3, 4, null),
-  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4), false, true, LocalTempView, true
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [b1#x, b2#x, b3#x, b4#xL]
       +- SubqueryAlias bitwise_test
          +- LocalRelation [b1#x, b2#x, b3#x, b4#xL]
@@ -306,3 +306,136 @@ select getbit(11L, 64)
 -- !query analysis
 Project [getbit(11, 64) AS getbit(11, 64)#x]
 +- OneRowRelation
+
+
+-- !query
+SELECT 20181117 >> 2
+-- !query analysis
+Project [(20181117 >> 2) AS (20181117 >> 2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT 20181117 << 2
+-- !query analysis
+Project [(20181117 << 2) AS (20181117 << 2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT 20181117 >>> 2
+-- !query analysis
+Project [(20181117 >>> 2) AS (20181117 >>> 2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT 20181117 > > 2
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'>'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 < < 2
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'<'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 > >> 2
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'>>'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 <<< 2
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'<'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 >>>> 2
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'>'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+select cast(null as array<array<int>>), 20181117 >> 2
+-- !query analysis
+Project [cast(null as array<array<int>>) AS NULL#x, (20181117 >> 2) AS (20181117 >> 2)#x]
++- OneRowRelation
+
+
+-- !query
+select cast(null as array<array<int>>), 20181117 >>> 2
+-- !query analysis
+Project [cast(null as array<array<int>>) AS NULL#x, (20181117 >>> 2) AS (20181117 >>> 2)#x]
++- OneRowRelation
+
+
+-- !query
+select cast(null as map<int, array<int>>), 20181117 >> 2
+-- !query analysis
+Project [cast(null as map<int,array<int>>) AS NULL#x, (20181117 >> 2) AS (20181117 >> 2)#x]
++- OneRowRelation
+
+
+-- !query
+select 1 << 1 + 2 as plus_over_shift
+-- !query analysis
+Project [(1 << (1 + 2)) AS plus_over_shift#x]
++- OneRowRelation
+
+
+-- !query
+select 2 >> 1 << 1 as left_to_right
+-- !query analysis
+Project [((2 >> 1) << 1) AS left_to_right#x]
++- OneRowRelation
+
+
+-- !query
+select 1 & 2 >> 1 as shift_over_ampersand
+-- !query analysis
+Project [(1 & (2 >> 1)) AS shift_over_ampersand#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out
index 07edfa5e95e1b..645057b85c000 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out
@@ -204,7 +204,7 @@ DescribeTableCommand `spark_catalog`.`default`.`test_change`, false, [col_name#x
 -- !query
 CREATE TEMPORARY VIEW temp_view(a, b) AS SELECT 1, "one"
 -- !query analysis
-CreateViewCommand `temp_view`, [(a,None), (b,None)], SELECT 1, "one", false, false, LocalTempView, true
+CreateViewCommand `temp_view`, [(a,None), (b,None)], SELECT 1, "one", false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x, one AS one#x]
       +- OneRowRelation
 
@@ -233,7 +233,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE GLOBAL TEMPORARY VIEW global_temp_view(a, b) AS SELECT 1, "one"
 -- !query analysis
-CreateViewCommand `global_temp_view`, [(a,None), (b,None)], SELECT 1, "one", false, false, GlobalTempView, true
+CreateViewCommand `global_temp_view`, [(a,None), (b,None)], SELECT 1, "one", false, false, GlobalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x, one AS one#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
index 02f09e0831d25..5c1417f7c0aae 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
@@ -174,7 +174,7 @@ alter view char_view as select * from char_tbl2
 AlterViewAsCommand `spark_catalog`.`default`.`char_view`, select * from char_tbl2, true
    +- Project [c#x, v#x]
       +- SubqueryAlias spark_catalog.default.char_tbl2
-         +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c#x, 5, true, false, true) AS c#x, v#x]
+         +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c#x, 5)) AS c#x, v#x]
             +- Relation spark_catalog.default.char_tbl2[c#x,v#x] parquet
 
 
@@ -348,7 +348,7 @@ CreateViewCommand `str_view`, select c, v from values
     ('NetE', 'Spar'),
     ('NetEa ', 'Spark '),
     ('NetEas ', 'Spark'),
-    ('NetEase', 'Spark-') t(c, v), false, false, LocalTempView, true
+    ('NetEase', 'Spark-') t(c, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c#x, v#x]
       +- SubqueryAlias t
          +- LocalRelation [c#x, v#x]
@@ -364,7 +364,7 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`char_tbl4`, false
 insert into char_tbl4 select c, c, v, c from str_view
 -- !query analysis
 InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/char_tbl4, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/char_tbl4], Append, `spark_catalog`.`default`.`char_tbl4`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/char_tbl4), [c7, c8, v, s]
-+- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, charTypeWriteSideCheck, cast(c#x as string), 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, charTypeWriteSideCheck, cast(c#x as string), 8, true, false, true) AS c8#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, varcharTypeWriteSideCheck, cast(v#x as string), 6, true, false, true) AS v#x, cast(c#x as string) AS s#x]
++- Project [static_invoke(CharVarcharCodegenUtils.charTypeWriteSideCheck(cast(c#x as string), 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.charTypeWriteSideCheck(cast(c#x as string), 8)) AS c8#x, static_invoke(CharVarcharCodegenUtils.varcharTypeWriteSideCheck(cast(v#x as string), 6)) AS v#x, cast(c#x as string) AS s#x]
    +- Project [c#x, c#x, v#x, c#x]
       +- SubqueryAlias str_view
          +- View (`str_view`, [c#x, v#x])
@@ -379,7 +379,7 @@ select c7, c8, v, s from char_tbl4
 -- !query analysis
 Project [c7#x, c8#x, v#x, s#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -389,7 +389,7 @@ select c7, c8, v, s from char_tbl4 where c7 = c8
 Project [c7#x, c8#x, v#x, s#x]
 +- Filter (rpad(c7#x, 8,  ) = c8#x)
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -399,7 +399,7 @@ select c7, c8, v, s from char_tbl4 where c7 = v
 Project [c7#x, c8#x, v#x, s#x]
 +- Filter (c7#x = v#x)
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -409,7 +409,7 @@ select c7, c8, v, s from char_tbl4 where c7 = s
 Project [c7#x, c8#x, v#x, s#x]
 +- Filter (c7#x = s#x)
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -419,7 +419,7 @@ select c7, c8, v, s from char_tbl4 where c7 = 'NetEase               '
 Project [c7#x, c8#x, v#x, s#x]
 +- Filter (rpad(c7#x, 22,  ) = NetEase               )
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -429,7 +429,7 @@ select c7, c8, v, s from char_tbl4 where v = 'Spark '
 Project [c7#x, c8#x, v#x, s#x]
 +- Filter (v#x = Spark )
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -439,7 +439,7 @@ select c7, c8, v, s from char_tbl4 order by c7
 Sort [c7#x ASC NULLS FIRST], true
 +- Project [c7#x, c8#x, v#x, s#x]
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -449,7 +449,7 @@ select c7, c8, v, s from char_tbl4 order by v
 Sort [v#x ASC NULLS FIRST], true
 +- Project [c7#x, c8#x, v#x, s#x]
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -458,7 +458,7 @@ select ascii(c7), ascii(c8), ascii(v), ascii(s) from char_tbl4
 -- !query analysis
 Project [ascii(c7#x) AS ascii(c7)#x, ascii(c8#x) AS ascii(c8)#x, ascii(v#x) AS ascii(v)#x, ascii(s#x) AS ascii(s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -467,7 +467,7 @@ select base64(c7), base64(c8), base64(v), ascii(s) from char_tbl4
 -- !query analysis
 Project [base64(cast(c7#x as binary)) AS base64(c7)#x, base64(cast(c8#x as binary)) AS base64(c8)#x, base64(cast(v#x as binary)) AS base64(v)#x, ascii(s#x) AS ascii(s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -476,7 +476,7 @@ select bit_length(c7), bit_length(c8), bit_length(v), bit_length(s) from char_tb
 -- !query analysis
 Project [bit_length(c7#x) AS bit_length(c7)#x, bit_length(c8#x) AS bit_length(c8)#x, bit_length(v#x) AS bit_length(v)#x, bit_length(s#x) AS bit_length(s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -485,7 +485,7 @@ select char_length(c7), char_length(c8), char_length(v), char_length(s) from cha
 -- !query analysis
 Project [char_length(c7#x) AS char_length(c7)#x, char_length(c8#x) AS char_length(c8)#x, char_length(v#x) AS char_length(v)#x, char_length(s#x) AS char_length(s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -494,7 +494,7 @@ select octet_length(c7), octet_length(c8), octet_length(v), octet_length(s) from
 -- !query analysis
 Project [octet_length(c7#x) AS octet_length(c7)#x, octet_length(c8#x) AS octet_length(c8)#x, octet_length(v#x) AS octet_length(v)#x, octet_length(s#x) AS octet_length(s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -503,7 +503,7 @@ select concat_ws('|', c7, c8), concat_ws('|', c7, v), concat_ws('|', c7, s), con
 -- !query analysis
 Project [concat_ws(|, c7#x, c8#x) AS concat_ws(|, c7, c8)#x, concat_ws(|, c7#x, v#x) AS concat_ws(|, c7, v)#x, concat_ws(|, c7#x, s#x) AS concat_ws(|, c7, s)#x, concat_ws(|, v#x, s#x) AS concat_ws(|, v, s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -512,7 +512,7 @@ select concat(c7, c8), concat(c7, v), concat(c7, s), concat(v, s) from char_tbl4
 -- !query analysis
 Project [concat(c7#x, c8#x) AS concat(c7, c8)#x, concat(c7#x, v#x) AS concat(c7, v)#x, concat(c7#x, s#x) AS concat(c7, s)#x, concat(v#x, s#x) AS concat(v, s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -521,7 +521,7 @@ select like(c7, 'Ne     _'), like(c8, 'Ne     _') from char_tbl4
 -- !query analysis
 Project [c7#x LIKE Ne     _ AS c7 LIKE Ne     _#x, c8#x LIKE Ne     _ AS c8 LIKE Ne     _#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -530,7 +530,7 @@ select like(v, 'Spark_') from char_tbl4
 -- !query analysis
 Project [v#x LIKE Spark_ AS v LIKE Spark_#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -540,7 +540,7 @@ select c7 = c8, upper(c7) = upper(c8), lower(c7) = lower(c8) from char_tbl4 wher
 Project [(rpad(c7#x, 8,  ) = c8#x) AS (c7 = c8)#x, (upper(c7#x) = upper(c8#x)) AS (upper(c7) = upper(c8))#x, (lower(c7#x) = lower(c8#x)) AS (lower(c7) = lower(c8))#x]
 +- Filter (s#x = NetEase)
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -550,7 +550,7 @@ select c7 = s, upper(c7) = upper(s), lower(c7) = lower(s) from char_tbl4 where s
 Project [(c7#x = s#x) AS (c7 = s)#x, (upper(c7#x) = upper(s#x)) AS (upper(c7) = upper(s))#x, (lower(c7#x) = lower(s#x)) AS (lower(c7) = lower(s))#x]
 +- Filter (s#x = NetEase)
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -560,7 +560,7 @@ select c7 = 'NetEase', upper(c7) = upper('NetEase'), lower(c7) = lower('NetEase'
 Project [(c7#x = NetEase) AS (c7 = NetEase)#x, (upper(c7#x) = upper(NetEase)) AS (upper(c7) = upper(NetEase))#x, (lower(c7#x) = lower(NetEase)) AS (lower(c7) = lower(NetEase))#x]
 +- Filter (s#x = NetEase)
    +- SubqueryAlias spark_catalog.default.char_tbl4
-      +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+      +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
          +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -569,7 +569,7 @@ select printf('Hey, %s%s%s%s', c7, c8, v, s) from char_tbl4
 -- !query analysis
 Project [printf(Hey, %s%s%s%s, c7#x, c8#x, v#x, s#x) AS printf(Hey, %s%s%s%s, c7, c8, v, s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -578,7 +578,7 @@ select repeat(c7, 2), repeat(c8, 2), repeat(v, 2), repeat(s, 2) from char_tbl4
 -- !query analysis
 Project [repeat(c7#x, 2) AS repeat(c7, 2)#x, repeat(c8#x, 2) AS repeat(c8, 2)#x, repeat(v#x, 2) AS repeat(v, 2)#x, repeat(s#x, 2) AS repeat(s, 2)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -587,7 +587,7 @@ select replace(c7, 'Net', 'Apache'), replace(c8, 'Net', 'Apache'), replace(v, 'S
 -- !query analysis
 Project [replace(c7#x, Net, Apache) AS replace(c7, Net, Apache)#x, replace(c8#x, Net, Apache) AS replace(c8, Net, Apache)#x, replace(v#x, Spark, Kyuubi) AS replace(v, Spark, Kyuubi)#x, replace(s#x, Net, Apache) AS replace(s, Net, Apache)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -596,7 +596,7 @@ select rpad(c7, 10), rpad(c8, 5), rpad(v, 5), rpad(s, 5)  from char_tbl4
 -- !query analysis
 Project [rpad(c7#x, 10,  ) AS rpad(c7, 10,  )#x, rpad(c8#x, 5,  ) AS rpad(c8, 5,  )#x, rpad(v#x, 5,  ) AS rpad(v, 5,  )#x, rpad(s#x, 5,  ) AS rpad(s, 5,  )#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -605,7 +605,7 @@ select rtrim(c7), rtrim(c8), rtrim(v), rtrim(s) from char_tbl4
 -- !query analysis
 Project [rtrim(c7#x, None) AS rtrim(c7)#x, rtrim(c8#x, None) AS rtrim(c8)#x, rtrim(v#x, None) AS rtrim(v)#x, rtrim(s#x, None) AS rtrim(s)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -614,7 +614,7 @@ select split(c7, 'e'), split(c8, 'e'), split(v, 'a'), split(s, 'e') from char_tb
 -- !query analysis
 Project [split(c7#x, e, -1) AS split(c7, e, -1)#x, split(c8#x, e, -1) AS split(c8, e, -1)#x, split(v#x, a, -1) AS split(v, a, -1)#x, split(s#x, e, -1) AS split(s, e, -1)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -623,7 +623,7 @@ select substring(c7, 2), substring(c8, 2), substring(v, 3), substring(s, 2) from
 -- !query analysis
 Project [substring(c7#x, 2, 2147483647) AS substring(c7, 2, 2147483647)#x, substring(c8#x, 2, 2147483647) AS substring(c8, 2, 2147483647)#x, substring(v#x, 3, 2147483647) AS substring(v, 3, 2147483647)#x, substring(s#x, 2, 2147483647) AS substring(s, 2, 2147483647)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -632,7 +632,7 @@ select left(c7, 2), left(c8, 2), left(v, 3), left(s, 2) from char_tbl4
 -- !query analysis
 Project [left(c7#x, 2) AS left(c7, 2)#x, left(c8#x, 2) AS left(c8, 2)#x, left(v#x, 3) AS left(v, 3)#x, left(s#x, 2) AS left(s, 2)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -641,7 +641,7 @@ select right(c7, 2), right(c8, 2), right(v, 3), right(s, 2) from char_tbl4
 -- !query analysis
 Project [right(c7#x, 2) AS right(c7, 2)#x, right(c8#x, 2) AS right(c8, 2)#x, right(v#x, 3) AS right(v, 3)#x, right(s#x, 2) AS right(s, 2)#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -652,7 +652,7 @@ GlobalLimit 1
 +- LocalLimit 1
    +- Project [typeof(c7#x) AS typeof(c7)#x, typeof(c8#x) AS typeof(c8)#x, typeof(v#x) AS typeof(v)#x, typeof(s#x) AS typeof(s)#x]
       +- SubqueryAlias spark_catalog.default.char_tbl4
-         +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+         +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
             +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
@@ -661,7 +661,7 @@ select cast(c7 as char(1)), cast(c8 as char(10)), cast(v as char(1)), cast(v as
 -- !query analysis
 Project [cast(c7#x as string) AS c7#x, cast(c8#x as string) AS c8#x, cast(v#x as string) AS v#x, cast(v#x as string) AS v#x, cast(s#x as string) AS s#x]
 +- SubqueryAlias spark_catalog.default.char_tbl4
-   +- Project [staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c7#x, 7, true, false, true) AS c7#x, staticinvoke(class org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils, StringType, readSidePadding, c8#x, 8, true, false, true) AS c8#x, v#x, s#x]
+   +- Project [static_invoke(CharVarcharCodegenUtils.readSidePadding(c7#x, 7)) AS c7#x, static_invoke(CharVarcharCodegenUtils.readSidePadding(c8#x, 8)) AS c8#x, v#x, s#x]
       +- Relation spark_catalog.default.char_tbl4[c7#x,c8#x,v#x,s#x] parquet
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
index d242a60a17c18..e6409806bad7a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
@@ -1,6 +1,6 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
-create table t1(utf8_binary string collate utf8_binary, utf8_binary_lcase string collate utf8_binary_lcase) using parquet
+create table t1(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`default`.`t1`, false
 
@@ -8,32 +8,32 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`t1`, false
 -- !query
 insert into t1 values('aaa', 'aaa')
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_binary_lcase]
-+- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_BINARY_LCASE) AS utf8_binary_lcase#x]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 insert into t1 values('AAA', 'AAA')
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_binary_lcase]
-+- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_BINARY_LCASE) AS utf8_binary_lcase#x]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 insert into t1 values('bbb', 'bbb')
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_binary_lcase]
-+- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_BINARY_LCASE) AS utf8_binary_lcase#x]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 insert into t1 values('BBB', 'BBB')
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_binary_lcase]
-+- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_BINARY_LCASE) AS utf8_binary_lcase#x]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -48,68 +48,68 @@ select count(*) from t1 group by utf8_binary
 -- !query analysis
 Aggregate [utf8_binary#x], [count(1) AS count(1)#xL]
 +- SubqueryAlias spark_catalog.default.t1
-   +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+   +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
-select count(*) from t1 group by utf8_binary_lcase
+select count(*) from t1 group by utf8_lcase
 -- !query analysis
-Aggregate [utf8_binary_lcase#x], [count(1) AS count(1)#xL]
+Aggregate [utf8_lcase#x], [count(1) AS count(1)#xL]
 +- SubqueryAlias spark_catalog.default.t1
-   +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+   +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
 select * from t1 where utf8_binary = 'aaa'
 -- !query analysis
-Project [utf8_binary#x, utf8_binary_lcase#x]
+Project [utf8_binary#x, utf8_lcase#x]
 +- Filter (utf8_binary#x = aaa)
    +- SubqueryAlias spark_catalog.default.t1
-      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
-select * from t1 where utf8_binary_lcase = 'aaa' collate utf8_binary_lcase
+select * from t1 where utf8_lcase = 'aaa' collate utf8_lcase
 -- !query analysis
-Project [utf8_binary#x, utf8_binary_lcase#x]
-+- Filter (utf8_binary_lcase#x = collate(aaa, utf8_binary_lcase))
+Project [utf8_binary#x, utf8_lcase#x]
++- Filter (utf8_lcase#x = collate(aaa, utf8_lcase))
    +- SubqueryAlias spark_catalog.default.t1
-      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
 select * from t1 where utf8_binary < 'bbb'
 -- !query analysis
-Project [utf8_binary#x, utf8_binary_lcase#x]
+Project [utf8_binary#x, utf8_lcase#x]
 +- Filter (utf8_binary#x < bbb)
    +- SubqueryAlias spark_catalog.default.t1
-      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
-select * from t1 where utf8_binary_lcase < 'bbb' collate utf8_binary_lcase
+select * from t1 where utf8_lcase < 'bbb' collate utf8_lcase
 -- !query analysis
-Project [utf8_binary#x, utf8_binary_lcase#x]
-+- Filter (utf8_binary_lcase#x < collate(bbb, utf8_binary_lcase))
+Project [utf8_binary#x, utf8_lcase#x]
++- Filter (utf8_lcase#x < collate(bbb, utf8_lcase))
    +- SubqueryAlias spark_catalog.default.t1
-      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+      +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
-select l.utf8_binary, r.utf8_binary_lcase from t1 l join t1 r on l.utf8_binary_lcase = r.utf8_binary_lcase
+select l.utf8_binary, r.utf8_lcase from t1 l join t1 r on l.utf8_lcase = r.utf8_lcase
 -- !query analysis
-Project [utf8_binary#x, utf8_binary_lcase#x]
-+- Join Inner, (utf8_binary_lcase#x = utf8_binary_lcase#x)
+Project [utf8_binary#x, utf8_lcase#x]
++- Join Inner, (utf8_lcase#x = utf8_lcase#x)
    :- SubqueryAlias l
    :  +- SubqueryAlias spark_catalog.default.t1
-   :     +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+   :     +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
    +- SubqueryAlias r
       +- SubqueryAlias spark_catalog.default.t1
-         +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+         +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
-create table t2(utf8_binary string collate utf8_binary, utf8_binary_lcase string collate utf8_binary_lcase) using parquet
+create table t2(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`default`.`t2`, false
 
@@ -117,28 +117,28 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`t2`, false
 -- !query
 insert into t2 values('aaa', 'aaa')
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [utf8_binary, utf8_binary_lcase]
-+- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_BINARY_LCASE) AS utf8_binary_lcase#x]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 insert into t2 values('bbb', 'bbb')
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [utf8_binary, utf8_binary_lcase]
-+- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_BINARY_LCASE) AS utf8_binary_lcase#x]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
-select * from t1 anti join t2 on t1.utf8_binary_lcase = t2.utf8_binary_lcase
+select * from t1 anti join t2 on t1.utf8_lcase = t2.utf8_lcase
 -- !query analysis
-Project [utf8_binary#x, utf8_binary_lcase#x]
-+- Join LeftAnti, (utf8_binary_lcase#x = utf8_binary_lcase#x)
+Project [utf8_binary#x, utf8_lcase#x]
++- Join LeftAnti, (utf8_lcase#x = utf8_lcase#x)
    :- SubqueryAlias spark_catalog.default.t1
-   :  +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_binary_lcase#x] parquet
+   :  +- Relation spark_catalog.default.t1[utf8_binary#x,utf8_lcase#x] parquet
    +- SubqueryAlias spark_catalog.default.t2
-      +- Relation spark_catalog.default.t2[utf8_binary#x,utf8_binary_lcase#x] parquet
+      +- Relation spark_catalog.default.t2[utf8_binary#x,utf8_lcase#x] parquet
 
 
 -- !query
@@ -156,75 +156,75 @@ DropTable false, false
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query analysis
 Except false
-:- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
+:- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
 :  +- LocalRelation [col1#x]
-+- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
++- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
    +- LocalRelation [col1#x]
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query analysis
 Except All true
-:- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
+:- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
 :  +- LocalRelation [col1#x]
-+- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
++- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
    +- LocalRelation [col1#x]
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
+   :- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
    :  +- LocalRelation [col1#x]
-   +- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
+   +- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
       +- LocalRelation [col1#x]
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query analysis
 Union false, false
-:- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
+:- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
 :  +- LocalRelation [col1#x]
-+- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
++- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
    +- LocalRelation [col1#x]
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query analysis
 Intersect false
-:- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
+:- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
 :  +- LocalRelation [col1#x]
-+- Project [collate(col1#x, utf8_binary_lcase) AS collate(col1)#x]
++- Project [collate(col1#x, utf8_lcase) AS collate(col1)#x]
    +- LocalRelation [col1#x]
 
 
 -- !query
-create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_binary_lcase: string collate utf8_binary_lcase>) USING PARQUET
+create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_lcase: string collate utf8_lcase>) USING PARQUET
 -- !query analysis
 CreateDataSourceTableCommand `spark_catalog`.`default`.`t1`, false
 
 
 -- !query
-insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa'))
+insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_lcase', 'aaa'))
 -- !query analysis
 InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [c1]
-+- Project [named_struct(utf8_binary, col1#x.utf8_binary, utf8_binary_lcase, cast(col1#x.utf8_binary_lcase as string collate UTF8_BINARY_LCASE)) AS c1#x]
++- Project [named_struct(utf8_binary, col1#x.utf8_binary, utf8_lcase, cast(col1#x.utf8_lcase as string collate UTF8_LCASE)) AS c1#x]
    +- LocalRelation [col1#x]
 
 
 -- !query
-insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA'))
+insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_lcase', 'AAA'))
 -- !query analysis
 InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [c1]
-+- Project [named_struct(utf8_binary, col1#x.utf8_binary, utf8_binary_lcase, cast(col1#x.utf8_binary_lcase as string collate UTF8_BINARY_LCASE)) AS c1#x]
++- Project [named_struct(utf8_binary, col1#x.utf8_binary, utf8_lcase, cast(col1#x.utf8_lcase as string collate UTF8_LCASE)) AS c1#x]
    +- LocalRelation [col1#x]
 
 
@@ -237,9 +237,9 @@ Aggregate [c1#x.utf8_binary], [count(1) AS count(1)#xL]
 
 
 -- !query
-select count(*) from t1 group by c1.utf8_binary_lcase
+select count(*) from t1 group by c1.utf8_lcase
 -- !query analysis
-Aggregate [c1#x.utf8_binary_lcase], [count(1) AS count(1)#xL]
+Aggregate [c1#x.utf8_lcase], [count(1) AS count(1)#xL]
 +- SubqueryAlias spark_catalog.default.t1
    +- Relation spark_catalog.default.t1[c1#x] parquet
 
@@ -252,63 +252,140 @@ DropTable false, false
 
 
 -- !query
-select array_contains(ARRAY('aaa' collate utf8_binary_lcase),'AAA' collate utf8_binary_lcase)
+select array_contains(ARRAY('aaa' collate utf8_lcase),'AAA' collate utf8_lcase)
 -- !query analysis
-Project [array_contains(array(collate(aaa, utf8_binary_lcase)), collate(AAA, utf8_binary_lcase)) AS array_contains(array(collate(aaa)), collate(AAA))#x]
+Project [array_contains(array(collate(aaa, utf8_lcase)), collate(AAA, utf8_lcase)) AS array_contains(array(collate(aaa)), collate(AAA))#x]
 +- OneRowRelation
 
 
 -- !query
-select array_position(ARRAY('aaa' collate utf8_binary_lcase, 'bbb' collate utf8_binary_lcase),'BBB' collate utf8_binary_lcase)
+select array_position(ARRAY('aaa' collate utf8_lcase, 'bbb' collate utf8_lcase),'BBB' collate utf8_lcase)
 -- !query analysis
-Project [array_position(array(collate(aaa, utf8_binary_lcase), collate(bbb, utf8_binary_lcase)), collate(BBB, utf8_binary_lcase)) AS array_position(array(collate(aaa), collate(bbb)), collate(BBB))#xL]
+Project [array_position(array(collate(aaa, utf8_lcase), collate(bbb, utf8_lcase)), collate(BBB, utf8_lcase)) AS array_position(array(collate(aaa), collate(bbb)), collate(BBB))#xL]
 +- OneRowRelation
 
 
 -- !query
-select nullif('aaa' COLLATE utf8_binary_lcase, 'AAA' COLLATE utf8_binary_lcase)
+select nullif('aaa' COLLATE utf8_lcase, 'AAA' COLLATE utf8_lcase)
 -- !query analysis
-Project [nullif(collate(aaa, utf8_binary_lcase), collate(AAA, utf8_binary_lcase)) AS nullif(collate(aaa), collate(AAA))#x]
+Project [nullif(collate(aaa, utf8_lcase), collate(AAA, utf8_lcase)) AS nullif(collate(aaa), collate(AAA))#x]
 +- OneRowRelation
 
 
 -- !query
-select least('aaa' COLLATE utf8_binary_lcase, 'AAA' collate utf8_binary_lcase, 'a' collate utf8_binary_lcase)
+select least('aaa' COLLATE utf8_lcase, 'AAA' collate utf8_lcase, 'a' collate utf8_lcase)
 -- !query analysis
-Project [least(collate(aaa, utf8_binary_lcase), collate(AAA, utf8_binary_lcase), collate(a, utf8_binary_lcase)) AS least(collate(aaa), collate(AAA), collate(a))#x]
+Project [least(collate(aaa, utf8_lcase), collate(AAA, utf8_lcase), collate(a, utf8_lcase)) AS least(collate(aaa), collate(AAA), collate(a))#x]
 +- OneRowRelation
 
 
 -- !query
-select arrays_overlap(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select arrays_overlap(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query analysis
-Project [arrays_overlap(array(collate(aaa, utf8_binary_lcase)), array(collate(AAA, utf8_binary_lcase))) AS arrays_overlap(array(collate(aaa)), array(collate(AAA)))#x]
+Project [arrays_overlap(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_lcase))) AS arrays_overlap(array(collate(aaa)), array(collate(AAA)))#x]
 +- OneRowRelation
 
 
 -- !query
-select array_distinct(array('aaa' collate utf8_binary_lcase, 'AAA' collate utf8_binary_lcase))
+select array_distinct(array('aaa' collate utf8_lcase, 'AAA' collate utf8_lcase))
 -- !query analysis
-Project [array_distinct(array(collate(aaa, utf8_binary_lcase), collate(AAA, utf8_binary_lcase))) AS array_distinct(array(collate(aaa), collate(AAA)))#x]
+Project [array_distinct(array(collate(aaa, utf8_lcase), collate(AAA, utf8_lcase))) AS array_distinct(array(collate(aaa), collate(AAA)))#x]
 +- OneRowRelation
 
 
 -- !query
-select array_union(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select array_union(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query analysis
-Project [array_union(array(collate(aaa, utf8_binary_lcase)), array(collate(AAA, utf8_binary_lcase))) AS array_union(array(collate(aaa)), array(collate(AAA)))#x]
+Project [array_union(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_lcase))) AS array_union(array(collate(aaa)), array(collate(AAA)))#x]
 +- OneRowRelation
 
 
 -- !query
-select array_intersect(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select array_intersect(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query analysis
-Project [array_intersect(array(collate(aaa, utf8_binary_lcase)), array(collate(AAA, utf8_binary_lcase))) AS array_intersect(array(collate(aaa)), array(collate(AAA)))#x]
+Project [array_intersect(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_lcase))) AS array_intersect(array(collate(aaa)), array(collate(AAA)))#x]
 +- OneRowRelation
 
 
 -- !query
-select array_except(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select array_except(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query analysis
-Project [array_except(array(collate(aaa, utf8_binary_lcase)), array(collate(AAA, utf8_binary_lcase))) AS array_except(array(collate(aaa)), array(collate(AAA)))#x]
+Project [array_except(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_lcase))) AS array_except(array(collate(aaa)), array(collate(AAA)))#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate unicode < 'A'
+-- !query analysis
+Project [(collate(a, unicode) < cast(A as string collate UNICODE)) AS (collate(a) < A)#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate unicode_ci = 'A'
+-- !query analysis
+Project [(collate(a, unicode_ci) = cast(A as string collate UNICODE_CI)) AS (collate(a) = A)#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate unicode_ai = 'å'
+-- !query analysis
+Project [(collate(a, unicode_ai) = cast(å as string collate UNICODE_AI)) AS (collate(a) = å)#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate unicode_ci_ai = 'Å'
+-- !query analysis
+Project [(collate(a, unicode_ci_ai) = cast(Å as string collate UNICODE_CI_AI)) AS (collate(a) = Å)#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate en < 'A'
+-- !query analysis
+Project [(collate(a, en) < cast(A as string collate en)) AS (collate(a) < A)#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate en_ci = 'A'
+-- !query analysis
+Project [(collate(a, en_ci) = cast(A as string collate en_CI)) AS (collate(a) = A)#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate en_ai = 'å'
+-- !query analysis
+Project [(collate(a, en_ai) = cast(å as string collate en_AI)) AS (collate(a) = å)#x]
++- OneRowRelation
+
+
+-- !query
+select 'a' collate en_ci_ai = 'Å'
+-- !query analysis
+Project [(collate(a, en_ci_ai) = cast(Å as string collate en_CI_AI)) AS (collate(a) = Å)#x]
++- OneRowRelation
+
+
+-- !query
+select 'Kypper' collate sv < 'Köpfe'
+-- !query analysis
+Project [(collate(Kypper, sv) < cast(Köpfe as string collate sv)) AS (collate(Kypper) < Köpfe)#x]
++- OneRowRelation
+
+
+-- !query
+select 'Kypper' collate de > 'Köpfe'
+-- !query analysis
+Project [(collate(Kypper, de) > cast(Köpfe as string collate de)) AS (collate(Kypper) > Köpfe)#x]
++- OneRowRelation
+
+
+-- !query
+select 'I' collate tr_ci = 'ı'
+-- !query analysis
+Project [(collate(I, tr_ci) = cast(ı as string collate tr_CI)) AS (collate(I) = ı)#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
index 39ab8aa835c48..b3bfec1fe3a8e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW v1 AS VALUES (1, 1, 1), (2, 2, 1) AS t(a, b, k)
 -- !query analysis
-CreateViewCommand `v1`, VALUES (1, 1, 1), (2, 2, 1) AS t(a, b, k), false, false, LocalTempView, true
+CreateViewCommand `v1`, VALUES (1, 1, 1), (2, 2, 1) AS t(a, b, k), false, false, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [a#x, b#x, k#x]
 
@@ -10,7 +10,7 @@ CreateViewCommand `v1`, VALUES (1, 1, 1), (2, 2, 1) AS t(a, b, k), false, false,
 -- !query
 CREATE TEMPORARY VIEW v2 AS VALUES (1, 1, 1), (2, 2, 1) AS t(x, y, all)
 -- !query analysis
-CreateViewCommand `v2`, VALUES (1, 1, 1), (2, 2, 1) AS t(x, y, all), false, false, LocalTempView, true
+CreateViewCommand `v2`, VALUES (1, 1, 1), (2, 2, 1) AS t(x, y, all), false, false, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [x#x, y#x, all#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-sort.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-sort.sql.out
index e4f6a5935dc66..e6fc6b3cf0c8a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-sort.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-sort.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW v1 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, k)
 -- !query analysis
-CreateViewCommand `v1`, VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, k), false, false, LocalTempView, true
+CreateViewCommand `v1`, VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, k), false, false, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [a#x, b#x, k#x]
 
@@ -10,7 +10,7 @@ CreateViewCommand `v1`, VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, k), false, false,
 -- !query
 CREATE TEMPORARY VIEW v2 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, all)
 -- !query analysis
-CreateViewCommand `v2`, VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, all), false, false, LocalTempView, true
+CreateViewCommand `v2`, VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, all), false, false, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [a#x, b#x, all#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-negative.sql.out
index 7348313c970c4..f16d42fac7226 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-negative.sql.out
@@ -387,7 +387,7 @@ SetCatalogAndNamespace
 -- !query
 CREATE VIEW v1 AS SELECT * FROM t1
 -- !query analysis
-CreateViewCommand `spark_catalog`.`mydb1`.`v1`, SELECT * FROM t1, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`mydb1`.`v1`, SELECT * FROM t1, false, false, PersistedView, COMPENSATION, true
    +- Project [i1#x]
       +- SubqueryAlias spark_catalog.mydb1.t1
          +- Relation spark_catalog.mydb1.t1[i1#x] parquet
@@ -435,7 +435,7 @@ SetCatalogAndNamespace
 -- !query
 CREATE TEMP VIEW v2 AS SELECT * FROM t1
 -- !query analysis
-CreateViewCommand `v2`, SELECT * FROM t1, false, false, LocalTempView, true
+CreateViewCommand `v2`, SELECT * FROM t1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [i1#x]
       +- SubqueryAlias spark_catalog.mydb2.t1
          +- Relation spark_catalog.mydb2.t1[i1#x] parquet
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-views.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-views.sql.out
index ea852537903ee..96f6469a8fcb5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-views.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/columnresolution-views.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW view1 AS SELECT 2 AS i1
 -- !query analysis
-CreateViewCommand `view1`, SELECT 2 AS i1, false, true, LocalTempView, true
+CreateViewCommand `view1`, SELECT 2 AS i1, false, true, LocalTempView, UNSUPPORTED, true
    +- Project [2 AS i1#x]
       +- OneRowRelation
 
@@ -84,7 +84,7 @@ DropTempViewCommand view1
 -- !query
 CREATE OR REPLACE GLOBAL TEMPORARY VIEW view1 as SELECT 1 as i1
 -- !query analysis
-CreateViewCommand `view1`, SELECT 1 as i1, false, true, GlobalTempView, true
+CreateViewCommand `view1`, SELECT 1 as i1, false, true, GlobalTempView, UNSUPPORTED, true
    +- Project [1 AS i1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/count.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/count.sql.out
index acfe447d8bf4e..732b714615792 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/count.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cross-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cross-join.sql.out
index 6a1fec839af2b..818093ab4ca66 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cross-join.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `nt1`, select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
-  as nt1(k, v1), false, false, LocalTempView, true
+  as nt1(k, v1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- LocalRelation [k#x, v1#x]
@@ -27,7 +27,7 @@ CreateViewCommand `nt2`, select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
-  as nt2(k, v2), false, false, LocalTempView, true
+  as nt2(k, v2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v2#x]
       +- SubqueryAlias nt2
          +- LocalRelation [k#x, v2#x]
@@ -142,7 +142,7 @@ Project [key#x, key#x]
 -- !query
 create temporary view A(a, va) as select * from nt1
 -- !query analysis
-CreateViewCommand `A`, [(a,None), (va,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `A`, [(a,None), (va,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
@@ -155,7 +155,7 @@ CreateViewCommand `A`, [(a,None), (va,None)], select * from nt1, false, false, L
 -- !query
 create temporary view B(b, vb) as select * from nt1
 -- !query analysis
-CreateViewCommand `B`, [(b,None), (vb,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `B`, [(b,None), (vb,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
@@ -168,7 +168,7 @@ CreateViewCommand `B`, [(b,None), (vb,None)], select * from nt1, false, false, L
 -- !query
 create temporary view C(c, vc) as select * from nt1
 -- !query analysis
-CreateViewCommand `C`, [(c,None), (vc,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `C`, [(c,None), (vc,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
@@ -181,7 +181,7 @@ CreateViewCommand `C`, [(c,None), (vc,None)], select * from nt1, false, false, L
 -- !query
 create temporary view D(d, vd) as select * from nt1
 -- !query analysis
-CreateViewCommand `D`, [(d,None), (vd,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `D`, [(d,None), (vd,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out
index d12644b907123..4149f5f09947c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out
@@ -173,7 +173,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 CREATE TEMPORARY VIEW csvTable(csvField, a) AS SELECT * FROM VALUES ('1,abc', 'a')
 -- !query analysis
-CreateViewCommand `csvTable`, [(csvField,None), (a,None)], SELECT * FROM VALUES ('1,abc', 'a'), false, false, LocalTempView, true
+CreateViewCommand `csvTable`, [(csvField,None), (a,None)], SELECT * FROM VALUES ('1,abc', 'a'), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col1#x, col2#x]
       +- LocalRelation [col1#x, col2#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out
index 4aac75ec45a93..0b539267e720f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-command.sql.out
@@ -24,7 +24,7 @@ Project [col#x]
 -- !query
 CREATE TEMPORARY VIEW cte_view AS WITH s AS (SELECT 42 AS col) SELECT * FROM s
 -- !query analysis
-CreateViewCommand `cte_view`, WITH s AS (SELECT 42 AS col) SELECT * FROM s, false, false, LocalTempView, true
+CreateViewCommand `cte_view`, WITH s AS (SELECT 42 AS col) SELECT * FROM s, false, false, LocalTempView, UNSUPPORTED, true
    +- WithCTE
       :- CTERelationDef xxxx, false
       :  +- SubqueryAlias s
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out
index 594a30b054edd..f9b78e94236fb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-legacy.sql.out
@@ -43,6 +43,30 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT id)
+  SELECT 1
+) FROM range(1)
+-- !query analysis
+Project [scalar-subquery#x [] AS scalarsubquery()#x]
+:  +- Project [1 AS 1#x]
+:     +- OneRowRelation
++- Range (0, 1, step=1)
+
+
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT 1)
+  SELECT id
+) FROM range(1)
+-- !query analysis
+Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
+:  +- Project [outer(id#xL)]
+:     +- OneRowRelation
++- Range (0, 1, step=1)
+
+
 -- !query
 SELECT * FROM
   (
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
index f1a302b06f2a8..3a9fc5ea1297f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
@@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT id)
+  SELECT 1
+) FROM range(1)
+-- !query analysis
+Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x]
+:  +- WithCTE
+:     :- CTERelationDef xxxx, false
+:     :  +- SubqueryAlias unreferenced
+:     :     +- Project [outer(id#xL)]
+:     :        +- OneRowRelation
+:     +- Project [1 AS 1#x]
+:        +- OneRowRelation
++- Range (0, 1, step=1)
+
+
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT 1)
+  SELECT id
+) FROM range(1)
+-- !query analysis
+Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
+:  +- WithCTE
+:     :- CTERelationDef xxxx, false
+:     :  +- SubqueryAlias unreferenced
+:     :     +- Project [1 AS 1#x]
+:     :        +- OneRowRelation
+:     +- Project [outer(id#xL)]
+:        +- OneRowRelation
++- Range (0, 1, step=1)
+
+
 -- !query
 SELECT * FROM
   (
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
index 6e55c6fa83cd9..e8640c3cbb6bd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
@@ -58,6 +58,40 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT id)
+  SELECT 1
+) FROM range(1)
+-- !query analysis
+Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#x]
+:  +- WithCTE
+:     :- CTERelationDef xxxx, false
+:     :  +- SubqueryAlias unreferenced
+:     :     +- Project [outer(id#xL)]
+:     :        +- OneRowRelation
+:     +- Project [1 AS 1#x]
+:        +- OneRowRelation
++- Range (0, 1, step=1)
+
+
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT 1)
+  SELECT id
+) FROM range(1)
+-- !query analysis
+Project [scalar-subquery#x [id#xL] AS scalarsubquery(id)#xL]
+:  +- WithCTE
+:     :- CTERelationDef xxxx, false
+:     :  +- SubqueryAlias unreferenced
+:     :     +- Project [1 AS 1#x]
+:     :        +- OneRowRelation
+:     +- Project [outer(id#xL)]
+:        +- OneRowRelation
++- Range (0, 1, step=1)
+
+
 -- !query
 SELECT * FROM
   (
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
index 1d9d812875c44..155308ee0d7ea 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
@@ -2,7 +2,7 @@
 -- !query
 create temporary view t as select * from values 0, 1, 2 as t(id)
 -- !query analysis
-CreateViewCommand `t`, select * from values 0, 1, 2 as t(id), false, false, LocalTempView, true
+CreateViewCommand `t`, select * from values 0, 1, 2 as t(id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x]
       +- SubqueryAlias t
          +- LocalRelation [id#x]
@@ -11,7 +11,7 @@ CreateViewCommand `t`, select * from values 0, 1, 2 as t(id), false, false, Loca
 -- !query
 create temporary view t2 as select * from values 0, 1 as t(id)
 -- !query analysis
-CreateViewCommand `t2`, select * from values 0, 1 as t(id), false, false, LocalTempView, true
+CreateViewCommand `t2`, select * from values 0, 1 as t(id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x]
       +- SubqueryAlias t
          +- LocalRelation [id#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out
index 3d20b9641f99d..48137e06467e8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out
@@ -2,7 +2,7 @@
 -- !query
 create temporary view date_view as select '2011-11-11' date_str, '1' int_str
 -- !query analysis
-CreateViewCommand `date_view`, select '2011-11-11' date_str, '1' int_str, false, false, LocalTempView, true
+CreateViewCommand `date_view`, select '2011-11-11' date_str, '1' int_str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out
index bc33537b3a8e5..7c5f1260b6487 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting-legacy.sql.out
@@ -16,7 +16,7 @@ CreateViewCommand `v`, select col from values
  (timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
  (timestamp '2018-11-17 13:33:33.123Z'),
  (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
- (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col), false, false, LocalTempView, true
+ (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col#x]
       +- SubqueryAlias t
          +- LocalRelation [col#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out
index bc33537b3a8e5..7c5f1260b6487 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-formatting.sql.out
@@ -16,7 +16,7 @@ CreateViewCommand `v`, select col from values
  (timestamp '1996-04-01 00:33:33.123Australia/Darwin'),
  (timestamp '2018-11-17 13:33:33.123Z'),
  (timestamp '2020-01-01 01:33:33.123Asia/Shanghai'),
- (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col), false, false, LocalTempView, true
+ (timestamp '2100-01-01 01:33:33.123America/Los_Angeles') t(col), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col#x]
       +- SubqueryAlias t
          +- LocalRelation [col#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out
index 12a15cc9b8967..1e49f4df8267a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out
@@ -2,7 +2,7 @@
 -- !query
 create temporary view date_view as select '2011-11-11' date_str, '1' int_str
 -- !query analysis
-CreateViewCommand `date_view`, select '2011-11-11' date_str, '1' int_str, false, false, LocalTempView, true
+CreateViewCommand `date_view`, select '2011-11-11' date_str, '1' int_str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
       +- OneRowRelation
 
@@ -1267,7 +1267,7 @@ create temporary view ttf1 as select * from values
 CreateViewCommand `ttf1`, select * from values
   (1, 2),
   (2, 3)
-  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, true
+  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [current_date#x, current_timestamp#x]
       +- SubqueryAlias ttf1
          +- LocalRelation [current_date#x, current_timestamp#x]
@@ -1294,7 +1294,7 @@ create temporary view ttf2 as select * from values
 CreateViewCommand `ttf2`, select * from values
   (1, 2),
   (2, 3)
-  as ttf2(a, b), false, false, LocalTempView, true
+  as ttf2(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias ttf2
          +- LocalRelation [a#x, b#x]
@@ -1642,7 +1642,7 @@ select null - timestamp'2011-11-11 11:11:11'
 -- !query
 create temporary view ts_view as select '2011-11-11 11:11:11' str
 -- !query analysis
-CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, true
+CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 11:11:11 AS str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out
index 5e6b27bc84411..4a2199033f819 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b
 -- !query analysis
-CreateViewCommand `t`, SELECT 1.0 as a, 0.0 as b, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1.0 as a, 0.0 as b, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1.0 AS a#x, 0.0 AS b#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
index b9fe5c1d74c1d..ff0935bfd03ec 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out
@@ -12,7 +12,7 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
 -- !query
 CREATE TEMPORARY VIEW temp_v AS SELECT * FROM t
 -- !query analysis
-CreateViewCommand `temp_v`, SELECT * FROM t, false, false, LocalTempView, true
+CreateViewCommand `temp_v`, SELECT * FROM t, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x, c#x, d#x]
       +- SubqueryAlias spark_catalog.default.t
          +- Relation spark_catalog.default.t[a#x,b#x,c#x,d#x] parquet
@@ -32,7 +32,7 @@ CreateTempViewUsing [tableIdent:`temp_Data_Source_View` replace:false provider:o
 -- !query
 CREATE VIEW v AS SELECT * FROM t
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x, c#x, d#x]
       +- SubqueryAlias spark_catalog.default.t
          +- Relation spark_catalog.default.t[a#x,b#x,c#x,d#x] parquet
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers.sql.out
index d73b72eca3e21..f7b0e3370f9f4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers.sql.out
@@ -276,7 +276,7 @@ Project [hello AS hello#x]
 -- !query
 CREATE TEMPORARY VIEW v(c1 COMMENT "hello") AS SELECT 1
 -- !query analysis
-CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
@@ -304,7 +304,7 @@ Project [hello AS hello#x]
 -- !query
 CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1
 -- !query analysis
-CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `v`, [(c1,Some(hello))], SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/except-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/except-all.sql.out
index d16fc0beaf5da..3972cde51bb72 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/except-all.sql.out
@@ -4,7 +4,7 @@ CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1)
 -- !query analysis
 CreateViewCommand `tab1`, SELECT * FROM VALUES
-    (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1), false, false, LocalTempView, true
+    (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c1#x]
       +- SubqueryAlias tab1
          +- LocalRelation [c1#x]
@@ -15,7 +15,7 @@ CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (1), (2), (2), (3), (5), (5), (null) AS tab2(c1)
 -- !query analysis
 CreateViewCommand `tab2`, SELECT * FROM VALUES
-    (1), (2), (2), (3), (5), (5), (null) AS tab2(c1), false, false, LocalTempView, true
+    (1), (2), (2), (3), (5), (5), (null) AS tab2(c1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c1#x]
       +- SubqueryAlias tab2
          +- LocalRelation [c1#x]
@@ -36,7 +36,7 @@ CreateViewCommand `tab3`, SELECT * FROM VALUES
     (1, 3),
     (2, 3),
     (2, 2)
-    AS tab3(k, v), false, false, LocalTempView, true
+    AS tab3(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab3
          +- LocalRelation [k#x, v#x]
@@ -57,7 +57,7 @@ CreateViewCommand `tab4`, SELECT * FROM VALUES
     (2, 2),
     (2, 2),
     (2, 20)
-    AS tab4(k, v), false, false, LocalTempView, true
+    AS tab4(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab4
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/except.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/except.sql.out
index 6244a8bed5609..9828956ab843d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/except.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/except.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `t1`, select * from values
   ("two", 2),
   ("three", 3),
   ("one", NULL)
-  as t1(k, v), false, false, LocalTempView, true
+  as t1(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias t1
          +- LocalRelation [k#x, v#x]
@@ -33,7 +33,7 @@ CreateViewCommand `t2`, select * from values
   ("one", 5),
   ("one", NULL),
   (NULL, 5)
-  as t2(k, v), false, false, LocalTempView, true
+  as t2(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias t2
          +- LocalRelation [k#x, v#x]
@@ -250,7 +250,7 @@ Except false
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t3 AS VALUES (decimal(1)) tbl(v)
 -- !query analysis
-CreateViewCommand `t3`, VALUES (decimal(1)) tbl(v), false, true, LocalTempView, true
+CreateViewCommand `t3`, VALUES (decimal(1)) tbl(v), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias tbl
       +- LocalRelation [v#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out
index 1597a29a4d6c4..78bf1ccb1678c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out
@@ -18,7 +18,7 @@ CreateViewCommand `tbl_view`, SELECT * FROM VALUES
   (50, 'name5', named_struct('f1', 5, 's2', named_struct('f2', 505, 'f3', 'e'))),
   (60, 'name6', named_struct('f1', 6, 's2', named_struct('f2', 606, 'f3', 'f'))),
   (70, 'name7', named_struct('f1', 7, 's2', named_struct('f2', 707, 'f3', 'g')))
-AS tbl_view(id, name, data), false, false, LocalTempView, true
+AS tbl_view(id, name, data), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, name#x, data#x]
       +- SubqueryAlias tbl_view
          +- LocalRelation [id#x, name#x, data#x]
@@ -54,7 +54,7 @@ SetCommand (spark.sql.ansi.enabled,Some(true))
 -- !query
 EXECUTE IMMEDIATE 'CREATE TEMPORARY VIEW IDENTIFIER(:tblName) AS SELECT id, name FROM tbl_view' USING 'tbl_view_tmp' as tblName
 -- !query analysis
-CreateViewCommand `tbl_view_tmp`, SELECT id, name FROM tbl_view, false, false, LocalTempView, true
+CreateViewCommand `tbl_view_tmp`, SELECT id, name FROM tbl_view, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, name#x]
       +- SubqueryAlias tbl_view
          +- View (`tbl_view`, [id#x, name#x, data#x])
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
index 26f40c1011140..3aea86b232cba 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
@@ -162,7 +162,7 @@ EXPLAIN FORMATTED
   CREATE VIEW explain_view AS
     SELECT key, val FROM explain_temp1
 -- !query analysis
-ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, FormattedMode
+ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
index 26f40c1011140..3aea86b232cba 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
@@ -162,7 +162,7 @@ EXPLAIN FORMATTED
   CREATE VIEW explain_view AS
     SELECT key, val FROM explain_temp1
 -- !query analysis
-ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, FormattedMode
+ExplainCommand 'CreateView SELECT key, val FROM explain_temp1, false, false, COMPENSATION, FormattedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out
index 886d1f8616c77..c42f2db3f0f9f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c, to_timestamp_ntz('2011-05-06 07:08:09.1234567') as ntz, interval 10 year 20 month as i, interval 30 day 40 hour 50 minute 6.7890 second as j
 -- !query analysis
-CreateViewCommand `t`, select '2011-05-06 07:08:09.1234567' as c, to_timestamp_ntz('2011-05-06 07:08:09.1234567') as ntz, interval 10 year 20 month as i, interval 30 day 40 hour 50 minute 6.7890 second as j, false, false, LocalTempView, true
+CreateViewCommand `t`, select '2011-05-06 07:08:09.1234567' as c, to_timestamp_ntz('2011-05-06 07:08:09.1234567') as ntz, interval 10 year 20 month as i, interval 30 day 40 hour 50 minute 6.7890 second as j, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out
index b692c5c374632..cdb6372bec099 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-analytics.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
@@ -75,7 +75,7 @@ AS courseSales(course, year, earnings)
 -- !query analysis
 CreateViewCommand `courseSales`, SELECT * FROM VALUES
 ("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
-AS courseSales(course, year, earnings), false, true, LocalTempView, true
+AS courseSales(course, year, earnings), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [course#x, year#x, earnings#x]
       +- SubqueryAlias courseSales
          +- LocalRelation [course#x, year#x, earnings#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-duckdb.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-duckdb.sql.out
index 13649989b4127..5f1cbea709891 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-duckdb.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-duckdb.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `integers`, select * from values
   (0, 2),
   (1, 3),
   (1, NULL)
-  as integers(g, i), false, false, LocalTempView, true
+  as integers(g, i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [g#x, i#x]
       +- SubqueryAlias integers
          +- LocalRelation [g#x, i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out
index 76ef52ecd8b63..da3f3de3fb448 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `stuff`, select * from values
   (42, 9.75, 'hello world', '1970-08-07', '13.37', array(1,20,300)),
   (1337, 1.2345, 'oh no', '2000-01-01', '42.0', array(4000,50000,600000)),
   (42, 13.37, 'test', '1970-08-07', '1234567890', array(7000000,80000000,900000000))
-  as stuff(i, f, s, t, d, a), false, false, LocalTempView, true
+  as stuff(i, f, s, t, d, a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [i#x, f#x, s#x, t#x, d#x, a#x]
       +- SubqueryAlias stuff
          +- LocalRelation [i#x, f#x, s#x, t#x, d#x, a#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out
index 79de44697b5f6..c2c77db6c3b1c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all.sql.out
@@ -18,7 +18,7 @@ CreateViewCommand `data`, select * from values
   ("China", "Shanghai", "Shanghaiese", 5, 15.0),
   ("Korea", "Seoul", "Hyukjin", 6, 16.0),
   ("UK", "London", "Sean", 7, 17.0)
-  as data(country, city, name, id, power), false, false, LocalTempView, true
+  as data(country, city, name, id, power), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [country#x, city#x, name#x, id#x, power#x]
       +- SubqueryAlias data
          +- LocalRelation [country#x, city#x, name#x, id#x, power#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out
index 1e06a01dc07ea..94d39111b29ed 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
@@ -35,7 +35,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, true, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -58,7 +58,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, true, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -75,7 +75,7 @@ CreateViewCommand `FilterExpressionTestData`, SELECT * FROM VALUES
   (1, 2, "asd"),
   (3, 4, "fgh"),
   (5, 6, "jkl")
-AS FilterExpressionTestData(num1, num2, str), false, true, LocalTempView, true
+AS FilterExpressionTestData(num1, num2, str), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [num1#x, num2#x, str#x]
       +- SubqueryAlias FilterExpressionTestData
          +- LocalRelation [num1#x, num2#x, str#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out
index 6aa32ed3a8866..904b35559ced8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out
@@ -16,7 +16,7 @@ CreateViewCommand `data`, select * from values
   (2, 2),
   (3, 1),
   (3, 2)
-  as data(a, b), false, false, LocalTempView, true
+  as data(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias data
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 324a0a366d85e..46b40e4515260 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
@@ -273,7 +273,7 @@ CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM V
 (1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
 -- !query analysis
 CreateViewCommand `testDataHasSameNameWithAlias`, SELECT * FROM VALUES
-(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v), false, true, LocalTempView, true
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, a#x, v#x]
       +- SubqueryAlias testDataHasSameNameWithAlias
          +- LocalRelation [k#x, a#x, v#x]
@@ -395,7 +395,7 @@ SELECT 1 FROM range(10) HAVING true
 -- !query analysis
 Filter cast(true as boolean)
 +- Aggregate [1 AS 1#x]
-   +- Range (0, 10, step=1, splits=None)
+   +- Range (0, 10, step=1)
 
 
 -- !query
@@ -404,7 +404,7 @@ SELECT 1 FROM range(10) HAVING MAX(id) > 0
 Project [1#x]
 +- Filter (max(id#xL)#xL > cast(0 as bigint))
    +- Aggregate [1 AS 1#x, max(id#xL) AS max(id#xL)#xL]
-      +- Range (0, 10, step=1, splits=None)
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -435,7 +435,7 @@ SELECT 1 FROM range(10) HAVING true
 -- !query analysis
 Filter cast(true as boolean)
 +- Project [1 AS 1#x]
-   +- Range (0, 10, step=1, splits=None)
+   +- Range (0, 10, step=1)
 
 
 -- !query
@@ -464,7 +464,7 @@ SELECT id FROM range(10) HAVING id > 0
 -- !query analysis
 Filter (id#xL > cast(0 as bigint))
 +- Project [id#xL]
-   +- Range (0, 10, step=1, splits=None)
+   +- Range (0, 10, step=1)
 
 
 -- !query
@@ -486,7 +486,7 @@ CreateViewCommand `test_agg`, SELECT * FROM VALUES
   (2, true),
   (3, false), (3, null),
   (4, null), (4, null),
-  (5, null), (5, true), (5, false) AS test_agg(k, v), false, true, LocalTempView, true
+  (5, null), (5, true), (5, false) AS test_agg(k, v), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias test_agg
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out
index b82275b5e3129..b73ee16c8bdef 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/grouping_set.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `grouping`, SELECT * FROM VALUES
   ("1", "2", "3", 1),
   ("4", "5", "6", 1),
   ("7", "8", "9", 1)
-  as grouping(a, b, c, d), false, false, LocalTempView, true
+  as grouping(a, b, c, d), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x, c#x, d#x]
       +- SubqueryAlias grouping
          +- LocalRelation [a#x, b#x, c#x, d#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/having.sql.out
index 889ca3d244ccd..78cf1223da50d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/having.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `hav`, select * from values
   ("two", 2),
   ("three", 3),
   ("one", 5)
-  as hav(k, v), false, false, LocalTempView, true
+  as hav(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias hav
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
index ec6d7271cc235..1281b19eb2f86 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `nested`, values
   (1, array(32, 97), array(array(12, 99), array(123, 42), array(1))),
   (2, array(77, -76), array(array(6, 96, 65), array(-1, -2))),
   (3, array(12), array(array(17)))
-  as t(x, ys, zs), false, true, LocalTempView, true
+  as t(x, ys, zs), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [x#x, ys#x, zs#x]
 
@@ -35,6 +35,26 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query analysis
@@ -258,7 +278,7 @@ create or replace temporary view nested as values
 CreateViewCommand `nested`, values
   (1, map(1, 1, 2, 2, 3, 3)),
   (2, map(4, 4, 5, 5, 6, 6))
-  as t(x, ys), false, true, LocalTempView, true
+  as t(x, ys), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [x#x, ys#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index c4221dd5773de..b3e2cd5ada950 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -198,7 +198,7 @@ Project [abs(c1#x) AS abs(c1)#x]
 SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1)
 -- !query analysis
 Project [id#xL]
-+- Range (0, 1, step=1, splits=None)
++- Range (0, 1, step=1)
 
 
 -- !query
@@ -394,7 +394,7 @@ DropTable true, false
 -- !query
 CREATE OR REPLACE VIEW IDENTIFIER('v')(c1) AS VALUES(1)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`v`, [(c1,None)], VALUES(1), false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`v`, [(c1,None)], VALUES(1), false, true, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x]
 
 
@@ -424,7 +424,7 @@ DropTableCommand `spark_catalog`.`default`.`v`, false, true, false
 -- !query
 CREATE TEMPORARY VIEW IDENTIFIER('v')(c1) AS VALUES(1)
 -- !query analysis
-CreateViewCommand `v`, [(c1,None)], VALUES(1), false, false, LocalTempView, true
+CreateViewCommand `v`, [(c1,None)], VALUES(1), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x]
 
 
@@ -732,7 +732,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER(1)(c1 INT)
+CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
@@ -754,7 +754,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER('a.b.c')(c1 INT)
+CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
@@ -926,6 +926,65 @@ org.apache.spark.sql.catalyst.parser.ParseException
 }
 
 
+-- !query
+create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query analysis
+CreateViewCommand `v1`, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, false, LocalTempView, UNSUPPORTED, true
+   +- Aggregate [my_col#x], [my_col#x]
+      +- SubqueryAlias __auto_generated_subquery_name
+         +- SubqueryAlias as
+            +- LocalRelation [my_col#x]
+
+
+-- !query
+cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query analysis
+CacheTableAsSelect t1, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, true
+   +- Aggregate [my_col#x], [my_col#x]
+      +- SubqueryAlias __auto_generated_subquery_name
+         +- SubqueryAlias as
+            +- LocalRelation [my_col#x]
+
+
+-- !query
+create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t2`, ErrorIfExists, [my_col]
+   +- Aggregate [my_col#x], [my_col#x]
+      +- SubqueryAlias __auto_generated_subquery_name
+         +- SubqueryAlias as
+            +- LocalRelation [my_col#x]
+
+
+-- !query
+insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [my_col]
++- Aggregate [my_col#x], [my_col#x]
+   +- SubqueryAlias __auto_generated_subquery_name
+      +- SubqueryAlias as
+         +- LocalRelation [my_col#x]
+
+
+-- !query
+drop view v1
+-- !query analysis
+DropTempViewCommand v1
+
+
+-- !query
+drop table t1
+-- !query analysis
+DropTempViewCommand t1
+
+
+-- !query
+drop table t2
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2
+
+
 -- !query
 SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-all.sql.out
index dc36fc2be1a27..cf9c2e12cc72e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-all.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `ilike_all_table`, SELECT * FROM (VALUES
   ('gOOgle', '%oo%'),
   ('facebook', '%OO%'),
   ('liNkedin', '%In'))
-  as t1(company, pat), false, true, LocalTempView, true
+  as t1(company, pat), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [company#x, pat#x]
       +- SubqueryAlias t1
          +- Project [col1#x AS company#x, col2#x AS pat#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-any.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-any.sql.out
index ac25d786cda57..e20000f1463d1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-any.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ilike-any.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `ilike_any_table`, SELECT * FROM (VALUES
   ('Google', '%Oo%'),
   ('FaceBook', '%oO%'),
   ('linkedIn', '%IN'))
-  as t1(company, pat), false, true, LocalTempView, true
+  as t1(company, pat), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [company#x, pat#x]
       +- SubqueryAlias t1
          +- Project [col1#x AS company#x, col2#x AS pat#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/inner-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/inner-join.sql.out
index 698b1056f73ee..ae123fa61e3d1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/inner-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/inner-join.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -11,7 +11,7 @@ CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -20,7 +20,7 @@ CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t3`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t3`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -29,7 +29,7 @@ CreateViewCommand `t3`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, fal
 -- !query
 CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t4`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t4`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -43,7 +43,7 @@ SELECT a, 'b' AS tag FROM t2
 -- !query analysis
 CreateViewCommand `ta`, SELECT a, 'a' AS tag FROM t1
 UNION ALL
-SELECT a, 'b' AS tag FROM t2, false, false, LocalTempView, true
+SELECT a, 'b' AS tag FROM t2, false, false, LocalTempView, UNSUPPORTED, true
    +- Union false, false
       :- Project [a#x, a AS tag#x]
       :  +- SubqueryAlias t1
@@ -69,7 +69,7 @@ SELECT a, 'b' AS tag FROM t4
 -- !query analysis
 CreateViewCommand `tb`, SELECT a, 'a' AS tag FROM t3
 UNION ALL
-SELECT a, 'b' AS tag FROM t4, false, false, LocalTempView, true
+SELECT a, 'b' AS tag FROM t4, false, false, LocalTempView, UNSUPPORTED, true
    +- Union false, false
       :- Project [a#x, a AS tag#x]
       :  +- SubqueryAlias t3
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/intersect-all.sql.out
index b4d791c2772b2..69b4001ff3481 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/intersect-all.sql.out
@@ -18,7 +18,7 @@ CreateViewCommand `tab1`, SELECT * FROM VALUES
     (2, 3),
     (null, null),
     (null, null)
-    AS tab1(k, v), false, false, LocalTempView, true
+    AS tab1(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab1
          +- LocalRelation [k#x, v#x]
@@ -41,7 +41,7 @@ CreateViewCommand `tab2`, SELECT * FROM VALUES
     (3, 4),
     (null, null),
     (null, null)
-    AS tab2(k, v), false, false, LocalTempView, true
+    AS tab2(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab2
          +- LocalRelation [k#x, v#x]
@@ -593,7 +593,7 @@ SetCommand (spark.sql.legacy.setopsPrecedence.enabled,Some(false))
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW tab3 AS VALUES (decimal(1)), (decimal(2)) tbl3(v)
 -- !query analysis
-CreateViewCommand `tab3`, VALUES (decimal(1)), (decimal(2)) tbl3(v), false, true, LocalTempView, true
+CreateViewCommand `tab3`, VALUES (decimal(1)), (decimal(2)) tbl3(v), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias tbl3
       +- LocalRelation [v#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
index 783255db337a1..290e55052931d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
@@ -1605,7 +1605,7 @@ Project [cast(cast(4 12:12:12 as timestamp) + INTERVAL '4 22:12' DAY TO MINUTE a
 -- !query
 create temporary view interval_view as select '1' str
 -- !query analysis
-CreateViewCommand `interval_view`, select '1' str, false, false, LocalTempView, true
+CreateViewCommand `interval_view`, select '1' str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/join-empty-relation.sql.out
index a4fae64952048..850255a065b3e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/join-empty-relation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/join-empty-relation.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -11,7 +11,7 @@ CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -20,7 +20,7 @@ CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false
 -- !query analysis
-CreateViewCommand `empty_table`, SELECT a FROM t2 WHERE false, false, false, LocalTempView, true
+CreateViewCommand `empty_table`, SELECT a FROM t2 WHERE false, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- Filter false
          +- SubqueryAlias t2
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
index a7bc1de1530a9..e81ee769f57d6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
@@ -2,28 +2,28 @@
 -- !query
 CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t1`, [(c1,None), (c2,None)], VALUES (0, 1), (1, 2), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`t1`, [(c1,None), (c2,None)], VALUES (0, 1), (1, 2), false, false, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE VIEW t2(c1, c2) AS VALUES (0, 2), (0, 3)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t2`, [(c1,None), (c2,None)], VALUES (0, 2), (0, 3), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`t2`, [(c1,None), (c2,None)], VALUES (0, 2), (0, 3), false, false, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE VIEW t3(c1, c2) AS VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t3`, [(c1,None), (c2,None)], VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`t3`, [(c1,None), (c2,None)], VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4)), false, false, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE VIEW t4(c1, c2) AS VALUES (0, 1), (0, 2), (1, 1), (1, 3)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t4`, [(c1,None), (c2,None)], VALUES (0, 1), (0, 2), (1, 1), (1, 3), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`t4`, [(c1,None), (c2,None)], VALUES (0, 1), (0, 2), (1, 1), (1, 3), false, false, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -2315,7 +2315,7 @@ SELECT * FROM t1, LATERAL RANGE(3)
 -- !query analysis
 Project [c1#x, c2#x, id#xL]
 +- LateralJoin lateral-subquery#x [], Inner
-   :  +- Range (0, 3, step=1, splits=None)
+   :  +- Range (0, 3, step=1)
    +- SubqueryAlias spark_catalog.default.t1
       +- View (`spark_catalog`.`default`.`t1`, [c1#x, c2#x])
          +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
@@ -2507,7 +2507,7 @@ CREATE OR REPLACE TEMPORARY VIEW array_struct(id, arr) AS VALUES
 CreateViewCommand `array_struct`, [(id,None), (arr,None)], VALUES
     (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
     (2, ARRAY()),
-    (3, ARRAY(STRUCT(3, 'c'))), false, true, LocalTempView, true
+    (3, ARRAY(STRUCT(3, 'c'))), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -2714,7 +2714,7 @@ CreateViewCommand `json_table`, [(key,None), (jstring,None)], VALUES
     ('3', '{"f1": 3, "f4": "4", "f3": "3", "f2": 2, "f5": 5.01}'),
     ('4', cast(null as string)),
     ('5', '{"f1": null, "f5": ""}'),
-    ('6', '[invalid JSON string]'), false, true, LocalTempView, true
+    ('6', '[invalid JSON string]'), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out
index 48b3bb07ef37b..0d7c6b2056231 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out
@@ -241,7 +241,7 @@ Project [c0#x, c1#x, c2#x, c3#x]
 -- !query
 CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a')
 -- !query analysis
-CreateViewCommand `jsonTable`, [(jsonField,None), (a,None)], SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a'), false, false, LocalTempView, true
+CreateViewCommand `jsonTable`, [(jsonField,None), (a,None)], SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a'), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col1#x, col2#x]
       +- LocalRelation [col1#x, col2#x]
 
@@ -449,7 +449,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 CREATE TEMPORARY VIEW jsonTable(jsonField, a) AS SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a')
 -- !query analysis
-CreateViewCommand `jsonTable`, [(jsonField,None), (a,None)], SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a'), false, false, LocalTempView, true
+CreateViewCommand `jsonTable`, [(jsonField,None), (a,None)], SELECT * FROM VALUES ('{"a": 1, "b": 2}', 'a'), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col1#x, col2#x]
       +- LocalRelation [col1#x, col2#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/like-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/like-all.sql.out
index cc5967a3fc59c..dab3e27be69aa 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/like-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/like-all.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `like_all_table`, SELECT * FROM (VALUES
   ('google', '%oo%'),
   ('facebook', '%oo%'),
   ('linkedin', '%in'))
-  as t1(company, pat), false, true, LocalTempView, true
+  as t1(company, pat), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [company#x, pat#x]
       +- SubqueryAlias t1
          +- Project [col1#x AS company#x, col2#x AS pat#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/like-any.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/like-any.sql.out
index 91adb137e3679..e075fe6c0a69c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/like-any.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/like-any.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `like_any_table`, SELECT * FROM (VALUES
   ('google', '%oo%'),
   ('facebook', '%oo%'),
   ('linkedin', '%in'))
-  as t1(company, pat), false, true, LocalTempView, true
+  as t1(company, pat), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [company#x, pat#x]
       +- SubqueryAlias t1
          +- Project [col1#x AS company#x, col2#x AS pat#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/limit.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/limit.sql.out
index 19307fc859927..e92dcfbc069a7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/limit.sql.out
@@ -191,7 +191,7 @@ Project [id#xL]
       +- GlobalLimit 5
          +- LocalLimit 5
             +- Project [id#xL]
-               +- Range (0, 10, step=1, splits=None)
+               +- Range (0, 10, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out
index fa7d0d331eda3..5f130cd1d422c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out
@@ -6,7 +6,7 @@ AS testRegression(k, y, x)
 -- !query analysis
 CreateViewCommand `testRegression`, SELECT * FROM VALUES
 (1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
-AS testRegression(k, y, x), false, true, LocalTempView, true
+AS testRegression(k, y, x), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, y#x, x#x]
       +- SubqueryAlias testRegression
          +- LocalRelation [k#x, y#x, x#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out
index e4dd1994b2c9e..5fe1b69352f57 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out
@@ -431,3 +431,80 @@ SELECT conv('-9223372036854775807', 36, 10)
 -- !query analysis
 Project [conv(-9223372036854775807, 36, 10, false) AS conv(-9223372036854775807, 36, 10)#x]
 +- OneRowRelation
+
+
+-- !query
+SELECT BIN(0)
+-- !query analysis
+Project [bin(cast(0 as bigint)) AS bin(0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT BIN(25)
+-- !query analysis
+Project [bin(cast(25 as bigint)) AS bin(25)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT BIN(25L)
+-- !query analysis
+Project [bin(25) AS bin(25)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT BIN(25.5)
+-- !query analysis
+Project [bin(cast(25.5 as bigint)) AS bin(25.5)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(0Y)
+-- !query analysis
+Project [positive(0) AS (+ 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(25)
+-- !query analysis
+Project [positive(25) AS (+ 25)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(-25L)
+-- !query analysis
+Project [positive(-25) AS (+ -25)#xL]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(25.5)
+-- !query analysis
+Project [positive(25.5) AS (+ 25.5)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE("25.5")
+-- !query analysis
+Project [positive(cast(25.5 as double)) AS (+ 25.5)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE("invalid")
+-- !query analysis
+Project [positive(cast(invalid as double)) AS (+ invalid)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT POSITIVE(null)
+-- !query analysis
+Project [positive(cast(null as double)) AS (+ NULL)#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out
index 539a348584217..e30bdf12f4a36 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/misc-functions.sql.out
@@ -92,7 +92,7 @@ Project [assert_true(false, custom error message) AS assert_true(false, custom e
 -- !query
 CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
 -- !query analysis
-CreateViewCommand `tbl_misc`, SELECT * FROM (VALUES (1), (8), (2)) AS T(v), false, false, LocalTempView, true
+CreateViewCommand `tbl_misc`, SELECT * FROM (VALUES (1), (8), (2)) AS T(v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [v#x]
       +- SubqueryAlias T
          +- Project [col1#x AS v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out
index 2508b9b5fdd98..d6ecbc72a7178 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out
@@ -38,7 +38,7 @@ CreateViewCommand `basic_pays`, SELECT * FROM VALUES
 ('Pamela Castillo','SCM',11303),
 ('Larry Bott','SCM',11798),
 ('Barry Jones','SCM',10586)
-AS basic_pays(employee_name, department, salary), false, true, LocalTempView, true
+AS basic_pays(employee_name, department, salary), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [employee_name#x, department#x, salary#x]
       +- SubqueryAlias basic_pays
          +- LocalRelation [employee_name#x, department#x, salary#x]
@@ -471,7 +471,7 @@ CreateViewCommand `intervals`, SELECT * FROM VALUES
 (2, INTERVAL '30' MONTH, INTERVAL '30' SECOND, INTERVAL '30' MINUTE),
 (3, INTERVAL '60' MONTH, INTERVAL '60' SECOND, INTERVAL '60' MINUTE),
 (4, null, null, null)
-AS intervals(k, dt, ym, dt2), false, true, LocalTempView, true
+AS intervals(k, dt, ym, dt2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, dt#x, ym#x, dt2#x]
       +- SubqueryAlias intervals
          +- LocalRelation [k#x, dt#x, ym#x, dt2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out
index f0b78dc46873b..72d9b454036d6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out
@@ -58,7 +58,7 @@ CreateViewCommand `t2`, select * from values
   ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -192,9 +192,9 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW v AS SELECT id FROM range(0, 8)
 -- !query analysis
-CreateViewCommand `v`, SELECT id FROM range(0, 8), false, true, LocalTempView, true
+CreateViewCommand `v`, SELECT id FROM range(0, 8), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#xL]
-      +- Range (0, 8, step=1, splits=None)
+      +- Range (0, 8, step=1)
 
 
 -- !query
@@ -205,7 +205,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_TABLE_ARGUMENT",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "treeNode" : "'Generate explode(table-argument#x []), false\n:  +- SubqueryAlias v\n:     +- View (`v`, [id#xL])\n:        +- Project [cast(id#xL as bigint) AS id#xL]\n:           +- Project [id#xL]\n:              +- Range (0, 8, step=1, splits=None)\n+- OneRowRelation\n"
+    "treeNode" : "'Generate explode(table-argument#x []), false\n:  +- SubqueryAlias v\n:     +- View (`v`, [id#xL])\n:        +- Project [cast(id#xL as bigint) AS id#xL]\n:           +- Project [id#xL]\n:              +- Range (0, 8, step=1)\n+- OneRowRelation\n"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out
index f8da5c5196357..857c574af3d23 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/natural-join.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `nt1`, select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
-  as nt1(k, v1), false, false, LocalTempView, true
+  as nt1(k, v1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- LocalRelation [k#x, v1#x]
@@ -27,7 +27,7 @@ CreateViewCommand `nt2`, select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
-  as nt2(k, v2), false, false, LocalTempView, true
+  as nt2(k, v2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v2#x]
       +- SubqueryAlias nt2
          +- LocalRelation [k#x, v2#x]
@@ -44,7 +44,7 @@ CreateViewCommand `nt3`, select * from values
   ("one", 4),
   ("two", 5),
   ("one", 6)
-  as nt3(k, v3), false, false, LocalTempView, true
+  as nt3(k, v3), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v3#x]
       +- SubqueryAlias nt3
          +- LocalRelation [k#x, v3#x]
@@ -61,7 +61,7 @@ CreateViewCommand `nt4`, select * from values
   ("one", 7),
   ("two", 8),
   ("one", 9)
-  as nt4(k, v4), false, false, LocalTempView, true
+  as nt4(k, v4), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v4#x]
       +- SubqueryAlias nt4
          +- LocalRelation [k#x, v4#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
index b80bed6f7c2aa..6b2c60f25bae3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
@@ -13,11 +13,11 @@ SELECT
 -- !query analysis
 Project [scalar-subquery#x [] AS scalarsubquery()#xL, scalar-subquery#x [] AS scalarsubquery()#xL, scalar-subquery#x [] AS scalarsubquery()#xL]
 :  :- Aggregate [min(id#xL) AS min(id)#xL]
-:  :  +- Range (0, 10, step=1, splits=None)
+:  :  +- Range (0, 10, step=1)
 :  :- Aggregate [sum(id#xL) AS sum(id)#xL]
-:  :  +- Range (0, 10, step=1, splits=None)
+:  :  +- Range (0, 10, step=1)
 :  +- Aggregate [count(distinct id#xL) AS count(DISTINCT id)#xL]
-:     +- Range (0, 10, step=1, splits=None)
+:     +- Range (0, 10, step=1)
 +- OneRowRelation
 
 
@@ -40,15 +40,15 @@ WithCTE
 :  +- SubqueryAlias tmp
 :     +- Intersect false
 :        :- Project [id#xL]
-:        :  +- Range (0, 2, step=1, splits=None)
+:        :  +- Range (0, 2, step=1)
 :        +- Project [id#xL]
-:           +- Range (0, 4, step=1, splits=None)
+:           +- Range (0, 4, step=1)
 +- Project [id#xL]
    +- Filter (id#xL > scalar-subquery#x [])
       :  +- Aggregate [max(id#xL) AS max(id)#xL]
       :     +- SubqueryAlias tmp
       :        +- CTERelationRef xxxx, true, [id#xL], false
-      +- Range (0, 3, step=1, splits=None)
+      +- Range (0, 3, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-all.sql.out
index 729fea0cf858b..b1447307d8549 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-all.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `data`, select * from values
   (0, 2),
   (1, 3),
   (1, NULL)
-  as data(g, i), false, false, LocalTempView, true
+  as data(g, i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [g#x, i#x]
       +- SubqueryAlias data
          +- LocalRelation [g#x, i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-ordinal.sql.out
index 4bf21d02f9a88..f1f189517dea7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/order-by-ordinal.sql.out
@@ -16,7 +16,7 @@ CreateViewCommand `data`, select * from values
   (2, 2),
   (3, 1),
   (3, 2)
-  as data(a, b), false, false, LocalTempView, true
+  as data(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias data
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/outer-join.sql.out
index 32088db51ac6c..1ee1cb1fb8fcd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/outer-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/outer-join.sql.out
@@ -6,7 +6,7 @@ as t1(int_col1)
 -- !query analysis
 CreateViewCommand `t1`, SELECT * FROM VALUES
 (-234), (145), (367), (975), (298)
-as t1(int_col1), false, true, LocalTempView, true
+as t1(int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col1#x]
       +- SubqueryAlias t1
          +- LocalRelation [int_col1#x]
@@ -19,7 +19,7 @@ as t2(int_col0, int_col1)
 -- !query analysis
 CreateViewCommand `t2`, SELECT * FROM VALUES
 (-769, -244), (-800, -409), (940, 86), (-507, 304), (-367, 158)
-as t2(int_col0, int_col1), false, true, LocalTempView, true
+as t2(int_col0, int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col0#x, int_col1#x]
       +- SubqueryAlias t2
          +- LocalRelation [int_col0#x, int_col1#x]
@@ -57,7 +57,7 @@ Filter (sum(coalesce(int_col1, int_col0))#xL > cast((coalesce(int_col1, int_col0
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (97) as t1(int_col1)
 -- !query analysis
-CreateViewCommand `t1`, SELECT * FROM VALUES (97) as t1(int_col1), false, true, LocalTempView, true
+CreateViewCommand `t1`, SELECT * FROM VALUES (97) as t1(int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col1#x]
       +- SubqueryAlias t1
          +- LocalRelation [int_col1#x]
@@ -66,7 +66,7 @@ CreateViewCommand `t1`, SELECT * FROM VALUES (97) as t1(int_col1), false, true,
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (0) as t2(int_col1)
 -- !query analysis
-CreateViewCommand `t2`, SELECT * FROM VALUES (0) as t2(int_col1), false, true, LocalTempView, true
+CreateViewCommand `t2`, SELECT * FROM VALUES (0) as t2(int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col1#x]
       +- SubqueryAlias t2
          +- LocalRelation [int_col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
index e763ed48b1364..4a31cff8c7d0f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/percentiles.sql.out
@@ -6,7 +6,7 @@ AS aggr(k, v)
 -- !query analysis
 CreateViewCommand `aggr`, SELECT * FROM VALUES
 (0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
-AS aggr(k, v), false, true, LocalTempView, true
+AS aggr(k, v), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias aggr
          +- LocalRelation [k#x, v#x]
@@ -51,7 +51,7 @@ CreateViewCommand `basic_pays`, SELECT * FROM VALUES
 ('Pamela Castillo','SCM',11303),
 ('Larry Bott','SCM',11798),
 ('Barry Jones','SCM',10586)
-AS basic_pays(employee_name, department, salary), false, true, LocalTempView, true
+AS basic_pays(employee_name, department, salary), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [employee_name#x, department#x, salary#x]
       +- SubqueryAlias basic_pays
          +- LocalRelation [employee_name#x, department#x, salary#x]
@@ -885,7 +885,7 @@ CreateViewCommand `intervals`, SELECT * FROM VALUES
 (2, INTERVAL '30' MONTH, INTERVAL '30' SECOND, INTERVAL '30' MINUTE),
 (3, INTERVAL '60' MONTH, INTERVAL '60' SECOND, INTERVAL '60' MINUTE),
 (4, null, null, null)
-AS intervals(k, dt, ym, dt2), false, true, LocalTempView, true
+AS intervals(k, dt, ym, dt2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, dt#x, ym#x, dt2#x]
       +- SubqueryAlias intervals
          +- LocalRelation [k#x, dt#x, ym#x, dt2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out
index c0a02dc29d606..93f2e240a0191 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pivot.sql.out
@@ -14,7 +14,7 @@ CreateViewCommand `courseSales`, select * from values
   ("dotNET", 2012, 5000),
   ("dotNET", 2013, 48000),
   ("Java", 2013, 30000)
-  as courseSales(course, year, earnings), false, false, LocalTempView, true
+  as courseSales(course, year, earnings), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [course#x, year#x, earnings#x]
       +- SubqueryAlias courseSales
          +- LocalRelation [course#x, year#x, earnings#x]
@@ -29,7 +29,7 @@ create temporary view years as select * from values
 CreateViewCommand `years`, select * from values
   (2012, 1),
   (2013, 2)
-  as years(y, s), false, false, LocalTempView, true
+  as years(y, s), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [y#x, s#x]
       +- SubqueryAlias years
          +- LocalRelation [y#x, s#x]
@@ -44,7 +44,7 @@ create temporary view yearsWithComplexTypes as select * from values
 CreateViewCommand `yearsWithComplexTypes`, select * from values
   (2012, array(1, 1), map('1', 1), struct(1, 'a')),
   (2013, array(2, 2), map('2', 2), struct(2, 'b'))
-  as yearsWithComplexTypes(y, a, m, s), false, false, LocalTempView, true
+  as yearsWithComplexTypes(y, a, m, s), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [y#x, a#x, m#x, s#x]
       +- SubqueryAlias yearsWithComplexTypes
          +- LocalRelation [y#x, a#x, m#x, s#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part1.sql.out
index 75eb7df4e2121..0577d73ea6a3c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part1.sql.out
@@ -154,70 +154,70 @@ Aggregate [stddev_pop(cast(cast(3.0 as decimal(38,0)) as double)) AS stddev_pop(
 select sum(CAST(null AS int)) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(null as int)) AS sum(CAST(NULL AS INT))#xL]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(CAST(null AS long)) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(null as bigint)) AS sum(CAST(NULL AS BIGINT))#xL]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(CAST(null AS Decimal(38,0))) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(null as decimal(38,0))) AS sum(CAST(NULL AS DECIMAL(38,0)))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(CAST(null AS DOUBLE)) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(null as double)) AS sum(CAST(NULL AS DOUBLE))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(CAST(null AS int)) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(null as int)) AS avg(CAST(NULL AS INT))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(CAST(null AS long)) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(null as bigint)) AS avg(CAST(NULL AS BIGINT))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(CAST(null AS Decimal(38,0))) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(null as decimal(38,0))) AS avg(CAST(NULL AS DECIMAL(38,0)))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(CAST(null AS DOUBLE)) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(null as double)) AS avg(CAST(NULL AS DOUBLE))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(CAST('NaN' AS DOUBLE)) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(NaN as double)) AS sum(CAST(NaN AS DOUBLE))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(CAST('NaN' AS DOUBLE)) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(NaN as double)) AS avg(CAST(NaN AS DOUBLE))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
@@ -355,7 +355,7 @@ Aggregate [corr(cast(b#x as double), cast(a#x as double)) AS corr(b, a)#x]
 -- !query
 CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y)
 -- !query analysis
-CreateViewCommand `regr_test`, SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y), false, false, LocalTempView, true
+CreateViewCommand `regr_test`, SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [x#x, y#x]
       +- SubqueryAlias regr_test
          +- LocalRelation [x#x, y#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part2.sql.out
index 84c5a88e6b2aa..98b0a27f04505 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/aggregates_part2.sql.out
@@ -14,7 +14,7 @@ CreateViewCommand `int4_tbl`, select * from values
   (-123456),
   (2147483647),
   (-2147483647)
-  as int4_tbl(f1), false, false, LocalTempView, true
+  as int4_tbl(f1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias int4_tbl
          +- LocalRelation [f1#x]
@@ -29,7 +29,7 @@ CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
 CreateViewCommand `bitwise_test`, SELECT * FROM VALUES
   (1, 1, 1, 1L),
   (3, 3, 3, null),
-  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4), false, true, LocalTempView, true
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [b1#x, b2#x, b3#x, b4#xL]
       +- SubqueryAlias bitwise_test
          +- LocalRelation [b1#x, b2#x, b3#x, b4#xL]
@@ -178,7 +178,7 @@ CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
 CreateViewCommand `bool_test`, SELECT * FROM VALUES
   (TRUE, null, FALSE, null),
   (FALSE, TRUE, null, null),
-  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4), false, true, LocalTempView, true
+  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [b1#x, b2#x, b3#x, b4#x]
       +- SubqueryAlias bool_test
          +- LocalRelation [b1#x, b2#x, b3#x, b4#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
index b200f255ff13e..e4e4be8fee7d3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
@@ -16,7 +16,7 @@ CREATE VIEW toyemp AS
    FROM emp
 -- !query analysis
 CreateViewCommand `spark_catalog`.`default`.`toyemp`, SELECT name, age, /* location ,*/ 12*salary AS annualsal
-   FROM emp, false, false, PersistedView, true
+   FROM emp, false, false, PersistedView, COMPENSATION, true
    +- Project [name#x, age#x, (12 * salary#x) AS annualsal#x]
       +- SubqueryAlias spark_catalog.default.emp
          +- Relation spark_catalog.default.emp[name#x,age#x,salary#x,manager#x] parquet
@@ -102,7 +102,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
 CREATE OR REPLACE VIEW viewtest AS
 	SELECT * FROM viewtest_tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT * FROM viewtest_tbl, false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT * FROM viewtest_tbl, false, true, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x]
       +- SubqueryAlias spark_catalog.default.viewtest_tbl
          +- Relation spark_catalog.default.viewtest_tbl[a#x,b#x] parquet
@@ -112,7 +112,7 @@ CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT * FROM viewtest_t
 CREATE OR REPLACE VIEW viewtest AS
 	SELECT * FROM viewtest_tbl WHERE a > 10
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT * FROM viewtest_tbl WHERE a > 10, false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT * FROM viewtest_tbl WHERE a > 10, false, true, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x]
       +- Filter (a#x > 10)
          +- SubqueryAlias spark_catalog.default.viewtest_tbl
@@ -136,7 +136,7 @@ Project [a#x, b#x]
 CREATE OR REPLACE VIEW viewtest AS
 	SELECT a, b FROM viewtest_tbl WHERE a > 5 ORDER BY b DESC
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a, b FROM viewtest_tbl WHERE a > 5 ORDER BY b DESC, false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a, b FROM viewtest_tbl WHERE a > 5 ORDER BY b DESC, false, true, PersistedView, COMPENSATION, true
    +- Sort [b#x DESC NULLS LAST], true
       +- Project [a#x, b#x]
          +- Filter (a#x > 5)
@@ -162,7 +162,7 @@ Project [a#x, b#x]
 CREATE OR REPLACE VIEW viewtest AS
 	SELECT a FROM viewtest_tbl WHERE a <> 20
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a FROM viewtest_tbl WHERE a <> 20, false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a FROM viewtest_tbl WHERE a <> 20, false, true, PersistedView, COMPENSATION, true
    +- Project [a#x]
       +- Filter NOT (a#x = 20)
          +- SubqueryAlias spark_catalog.default.viewtest_tbl
@@ -173,7 +173,7 @@ CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a FROM viewtest_t
 CREATE OR REPLACE VIEW viewtest AS
 	SELECT 1, * FROM viewtest_tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT 1, * FROM viewtest_tbl, false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT 1, * FROM viewtest_tbl, false, true, PersistedView, COMPENSATION, true
    +- Project [1 AS 1#x, a#x, b#x]
       +- SubqueryAlias spark_catalog.default.viewtest_tbl
          +- Relation spark_catalog.default.viewtest_tbl[a#x,b#x] parquet
@@ -183,7 +183,7 @@ CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT 1, * FROM viewtes
 CREATE OR REPLACE VIEW viewtest AS
 	SELECT a, decimal(b) FROM viewtest_tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a, decimal(b) FROM viewtest_tbl, false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a, decimal(b) FROM viewtest_tbl, false, true, PersistedView, COMPENSATION, true
    +- Project [a#x, cast(b#x as decimal(10,0)) AS b#x]
       +- SubqueryAlias spark_catalog.default.viewtest_tbl
          +- Relation spark_catalog.default.viewtest_tbl[a#x,b#x] parquet
@@ -193,7 +193,7 @@ CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a, decimal(b) FRO
 CREATE OR REPLACE VIEW viewtest AS
 	SELECT a, b, 0 AS c FROM viewtest_tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a, b, 0 AS c FROM viewtest_tbl, false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`viewtest`, SELECT a, b, 0 AS c FROM viewtest_tbl, false, true, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x, 0 AS c#x]
       +- SubqueryAlias spark_catalog.default.viewtest_tbl
          +- Relation spark_catalog.default.viewtest_tbl[a#x,b#x] parquet
@@ -243,7 +243,7 @@ CREATE TEMPORARY VIEW temp_table AS SELECT * FROM VALUES
   (1, 1) as temp_table(a, id)
 -- !query analysis
 CreateViewCommand `temp_table`, SELECT * FROM VALUES
-  (1, 1) as temp_table(a, id), false, false, LocalTempView, true
+  (1, 1) as temp_table(a, id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, id#x]
       +- SubqueryAlias temp_table
          +- LocalRelation [a#x, id#x]
@@ -252,7 +252,7 @@ CreateViewCommand `temp_table`, SELECT * FROM VALUES
 -- !query
 CREATE VIEW v1 AS SELECT * FROM base_table
 -- !query analysis
-CreateViewCommand `spark_catalog`.`temp_view_test`.`v1`, SELECT * FROM base_table, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`temp_view_test`.`v1`, SELECT * FROM base_table, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, id#x]
       +- SubqueryAlias spark_catalog.temp_view_test.base_table
          +- Relation spark_catalog.temp_view_test.base_table[a#x,id#x] parquet
@@ -283,7 +283,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE TEMP VIEW v2_temp AS SELECT * FROM base_table
 -- !query analysis
-CreateViewCommand `v2_temp`, SELECT * FROM base_table, false, false, LocalTempView, true
+CreateViewCommand `v2_temp`, SELECT * FROM base_table, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, id#x]
       +- SubqueryAlias spark_catalog.temp_view_test.base_table
          +- Relation spark_catalog.temp_view_test.base_table[a#x,id#x] parquet
@@ -298,7 +298,7 @@ DescribeTableCommand `v2_temp`, true, [col_name#x, data_type#x, comment#x]
 -- !query
 CREATE VIEW temp_view_test.v2 AS SELECT * FROM base_table
 -- !query analysis
-CreateViewCommand `spark_catalog`.`temp_view_test`.`v2`, SELECT * FROM base_table, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`temp_view_test`.`v2`, SELECT * FROM base_table, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, id#x]
       +- SubqueryAlias spark_catalog.temp_view_test.base_table
          +- Relation spark_catalog.temp_view_test.base_table[a#x,id#x] parquet
@@ -334,7 +334,7 @@ CREATE VIEW v3 AS
 -- !query analysis
 CreateViewCommand `spark_catalog`.`temp_view_test`.`v3`, SELECT t1.a AS t1_a, t2.a AS t2_a
     FROM base_table t1, base_table2 t2
-    WHERE t1.id = t2.id, false, false, PersistedView, true
+    WHERE t1.id = t2.id, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x AS t1_a#x, a#x AS t2_a#x]
       +- Filter (id#x = id#x)
          +- Join Inner
@@ -393,7 +393,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE VIEW v4 AS SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`temp_view_test`.`v4`, SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`temp_view_test`.`v4`, SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2), false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, id#x]
       +- Filter id#x IN (list#x [])
          :  +- Project [id#x]
@@ -412,7 +412,7 @@ DescribeTableCommand `spark_catalog`.`temp_view_test`.`v4`, true, [col_name#x, d
 -- !query
 CREATE VIEW v5 AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2
 -- !query analysis
-CreateViewCommand `spark_catalog`.`temp_view_test`.`v5`, SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`temp_view_test`.`v5`, SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2, false, false, PersistedView, COMPENSATION, true
    +- Project [id#x, a#x]
       +- Join Inner
          :- SubqueryAlias t1
@@ -433,7 +433,7 @@ DescribeTableCommand `spark_catalog`.`temp_view_test`.`v5`, true, [col_name#x, d
 -- !query
 CREATE VIEW v6 AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`temp_view_test`.`v6`, SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`temp_view_test`.`v6`, SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2), false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, id#x]
       +- Filter exists#x []
          :  +- Project [1 AS 1#x]
@@ -452,7 +452,7 @@ DescribeTableCommand `spark_catalog`.`temp_view_test`.`v6`, true, [col_name#x, d
 -- !query
 CREATE VIEW v7 AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`temp_view_test`.`v7`, SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`temp_view_test`.`v7`, SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2), false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, id#x]
       +- Filter NOT exists#x []
          :  +- Project [1 AS 1#x]
@@ -471,7 +471,7 @@ DescribeTableCommand `spark_catalog`.`temp_view_test`.`v7`, true, [col_name#x, d
 -- !query
 CREATE VIEW v8 AS SELECT * FROM base_table WHERE EXISTS (SELECT 1)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`temp_view_test`.`v8`, SELECT * FROM base_table WHERE EXISTS (SELECT 1), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`temp_view_test`.`v8`, SELECT * FROM base_table WHERE EXISTS (SELECT 1), false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, id#x]
       +- Filter exists#x []
          :  +- Project [1 AS 1#x]
@@ -641,7 +641,7 @@ CREATE TEMP VIEW tt AS SELECT * FROM VALUES
   (1, 'a') AS tt(num2, value)
 -- !query analysis
 CreateViewCommand `tt`, SELECT * FROM VALUES
-  (1, 'a') AS tt(num2, value), false, false, LocalTempView, true
+  (1, 'a') AS tt(num2, value), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [num2#x, value#x]
       +- SubqueryAlias tt
          +- LocalRelation [num2#x, value#x]
@@ -650,7 +650,7 @@ CreateViewCommand `tt`, SELECT * FROM VALUES
 -- !query
 CREATE VIEW nontemp1 AS SELECT * FROM t1 CROSS JOIN t2
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp1`, SELECT * FROM t1 CROSS JOIN t2, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp1`, SELECT * FROM t1 CROSS JOIN t2, false, false, PersistedView, COMPENSATION, true
    +- Project [num#x, name#x, num2#x, value#x]
       +- Join Cross
          :- SubqueryAlias spark_catalog.testviewschm2.t1
@@ -684,7 +684,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE VIEW nontemp2 AS SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp2`, SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp2`, SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2, false, false, PersistedView, COMPENSATION, true
    +- Project [num#x, name#x, num2#x, value#x]
       +- Join Inner, (num#x = num2#x)
          :- SubqueryAlias spark_catalog.testviewschm2.t1
@@ -718,7 +718,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE VIEW nontemp3 AS SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp3`, SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp3`, SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2, false, false, PersistedView, COMPENSATION, true
    +- Project [num#x, name#x, num2#x, value#x]
       +- Join LeftOuter, (num#x = num2#x)
          :- SubqueryAlias spark_catalog.testviewschm2.t1
@@ -752,7 +752,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE VIEW nontemp4 AS SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp4`, SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx', false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`nontemp4`, SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx', false, false, PersistedView, COMPENSATION, true
    +- Project [num#x, name#x, num2#x, value#x]
       +- Join LeftOuter, ((num#x = num2#x) AND (value#x = xxx))
          :- SubqueryAlias spark_catalog.testviewschm2.t1
@@ -844,7 +844,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`pubview`, SELECT * FROM tbl1 WHERE tbl1.a
 BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
-AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f), false, false, PersistedView, true
+AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f), false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x]
       +- Filter (between(a#x, scalar-subquery#x [], scalar-subquery#x []) AND exists#x [])
          :  :- Project [d#x]
@@ -880,7 +880,7 @@ AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)
 CreateViewCommand `spark_catalog`.`testviewschm2`.`mytempview`, SELECT * FROM tbl1 WHERE tbl1.a
 BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
 AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
-AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j), false, false, PersistedView, true
+AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j), false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x]
       +- Filter ((between(a#x, scalar-subquery#x [], scalar-subquery#x []) AND exists#x []) AND NOT exists#x [])
          :  :- Project [d#x]
@@ -925,7 +925,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`tt1`, SELECT * FROM (
     VALUES
        ('abc', '0123456789', 42, 'abcd'),
        ('0123456789', 'abc', 42.12, 'abc')
-  ) vv(a,b,c,d), false, false, PersistedView, true
+  ) vv(a,b,c,d), false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x, c#x, d#x]
       +- SubqueryAlias vv
          +- Project [col1#x AS a#x, col2#x AS b#x, col3#x AS c#x, col4#x AS d#x]
@@ -988,7 +988,7 @@ CREATE VIEW aliased_view_1 AS
     where exists (select 1 from tx1 where tt1.f1 = tx1.x1)
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`aliased_view_1`, select * from tt1
-    where exists (select 1 from tx1 where tt1.f1 = tx1.x1), false, false, PersistedView, true
+    where exists (select 1 from tx1 where tt1.f1 = tx1.x1), false, false, PersistedView, COMPENSATION, true
    +- Project [f1#x, f2#x, f3#x]
       +- Filter exists#x [f1#x]
          :  +- Project [1 AS 1#x]
@@ -1005,7 +1005,7 @@ CREATE VIEW aliased_view_2 AS
     where exists (select 1 from tx1 where a1.f1 = tx1.x1)
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`aliased_view_2`, select * from tt1 a1
-    where exists (select 1 from tx1 where a1.f1 = tx1.x1), false, false, PersistedView, true
+    where exists (select 1 from tx1 where a1.f1 = tx1.x1), false, false, PersistedView, COMPENSATION, true
    +- Project [f1#x, f2#x, f3#x]
       +- Filter exists#x [f1#x]
          :  +- Project [1 AS 1#x]
@@ -1023,7 +1023,7 @@ CREATE VIEW aliased_view_3 AS
     where exists (select 1 from tx1 a2 where tt1.f1 = a2.x1)
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`aliased_view_3`, select * from tt1
-    where exists (select 1 from tx1 a2 where tt1.f1 = a2.x1), false, false, PersistedView, true
+    where exists (select 1 from tx1 a2 where tt1.f1 = a2.x1), false, false, PersistedView, COMPENSATION, true
    +- Project [f1#x, f2#x, f3#x]
       +- Filter exists#x [f1#x]
          :  +- Project [1 AS 1#x]
@@ -1041,7 +1041,7 @@ CREATE VIEW aliased_view_4 AS
     where exists (select 1 from tt1 where temp_view_test.tt1.y1 = tt1.f1)
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`aliased_view_4`, select * from temp_view_test.tt1
-    where exists (select 1 from tt1 where temp_view_test.tt1.y1 = tt1.f1), false, false, PersistedView, true
+    where exists (select 1 from tt1 where temp_view_test.tt1.y1 = tt1.f1), false, false, PersistedView, COMPENSATION, true
    +- Project [y1#x, f2#x, f3#x]
       +- Filter exists#x [y1#x]
          :  +- Project [1 AS 1#x]
@@ -1180,7 +1180,7 @@ select * from
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`view_of_joins`, select * from
   (select * from (tbl1 cross join tbl2) same) ss,
-  (tbl3 cross join tbl4) same, false, false, PersistedView, true
+  (tbl3 cross join tbl4) same, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x, c#x, d#x, e#x, f#x, g#x, h#x]
       +- Join Inner
          :- SubqueryAlias ss
@@ -1220,7 +1220,7 @@ CreateDataSourceTableCommand `spark_catalog`.`testviewschm2`.`tt4`, false
 -- !query
 create view v1 as select * from tt2 natural join tt3
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`v1`, select * from tt2 natural join tt3, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`v1`, select * from tt2 natural join tt3, false, false, PersistedView, COMPENSATION, true
    +- Project [b#x, c#x, a#x, ax#xL]
       +- Project [b#x, c#x, a#x, ax#xL]
          +- Join Inner, ((b#x = cast(b#x as int)) AND (cast(c#x as decimal(10,0)) = c#x))
@@ -1233,7 +1233,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`v1`, select * from tt2 natura
 -- !query
 create view v1a as select * from (tt2 natural join tt3) j
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`v1a`, select * from (tt2 natural join tt3) j, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`v1a`, select * from (tt2 natural join tt3) j, false, false, PersistedView, COMPENSATION, true
    +- Project [b#x, c#x, a#x, ax#xL]
       +- SubqueryAlias j
          +- Project [b#x, c#x, a#x, ax#xL]
@@ -1247,7 +1247,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`v1a`, select * from (tt2 natu
 -- !query
 create view v2 as select * from tt2 join tt3 using (b,c) join tt4 using (b)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`v2`, select * from tt2 join tt3 using (b,c) join tt4 using (b), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`v2`, select * from tt2 join tt3 using (b,c) join tt4 using (b), false, false, PersistedView, COMPENSATION, true
    +- Project [b#x, c#x, a#x, ax#xL, ay#x, q#x]
       +- Project [b#x, c#x, a#x, ax#xL, ay#x, q#x]
          +- Join Inner, (b#x = b#x)
@@ -1264,7 +1264,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`v2`, select * from tt2 join t
 -- !query
 create view v2a as select * from (tt2 join tt3 using (b,c) join tt4 using (b)) j
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`v2a`, select * from (tt2 join tt3 using (b,c) join tt4 using (b)) j, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`v2a`, select * from (tt2 join tt3 using (b,c) join tt4 using (b)) j, false, false, PersistedView, COMPENSATION, true
    +- Project [b#x, c#x, a#x, ax#xL, ay#x, q#x]
       +- SubqueryAlias j
          +- Project [b#x, c#x, a#x, ax#xL, ay#x, q#x]
@@ -1282,7 +1282,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`v2a`, select * from (tt2 join
 -- !query
 create view v3 as select * from tt2 join tt3 using (b,c) full join tt4 using (b)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`v3`, select * from tt2 join tt3 using (b,c) full join tt4 using (b), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`v3`, select * from tt2 join tt3 using (b,c) full join tt4 using (b), false, false, PersistedView, COMPENSATION, true
    +- Project [b#x, c#x, a#x, ax#xL, ay#x, q#x]
       +- Project [coalesce(b#x, b#x) AS b#x, c#x, a#x, ax#xL, ay#x, q#x]
          +- Join FullOuter, (b#x = b#x)
@@ -1438,7 +1438,7 @@ CreateDataSourceTableCommand `spark_catalog`.`testviewschm2`.`tt6`, false
 -- !query
 create view vv1 as select * from (tt5 cross join tt6) j(aa,bb,cc,dd)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`vv1`, select * from (tt5 cross join tt6) j(aa,bb,cc,dd), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`vv1`, select * from (tt5 cross join tt6) j(aa,bb,cc,dd), false, false, PersistedView, COMPENSATION, true
    +- Project [aa#x, bb#x, cc#x, dd#x]
       +- SubqueryAlias j
          +- Project [a#x AS aa#x, b#x AS bb#x, c#x AS cc#x, d#x AS dd#x]
@@ -1499,7 +1499,7 @@ select * from tt7 full join tt8 using (x), tt8 tt8x
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`vv2`, select * from (values(1,2,3,4,5)) v(a,b,c,d,e)
 union all
-select * from tt7 full join tt8 using (x), tt8 tt8x, false, false, PersistedView, true
+select * from tt7 full join tt8 using (x), tt8 tt8x, false, false, PersistedView, COMPENSATION, true
    +- Union false, false
       :- Project [a#x, b#x, c#x, d#x, e#x]
       :  +- SubqueryAlias v
@@ -1536,7 +1536,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`vv3`, select * from (values(1
 union all
 select * from
   tt7 full join tt8 using (x),
-  tt7 tt7x full join tt8 tt8x using (x), false, false, PersistedView, true
+  tt7 tt7x full join tt8 tt8x using (x), false, false, PersistedView, COMPENSATION, true
    +- Union false, false
       :- Project [a#x, b#x, c#x, x#x, e#x, f#x]
       :  +- SubqueryAlias v
@@ -1578,7 +1578,7 @@ CreateViewCommand `spark_catalog`.`testviewschm2`.`vv4`, select * from (values(1
 union all
 select * from
   tt7 full join tt8 using (x),
-  tt7 tt7x full join tt8 tt8x using (x) full join tt8 tt8y using (x), false, false, PersistedView, true
+  tt7 tt7x full join tt8 tt8x using (x) full join tt8 tt8y using (x), false, false, PersistedView, COMPENSATION, true
    +- Union false, false
       :- Project [a#x, b#x, c#x, x#x, e#x, f#x, g#x]
       :  +- SubqueryAlias v
@@ -1669,7 +1669,7 @@ select * from tt7a left join tt8a using (x), tt8a tt8ax
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`vv2a`, select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e)
 union all
-select * from tt7a left join tt8a using (x), tt8a tt8ax, false, false, PersistedView, true
+select * from tt7a left join tt8a using (x), tt8a tt8ax, false, false, PersistedView, COMPENSATION, true
    +- Union false, false
       :- Project [a#x, b#x, c#x, d#x, e#x]
       :  +- SubqueryAlias v
@@ -1716,7 +1716,7 @@ CreateDataSourceTableCommand `spark_catalog`.`testviewschm2`.`tt10`, false
 -- !query
 create view vv5 as select x,y,z from tt9 join tt10 using(x)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`vv5`, select x,y,z from tt9 join tt10 using(x), false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`vv5`, select x,y,z from tt9 join tt10 using(x), false, false, PersistedView, COMPENSATION, true
    +- Project [x#x, y#x, z#x]
       +- Project [x#x, xx#x, y#x, z#x]
          +- Join Inner, (x#x = x#x)
@@ -1761,7 +1761,7 @@ create view vv6 as select x,y,z,q from
   (tt11 join tt12 using(x)) join tt13 using(z)
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`vv6`, select x,y,z,q from
-  (tt11 join tt12 using(x)) join tt13 using(z), false, false, PersistedView, true
+  (tt11 join tt12 using(x)) join tt13 using(z), false, false, PersistedView, COMPENSATION, true
    +- Project [x#x, y#x, z#x, q#x]
       +- Project [z#x, x#x, y#x, q#x]
          +- Join Inner, (z#x = z#x)
@@ -1807,7 +1807,7 @@ create view tt18v as
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`tt18v`, select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
   union all
-  select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz, false, false, PersistedView, true
+  select * from int8_tbl xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxz, false, false, PersistedView, COMPENSATION, true
    +- Union false, false
       :- Project [q1#x, q2#x]
       :  +- SubqueryAlias xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy
@@ -1829,7 +1829,7 @@ DescribeTableCommand `spark_catalog`.`testviewschm2`.`tt18v`, false, [col_name#x
 create view tt21v as
 select * from tt5 natural inner join tt6
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`tt21v`, select * from tt5 natural inner join tt6, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`tt21v`, select * from tt5 natural inner join tt6, false, false, PersistedView, COMPENSATION, true
    +- Project [c#x, a#x, b#x, cc#x, d#x]
       +- Project [c#x, a#x, b#x, cc#x, d#x]
          +- Join Inner, (c#x = c#x)
@@ -1849,7 +1849,7 @@ DescribeTableCommand `spark_catalog`.`testviewschm2`.`tt21v`, false, [col_name#x
 create view tt22v as
 select * from tt5 natural left join tt6
 -- !query analysis
-CreateViewCommand `spark_catalog`.`testviewschm2`.`tt22v`, select * from tt5 natural left join tt6, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`testviewschm2`.`tt22v`, select * from tt5 natural left join tt6, false, false, PersistedView, COMPENSATION, true
    +- Project [c#x, a#x, b#x, cc#x, d#x]
       +- Project [c#x, a#x, b#x, cc#x, d#x]
          +- Join LeftOuter, (c#x = c#x)
@@ -1873,7 +1873,7 @@ select 42, 43
 -- !query analysis
 CreateViewCommand `spark_catalog`.`testviewschm2`.`tt23v`, [(col_a,None), (col_b,None)], select q1 as other_name1, q2 as other_name2 from int8_tbl
 union
-select 42, 43, false, false, PersistedView, true
+select 42, 43, false, false, PersistedView, COMPENSATION, true
    +- Distinct
       +- Union false, false
          :- Project [q1#x AS other_name1#x, q2#x AS other_name2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/float8.sql.out
index d30f971628e4d..2f2beda4f1cec 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/float8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/float8.sql.out
@@ -403,7 +403,7 @@ FROM FLOAT8_TBL
 -- !query analysis
 CreateViewCommand `UPDATED_FLOAT8_TBL`, SELECT
   CASE WHEN FLOAT8_TBL.f1 > '0.0' THEN FLOAT8_TBL.f1 * '-1' ELSE FLOAT8_TBL.f1 END AS f1
-FROM FLOAT8_TBL, false, false, LocalTempView, true
+FROM FLOAT8_TBL, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [CASE WHEN (f1#x > cast(0.0 as double)) THEN (f1#x * cast(-1 as double)) ELSE f1#x END AS f1#x]
       +- SubqueryAlias spark_catalog.default.float8_tbl
          +- Relation spark_catalog.default.float8_tbl[f1#x] parquet
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out
index 173a324dc456c..27e9707425833 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/groupingsets.sql.out
@@ -9,7 +9,7 @@ create temp view gstest1(a,b,v)
 CreateViewCommand `gstest1`, [(a,None), (b,None), (v,None)], values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),
             (2,3,15),
             (3,3,16),(3,4,17),
-            (4,1,18),(4,1,19), false, false, LocalTempView, true
+            (4,1,18),(4,1,19), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x]
 
 
@@ -320,7 +320,7 @@ CreateViewCommand `int8_tbl`, SELECT * FROM VALUES
   (123L, 4567890123456789L),
   (4567890123456789L, 123L),
   (4567890123456789L, 4567890123456789L),
-  (4567890123456789L, -4567890123456789L) as int8_tbl(q1, q2), false, false, LocalTempView, true
+  (4567890123456789L, -4567890123456789L) as int8_tbl(q1, q2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [q1#xL, q2#xL]
       +- SubqueryAlias int8_tbl
          +- LocalRelation [q1#xL, q2#xL]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/int8.sql.out
index c7f3f7bdbbb80..72972469fa6ef 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/int8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/int8.sql.out
@@ -626,7 +626,7 @@ Project [q1#xL, q2#xL, (q1#xL & q2#xL) AS and#xL, (q1#xL | q2#xL) AS or#xL, ~q1#
 SELECT * FROM range(bigint('+4567890123456789'), bigint('+4567890123456799'))
 -- !query analysis
 Project [id#xL]
-+- Range (4567890123456789, 4567890123456799, step=1, splits=None)
++- Range (4567890123456789, 4567890123456799, step=1)
 
 
 -- !query
@@ -653,7 +653,7 @@ org.apache.spark.sql.AnalysisException
 SELECT * FROM range(bigint('+4567890123456789'), bigint('+4567890123456799'), 2)
 -- !query analysis
 Project [id#xL]
-+- Range (4567890123456789, 4567890123456799, step=2, splits=None)
++- Range (4567890123456789, 4567890123456799, step=2)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out
index 0147e84cb5a9c..37ec8291c4e4b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/join.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `INT2_TBL`, [(f1,None)], VALUES
   (smallint(trim('  1234 '))),
   (smallint(trim('    -1234'))),
   (smallint('32767')),
-  (smallint('-32767')), false, true, LocalTempView, true
+  (smallint('-32767')), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x]
 
 
@@ -23,7 +23,7 @@ CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
 -- !query analysis
 CreateViewCommand `INT4_TBL`, SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -47,7 +47,7 @@ CreateViewCommand `INT8_TBL`, SELECT * FROM
     (4567890123456789, 123),
     (4567890123456789, 4567890123456789),
     (4567890123456789, -4567890123456789))
-  AS v(q1, q2), false, true, LocalTempView, true
+  AS v(q1, q2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [q1#xL, q2#xL]
       +- SubqueryAlias v
          +- Project [col1#xL AS q1#xL, col2#xL AS q2#xL]
@@ -63,7 +63,7 @@ CREATE OR REPLACE TEMPORARY VIEW FLOAT8_TBL AS SELECT * FROM
 CreateViewCommand `FLOAT8_TBL`, SELECT * FROM
   (VALUES (0.0), (1004.30), (-34.84),
     (cast('1.2345678901234e+200' as double)), (cast('1.2345678901234e-200' as double)))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -77,7 +77,7 @@ CREATE OR REPLACE TEMPORARY VIEW TEXT_TBL AS SELECT * FROM
 -- !query analysis
 CreateViewCommand `TEXT_TBL`, SELECT * FROM
   (VALUES ('doh!'), ('hi de ho neighbor'))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -87,7 +87,7 @@ CreateViewCommand `TEXT_TBL`, SELECT * FROM
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
 -- !query analysis
-CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, true, LocalTempView, true
+CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, true, LocalTempView, UNSUPPORTED, true
    +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x]
       +- SubqueryAlias spark_catalog.default.tenk1
          +- Relation spark_catalog.default.tenk1[unique1#x,unique2#x,two#x,four#x,ten#x,twenty#x,hundred#x,thousand#x,twothousand#x,fivethous#x,tenthous#x,odd#x,even#x,stringu1#x,stringu2#x,string4#x] parquet
@@ -1001,7 +1001,7 @@ create or replace temporary view x as select * from
 -- !query analysis
 CreateViewCommand `x`, select * from
   (values (1,11), (2,22), (3,null), (4,44), (5,null))
-  as v(x1, x2), false, true, LocalTempView, true
+  as v(x1, x2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [x1#x, x2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS x1#x, col2#x AS x2#x]
@@ -1015,7 +1015,7 @@ create or replace temporary view y as select * from
 -- !query analysis
 CreateViewCommand `y`, select * from
   (values (1,111), (2,222), (3,333), (4,null))
-  as v(y1, y2), false, true, LocalTempView, true
+  as v(y1, y2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [y1#x, y2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS y1#x, col2#x AS y2#x]
@@ -1625,7 +1625,7 @@ create or replace temporary view tt1 as select * from
 -- !query analysis
 CreateViewCommand `tt1`, select * from
   (values (1, 11), (2, NULL))
-  as v(tt1_id, joincol), false, true, LocalTempView, true
+  as v(tt1_id, joincol), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [tt1_id#x, joincol#x]
       +- SubqueryAlias v
          +- Project [col1#x AS tt1_id#x, col2#x AS joincol#x]
@@ -1639,7 +1639,7 @@ create or replace temporary view tt2 as select * from
 -- !query analysis
 CreateViewCommand `tt2`, select * from
   (values (21, 11), (22, 11))
-  as v(tt2_id, joincol), false, true, LocalTempView, true
+  as v(tt2_id, joincol), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [tt2_id#x, joincol#x]
       +- SubqueryAlias v
          +- Project [col1#x AS tt2_id#x, col2#x AS joincol#x]
@@ -1710,13 +1710,13 @@ create or replace temporary view tt3 as select * from
 -- !query analysis
 CreateViewCommand `tt3`, select * from
   (SELECT cast(x.id as int), repeat('xyzzy', 100) FROM range(1,10001) x)
-  as v(f1, f2), false, true, LocalTempView, true
+  as v(f1, f2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x, f2#x]
       +- SubqueryAlias v
          +- Project [id#x AS f1#x, repeat(xyzzy, 100)#x AS f2#x]
             +- Project [cast(id#xL as int) AS id#x, repeat(xyzzy, 100) AS repeat(xyzzy, 100)#x]
                +- SubqueryAlias x
-                  +- Range (1, 10001, step=1, splits=None)
+                  +- Range (1, 10001, step=1)
 
 
 -- !query
@@ -1726,7 +1726,7 @@ create or replace temporary view tt4 as select * from
 -- !query analysis
 CreateViewCommand `tt4`, select * from
   (values (0), (1), (9999))
-  as v(f1), false, true, LocalTempView, true
+  as v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -1767,7 +1767,7 @@ Project [f1#x]
                   :                 +- Project [id#x AS f1#x, repeat(xyzzy, 100)#x AS f2#x]
                   :                    +- Project [cast(id#xL as int) AS id#x, repeat(xyzzy, 100) AS repeat(xyzzy, 100)#x]
                   :                       +- SubqueryAlias x
-                  :                          +- Range (1, 10001, step=1, splits=None)
+                  :                          +- Range (1, 10001, step=1)
                   +- SubqueryAlias c
                      +- SubqueryAlias tt3
                         +- View (`tt3`, [f1#x, f2#x])
@@ -1777,7 +1777,7 @@ Project [f1#x]
                                     +- Project [id#x AS f1#x, repeat(xyzzy, 100)#x AS f2#x]
                                        +- Project [cast(id#xL as int) AS id#x, repeat(xyzzy, 100) AS repeat(xyzzy, 100)#x]
                                           +- SubqueryAlias x
-                                             +- Range (1, 10001, step=1, splits=None)
+                                             +- Range (1, 10001, step=1)
 
 
 -- !query
@@ -1787,7 +1787,7 @@ create or replace temporary view tt5 as select * from
 -- !query analysis
 CreateViewCommand `tt5`, select * from
   (values (1, 10), (1, 11))
-  as v(f1, f2), false, true, LocalTempView, true
+  as v(f1, f2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x, f2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x, col2#x AS f2#x]
@@ -1801,7 +1801,7 @@ create or replace temporary view tt6 as select * from
 -- !query analysis
 CreateViewCommand `tt6`, select * from
   (values (1, 9), (1, 2), (2, 9))
-  as v(f1, f2), false, true, LocalTempView, true
+  as v(f1, f2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x, f2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x, col2#x AS f2#x]
@@ -1837,7 +1837,7 @@ create or replace temporary view xx as select * from
 -- !query analysis
 CreateViewCommand `xx`, select * from
   (values (1), (2), (3))
-  as v(pkxx), false, true, LocalTempView, true
+  as v(pkxx), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [pkxx#x]
       +- SubqueryAlias v
          +- Project [col1#x AS pkxx#x]
@@ -1851,7 +1851,7 @@ create or replace temporary view yy as select * from
 -- !query analysis
 CreateViewCommand `yy`, select * from
   (values (101, 1), (201, 2), (301, NULL))
-  as v(pkyy, pkxx), false, true, LocalTempView, true
+  as v(pkyy, pkxx), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [pkyy#x, pkxx#x]
       +- SubqueryAlias v
          +- Project [col1#x AS pkyy#x, col2#x AS pkxx#x]
@@ -1912,7 +1912,7 @@ create or replace temporary view zt1 as select * from
 -- !query analysis
 CreateViewCommand `zt1`, select * from
   (values (53))
-  as v(f1), false, true, LocalTempView, true
+  as v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -1926,7 +1926,7 @@ create or replace temporary view zt2 as select * from
 -- !query analysis
 CreateViewCommand `zt2`, select * from
   (values (53))
-  as v(f2), false, true, LocalTempView, true
+  as v(f2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f2#x]
@@ -1971,7 +1971,7 @@ Project [f2#x, f3#x, f1#x]
 -- !query
 create temp view zv1 as select *,'dummy' AS junk from zt1
 -- !query analysis
-CreateViewCommand `zv1`, select *,'dummy' AS junk from zt1, false, false, LocalTempView, true
+CreateViewCommand `zv1`, select *,'dummy' AS junk from zt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x, dummy AS junk#x]
       +- SubqueryAlias zt1
          +- View (`zt1`, [f1#x])
@@ -2146,7 +2146,7 @@ create or replace temporary view a as select * from
 -- !query analysis
 CreateViewCommand `a`, select * from
   (values ('p'), ('q'))
-  as v(code), false, true, LocalTempView, true
+  as v(code), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [code#x]
       +- SubqueryAlias v
          +- Project [col1#x AS code#x]
@@ -2160,7 +2160,7 @@ create or replace temporary view b as select * from
 -- !query analysis
 CreateViewCommand `b`, select * from
   (values ('p', 1), ('p', 2))
-  as v(a, num), false, true, LocalTempView, true
+  as v(a, num), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, num#x]
       +- SubqueryAlias v
          +- Project [col1#x AS a#x, col2#x AS num#x]
@@ -2174,7 +2174,7 @@ create or replace temporary view c as select * from
 -- !query analysis
 CreateViewCommand `c`, select * from
   (values ('A', 'p'), ('B', 'q'), ('C', null))
-  as v(name, a), false, true, LocalTempView, true
+  as v(name, a), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [name#x, a#x]
       +- SubqueryAlias v
          +- Project [col1#x AS name#x, col2#x AS a#x]
@@ -2346,7 +2346,7 @@ create or replace temporary view nt1 as select * from
 -- !query analysis
 CreateViewCommand `nt1`, select * from
   (values(1,true,true), (2,true,false), (3,false,false))
-  as v(id, a1, a2), false, true, LocalTempView, true
+  as v(id, a1, a2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, a1#x, a2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS a1#x, col3#x AS a2#x]
@@ -2360,7 +2360,7 @@ create or replace temporary view nt2 as select * from
 -- !query analysis
 CreateViewCommand `nt2`, select * from
   (values(1,1,true,true), (2,2,true,false), (3,3,false,false))
-  as v(id, nt1_id, b1, b2), false, true, LocalTempView, true
+  as v(id, nt1_id, b1, b2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, nt1_id#x, b1#x, b2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS nt1_id#x, col3#x AS b1#x, col4#x AS b2#x]
@@ -2374,7 +2374,7 @@ create or replace temporary view nt3 as select * from
 -- !query analysis
 CreateViewCommand `nt3`, select * from
   (values(1,1,true), (2,2,false), (3,3,true))
-  as v(id, nt2_id, c1), false, true, LocalTempView, true
+  as v(id, nt2_id, c1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, nt2_id#x, c1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS nt2_id#x, col3#x AS c1#x]
@@ -3050,7 +3050,7 @@ create or replace temporary view parent as select * from
 -- !query analysis
 CreateViewCommand `parent`, select * from
   (values (1, 10), (2, 20), (3, 30))
-  as v(k, pd), false, true, LocalTempView, true
+  as v(k, pd), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, pd#x]
       +- SubqueryAlias v
          +- Project [col1#x AS k#x, col2#x AS pd#x]
@@ -3064,7 +3064,7 @@ create or replace temporary view child as select * from
 -- !query analysis
 CreateViewCommand `child`, select * from
   (values (1, 100), (4, 400))
-  as v(k, cd), false, true, LocalTempView, true
+  as v(k, cd), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, cd#x]
       +- SubqueryAlias v
          +- Project [col1#x AS k#x, col2#x AS cd#x]
@@ -3189,7 +3189,7 @@ create or replace temporary view a as select * from
 -- !query analysis
 CreateViewCommand `a`, select * from
   (values (0), (1))
-  as v(id), false, true, LocalTempView, true
+  as v(id), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x]
@@ -3203,7 +3203,7 @@ create or replace temporary view b as select * from
 -- !query analysis
 CreateViewCommand `b`, select * from
   (values (0, 0), (1, NULL))
-  as v(id, a_id), false, true, LocalTempView, true
+  as v(id, a_id), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, a_id#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS a_id#x]
@@ -3261,7 +3261,7 @@ create or replace temporary view innertab as select * from
 -- !query analysis
 CreateViewCommand `innertab`, select * from
   (values (123L, 42L))
-  as v(id, dat1), false, true, LocalTempView, true
+  as v(id, dat1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#xL, dat1#xL]
       +- SubqueryAlias v
          +- Project [col1#xL AS id#xL, col2#xL AS dat1#xL]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/limit.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/limit.sql.out
index c25002d7a6be6..2a5062bd65b72 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/limit.sql.out
@@ -129,7 +129,7 @@ CreateViewCommand `INT8_TBL`, SELECT * FROM
     (4567890123456789, 123),
     (4567890123456789, 4567890123456789),
     (4567890123456789, -4567890123456789))
-  AS v(q1, q2), false, true, LocalTempView, true
+  AS v(q1, q2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [q1#xL, q2#xL]
       +- SubqueryAlias v
          +- Project [col1#xL AS q1#xL, col2#xL AS q2#xL]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/numeric.sql.out
index 418e3b2626f89..6c2ae23291755 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/numeric.sql.out
@@ -4775,21 +4775,21 @@ Project [EXP(cast(1234.5678 as double)) AS EXP(1234.5678)#x]
 select * from range(cast(0.0 as decimal(38, 18)), cast(4.0 as decimal(38, 18)))
 -- !query analysis
 Project [id#xL]
-+- Range (0, 4, step=1, splits=None)
++- Range (0, 4, step=1)
 
 
 -- !query
 select * from range(cast(0.1 as decimal(38, 18)), cast(4.0 as decimal(38, 18)), cast(1.3 as decimal(38, 18)))
 -- !query analysis
 Project [id#xL]
-+- Range (0, 4, step=1, splits=None)
++- Range (0, 4, step=1)
 
 
 -- !query
 select * from range(cast(4.0 as decimal(38, 18)), cast(-1.5 as decimal(38, 18)), cast(-2.2 as decimal(38, 18)))
 -- !query analysis
 Project [id#xL]
-+- Range (4, -1, step=-2, splits=None)
++- Range (4, -1, step=-2)
 
 
 -- !query
@@ -4894,14 +4894,14 @@ Project [LOG(cast(1.000016 as double), 8.45201E18) AS LOG(1.000016, 8.45201E18)#
 SELECT SUM(decimal(9999)) FROM range(1, 100001)
 -- !query analysis
 Aggregate [sum(cast(9999 as decimal(10,0))) AS sum(9999)#x]
-+- Range (1, 100001, step=1, splits=None)
++- Range (1, 100001, step=1)
 
 
 -- !query
 SELECT SUM(decimal(-9999)) FROM range(1, 100001)
 -- !query analysis
 Aggregate [sum(cast(-9999 as decimal(10,0))) AS sum(-9999)#x]
-+- Range (1, 100001, step=1, splits=None)
++- Range (1, 100001, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select.sql.out
index 875673dd72ec0..ed15cce62dc78 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select.sql.out
@@ -2,7 +2,7 @@
 -- !query
 create or replace temporary view onek2 as select * from onek
 -- !query analysis
-CreateViewCommand `onek2`, select * from onek, false, true, LocalTempView, true
+CreateViewCommand `onek2`, select * from onek, false, true, LocalTempView, UNSUPPORTED, true
    +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x]
       +- SubqueryAlias spark_catalog.default.onek
          +- Relation spark_catalog.default.onek[unique1#x,unique2#x,two#x,four#x,ten#x,twenty#x,hundred#x,thousand#x,twothousand#x,fivethous#x,tenthous#x,odd#x,even#x,stringu1#x,stringu2#x,string4#x] parquet
@@ -23,7 +23,7 @@ CreateViewCommand `INT8_TBL`, select * from values
   (cast('4567890123456789' as bigint),cast('123' as bigint)),
   (cast(+4567890123456789 as bigint),cast('4567890123456789' as bigint)),
   (cast('+4567890123456789' as bigint),cast('-4567890123456789' as bigint))
-  as INT8_TBL(q1, q2), false, true, LocalTempView, true
+  as INT8_TBL(q1, q2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [q1#xL, q2#xL]
       +- SubqueryAlias INT8_TBL
          +- LocalRelation [q1#xL, q2#xL]
@@ -240,7 +240,7 @@ Union false, false
 CREATE OR REPLACE TEMPORARY VIEW foo AS
 SELECT * FROM (values(42),(3),(10),(7),(null),(null),(1)) as foo (f1)
 -- !query analysis
-CreateViewCommand `foo`, SELECT * FROM (values(42),(3),(10),(7),(null),(null),(1)) as foo (f1), false, true, LocalTempView, true
+CreateViewCommand `foo`, SELECT * FROM (values(42),(3),(10),(7),(null),(null),(1)) as foo (f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias foo
          +- Project [col1#x AS f1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select_distinct.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select_distinct.sql.out
index bd653048a2ed7..632df1ed53fd4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select_distinct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/select_distinct.sql.out
@@ -5,7 +5,7 @@ SELECT two, stringu1, ten, string4
 FROM onek
 -- !query analysis
 CreateViewCommand `tmp`, SELECT two, stringu1, ten, string4
-FROM onek, false, true, LocalTempView, true
+FROM onek, false, true, LocalTempView, UNSUPPORTED, true
    +- Project [two#x, stringu1#x, ten#x, string4#x]
       +- SubqueryAlias spark_catalog.default.onek
          +- Relation spark_catalog.default.onek[unique1#x,unique2#x,two#x,four#x,ten#x,twenty#x,hundred#x,thousand#x,twothousand#x,fivethous#x,tenthous#x,odd#x,even#x,stringu1#x,stringu2#x,string4#x] parquet
@@ -88,7 +88,7 @@ CREATE OR REPLACE TEMPORARY VIEW disttable AS SELECT * FROM
 -- !query analysis
 CreateViewCommand `disttable`, SELECT * FROM
   (VALUES (1), (2), (3), (NULL))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/text.sql.out
index 7fb0649f5e778..474c2401f40d0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/text.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/text.sql.out
@@ -137,7 +137,7 @@ Sort [i#xL ASC NULLS FIRST], true
 +- Project [i#xL, left(ahoj, cast(i#xL as int)) AS left(ahoj, i)#x, right(ahoj, cast(i#xL as int)) AS right(ahoj, i)#x]
    +- SubqueryAlias t
       +- Project [id#xL AS i#xL]
-         +- Range (-5, 6, step=1, splits=None)
+         +- Range (-5, 6, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/union.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/union.sql.out
index 2865d35a3d6a3..05002a7c45386 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/union.sql.out
@@ -6,7 +6,7 @@ CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
 -- !query analysis
 CreateViewCommand `INT4_TBL`, SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -30,7 +30,7 @@ CreateViewCommand `INT8_TBL`, SELECT * FROM
     (4567890123456789, 123),
     (4567890123456789, 4567890123456789),
     (4567890123456789, -4567890123456789))
-  AS v(q1, q2), false, true, LocalTempView, true
+  AS v(q1, q2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [q1#xL, q2#xL]
       +- SubqueryAlias v
          +- Project [col1#xL AS q1#xL, col2#xL AS q2#xL]
@@ -46,7 +46,7 @@ CREATE OR REPLACE TEMPORARY VIEW FLOAT8_TBL AS SELECT * FROM
 CreateViewCommand `FLOAT8_TBL`, SELECT * FROM
   (VALUES (0.0), (-34.84), (-1004.30),
     (CAST('-1.2345678901234e+200' AS DOUBLE)), (CAST('-1.2345678901234e-200' AS DOUBLE)))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -1059,9 +1059,9 @@ select * from range(1,5) union select * from range(1,3)
 Distinct
 +- Union false, false
    :- Project [id#xL]
-   :  +- Range (1, 5, step=1, splits=None)
+   :  +- Range (1, 5, step=1)
    +- Project [id#xL]
-      +- Range (1, 3, step=1, splits=None)
+      +- Range (1, 3, step=1)
 
 
 -- !query
@@ -1069,9 +1069,9 @@ select * from range(1,6) union all select * from range(1,4)
 -- !query analysis
 Union false, false
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1079,9 +1079,9 @@ select * from range(1,6) intersect select * from range(1,4)
 -- !query analysis
 Intersect false
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1089,9 +1089,9 @@ select * from range(1,6) intersect all select * from range(1,4)
 -- !query analysis
 Intersect All true
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1099,9 +1099,9 @@ select * from range(1,6) except select * from range(1,4)
 -- !query analysis
 Except false
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1109,9 +1109,9 @@ select * from range(1,6) except all select * from range(1,4)
 -- !query analysis
 Except All true
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1120,9 +1120,9 @@ select * from range(1,6) union select * from range(1,4)
 Distinct
 +- Union false, false
    :- Project [id#xL]
-   :  +- Range (1, 6, step=1, splits=None)
+   :  +- Range (1, 6, step=1)
    +- Project [id#xL]
-      +- Range (1, 4, step=1, splits=None)
+      +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1130,9 +1130,9 @@ select * from range(1,6) union all select * from range(1,4)
 -- !query analysis
 Union false, false
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1140,9 +1140,9 @@ select * from range(1,6) intersect select * from range(1,4)
 -- !query analysis
 Intersect false
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1150,9 +1150,9 @@ select * from range(1,6) intersect all select * from range(1,4)
 -- !query analysis
 Intersect All true
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1160,9 +1160,9 @@ select * from range(1,6) except select * from range(1,4)
 -- !query analysis
 Except false
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1170,9 +1170,9 @@ select * from range(1,6) except all select * from range(1,4)
 -- !query analysis
 Except All true
 :- Project [id#xL]
-:  +- Range (1, 6, step=1, splits=None)
+:  +- Range (1, 6, step=1)
 +- Project [id#xL]
-   +- Range (1, 4, step=1, splits=None)
+   +- Range (1, 4, step=1)
 
 
 -- !query
@@ -1223,7 +1223,7 @@ Sort [x#xL ASC NULLS FIRST], true
          +- Distinct
             +- Union false, false
                :- Project [1 AS t#x, id#xL AS x#xL]
-               :  +- Range (1, 11, step=1, splits=None)
+               :  +- Range (1, 11, step=1)
                +- Project [t#x, cast(x#x as bigint) AS x#xL]
                   +- Project [2 AS t#x, 4 AS x#x]
                      +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part1.sql.out
index 40355dbd7b9b9..6cdf71e33c73f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part1.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
 -- !query analysis
-CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, false, LocalTempView, true
+CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x]
       +- SubqueryAlias spark_catalog.default.tenk1
          +- Relation spark_catalog.default.tenk1[unique1#x,unique2#x,two#x,four#x,ten#x,twenty#x,hundred#x,thousand#x,twothousand#x,fivethous#x,tenthous#x,odd#x,even#x,stringu1#x,stringu2#x,string4#x] parquet
@@ -343,7 +343,7 @@ CreateViewCommand `int4_tbl`, select * from values
   (-123456),
   (2147483647),
   (-2147483647)
-  as int4_tbl(f1), false, false, LocalTempView, true
+  as int4_tbl(f1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias int4_tbl
          +- LocalRelation [f1#x]
@@ -522,13 +522,13 @@ SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 follow
 FROM range(1, 11) i
 -- !query analysis
 CreateViewCommand `v_window`, SELECT i.id, sum(i.id) over (order by i.id rows between 1 preceding and 1 following) as sum_rows
-FROM range(1, 11) i, false, false, LocalTempView, true
+FROM range(1, 11) i, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#xL, sum_rows#xL]
       +- Project [id#xL, sum_rows#xL, sum_rows#xL]
          +- Window [sum(id#xL) windowspecdefinition(id#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum_rows#xL], [id#xL ASC NULLS FIRST]
             +- Project [id#xL]
                +- SubqueryAlias i
-                  +- Range (1, 11, step=1, splits=None)
+                  +- Range (1, 11, step=1)
 
 
 -- !query
@@ -543,7 +543,7 @@ Project [id#xL, sum_rows#xL]
                +- Window [sum(id#xL) windowspecdefinition(id#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum_rows#xL], [id#xL ASC NULLS FIRST]
                   +- Project [id#xL]
                      +- SubqueryAlias i
-                        +- Range (1, 11, step=1, splits=None)
+                        +- Range (1, 11, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out
index dfc4fdde71748..cdcd563de4f6a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out
@@ -90,7 +90,7 @@ Project [id#xL, y#xL, first(y) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN (
                :- Union false, false
                :  :- Project [id#xL, id#xL AS y#xL]
                :  :  +- SubqueryAlias x
-               :  :     +- Range (1, 6, step=1, splits=None)
+               :  :     +- Range (1, 6, step=1)
                :  +- Project [cast(NULL#x as bigint) AS NULL#xL, cast(42#x as bigint) AS 42#xL]
                :     +- Project [null AS NULL#x, 42 AS 42#x]
                :        +- OneRowRelation
@@ -119,7 +119,7 @@ Project [id#xL, y#xL, first(y) OVER (ORDER BY id ASC NULLS LAST RANGE BETWEEN (-
                :- Union false, false
                :  :- Project [id#xL, id#xL AS y#xL]
                :  :  +- SubqueryAlias x
-               :  :     +- Range (1, 6, step=1, splits=None)
+               :  :     +- Range (1, 6, step=1)
                :  +- Project [cast(NULL#x as bigint) AS NULL#xL, cast(42#x as bigint) AS 42#xL]
                :     +- Project [null AS NULL#x, 42 AS 42#x]
                :        +- OneRowRelation
@@ -148,7 +148,7 @@ Project [id#xL, y#xL, first(y) OVER (ORDER BY id DESC NULLS FIRST RANGE BETWEEN
                :- Union false, false
                :  :- Project [id#xL, id#xL AS y#xL]
                :  :  +- SubqueryAlias x
-               :  :     +- Range (1, 6, step=1, splits=None)
+               :  :     +- Range (1, 6, step=1)
                :  +- Project [cast(NULL#x as bigint) AS NULL#xL, cast(42#x as bigint) AS 42#xL]
                :     +- Project [null AS NULL#x, 42 AS 42#x]
                :        +- OneRowRelation
@@ -177,7 +177,7 @@ Project [id#xL, y#xL, first(y) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN (
                :- Union false, false
                :  :- Project [id#xL, id#xL AS y#xL]
                :  :  +- SubqueryAlias x
-               :  :     +- Range (1, 6, step=1, splits=None)
+               :  :     +- Range (1, 6, step=1)
                :  +- Project [cast(NULL#x as bigint) AS NULL#xL, cast(42#x as bigint) AS 42#xL]
                :     +- Project [null AS NULL#x, 42 AS 42#x]
                :        +- OneRowRelation
@@ -195,7 +195,7 @@ Project [id#xL, last(id) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT
    +- Window [last(id#xL, false) windowspecdefinition(id#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, currentrow$(), cast(2147450884 as bigint))) AS last(id) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 2147450884 FOLLOWING)#xL], [id#xL ASC NULLS FIRST]
       +- Project [id#xL]
          +- SubqueryAlias x
-            +- Range (32764, 32767, step=1, splits=None)
+            +- Range (32764, 32767, step=1)
 
 
 -- !query
@@ -207,7 +207,7 @@ Project [id#xL, last(id) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT
    +- Window [last(id#xL, false) windowspecdefinition(id#xL DESC NULLS LAST, specifiedwindowframe(RangeFrame, currentrow$(), cast(2147450885 as bigint))) AS last(id) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 2147450885 FOLLOWING)#xL], [id#xL DESC NULLS LAST]
       +- Project [id#xL]
          +- SubqueryAlias x
-            +- Range (-32766, -32765, step=1, splits=None)
+            +- Range (-32766, -32765, step=1)
 
 
 -- !query
@@ -219,7 +219,7 @@ Project [id#xL, last(id) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT
    +- Window [last(id#xL, false) windowspecdefinition(id#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, currentrow$(), cast(4 as bigint))) AS last(id) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)#xL], [id#xL ASC NULLS FIRST]
       +- Project [id#xL]
          +- SubqueryAlias x
-            +- Range (2147483644, 2147483647, step=1, splits=None)
+            +- Range (2147483644, 2147483647, step=1)
 
 
 -- !query
@@ -231,7 +231,7 @@ Project [id#xL, last(id) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT
    +- Window [last(id#xL, false) windowspecdefinition(id#xL DESC NULLS LAST, specifiedwindowframe(RangeFrame, currentrow$(), cast(5 as bigint))) AS last(id) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 5 FOLLOWING)#xL], [id#xL DESC NULLS LAST]
       +- Project [id#xL]
          +- SubqueryAlias x
-            +- Range (-2147483646, -2147483645, step=1, splits=None)
+            +- Range (-2147483646, -2147483645, step=1)
 
 
 -- !query
@@ -243,7 +243,7 @@ Project [id#xL, last(id) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT
    +- Window [last(id#xL, false) windowspecdefinition(id#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, currentrow$(), cast(4 as bigint))) AS last(id) OVER (ORDER BY id ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 4 FOLLOWING)#xL], [id#xL ASC NULLS FIRST]
       +- Project [id#xL]
          +- SubqueryAlias x
-            +- Range (9223372036854775804, 9223372036854775807, step=1, splits=None)
+            +- Range (9223372036854775804, 9223372036854775807, step=1)
 
 
 -- !query
@@ -255,7 +255,7 @@ Project [id#xL, last(id) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT
    +- Window [last(id#xL, false) windowspecdefinition(id#xL DESC NULLS LAST, specifiedwindowframe(RangeFrame, currentrow$(), cast(5 as bigint))) AS last(id) OVER (ORDER BY id DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 5 FOLLOWING)#xL], [id#xL DESC NULLS LAST]
       +- Project [id#xL]
          +- SubqueryAlias x
-            +- Range (-9223372036854775806, -9223372036854775805, step=1, splits=None)
+            +- Range (-9223372036854775806, -9223372036854775805, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
index 7609f898ebf85..9f2dd9bcb1783 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
 -- !query analysis
-CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, false, LocalTempView, true
+CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x]
       +- SubqueryAlias spark_catalog.default.tenk1
          +- Relation spark_catalog.default.tenk1[unique1#x,unique2#x,two#x,four#x,ten#x,twenty#x,hundred#x,thousand#x,twothousand#x,fivethous#x,tenthous#x,odd#x,even#x,stringu1#x,stringu2#x,string4#x] parquet
@@ -93,7 +93,7 @@ WithCTE
 :  +- SubqueryAlias cte
 :     +- Project [id#xL AS x#xL]
 :        +- Project [id#xL]
-:           +- Range (1, 36, step=2, splits=None)
+:           +- Range (1, 36, step=2)
 +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL]
    +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL]
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
@@ -115,7 +115,7 @@ WithCTE
 :  +- SubqueryAlias cte
 :     +- Project [id#xL AS x#xL]
 :        +- Project [id#xL]
-:           +- Range (1, 36, step=2, splits=None)
+:           +- Range (1, 36, step=2)
 +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL]
    +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL]
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, cast(-1 as bigint), cast(1 as bigint))) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
@@ -148,7 +148,7 @@ WithCTE
 :           :     +- Project [1 AS 1#x]
 :           :        +- OneRowRelation
 :           +- Project [id#xL]
-:              +- Range (5, 50, step=2, splits=None)
+:              +- Range (5, 50, step=2)
 +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL]
    +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL]
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
@@ -181,7 +181,7 @@ WithCTE
 :           :     +- Project [1 AS 1#x]
 :           :        +- OneRowRelation
 :           +- Project [id#xL]
-:              +- Range (5, 50, step=2, splits=None)
+:              +- Range (5, 50, step=2)
 +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL]
    +- Project [x#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL, sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL]
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, cast(-1 as bigint), cast(1 as bigint))) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pred-pushdown.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pred-pushdown.sql.out
index 9e0c055db6dc7..74d1c69dcc532 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/pred-pushdown.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pred-pushdown.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW tbl_a AS VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2)
 -- !query analysis
-CreateViewCommand `tbl_a`, VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `tbl_a`, VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [c1#x, c2#x]
 
@@ -10,7 +10,7 @@ CreateViewCommand `tbl_a`, VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2), false, tr
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW tbl_b AS VALUES 1 AS T(c1)
 -- !query analysis
-CreateViewCommand `tbl_b`, VALUES 1 AS T(c1), false, true, LocalTempView, true
+CreateViewCommand `tbl_b`, VALUES 1 AS T(c1), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [c1#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out
index 772e643027b1e..7e720995c44b4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out
@@ -1,4 +1,151 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+select not true
+-- !query analysis
+Project [NOT true AS (NOT true)#x]
++- OneRowRelation
+
+
+-- !query
+select ! true
+-- !query analysis
+Project [NOT true AS (NOT true)#x]
++- OneRowRelation
+
+
+-- !query
+select not null::boolean
+-- !query analysis
+Project [NOT cast(null as boolean) AS (NOT CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
+
+
+-- !query
+select true and true
+-- !query analysis
+Project [(true AND true) AS (true AND true)#x]
++- OneRowRelation
+
+
+-- !query
+select true and false
+-- !query analysis
+Project [(true AND false) AS (true AND false)#x]
++- OneRowRelation
+
+
+-- !query
+select false and true
+-- !query analysis
+Project [(false AND true) AS (false AND true)#x]
++- OneRowRelation
+
+
+-- !query
+select false and false
+-- !query analysis
+Project [(false AND false) AS (false AND false)#x]
++- OneRowRelation
+
+
+-- !query
+select true and null::boolean
+-- !query analysis
+Project [(true AND cast(null as boolean)) AS (true AND CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
+
+
+-- !query
+select false and null::boolean
+-- !query analysis
+Project [(false AND cast(null as boolean)) AS (false AND CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
+
+
+-- !query
+select null::boolean and true
+-- !query analysis
+Project [(cast(null as boolean) AND true) AS (CAST(NULL AS BOOLEAN) AND true)#x]
++- OneRowRelation
+
+
+-- !query
+select null::boolean and false
+-- !query analysis
+Project [(cast(null as boolean) AND false) AS (CAST(NULL AS BOOLEAN) AND false)#x]
++- OneRowRelation
+
+
+-- !query
+select null::boolean and null::boolean
+-- !query analysis
+Project [(cast(null as boolean) AND cast(null as boolean)) AS (CAST(NULL AS BOOLEAN) AND CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
+
+
+-- !query
+select true or true
+-- !query analysis
+Project [(true OR true) AS (true OR true)#x]
++- OneRowRelation
+
+
+-- !query
+select true or false
+-- !query analysis
+Project [(true OR false) AS (true OR false)#x]
++- OneRowRelation
+
+
+-- !query
+select false or true
+-- !query analysis
+Project [(false OR true) AS (false OR true)#x]
++- OneRowRelation
+
+
+-- !query
+select false or false
+-- !query analysis
+Project [(false OR false) AS (false OR false)#x]
++- OneRowRelation
+
+
+-- !query
+select true or null::boolean
+-- !query analysis
+Project [(true OR cast(null as boolean)) AS (true OR CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
+
+
+-- !query
+select false or null::boolean
+-- !query analysis
+Project [(false OR cast(null as boolean)) AS (false OR CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
+
+
+-- !query
+select null::boolean or true
+-- !query analysis
+Project [(cast(null as boolean) OR true) AS (CAST(NULL AS BOOLEAN) OR true)#x]
++- OneRowRelation
+
+
+-- !query
+select null::boolean or false
+-- !query analysis
+Project [(cast(null as boolean) OR false) AS (CAST(NULL AS BOOLEAN) OR false)#x]
++- OneRowRelation
+
+
+-- !query
+select null::boolean or null::boolean
+-- !query analysis
+Project [(cast(null as boolean) OR cast(null as boolean)) AS (CAST(NULL AS BOOLEAN) OR CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
+
+
 -- !query
 select 1 = 1
 -- !query analysis
@@ -450,3 +597,50 @@ Project [NOT between(to_timestamp(2022-12-26 00:00:01, None, TimestampType, Some
 select rand(123) not between 0.1 AND 0.2
 -- !query analysis
 [Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+set spark.sql.legacy.bangEqualsNot=true
+-- !query analysis
+SetCommand (spark.sql.legacy.bangEqualsNot,Some(true))
+
+
+-- !query
+select 1 ! between 0 and 2
+-- !query analysis
+Project [NOT between(1, 0, 2) AS (NOT between(1, 0, 2))#x]
++- OneRowRelation
+
+
+-- !query
+select 1 ! in (3, 4)
+-- !query analysis
+Project [NOT 1 IN (3,4) AS (NOT (1 IN (3, 4)))#x]
++- OneRowRelation
+
+
+-- !query
+select 'hello' ! like 'world'
+-- !query analysis
+Project [NOT hello LIKE world AS (NOT hello LIKE world)#x]
++- OneRowRelation
+
+
+-- !query
+select 1 is ! null
+-- !query analysis
+Project [isnotnull(1) AS (1 IS NOT NULL)#x]
++- OneRowRelation
+
+
+-- !query
+select false is ! true
+-- !query analysis
+Project [NOT (false <=> true) AS (NOT (false <=> true))#x]
++- OneRowRelation
+
+
+-- !query
+set spark.sql.legacy.bangEqualsNot=false
+-- !query analysis
+SetCommand (spark.sql.legacy.bangEqualsNot,Some(false))
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/query_regex_column.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/query_regex_column.sql.out
index 6c14323382889..d62e4b7921363 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/query_regex_column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/query_regex_column.sql.out
@@ -12,7 +12,7 @@ AS testData(key, value1, value2)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, "1", "11"), (2, "2", "22"), (3, "3", "33"), (4, "4", "44"), (5, "5", "55"), (6, "6", "66")
-AS testData(key, value1, value2), false, true, LocalTempView, true
+AS testData(key, value1, value2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [key#x, value1#x, value2#x]
       +- SubqueryAlias testData
          +- LocalRelation [key#x, value1#x, value2#x]
@@ -25,7 +25,7 @@ AS testData2(A, B, c, d)
 -- !query analysis
 CreateViewCommand `testData2`, SELECT * FROM VALUES
 (1, 1, 1, 2), (1, 2, 1, 2), (2, 1, 2, 3), (2, 2, 2, 3), (3, 1, 3, 4), (3, 2, 3, 4)
-AS testData2(A, B, c, d), false, true, LocalTempView, true
+AS testData2(A, B, c, d), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [A#x, B#x, c#x, d#x]
       +- SubqueryAlias testData2
          +- LocalRelation [A#x, B#x, c#x, d#x]
@@ -349,7 +349,7 @@ AS testdata3(a, b)
 -- !query analysis
 CreateViewCommand `testdata3`, SELECT * FROM VALUES
 (0, 1), (1, 2), (2, 3), (3, 4)
-AS testdata3(a, b), false, true, LocalTempView, true
+AS testdata3(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testdata3
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
index 24119599c532d..e02562e29835f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/selectExcept.sql.out
@@ -18,7 +18,7 @@ CreateViewCommand `tbl_view`, SELECT * FROM VALUES
   (50, "name5", named_struct("f1", 5, "s2", named_struct("f2", 505, "f3", "e"))),
   (60, "name6", named_struct("f1", 6, "s2", named_struct("f2", 606, "f3", "f"))),
   (70, "name7", named_struct("f1", 7, "s2", named_struct("f2", 707, "f3", "g")))
-AS tbl_view(id, name, data), false, false, LocalTempView, true
+AS tbl_view(id, name, data), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, name#x, data#x]
       +- SubqueryAlias tbl_view
          +- LocalRelation [id#x, name#x, data#x]
@@ -351,7 +351,7 @@ DropTempViewCommand tbl_view
 -- !query
 CREATE TEMPORARY VIEW v1 AS VALUES (1, 2, NULL, 4, 5) AS T(c1, c2, c3, c4, c5)
 -- !query analysis
-CreateViewCommand `v1`, VALUES (1, 2, NULL, 4, 5) AS T(c1, c2, c3, c4, c5), false, false, LocalTempView, true
+CreateViewCommand `v1`, VALUES (1, 2, NULL, 4, 5) AS T(c1, c2, c3, c4, c5), false, false, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [c1#x, c2#x, c3#x, c4#x, c5#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/show-create-table.sql.out
index b44edb8dc4d96..b1c3ad59e1515 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/show-create-table.sql.out
@@ -217,7 +217,7 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`tbl`, false
 CREATE VIEW view_SPARK_30302 (aaa, bbb)
 AS SELECT a, b FROM tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`view_SPARK_30302`, [(aaa,None), (bbb,None)], SELECT a, b FROM tbl, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`view_SPARK_30302`, [(aaa,None), (bbb,None)], SELECT a, b FROM tbl, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x]
       +- SubqueryAlias spark_catalog.default.tbl
          +- Relation spark_catalog.default.tbl[a#x,b#x,c#x] parquet
@@ -246,7 +246,7 @@ CREATE VIEW view_SPARK_30302 (aaa COMMENT 'comment with \'quoted text\' for aaa'
 COMMENT 'This is a comment with \'quoted text\' for view'
 AS SELECT a, b FROM tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`view_SPARK_30302`, [(aaa,Some(comment with 'quoted text' for aaa)), (bbb,None)], This is a comment with 'quoted text' for view, SELECT a, b FROM tbl, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`view_SPARK_30302`, [(aaa,Some(comment with 'quoted text' for aaa)), (bbb,None)], This is a comment with 'quoted text' for view, SELECT a, b FROM tbl, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x]
       +- SubqueryAlias spark_catalog.default.tbl
          +- Relation spark_catalog.default.tbl[a#x,b#x,c#x] parquet
@@ -275,7 +275,7 @@ CREATE VIEW view_SPARK_30302 (aaa, bbb)
 TBLPROPERTIES ('a' = '1', 'b' = '2')
 AS SELECT a, b FROM tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`view_SPARK_30302`, [(aaa,None), (bbb,None)], [a=1, b=2], SELECT a, b FROM tbl, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`view_SPARK_30302`, [(aaa,None), (bbb,None)], [a=1, b=2], SELECT a, b FROM tbl, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x]
       +- SubqueryAlias spark_catalog.default.tbl
          +- Relation spark_catalog.default.tbl[a#x,b#x,c#x] parquet
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out
index ce5f7995f5d13..a86cc72f0863c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/show-tables.sql.out
@@ -40,7 +40,7 @@ CreateTempViewUsing [tableIdent:`show_t3` StructType(StructField(e,IntegerType,t
 -- !query
 CREATE GLOBAL TEMP VIEW show_t4 AS SELECT 1 as col1
 -- !query analysis
-CreateViewCommand `show_t4`, SELECT 1 as col1, false, false, GlobalTempView, true
+CreateViewCommand `show_t4`, SELECT 1 as col1, false, false, GlobalTempView, UNSUPPORTED, true
    +- Project [1 AS col1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/show-tblproperties.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/show-tblproperties.sql.out
index 0ea52d7d1e8cf..f4b29105b58a7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/show-tblproperties.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/show-tblproperties.sql.out
@@ -37,7 +37,7 @@ DropTable false, false
 -- !query
 CREATE VIEW view TBLPROPERTIES('p1'='v1', 'p2'='v2') AS SELECT 1 AS c1
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`view`, [p1=v1, p2=v2], SELECT 1 AS c1, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`view`, [p1=v1, p2=v2], SELECT 1 AS c1, false, false, PersistedView, COMPENSATION, true
    +- Project [1 AS c1#x]
       +- OneRowRelation
 
@@ -69,7 +69,7 @@ DropTableCommand `spark_catalog`.`default`.`view`, false, true, false
 -- !query
 CREATE TEMPORARY VIEW tv AS SELECT 1 AS c1
 -- !query analysis
-CreateViewCommand `tv`, SELECT 1 AS c1, false, false, LocalTempView, true
+CreateViewCommand `tv`, SELECT 1 AS c1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS c1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/show-views.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/show-views.sql.out
index c8f2c6f9cc029..ed3690ec5c6a3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/show-views.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/show-views.sql.out
@@ -22,7 +22,7 @@ CreateDataSourceTableCommand `spark_catalog`.`showdb`.`tbl`, false
 -- !query
 CREATE VIEW view_1 AS SELECT * FROM tbl
 -- !query analysis
-CreateViewCommand `spark_catalog`.`showdb`.`view_1`, SELECT * FROM tbl, false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`showdb`.`view_1`, SELECT * FROM tbl, false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x, c#x, d#x]
       +- SubqueryAlias spark_catalog.showdb.tbl
          +- Relation spark_catalog.showdb.tbl[a#x,b#x,c#x,d#x] parquet
@@ -31,7 +31,7 @@ CreateViewCommand `spark_catalog`.`showdb`.`view_1`, SELECT * FROM tbl, false, f
 -- !query
 CREATE VIEW view_2 AS SELECT * FROM tbl WHERE c='a'
 -- !query analysis
-CreateViewCommand `spark_catalog`.`showdb`.`view_2`, SELECT * FROM tbl WHERE c='a', false, false, PersistedView, true
+CreateViewCommand `spark_catalog`.`showdb`.`view_2`, SELECT * FROM tbl WHERE c='a', false, false, PersistedView, COMPENSATION, true
    +- Project [a#x, b#x, c#x, d#x]
       +- Filter (c#x = a)
          +- SubqueryAlias spark_catalog.showdb.tbl
@@ -41,7 +41,7 @@ CreateViewCommand `spark_catalog`.`showdb`.`view_2`, SELECT * FROM tbl WHERE c='
 -- !query
 CREATE GLOBAL TEMP VIEW view_3 AS SELECT 1 as col1
 -- !query analysis
-CreateViewCommand `view_3`, SELECT 1 as col1, false, false, GlobalTempView, true
+CreateViewCommand `view_3`, SELECT 1 as col1, false, false, GlobalTempView, UNSUPPORTED, true
    +- Project [1 AS col1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out
index 55c744a8c0726..27e75187cdba7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out
@@ -34,7 +34,7 @@ CreateTempViewUsing [tableIdent:`showColumn3` StructType(StructField(col3,Intege
 -- !query
 CREATE GLOBAL TEMP VIEW showColumn4 AS SELECT 1 as col1, 'abc' as `col 5`
 -- !query analysis
-CreateViewCommand `showColumn4`, SELECT 1 as col1, 'abc' as `col 5`, false, false, GlobalTempView, true
+CreateViewCommand `showColumn4`, SELECT 1 as col1, 'abc' as `col 5`, false, false, GlobalTempView, UNSUPPORTED, true
    +- Project [1 AS col1#x, abc AS col 5#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-compatibility-functions.sql.out
index f80290c5ab348..a18e4ede957cf 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-compatibility-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-compatibility-functions.sql.out
@@ -102,7 +102,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 CREATE TEMPORARY VIEW tempView1 AS VALUES (1, NAMED_STRUCT('col1', 'gamma', 'col2', 'delta')) AS T(id, st)
 -- !query analysis
-CreateViewCommand `tempView1`, VALUES (1, NAMED_STRUCT('col1', 'gamma', 'col2', 'delta')) AS T(id, st), false, false, LocalTempView, true
+CreateViewCommand `tempView1`, VALUES (1, NAMED_STRUCT('col1', 'gamma', 'col2', 'delta')) AS T(id, st), false, false, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [id#x, st#x]
 
@@ -122,4 +122,4 @@ Aggregate [nvl(st#x.col1, value)], [nvl(st#x.col1, value) AS nvl(st.col1, value)
 SELECT nullif(SUM(id), 0) from range(5)
 -- !query analysis
 Aggregate [nullif(sum(id#xL), 0) AS nullif(sum(id), 0)#xL]
-+- Range (0, 5, step=1, splits=None)
++- Range (0, 5, step=1)
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
index 6a6ffe85ad592..f5ce5ed2e8b6e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
@@ -498,7 +498,7 @@ SELECT (SELECT MAX(id) FROM RANGE(10) WHERE id < title) FROM VALUES 1, 2 AS t(ti
 Project [scalar-subquery#x [title#x] AS scalarsubquery(title)#xL]
 :  +- Aggregate [max(id#xL) AS max(id)#xL]
 :     +- Filter (id#xL < cast(outer(title#x) as bigint))
-:        +- Range (0, 10, step=1, splits=None)
+:        +- Range (0, 10, step=1)
 +- SubqueryAlias t
    +- LocalRelation [title#x]
 
@@ -2060,7 +2060,7 @@ WithCTE
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW v AS SELECT var1 AS c1
 -- !query analysis
-CreateViewCommand `v`, SELECT var1 AS c1, false, true, LocalTempView, true
+CreateViewCommand `v`, SELECT var1 AS c1, false, true, LocalTempView, UNSUPPORTED, true
    +- Project [variablereference(system.session.var1=1) AS c1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
index 7ffd3cbd8bac6..98664dedf820c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out
@@ -384,21 +384,21 @@ Project [btrim(xyxtrimyyx, xy) AS btrim(xyxtrimyyx, xy)#x]
 -- !query
 SELECT btrim(encode(" xyz ", 'utf-8'))
 -- !query analysis
-Project [btrim(encode( xyz , utf-8, false)) AS btrim(encode( xyz , utf-8))#x]
+Project [btrim(encode( xyz , utf-8)) AS btrim(encode( xyz , utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('yxTomxx', 'utf-8'), encode('xyz', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(yxTomxx, utf-8, false), encode(xyz, utf-8, false)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
+Project [btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8)) AS btrim(encode(yxTomxx, utf-8), encode(xyz, utf-8))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT btrim(encode('xxxbarxxx', 'utf-8'), encode('x', 'utf-8'))
 -- !query analysis
-Project [btrim(encode(xxxbarxxx, utf-8, false), encode(x, utf-8, false)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
+Project [btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8)) AS btrim(encode(xxxbarxxx, utf-8), encode(x, utf-8))#x]
 +- OneRowRelation
 
 
@@ -649,14 +649,14 @@ SetCommand (spark.sql.legacy.javaCharsets,Some(true))
 -- !query
 select encode('hello', 'WINDOWS-1252')
 -- !query analysis
-Project [encode(hello, WINDOWS-1252, true) AS encode(hello, WINDOWS-1252)#x]
+Project [encode(hello, WINDOWS-1252) AS encode(hello, WINDOWS-1252)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x, true) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
@@ -670,14 +670,14 @@ SetCommand (spark.sql.legacy.javaCharsets,Some(false))
 -- !query
 select encode('hello', 'WINDOWS-1252')
 -- !query analysis
-Project [encode(hello, WINDOWS-1252, false) AS encode(hello, WINDOWS-1252)#x]
+Project [encode(hello, WINDOWS-1252) AS encode(hello, WINDOWS-1252)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
@@ -685,18 +685,95 @@ Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
 -- !query
 select encode('hello', 'Windows-xxx')
 -- !query analysis
-Project [encode(hello, Windows-xxx, false) AS encode(hello, Windows-xxx)#x]
+Project [encode(hello, Windows-xxx) AS encode(hello, Windows-xxx)#x]
 +- OneRowRelation
 
 
 -- !query
 select encode(scol, ecol) from values('hello', 'Windows-xxx') as t(scol, ecol)
 -- !query analysis
-Project [encode(scol#x, ecol#x, false) AS encode(scol, ecol)#x]
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
 +- SubqueryAlias t
    +- LocalRelation [scol#x, ecol#x]
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(true))
+
+
+-- !query
+select encode('渭城朝雨浥轻尘', 'US-ASCII')
+-- !query analysis
+Project [encode(渭城朝雨浥轻尘, US-ASCII) AS encode(渭城朝雨浥轻尘, US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('渭城朝雨浥轻尘', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(false))
+
+
+-- !query
+select encode('客舍青青柳色新', 'US-ASCII')
+-- !query analysis
+Project [encode(客舍青青柳色新, US-ASCII) AS encode(客舍青青柳色新, US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(scol, ecol) from values('客舍青青柳色新', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [encode(scol#x, ecol#x) AS encode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+select encode(decode(encode('白日依山尽，黄河入海流。欲穷千里目，更上一层楼。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(白日依山尽，黄河入海流。欲穷千里目，更上一层楼。, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(白日依山尽，黄河入海流。欲穷千里目，更上一层楼。, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
+-- !query
+select encode(decode(encode('Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query analysis
+Project [encode(decode(encode(Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。, UTF-16), UTF-16), UTF-8) AS encode(decode(encode(Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。, UTF-16), UTF-16), UTF-8)#x]
++- OneRowRelation
+
+
 -- !query
 select decode()
 -- !query analysis
@@ -746,7 +823,14 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select decode(encode('abc', 'utf-8'), 'utf-8')
 -- !query analysis
-Project [decode(encode(abc, utf-8, false), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
+Project [decode(encode(abc, utf-8), utf-8) AS decode(encode(abc, utf-8), utf-8)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(encode('大千世界', 'utf-32'), 'utf-32')
+-- !query analysis
+Project [decode(encode(大千世界, utf-32), utf-32) AS decode(encode(大千世界, utf-32), utf-32)#x]
 +- OneRowRelation
 
 
@@ -856,6 +940,48 @@ Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
    +- LocalRelation [scol#x, ecol#x]
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(true))
+
+
+-- !query
+select decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII')
+-- !query analysis
+Project [decode(0xE58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592, US-ASCII) AS decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(scol, ecol) from values(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query analysis
+SetCommand (spark.sql.legacy.codingErrorAction,Some(false))
+
+
+-- !query
+select decode(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII')
+-- !query analysis
+Project [decode(0xE8A5BFE587BAE998B3E585B3E697A0E69585E4BABA, US-ASCII) AS decode(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', US-ASCII)#x]
++- OneRowRelation
+
+
+-- !query
+select decode(scol, ecol) from values(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII') as t(scol, ecol)
+-- !query analysis
+Project [decode(scol#x, ecol#x) AS decode(scol, ecol)#x]
++- SubqueryAlias t
+   +- LocalRelation [scol#x, ecol#x]
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query analysis
@@ -1428,7 +1554,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 CREATE TEMPORARY VIEW fmtTable(fmtField) AS SELECT * FROM VALUES ('invalidFormat')
 -- !query analysis
-CreateViewCommand `fmtTable`, [(fmtField,None)], SELECT * FROM VALUES ('invalidFormat'), false, false, LocalTempView, true
+CreateViewCommand `fmtTable`, [(fmtField,None)], SELECT * FROM VALUES ('invalidFormat'), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col1#x]
       +- LocalRelation [col1#x]
 
@@ -1586,3 +1712,87 @@ select luhn_check(123.456)
 -- !query analysis
 Project [luhn_check(cast(123.456 as string)) AS luhn_check(123.456)#x]
 +- OneRowRelation
+
+
+-- !query
+select is_valid_utf8('')
+-- !query analysis
+Project [is_valid_utf8() AS is_valid_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select is_valid_utf8('abc')
+-- !query analysis
+Project [is_valid_utf8(abc) AS is_valid_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select is_valid_utf8(x'80')
+-- !query analysis
+Project [is_valid_utf8(cast(0x80 as string)) AS is_valid_utf8(X'80')#x]
++- OneRowRelation
+
+
+-- !query
+select make_valid_utf8('')
+-- !query analysis
+Project [make_valid_utf8() AS make_valid_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select make_valid_utf8('abc')
+-- !query analysis
+Project [make_valid_utf8(abc) AS make_valid_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select make_valid_utf8(x'80')
+-- !query analysis
+Project [make_valid_utf8(cast(0x80 as string)) AS make_valid_utf8(X'80')#x]
++- OneRowRelation
+
+
+-- !query
+select validate_utf8('')
+-- !query analysis
+Project [validate_utf8() AS validate_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select validate_utf8('abc')
+-- !query analysis
+Project [validate_utf8(abc) AS validate_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select validate_utf8(x'80')
+-- !query analysis
+Project [validate_utf8(cast(0x80 as string)) AS validate_utf8(X'80')#x]
++- OneRowRelation
+
+
+-- !query
+select try_validate_utf8('')
+-- !query analysis
+Project [try_validate_utf8() AS try_validate_utf8()#x]
++- OneRowRelation
+
+
+-- !query
+select try_validate_utf8('abc')
+-- !query analysis
+Project [try_validate_utf8(abc) AS try_validate_utf8(abc)#x]
++- OneRowRelation
+
+
+-- !query
+select try_validate_utf8(x'80')
+-- !query analysis
+Project [try_validate_utf8(cast(0x80 as string)) AS try_validate_utf8(X'80')#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/struct.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/struct.sql.out
index c672353ecda6c..dba912cdff72e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/struct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/struct.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `tbl_x`, VALUES
   (1, NAMED_STRUCT('C', 'gamma', 'D', 'delta')),
   (2, NAMED_STRUCT('C', 'epsilon', 'D', 'eta')),
   (3, NAMED_STRUCT('C', 'theta', 'D', 'iota'))
-  AS T(ID, ST), false, false, LocalTempView, true
+  AS T(ID, ST), false, false, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [ID#x, ST#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out
index 8409212f136ad..94073f2751b3e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 ('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]')
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-aggregate.sql.out
index b4c9632462527..4da75ae2764f5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-aggregate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-aggregate.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -45,7 +45,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -72,7 +72,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-basic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-basic.sql.out
index 9a2f65a7377f8..81c4a15dc9f4a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-basic.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -45,7 +45,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -72,7 +72,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-count-bug.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-count-bug.sql.out
index 01ab697a0dc30..e003c2624009b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-count-bug.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-count-bug.sql.out
@@ -2,21 +2,21 @@
 -- !query
 create temporary view t1(c1, c2) as values (0, 1), (1, 2)
 -- !query analysis
-CreateViewCommand `t1`, [(c1,None), (c2,None)], values (0, 1), (1, 2), false, false, LocalTempView, true
+CreateViewCommand `t1`, [(c1,None), (c2,None)], values (0, 1), (1, 2), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 create temporary view t2(c1, c2) as values (0, 2), (0, 3)
 -- !query analysis
-CreateViewCommand `t2`, [(c1,None), (c2,None)], values (0, 2), (0, 3), false, false, LocalTempView, true
+CreateViewCommand `t2`, [(c1,None), (c2,None)], values (0, 2), (0, 3), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5)
 -- !query analysis
-CreateViewCommand `t3`, [(c1,None), (c2,None)], values (0, 3), (1, 4), (2, 5), false, false, LocalTempView, true
+CreateViewCommand `t3`, [(c1,None), (c2,None)], values (0, 3), (1, 4), (2, 5), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
index b4338b34eea13..7c3678c66c117 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -45,7 +45,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -72,7 +72,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
index b053cb369adca..2409cd0559bd2 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-having.sql.out
@@ -20,7 +20,7 @@ CreateViewCommand `EMP`, [(id,None), (emp_name,None), (hiredate,None), (salary,N
   (500, 'emp 5', date '2001-01-01', double(400.00), NULL),
   (600, 'emp 6 - no dept', date '2001-01-01', double(400.00), 100),
   (700, 'emp 7', date '2010-01-01', double(400.00), 100),
-  (800, 'emp 8', date '2016-01-01', double(150.00), 70), false, false, LocalTempView, true
+  (800, 'emp 8', date '2016-01-01', double(150.00), 70), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
 
 
@@ -39,7 +39,7 @@ CreateViewCommand `DEPT`, [(dept_id,None), (dept_name,None), (state,None)], VALU
   (30, 'dept 3', 'TX'),
   (40, 'dept 4 - unassigned', 'OR'),
   (50, 'dept 5 - unassigned', 'NJ'),
-  (70, 'dept 7', 'FL'), false, false, LocalTempView, true
+  (70, 'dept 7', 'FL'), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x]
 
 
@@ -62,7 +62,7 @@ CreateViewCommand `BONUS`, [(emp_name,None), (bonus_amt,None)], VALUES
   ('emp 3', double(300.00)),
   ('emp 4', double(100.00)),
   ('emp 5', double(1000.00)),
-  ('emp 6 - no dept', double(500.00)), false, false, LocalTempView, true
+  ('emp 6 - no dept', double(500.00)), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-in-join-condition.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-in-join-condition.sql.out
index da8ea257cfa70..3b55a7293bcfa 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-in-join-condition.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-in-join-condition.sql.out
@@ -8,7 +8,7 @@ CREATE TEMP VIEW x(x1, x2) AS VALUES
 CreateViewCommand `x`, [(x1,None), (x2,None)], VALUES
     (2, 1),
     (1, 1),
-    (3, 4), false, false, LocalTempView, true
+    (3, 4), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -21,7 +21,7 @@ CREATE TEMP VIEW y(y1, y2) AS VALUES
 CreateViewCommand `y`, [(y1,None), (y2,None)], VALUES
     (0, 2),
     (1, 4),
-    (4, 11), false, false, LocalTempView, true
+    (4, 11), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -34,7 +34,7 @@ CREATE TEMP VIEW z(z1, z2) AS VALUES
 CreateViewCommand `z`, [(z1,None), (z2,None)], VALUES
     (4, 2),
     (3, 3),
-    (8, 1), false, false, LocalTempView, true
+    (8, 1), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -1004,3 +1004,47 @@ Sort [x1#x ASC NULLS FIRST, x2#x ASC NULLS FIRST, y1#x ASC NULLS FIRST, y2#x ASC
          +- View (`y`, [y1#x, y2#x])
             +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
                +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select * from x join y on x1 = y1 and exists (select * from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query analysis
+Sort [x1#x ASC NULLS FIRST, x2#x ASC NULLS FIRST, y1#x ASC NULLS FIRST, y2#x ASC NULLS FIRST], true
++- Project [x1#x, x2#x, y1#x, y2#x]
+   +- Join Inner, ((x1#x = y1#x) AND exists#x [x2#x && y2#x])
+      :  +- Project [z1#x, z2#x]
+      :     +- Filter ((z2#x = outer(x2#x)) AND (z2#x = outer(y2#x)))
+      :        +- SubqueryAlias z
+      :           +- View (`z`, [z1#x, z2#x])
+      :              +- Project [cast(col1#x as int) AS z1#x, cast(col2#x as int) AS z2#x]
+      :                 +- LocalRelation [col1#x, col2#x]
+      :- SubqueryAlias x
+      :  +- View (`x`, [x1#x, x2#x])
+      :     +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+      :        +- LocalRelation [col1#x, col2#x]
+      +- SubqueryAlias y
+         +- View (`y`, [y1#x, y2#x])
+            +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select * from x join y on x1 = y1 and not exists (select * from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query analysis
+Sort [x1#x ASC NULLS FIRST, x2#x ASC NULLS FIRST, y1#x ASC NULLS FIRST, y2#x ASC NULLS FIRST], true
++- Project [x1#x, x2#x, y1#x, y2#x]
+   +- Join Inner, ((x1#x = y1#x) AND NOT exists#x [x2#x && y2#x])
+      :  +- Project [z1#x, z2#x]
+      :     +- Filter ((z2#x = outer(x2#x)) AND (z2#x = outer(y2#x)))
+      :        +- SubqueryAlias z
+      :           +- View (`z`, [z1#x, z2#x])
+      :              +- Project [cast(col1#x as int) AS z1#x, cast(col2#x as int) AS z2#x]
+      :                 +- LocalRelation [col1#x, col2#x]
+      :- SubqueryAlias x
+      :  +- View (`x`, [x1#x, x2#x])
+      :     +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+      :        +- LocalRelation [col1#x, col2#x]
+      +- SubqueryAlias y
+         +- View (`y`, [y1#x, y2#x])
+            +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+               +- LocalRelation [col1#x, col2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
index 99de720ac47d9..55b10125f7680 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -45,7 +45,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -72,7 +72,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out
index 4bfb6c5b843cd..3f56e346d0a53 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -45,7 +45,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -72,7 +72,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-outside-filter.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-outside-filter.sql.out
index a0e485b49096e..d15466f04f800 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-outside-filter.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-outside-filter.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -45,7 +45,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -72,7 +72,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-within-and-or.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-within-and-or.sql.out
index c43c920971d0e..7b846e5eae9c8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-within-and-or.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-within-and-or.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -45,7 +45,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -72,7 +72,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-basic.sql.out
index ca68b169b41f8..4d59da8c393af 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-basic.sql.out
@@ -2,7 +2,7 @@
 -- !query
 create temporary view tab_a as select * from values (1, 1) as tab_a(a1, b1)
 -- !query analysis
-CreateViewCommand `tab_a`, select * from values (1, 1) as tab_a(a1, b1), false, false, LocalTempView, true
+CreateViewCommand `tab_a`, select * from values (1, 1) as tab_a(a1, b1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a1#x, b1#x]
       +- SubqueryAlias tab_a
          +- LocalRelation [a1#x, b1#x]
@@ -11,7 +11,7 @@ CreateViewCommand `tab_a`, select * from values (1, 1) as tab_a(a1, b1), false,
 -- !query
 create temporary view tab_b as select * from values (1, 1) as tab_b(a2, b2)
 -- !query analysis
-CreateViewCommand `tab_b`, select * from values (1, 1) as tab_b(a2, b2), false, false, LocalTempView, true
+CreateViewCommand `tab_b`, select * from values (1, 1) as tab_b(a2, b2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a2#x, b2#x]
       +- SubqueryAlias tab_b
          +- LocalRelation [a2#x, b2#x]
@@ -22,7 +22,7 @@ create temporary view struct_tab as select struct(col1 as a, col2 as b) as recor
  values (1, 1), (1, 2), (2, 1), (2, 2)
 -- !query analysis
 CreateViewCommand `struct_tab`, select struct(col1 as a, col2 as b) as record from
- values (1, 1), (1, 2), (2, 1), (2, 2), false, false, LocalTempView, true
+ values (1, 1), (1, 2), (2, 1), (2, 2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [struct(a, col1#x, b, col2#x) AS record#x]
       +- LocalRelation [col1#x, col2#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-count-bug.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-count-bug.sql.out
index 390b9a886c31b..39a0a0d5997ff 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-count-bug.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-count-bug.sql.out
@@ -2,21 +2,21 @@
 -- !query
 create temporary view t1(c1, c2) as values (0, 1), (1, 2)
 -- !query analysis
-CreateViewCommand `t1`, [(c1,None), (c2,None)], values (0, 1), (1, 2), false, false, LocalTempView, true
+CreateViewCommand `t1`, [(c1,None), (c2,None)], values (0, 1), (1, 2), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 create temporary view t2(c1, c2) as values (0, 2), (0, 3)
 -- !query analysis
-CreateViewCommand `t2`, [(c1,None), (c2,None)], values (0, 2), (0, 3), false, false, LocalTempView, true
+CreateViewCommand `t2`, [(c1,None), (c2,None)], values (0, 2), (0, 3), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 create temporary view t3(c1, c2) as values (0, 3), (1, 4), (2, 5)
 -- !query analysis
-CreateViewCommand `t3`, [(c1,None), (c2,None)], values (0, 3), (1, 4), (2, 5), false, false, LocalTempView, true
+CreateViewCommand `t3`, [(c1,None), (c2,None)], values (0, 3), (1, 4), (2, 5), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-group-by.sql.out
index 2c5e9eb6733d9..5a253f633bb11 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-group-by.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("t1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("t1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
   ("t3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("t3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-having.sql.out
index ec2cc088bf8a9..bddc9d16d7eba 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-having.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-joins.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-joins.sql.out
index 0dfba13398228..46b4a78b9a745 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-joins.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
@@ -113,7 +113,7 @@ create temporary view s1 as select * from values
 -- !query analysis
 CreateViewCommand `s1`, select * from values
     (1), (3), (5), (7), (9)
-  as s1(id), false, false, LocalTempView, true
+  as s1(id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x]
       +- SubqueryAlias s1
          +- LocalRelation [id#x]
@@ -126,7 +126,7 @@ create temporary view s2 as select * from values
 -- !query analysis
 CreateViewCommand `s2`, select * from values
     (1), (3), (4), (6), (9)
-  as s2(id), false, false, LocalTempView, true
+  as s2(id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x]
       +- SubqueryAlias s2
          +- LocalRelation [id#x]
@@ -139,7 +139,7 @@ create temporary view s3 as select * from values
 -- !query analysis
 CreateViewCommand `s3`, select * from values
     (3), (4), (6), (9)
-  as s3(id), false, false, LocalTempView, true
+  as s3(id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x]
       +- SubqueryAlias s3
          +- LocalRelation [id#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out
index d19155916f4ad..a828cb92e59d3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
index f4cca5fd385bb..39748a324e527 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-null-semantics.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-null-semantics.sql.out
index 7816958795d01..51fb2455c19f8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-null-semantics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-null-semantics.sql.out
@@ -2,14 +2,14 @@
 -- !query
 create temp view v (c) as values (1), (null)
 -- !query analysis
-CreateViewCommand `v`, [(c,None)], values (1), (null), false, false, LocalTempView, true
+CreateViewCommand `v`, [(c,None)], values (1), (null), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x]
 
 
 -- !query
 create temp view v_empty (e) as select 1 where false
 -- !query analysis
-CreateViewCommand `v_empty`, [(e,None)], select 1 where false, false, false, LocalTempView, true
+CreateViewCommand `v_empty`, [(e,None)], select 1 where false, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- Filter false
          +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-nullability.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-nullability.sql.out
index 34a14c6f73d15..24c8273f475a8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-nullability.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-nullability.sql.out
@@ -2,7 +2,7 @@
 -- !query
 create temp view t0 as select 1 as a_nonnullable
 -- !query analysis
-CreateViewCommand `t0`, select 1 as a_nonnullable, false, false, LocalTempView, true
+CreateViewCommand `t0`, select 1 as a_nonnullable, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS a_nonnullable#x]
       +- OneRowRelation
 
@@ -10,7 +10,7 @@ CreateViewCommand `t0`, select 1 as a_nonnullable, false, false, LocalTempView,
 -- !query
 create temp view t1 as select cast(null as int) as b_nullable
 -- !query analysis
-CreateViewCommand `t1`, select cast(null as int) as b_nullable, false, false, LocalTempView, true
+CreateViewCommand `t1`, select cast(null as int) as b_nullable, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [cast(null as int) AS b_nullable#x]
       +- OneRowRelation
 
@@ -18,7 +18,7 @@ CreateViewCommand `t1`, select cast(null as int) as b_nullable, false, false, Lo
 -- !query
 create temp view t2 as select 2 as c
 -- !query analysis
-CreateViewCommand `t2`, select 2 as c, false, false, LocalTempView, true
+CreateViewCommand `t2`, select 2 as c, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2 AS c#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-order-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-order-by.sql.out
index 3f78e0c62c03e..075e4c90d8110 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-order-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-order-by.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-set-operations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-set-operations.sql.out
index de8864ec73bbd..9239670589bcb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-set-operations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-set-operations.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-subquery-in-join-condition.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-subquery-in-join-condition.sql.out
index 712d94ce323d6..ce6a1a3d7ed53 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-subquery-in-join-condition.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-subquery-in-join-condition.sql.out
@@ -8,7 +8,7 @@ CREATE TEMP VIEW x(x1, x2) AS VALUES
 CreateViewCommand `x`, [(x1,None), (x2,None)], VALUES
     (2, 1),
     (1, 1),
-    (3, 4), false, false, LocalTempView, true
+    (3, 4), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -21,7 +21,7 @@ CREATE TEMP VIEW y(y1, y2) AS VALUES
 CreateViewCommand `y`, [(y1,None), (y2,None)], VALUES
     (0, 2),
     (1, 4),
-    (4, 11), false, false, LocalTempView, true
+    (4, 11), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -34,7 +34,7 @@ CREATE TEMP VIEW z(z1, z2) AS VALUES
 CreateViewCommand `z`, [(z1,None), (z2,None)], VALUES
     (4, 2),
     (3, 3),
-    (8, 1), false, false, LocalTempView, true
+    (8, 1), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -916,3 +916,47 @@ Sort [x1#x ASC NULLS FIRST, x2#x ASC NULLS FIRST, y1#x ASC NULLS FIRST, y2#x ASC
          +- View (`y`, [y1#x, y2#x])
             +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
                +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select * from x left join y on x1 = y1 and x2 IN (select z1 from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query analysis
+Sort [x1#x ASC NULLS FIRST, x2#x ASC NULLS FIRST, y1#x ASC NULLS FIRST, y2#x ASC NULLS FIRST], true
++- Project [x1#x, x2#x, y1#x, y2#x]
+   +- Join LeftOuter, ((x1#x = y1#x) AND x2#x IN (list#x [x2#x && y2#x]))
+      :  +- Project [z1#x]
+      :     +- Filter ((z2#x = outer(x2#x)) AND (z2#x = outer(y2#x)))
+      :        +- SubqueryAlias z
+      :           +- View (`z`, [z1#x, z2#x])
+      :              +- Project [cast(col1#x as int) AS z1#x, cast(col2#x as int) AS z2#x]
+      :                 +- LocalRelation [col1#x, col2#x]
+      :- SubqueryAlias x
+      :  +- View (`x`, [x1#x, x2#x])
+      :     +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+      :        +- LocalRelation [col1#x, col2#x]
+      +- SubqueryAlias y
+         +- View (`y`, [y1#x, y2#x])
+            +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select * from x left join y on x1 = y1 and x2 not IN (select z1 from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query analysis
+Sort [x1#x ASC NULLS FIRST, x2#x ASC NULLS FIRST, y1#x ASC NULLS FIRST, y2#x ASC NULLS FIRST], true
++- Project [x1#x, x2#x, y1#x, y2#x]
+   +- Join LeftOuter, ((x1#x = y1#x) AND NOT x2#x IN (list#x [x2#x && y2#x]))
+      :  +- Project [z1#x]
+      :     +- Filter ((z2#x = outer(x2#x)) AND (z2#x = outer(y2#x)))
+      :        +- SubqueryAlias z
+      :           +- View (`z`, [z1#x, z2#x])
+      :              +- Project [cast(col1#x as int) AS z1#x, cast(col2#x as int) AS z2#x]
+      :                 +- LocalRelation [col1#x, col2#x]
+      :- SubqueryAlias x
+      :  +- View (`x`, [x1#x, x2#x])
+      :     +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+      :        +- LocalRelation [col1#x, col2#x]
+      +- SubqueryAlias y
+         +- View (`y`, [y1#x, y2#x])
+            +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+               +- LocalRelation [col1#x, col2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
index 5c1465d4fc136..0074991b4ea6a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/nested-not-in.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/nested-not-in.sql.out
index 5dd0d161aed82..a098d19da594d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/nested-not-in.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/nested-not-in.sql.out
@@ -18,7 +18,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (500, "emp 5", NULL),
   (600, "emp 6", 100),
   (800, "emp 8", 70)
-AS EMP(id, emp_name, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, dept_id#x]
@@ -41,7 +41,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
@@ -68,7 +68,7 @@ CreateViewCommand `BONUS`, SELECT * FROM VALUES
   ("emp 4", 100.00D),
   ("emp 5", 1000.00D),
   ("emp 6 - no dept", 500.00D)
-AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, true
+AS BONUS(emp_name, bonus_amt), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [emp_name#x, bonus_amt#x]
       +- SubqueryAlias BONUS
          +- LocalRelation [emp_name#x, bonus_amt#x]
@@ -91,7 +91,7 @@ CreateViewCommand `ADDRESS`, SELECT * FROM VALUES
   (null, null, "addr4"),
   (600, "emp 6", "addr6"),
   (800, "emp 8", "addr8")
-AS ADDRESS(id, emp_name, address), false, false, LocalTempView, true
+AS ADDRESS(id, emp_name, address), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, address#x]
       +- SubqueryAlias ADDRESS
          +- LocalRelation [id#x, emp_name#x, address#x]
@@ -102,7 +102,7 @@ CREATE TEMPORARY VIEW S1 AS SELECT * FROM VALUES
   (null, null), (5, 5), (8, 8), (11, 11) AS s1(a, b)
 -- !query analysis
 CreateViewCommand `S1`, SELECT * FROM VALUES
-  (null, null), (5, 5), (8, 8), (11, 11) AS s1(a, b), false, false, LocalTempView, true
+  (null, null), (5, 5), (8, 8), (11, 11) AS s1(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias s1
          +- LocalRelation [a#x, b#x]
@@ -113,7 +113,7 @@ CREATE TEMPORARY VIEW S2 AS SELECT * FROM VALUES
   (7, 7), (8, 8), (11, 11), (null, null) AS s2(c, d)
 -- !query analysis
 CreateViewCommand `S2`, SELECT * FROM VALUES
-  (7, 7), (8, 8), (11, 11), (null, null) AS s2(c, d), false, false, LocalTempView, true
+  (7, 7), (8, 8), (11, 11), (null, null) AS s2(c, d), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c#x, d#x]
       +- SubqueryAlias s2
          +- LocalRelation [c#x, d#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-group-by.sql.out
index b2559f63ea642..724f9f29894c1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-group-by.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-joins.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-joins.sql.out
index 5f6a4b9d0a546..90bcb4818091f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-joins.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("val1d", 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("val1e", 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("val1e", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("val1f", 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("val1b", null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
   ("val3b", 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("val3b", 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
index 0844e2540acc8..e8a8e46d1d122 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `m`, SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
-  AS m(a, b), false, false, LocalTempView, true
+  AS m(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias m
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
index 7578a97548e06..f7dac3e2c4675 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `m`, SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
-  AS m(a, b), false, false, LocalTempView, true
+  AS m(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias m
          +- LocalRelation [a#x, b#x]
@@ -31,7 +31,7 @@ CreateViewCommand `s`, SELECT * FROM VALUES
   (0, 1.0),
   (2, 3.0),
   (4, null)
-  AS s(c, d), false, false, LocalTempView, true
+  AS s(c, d), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c#x, d#x]
       +- SubqueryAlias s
          +- LocalRelation [c#x, d#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
index f1c047fddd955..7de84bb97346a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `m`, SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
-  AS m(a, b), false, false, LocalTempView, true
+  AS m(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias m
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
index 0b63a774631af..8c234dd5b7d70 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `m`, SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (4, 5.0)
-  AS m(a, b), false, false, LocalTempView, true
+  AS m(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias m
          +- LocalRelation [a#x, b#x]
@@ -27,7 +27,7 @@ CreateViewCommand `s`, SELECT * FROM VALUES
   (null, 1.0),
   (2, 3.0),
   (6, 7.0)
-  AS s(c, d), false, false, LocalTempView, true
+  AS s(c, d), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c#x, d#x]
       +- SubqueryAlias s
          +- LocalRelation [c#x, d#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/simple-in.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/simple-in.sql.out
index e95de3f5a0f1d..f3f5aa7fcade4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/simple-in.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/simple-in.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ("t1d", 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ("t1e", 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ("t1e", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ("t1f", 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ("t1b", null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
   ("t3b", 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ("t3b", 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
@@ -317,7 +317,7 @@ create temporary view a as select * from values
 -- !query analysis
 CreateViewCommand `a`, select * from values
   (1, 1), (2, 1), (null, 1), (1, 3), (null, 3), (1, null), (null, 2)
-  as a(a1, a2), false, false, LocalTempView, true
+  as a(a1, a2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a1#x, a2#x]
       +- SubqueryAlias a
          +- LocalRelation [a1#x, a2#x]
@@ -330,7 +330,7 @@ create temporary view b as select * from values
 -- !query analysis
 CreateViewCommand `b`, select * from values
   (1, 1, 2), (null, 3, 2), (1, null, 2), (1, 2, null)
-  as b(b1, b2, b3), false, false, LocalTempView, true
+  as b(b1, b2, b3), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [b1#x, b2#x, b3#x]
       +- SubqueryAlias b
          +- LocalRelation [b1#x, b2#x, b3#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
index 62aa6be0bd118..95b38e1c7e0f5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/invalid-correlation.sql.out
@@ -6,7 +6,7 @@ AS t1(t1a, t1b, t1c)
 -- !query analysis
 CreateViewCommand `t1`, SELECT * FROM VALUES
   (1, 2, 3)
-AS t1(t1a, t1b, t1c), false, false, LocalTempView, true
+AS t1(t1a, t1b, t1c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x]
@@ -19,7 +19,7 @@ AS t2(t2a, t2b, t2c)
 -- !query analysis
 CreateViewCommand `t2`, SELECT * FROM VALUES
   (1, 0, 1)
-AS t2(t2a, t2b, t2c), false, false, LocalTempView, true
+AS t2(t2a, t2b, t2c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x]
@@ -32,7 +32,7 @@ AS t3(t3a, t3b, t3c)
 -- !query analysis
 CreateViewCommand `t3`, SELECT * FROM VALUES
   (3, 1, 2)
-AS t3(t3a, t3b, t3c), false, false, LocalTempView, true
+AS t3(t3a, t3b, t3c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x]
@@ -178,7 +178,7 @@ AS t1(t1a, t1b, t1c)
 -- !query analysis
 CreateViewCommand `t1_copy`, SELECT * FROM VALUES
   (1, 2, 3)
-AS t1(t1a, t1b, t1c), false, false, LocalTempView, true
+AS t1(t1a, t1b, t1c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/subq-input-typecheck.sql.out
index e550e52bf64b0..dbbb3e2d7062b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/subq-input-typecheck.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/negative-cases/subq-input-typecheck.sql.out
@@ -6,7 +6,7 @@ AS t1(t1a, t1b, t1c)
 -- !query analysis
 CreateViewCommand `t1`, SELECT * FROM VALUES
   (1, 2, 3)
-AS t1(t1a, t1b, t1c), false, false, LocalTempView, true
+AS t1(t1a, t1b, t1c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x]
@@ -19,7 +19,7 @@ AS t2(t2a, t2b, t2c)
 -- !query analysis
 CreateViewCommand `t2`, SELECT * FROM VALUES
   (1, 0, 1)
-AS t2(t2a, t2b, t2c), false, false, LocalTempView, true
+AS t2(t2a, t2b, t2c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x]
@@ -32,7 +32,7 @@ AS t3(t3a, t3b, t3c)
 -- !query analysis
 CreateViewCommand `t3`, SELECT * FROM VALUES
   (3, 1, 2)
-AS t3(t3a, t3b, t3c), false, false, LocalTempView, true
+AS t3(t3a, t3b, t3c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x]
@@ -45,7 +45,7 @@ AS t1(t4a, t4b, t4c)
 -- !query analysis
 CreateViewCommand `t4`, SELECT * FROM VALUES
   (CAST(1 AS DOUBLE), CAST(2 AS STRING), CAST(3 AS STRING))
-AS t1(t4a, t4b, t4c), false, false, LocalTempView, true
+AS t1(t4a, t4b, t4c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t4a#x, t4b#x, t4c#x]
       +- SubqueryAlias t1
          +- LocalRelation [t4a#x, t4b#x, t4c#x]
@@ -58,7 +58,7 @@ AS t1(t5a, t5b, t5c)
 -- !query analysis
 CreateViewCommand `t5`, SELECT * FROM VALUES
   (CAST('2011-01-01 01:01:01' AS TIMESTAMP), CAST(2 AS STRING), CAST(3 AS BIGINT))
-AS t1(t5a, t5b, t5c), false, false, LocalTempView, true
+AS t1(t5a, t5b, t5c), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t5a#x, t5b#x, t5c#xL]
       +- SubqueryAlias t1
          +- LocalRelation [t5a#x, t5b#x, t5c#xL]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql.out
index 0dc4fa9e4808f..2a1abc0d48871 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/nested-scalar-subquery-count-bug.sql.out
@@ -2,21 +2,21 @@
 -- !query
 CREATE OR REPLACE VIEW t1(a1, a2) as values (0, 1), (1, 2)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t1`, [(a1,None), (a2,None)], values (0, 1), (1, 2), false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`t1`, [(a1,None), (a2,None)], values (0, 1), (1, 2), false, true, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE OR REPLACE VIEW t2(b1, b2) as values (0, 2), (0, 3)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t2`, [(b1,None), (b2,None)], values (0, 2), (0, 3), false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`t2`, [(b1,None), (b2,None)], values (0, 2), (0, 3), false, true, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE OR REPLACE VIEW t3(c1, c2) as values (0, 2), (0, 3)
 -- !query analysis
-CreateViewCommand `spark_catalog`.`default`.`t3`, [(c1,None), (c2,None)], values (0, 2), (0, 3), false, true, PersistedView, true
+CreateViewCommand `spark_catalog`.`default`.`t3`, [(c1,None), (c2,None)], values (0, 2), (0, 3), false, true, PersistedView, COMPENSATION, true
    +- LocalRelation [col1#x, col2#x]
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
index f40deb3ead5ab..ebe071c5261fd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
@@ -19,7 +19,7 @@ CreateViewCommand `l`, [(a,None), (b,None)], values
     (3, 3.0),
     (null, null),
     (null, 5.0),
-    (6, null), false, false, LocalTempView, true
+    (6, null), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -41,7 +41,7 @@ CreateViewCommand `r`, [(c,None), (d,None)], values
     (4, 1.0),
     (null, null),
     (null, 5.0),
-    (6, null), false, false, LocalTempView, true
+    (6, null), false, false, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -171,7 +171,7 @@ Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
 -- !query
 CREATE TEMPORARY VIEW null_view(a, b) AS SELECT CAST(null AS int), CAST(null as int)
 -- !query analysis
-CreateViewCommand `null_view`, [(a,None), (b,None)], SELECT CAST(null AS int), CAST(null as int), false, false, LocalTempView, true
+CreateViewCommand `null_view`, [(a,None), (b,None)], SELECT CAST(null AS int), CAST(null as int), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [cast(null as int) AS CAST(NULL AS INT)#x, cast(null as int) AS CAST(NULL AS INT)#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
new file mode 100644
index 0000000000000..671557aa39566
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
@@ -0,0 +1,218 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temp view x (x1, x2) as values (1, 1), (2, 2)
+-- !query analysis
+CreateViewCommand `x`, [(x1,None), (x2,None)], values (1, 1), (2, 2), false, false, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temp view y (y1, y2) as values (2, 0), (3, -1)
+-- !query analysis
+CreateViewCommand `y`, [(y1,None), (y2,None)], values (2, 0), (3, -1), false, false, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temp view z (z1, z2) as values (1, 0), (1, 1)
+-- !query analysis
+CreateViewCommand `z`, [(z1,None), (z2,None)], values (1, 0), (1, 1), false, false, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select * from x where (select count(*) from y where y1 = x1 group by y1) = 1
+-- !query analysis
+Project [x1#x, x2#x]
++- Filter (scalar-subquery#x [x1#x] = cast(1 as bigint))
+   :  +- Aggregate [y1#x], [count(1) AS count(1)#xL]
+   :     +- Filter (y1#x = outer(x1#x))
+   :        +- SubqueryAlias y
+   :           +- View (`y`, [y1#x, y2#x])
+   :              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+   :                 +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias x
+      +- View (`x`, [x1#x, x2#x])
+         +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select * from x where (select count(*) from y where y1 = x1 group by x1) = 1
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"x1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 61,
+    "stopIndex" : 71,
+    "fragment" : "group by x1"
+  } ]
+}
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by x1) = 1
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"x1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 61,
+    "stopIndex" : 71,
+    "fragment" : "group by x1"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from y where x1 = y1 and y2 = 1 group by y2) from x
+-- !query analysis
+Project [x1#x, x2#x, scalar-subquery#x [x1#x] AS scalarsubquery(x1)#xL]
+:  +- Aggregate [y2#x], [count(1) AS count(1)#xL]
+:     +- Filter ((outer(x1#x) = y1#x) AND (y2#x = 1))
+:        +- SubqueryAlias y
+:           +- View (`y`, [y1#x, y2#x])
+:              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias x
+   +- View (`x`, [x1#x, x2#x])
+      +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from y where x1 = y1 and y2 = x1 + 1 group by y2) from x
+-- !query analysis
+Project [x1#x, x2#x, scalar-subquery#x [x1#x && x1#x] AS scalarsubquery(x1, x1)#xL]
+:  +- Aggregate [y2#x], [count(1) AS count(1)#xL]
+:     +- Filter ((outer(x1#x) = y1#x) AND (y2#x = (outer(x1#x) + 1)))
+:        +- SubqueryAlias y
+:           +- View (`y`, [y1#x, y2#x])
+:              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias x
+   +- View (`x`, [x1#x, x2#x])
+      +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "y1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 72,
+    "fragment" : "(select count(*) from y where y1 > x1 group by y1)"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from y where y1 + y2 = x1 group by y1) from x
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "y1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 65,
+    "fragment" : "(select count(*) from y where y1 + y2 = x1 group by y1)"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "y1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 106,
+    "fragment" : "(select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1)"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1) from x
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "z1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 103,
+    "fragment" : "(select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1)"
+  } ]
+}
+
+
+-- !query
+set spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate = true
+-- !query analysis
+SetCommand (spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate,Some(true))
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
+-- !query analysis
+Project [x1#x, x2#x]
++- Filter (scalar-subquery#x [x1#x] = cast(1 as bigint))
+   :  +- Aggregate [y1#x], [count(1) AS count(1)#xL]
+   :     +- Filter (y1#x > outer(x1#x))
+   :        +- SubqueryAlias y
+   :           +- View (`y`, [y1#x, y2#x])
+   :              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+   :                 +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias x
+      +- View (`x`, [x1#x, x2#x])
+         +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+reset spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate
+-- !query analysis
+ResetCommand spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
index fd1acb113e3a6..3648a97e9872a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv)
 -- !query analysis
-CreateViewCommand `p`, VALUES (1, 1) AS T(pk, pv), false, true, LocalTempView, true
+CreateViewCommand `p`, VALUES (1, 1) AS T(pk, pv), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [pk#x, pv#x]
 
@@ -10,7 +10,7 @@ CreateViewCommand `p`, VALUES (1, 1) AS T(pk, pv), false, true, LocalTempView, t
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW c AS VALUES (1, 1) AS T(ck, cv)
 -- !query analysis
-CreateViewCommand `c`, VALUES (1, 1) AS T(ck, cv), false, true, LocalTempView, true
+CreateViewCommand `c`, VALUES (1, 1) AS T(ck, cv), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [ck#x, cv#x]
 
@@ -110,7 +110,7 @@ CreateViewCommand `t1`, select * from values
   ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2BD, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -147,7 +147,7 @@ CreateViewCommand `t2`, select * from values
   ('val1e', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ('val1f', 19S, null, 19L, float(17), 25D, 26E2BD, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -182,7 +182,7 @@ CreateViewCommand `t3`, select * from values
   ('val1b', null, 16, 19L, float(17), 25D, 26E2BD, timestamp '2014-11-04 01:02:00.000', null),
   ('val3b', 8S, null, 719L, float(17), 25D, 26E2BD, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ('val3b', 8S, null, 19L, float(17), 25D, 26E2BD, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
@@ -1254,21 +1254,21 @@ Project [t1a#x, t1b#x]
 -- !query
 CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
 -- !query analysis
-CreateViewCommand `t0`, [(t0a,None), (t0b,None)], VALUES (1, 1), (2, 0), false, true, LocalTempView, true
+CreateViewCommand `t0`, [(t0a,None), (t0b,None)], VALUES (1, 1), (2, 0), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3)
 -- !query analysis
-CreateViewCommand `t1`, [(t1a,None), (t1b,None), (t1c,None)], VALUES (1, 1, 3), false, true, LocalTempView, true
+CreateViewCommand `t1`, [(t1a,None), (t1b,None), (t1c,None)], VALUES (1, 1, 3), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x]
 
 
 -- !query
 CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7)
 -- !query analysis
-CreateViewCommand `t2`, [(t2a,None), (t2b,None), (t2c,None)], VALUES (1, 1, 5), (2, 2, 7), false, true, LocalTempView, true
+CreateViewCommand `t2`, [(t2a,None), (t2b,None), (t2c,None)], VALUES (1, 1, 5), (2, 2, 7), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x]
 
 
@@ -1626,9 +1626,9 @@ Project [id#xL]
    :  +- Project [id#xL AS c#xL]
    :     +- Filter (outer(id#xL) = id#xL)
    :        +- SubqueryAlias t2
-   :           +- Range (1, 2, step=1, splits=None)
+   :           +- Range (1, 2, step=1)
    +- SubqueryAlias t1
-      +- Range (1, 3, step=1, splits=None)
+      +- Range (1, 3, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 1b03c3b780ca5..72e230f9bb881 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -28,7 +28,7 @@ CreateViewCommand `t1`, select * from values
   ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'),
   ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'),
   ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04')
-  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, true
+  as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
       +- SubqueryAlias t1
          +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
@@ -65,7 +65,7 @@ CreateViewCommand `t2`, select * from values
   ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'),
   ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'),
   ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null)
-  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, true
+  as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
       +- SubqueryAlias t2
          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
@@ -100,7 +100,7 @@ CreateViewCommand `t3`, select * from values
   ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null),
   ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'),
   ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04')
-  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, true
+  as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
       +- SubqueryAlias t3
          +- LocalRelation [t3a#x, t3b#x, t3c#x, t3d#xL, t3e#x, t3f#x, t3g#x, t3h#x, t3i#x]
@@ -599,7 +599,7 @@ Project [t1a#x, scalar-subquery#x [t1a#x] AS scalarsubquery(t1a)#xL]
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t1(c1, c2)
 -- !query analysis
-CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t1(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t1(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t1
       +- LocalRelation [c1#x, c2#x]
 
@@ -607,7 +607,7 @@ CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t1(c1, c2), false, true, LocalTemp
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 2), (0, 3) t2(c1, c2)
 -- !query analysis
-CreateViewCommand `t2`, VALUES (0, 2), (0, 3) t2(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `t2`, VALUES (0, 2), (0, 3) t2(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t2
       +- LocalRelation [c1#x, c2#x]
 
@@ -820,14 +820,14 @@ Project [scalar-subquery#x [] AS b#x]
 -- !query
 CREATE OR REPLACE TEMP VIEW t1(c1, c2) AS (VALUES (0, 1), (1, 2))
 -- !query analysis
-CreateViewCommand `t1`, [(c1,None), (c2,None)], (VALUES (0, 1), (1, 2)), false, true, LocalTempView, true
+CreateViewCommand `t1`, [(c1,None), (c2,None)], (VALUES (0, 1), (1, 2)), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE OR REPLACE TEMP VIEW t2(c1, c2) AS (VALUES (0, 2), (0, 3))
 -- !query analysis
-CreateViewCommand `t2`, [(c1,None), (c2,None)], (VALUES (0, 2), (0, 3)), false, true, LocalTempView, true
+CreateViewCommand `t2`, [(c1,None), (c2,None)], (VALUES (0, 2), (0, 3)), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
@@ -840,7 +840,7 @@ CREATE OR REPLACE TEMP VIEW students(id, name, major, year) AS (VALUES
 CreateViewCommand `students`, [(id,None), (name,None), (major,None), (year,None)], (VALUES
     (0, 'A', 'CS', 2022),
     (1, 'B', 'CS', 2022),
-    (2, 'C', 'Math', 2022)), false, true, LocalTempView, true
+    (2, 'C', 'Math', 2022)), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x, col4#x]
 
 
@@ -855,7 +855,7 @@ CreateViewCommand `exams`, [(sid,None), (course,None), (curriculum,None), (grade
     (0, 'C1', 'CS', 4, 2020),
     (0, 'C2', 'CS', 3, 2021),
     (1, 'C1', 'CS', 2, 2020),
-    (1, 'C2', 'CS', 1, 2021)), false, true, LocalTempView, true
+    (1, 'C2', 'CS', 1, 2021)), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
 
 
@@ -1054,9 +1054,9 @@ Project [c1#xL, c2#xL]
          :  +- Project [id#xL AS c#xL]
          :     +- Filter (outer(id#xL) = id#xL)
          :        +- SubqueryAlias t2
-         :           +- Range (1, 2, step=1, splits=None)
+         :           +- Range (1, 2, step=1)
          +- SubqueryAlias t1
-            +- Range (1, 3, step=1, splits=None)
+            +- Range (1, 3, step=1)
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-set-op.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-set-op.sql.out
index eeea71f6cd4f2..eaeff0ba9dedb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-set-op.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-set-op.sql.out
@@ -2,21 +2,21 @@
 -- !query
 CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
 -- !query analysis
-CreateViewCommand `t0`, [(t0a,None), (t0b,None)], VALUES (1, 1), (2, 0), false, true, LocalTempView, true
+CreateViewCommand `t0`, [(t0a,None), (t0b,None)], VALUES (1, 1), (2, 0), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
 -- !query
 CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3)
 -- !query analysis
-CreateViewCommand `t1`, [(t1a,None), (t1b,None), (t1c,None)], VALUES (1, 1, 3), false, true, LocalTempView, true
+CreateViewCommand `t1`, [(t1a,None), (t1b,None), (t1c,None)], VALUES (1, 1, 3), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x]
 
 
 -- !query
 CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7)
 -- !query analysis
-CreateViewCommand `t2`, [(t2a,None), (t2b,None), (t2c,None)], VALUES (1, 1, 5), (2, 2, 7), false, true, LocalTempView, true
+CreateViewCommand `t2`, [(t2a,None), (t2b,None), (t2c,None)], VALUES (1, 1, 5), (2, 2, 7), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x, col3#x]
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out
index 428df8e6adf8e..104c1b0f41a10 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out
@@ -139,7 +139,7 @@ CreateViewCommand `EMP`, SELECT * FROM VALUES
   (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
   (700, "emp 7", date "2010-01-01", 400.00D, 100),
   (800, "emp 8", date "2016-01-01", 150.00D, 70)
-AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, true
+AS EMP(id, emp_name, hiredate, salary, dept_id), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
       +- SubqueryAlias EMP
          +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
@@ -162,7 +162,7 @@ CreateViewCommand `DEPT`, SELECT * FROM VALUES
   (40, "dept 4 - unassigned", "OR"),
   (50, "dept 5 - unassigned", "NJ"),
   (70, "dept 7", "FL")
-AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, true
+AS DEPT(dept_id, dept_name, state), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [dept_id#x, dept_name#x, state#x]
       +- SubqueryAlias DEPT
          +- LocalRelation [dept_id#x, dept_name#x, state#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
index a889e549d99cc..8d4bffd868753 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/table-aliases.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b)
 -- !query analysis
-CreateViewCommand `testData`, SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b), false, true, LocalTempView, true
+CreateViewCommand `testData`, SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
@@ -158,7 +158,7 @@ Project [col1#x, col2#x]
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1)
 -- !query analysis
-CreateViewCommand `src1`, SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1), false, true, LocalTempView, true
+CreateViewCommand `src1`, SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS src1(id, v1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, v1#x]
       +- SubqueryAlias src1
          +- LocalRelation [id#x, v1#x]
@@ -167,7 +167,7 @@ CreateViewCommand `src1`, SELECT * FROM VALUES (1, "a"), (2, "b"), (3, "c") AS s
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW src2 AS SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2)
 -- !query analysis
-CreateViewCommand `src2`, SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2), false, true, LocalTempView, true
+CreateViewCommand `src2`, SELECT * FROM VALUES (2, 1.0), (3, 3.2), (1, 8.5) AS src2(id, v2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, v2#x]
       +- SubqueryAlias src2
          +- LocalRelation [id#x, v2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out
index c8698f7c7cd73..438e98f559db7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out
@@ -23,21 +23,21 @@ org.apache.spark.sql.AnalysisException
 select * from range(6 + cos(3))
 -- !query analysis
 Project [id#xL]
-+- Range (0, 5, step=1, splits=None)
++- Range (0, 5, step=1)
 
 
 -- !query
 select * from range(5, 10)
 -- !query analysis
 Project [id#xL]
-+- Range (5, 10, step=1, splits=None)
++- Range (5, 10, step=1)
 
 
 -- !query
 select * from range(0, 10, 2)
 -- !query analysis
 Project [id#xL]
-+- Range (0, 10, step=2, splits=None)
++- Range (0, 10, step=2)
 
 
 -- !query
@@ -142,7 +142,7 @@ org.apache.spark.sql.AnalysisException
 select * from RaNgE(2)
 -- !query analysis
 Project [id#xL]
-+- Range (0, 2, step=1, splits=None)
++- Range (0, 2, step=1)
 
 
 -- !query
@@ -151,7 +151,7 @@ select i from range(0, 2) t(i)
 Project [i#xL]
 +- SubqueryAlias t
    +- Project [id#xL AS i#xL]
-      +- Range (0, 2, step=1, splits=None)
+      +- Range (0, 2, step=1)
 
 
 -- !query
@@ -430,7 +430,7 @@ select * from range(2) join explode(array(1, 2))
 -- !query analysis
 Project [id#xL, col#x]
 +- Join Inner
-   :- Range (0, 2, step=1, splits=None)
+   :- Range (0, 2, step=1)
    +- Generate explode(array(1, 2)), false, [col#x]
       +- OneRowRelation
 
@@ -440,7 +440,7 @@ select * from range(2) join explode_outer(array())
 -- !query analysis
 Project [id#xL, col#x]
 +- Join Inner
-   :- Range (0, 2, step=1, splits=None)
+   :- Range (0, 2, step=1)
    +- Generate explode(array()), true, [col#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out
index 4a48795e4cca5..6ca35b8b141dc 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out
@@ -236,7 +236,7 @@ create temporary view ttf1 as select * from values
 CreateViewCommand `ttf1`, select * from values
   (1, 2),
   (2, 3)
-  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, true
+  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [current_date#x, current_timestamp#x]
       +- SubqueryAlias ttf1
          +- LocalRelation [current_date#x, current_timestamp#x]
@@ -263,7 +263,7 @@ create temporary view ttf2 as select * from values
 CreateViewCommand `ttf2`, select * from values
   (1, 2),
   (2, 3)
-  as ttf2(a, b), false, false, LocalTempView, true
+  as ttf2(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias ttf2
          +- LocalRelation [a#x, b#x]
@@ -611,7 +611,7 @@ select null - timestamp'2011-11-11 11:11:11'
 -- !query
 create temporary view ts_view as select '2011-11-11 11:11:11' str
 -- !query analysis
-CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, true
+CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 11:11:11 AS str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
index 959eac6d5e639..e50c860270563 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
@@ -237,7 +237,7 @@ create temporary view ttf1 as select * from values
 CreateViewCommand `ttf1`, select * from values
   (1, 2),
   (2, 3)
-  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, true
+  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [current_date#x, current_timestamp#x]
       +- SubqueryAlias ttf1
          +- LocalRelation [current_date#x, current_timestamp#x]
@@ -264,7 +264,7 @@ create temporary view ttf2 as select * from values
 CreateViewCommand `ttf2`, select * from values
   (1, 2),
   (2, 3)
-  as ttf2(a, b), false, false, LocalTempView, true
+  as ttf2(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias ttf2
          +- LocalRelation [a#x, b#x]
@@ -579,7 +579,7 @@ select null - timestamp'2011-11-11 11:11:11'
 -- !query
 create temporary view ts_view as select '2011-11-11 11:11:11' str
 -- !query analysis
-CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, true
+CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 11:11:11 AS str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
index 4aa1ffcbc31db..098abfb3852cf 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
@@ -237,7 +237,7 @@ create temporary view ttf1 as select * from values
 CreateViewCommand `ttf1`, select * from values
   (1, 2),
   (2, 3)
-  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, true
+  as ttf1(`current_date`, `current_timestamp`), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [current_date#x, current_timestamp#x]
       +- SubqueryAlias ttf1
          +- LocalRelation [current_date#x, current_timestamp#x]
@@ -264,7 +264,7 @@ create temporary view ttf2 as select * from values
 CreateViewCommand `ttf2`, select * from values
   (1, 2),
   (2, 3)
-  as ttf2(a, b), false, false, LocalTempView, true
+  as ttf2(a, b), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias ttf2
          +- LocalRelation [a#x, b#x]
@@ -613,7 +613,7 @@ select null - timestamp'2011-11-11 11:11:11'
 -- !query
 create temporary view ts_view as select '2011-11-11 11:11:11' str
 -- !query analysis
-CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, true
+CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [2011-11-11 11:11:11 AS str#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out
new file mode 100644
index 0000000000000..951a4025d5fb2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out
@@ -0,0 +1,132 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create table t as
+  select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s
+  from values (1, null), (null,  'a') tab(member0, member1)
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t`, ErrorIfExists, [s]
+   +- Project [named_struct(u, named_struct(member0, member0#x, member1, member1#x)) AS s#x]
+      +- SubqueryAlias tab
+         +- LocalRelation [member0#x, member1#x]
+
+
+-- !query
+declare avro_schema string
+-- !query analysis
+CreateVariable defaultvalueexpression(null, null), false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.avro_schema
+
+
+-- !query
+set variable avro_schema =
+  '{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] }'
+-- !query analysis
+SetVariable [variablereference(system.session.avro_schema=CAST(NULL AS STRING))]
++- Project [{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] } AS avro_schema#x]
+   +- OneRowRelation
+
+
+-- !query
+select from_avro(s, 42, map()) from t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "hint" : "",
+    "msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format",
+    "sqlExpr" : "\"fromavro(s, 42, map())\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "from_avro(s, 42, map())"
+  } ]
+}
+
+
+-- !query
+select from_avro(s, avro_schema, 42) from t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "hint" : "",
+    "msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format",
+    "sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "from_avro(s, avro_schema, 42)"
+  } ]
+}
+
+
+-- !query
+select to_avro(s, 42) from t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "hint" : "",
+    "msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format",
+    "sqlExpr" : "\"toavro(s, 42)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "to_avro(s, 42)"
+  } ]
+}
+
+
+-- !query
+select to_avro(s, avro_schema) as result from t
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE",
+  "sqlState" : "22KD3",
+  "messageParameters" : {
+    "functionName" : "TO_AVRO"
+  }
+}
+
+
+-- !query
+select from_avro(result, avro_schema, map()).u from (select null as result)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE",
+  "sqlState" : "22KD3",
+  "messageParameters" : {
+    "functionName" : "FROM_AVRO"
+  }
+}
+
+
+-- !query
+drop temporary variable avro_schema
+-- !query analysis
+DropVariable false
++- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.avro_schema
+
+
+-- !query
+drop table t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
index 173db880eb9d4..7cf8a2886069d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `t`, SELECT * FROM VALUES
 ('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100), bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'), date('2000-04-01')),
 ('2', false, unhex('537061726B2053514C'), tinyint(2), 2,  smallint(200), bigint(2), float(2.0), 2.0, Decimal(2.0), timestamp('1997-01-02 03:04:05'), date('2000-04-02')),
 ('3', true, unhex('537061726B2053514C'), tinyint(3), 3, smallint(300), bigint(3), float(3.0), 3.0, Decimal(3.0), timestamp('1997-02-10 17:32:01-08'), date('2000-04-03'))
-AS t(a, b, c, d, e, f, g, h, i, j, k, l), false, true, LocalTempView, true
+AS t(a, b, c, d, e, f, g, h, i, j, k, l), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x, c#x, d#x, e#x, f#x, g#xL, h#x, i#x, j#x, k#x, l#x]
       +- SubqueryAlias t
          +- LocalRelation [a#x, b#x, c#x, d#x, e#x, f#x, g#xL, h#x, i#x, j#x, k#x, l#x]
@@ -27,7 +27,7 @@ CreateViewCommand `script_trans`, SELECT * FROM VALUES
 (1, 2, 3),
 (4, 5, 6),
 (7, 8, 9)
-AS script_trans(a, b, c), false, true, LocalTempView, true
+AS script_trans(a, b, c), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x, c#x]
       +- SubqueryAlias script_trans
          +- LocalRelation [a#x, b#x, c#x]
@@ -56,7 +56,7 @@ CreateViewCommand `complex_trans`, SELECT * FROM VALUES
 (3, 3),
 (1, 1),
 (3, 3)
-as complex_trans(a, b), false, true, LocalTempView, true
+as complex_trans(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias complex_trans
          +- LocalRelation [a#x, b#x]
@@ -1035,3 +1035,14 @@ ScriptTransformation cat, [a#x, b#x], ScriptInputOutputSchema(List(),List(),None
                         +- Project [a#x, b#x]
                            +- SubqueryAlias complex_trans
                               +- LocalRelation [a#x, b#x]
+
+
+-- !query
+SELECT TRANSFORM (a, b)
+  USING 'cat' AS (a CHAR(10), b VARCHAR(10))
+FROM VALUES('apache', 'spark') t(a, b)
+-- !query analysis
+ScriptTransformation cat, [a#x, b#x], ScriptInputOutputSchema(List(),List(),None,None,List(),List(),None,None,false)
++- Project [a#x, b#x]
+   +- SubqueryAlias t
+      +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
index ef17f6b50b90a..caf997f6ccbb2 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
@@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query analysis
+Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query analysis
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query analysis
+Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query analysis
+Project [try_divide(1, 0) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query analysis
+Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query analysis
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query analysis
+Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query analysis
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out
index c2dfe61b259da..d15418c17b730 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out
index c1aa8f3c7921e..de6c0b72c1c79 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out
index 7662eac61e543..4124fc57996cd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
index eb3d43a92896f..62e3a87473263 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
@@ -11,8 +11,8 @@ FROM (
 -- !query analysis
 Project [concat(concat(cast(col1#xL as string), col2#x), cast(col3#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [id#xL AS col1#xL, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [id#xL AS col1#xL, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -29,8 +29,8 @@ FROM (
 -- !query analysis
 Project [concat(concat(concat(col1#x, cast(col2#xL as string)), concat(col3#x, cast(col4#x as string))), cast(col5#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col4#x, cast(id#xL as double) AS col5#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col4#x, cast(id#xL as double) AS col5#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -46,8 +46,8 @@ FROM (
 -- !query analysis
 Project [concat(concat(col1#x, col2#x), cast(concat(col3#x, col4#x) as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -67,8 +67,8 @@ FROM (
 -- !query analysis
 Project [concat(cast(col1#x as string), cast(col2#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -84,8 +84,8 @@ FROM (
 -- !query analysis
 Project [concat(concat(concat(cast(col1#x as string), cast(col2#x as string)), cast(col3#x as string)), cast(col4#x as string)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -101,8 +101,8 @@ FROM (
 -- !query analysis
 Project [concat(concat(cast(col1#x as string), cast(col2#x as string)), concat(cast(col3#x as string), cast(col4#x as string))) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -122,8 +122,8 @@ FROM (
 -- !query analysis
 Project [concat(col1#x, col2#x) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -139,8 +139,8 @@ FROM (
 -- !query analysis
 Project [concat(concat(concat(col1#x, col2#x), col3#x), col4#x) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -156,8 +156,8 @@ FROM (
 -- !query analysis
 Project [concat(concat(col1#x, col2#x), concat(col3#x, col4#x)) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -225,7 +225,7 @@ CreateViewCommand `various_arrays`, SELECT * FROM VALUES (
   array_array1, array_array2,
   struct_array1, struct_array2,
   map_array1, map_array2
-), false, false, LocalTempView, true
+), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
       +- SubqueryAlias various_arrays
          +- LocalRelation [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/dateTimeOperations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/dateTimeOperations.sql.out
index a50c797e78c20..c2787e5816883 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/dateTimeOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/dateTimeOperations.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out
index eebe370666edb..093297f03edb7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out
index a034c22ffcda3..22b870bc0b420 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
index 4d897a329cfe1..f4902012f0f96 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
@@ -13,8 +13,8 @@ FROM (
 -- !query analysis
 Project [elt(2, col1#x, cast(col2#xL as string), col3#x, cast(col4#x as string), cast(col5#x as string), false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col4#x, cast(id#xL as double) AS col5#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col4#x, cast(id#xL as double) AS col5#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -30,8 +30,8 @@ FROM (
 -- !query analysis
 Project [elt(3, col1#x, col2#x, cast(col3#x as string), cast(col4#x as string), false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8, false) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8, false) AS col4#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -51,8 +51,8 @@ FROM (
 -- !query analysis
 Project [elt(1, cast(col1#x as string), cast(col2#x as string), false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -72,5 +72,5 @@ FROM (
 -- !query analysis
 Project [elt(2, col1#x, col2#x, false) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
-   +- Project [encode(cast(id#xL as string), utf-8, false) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8, false) AS col2#x]
-      +- Range (0, 10, step=1, splits=None)
+   +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
+      +- Range (0, 10, step=1)
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out
index edee343b59250..b1d07bd7be902 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
index 9818d54b809a5..43aaea63fd045 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out
index b248ec5997297..0db96719a3fb0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out
index 2b8152f0cc703..0f72b0cf8a0e3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out
@@ -66,7 +66,7 @@ CreateViewCommand `various_maps`, SELECT * FROM VALUES (
   string_map1, string_map2, string_map3, string_map4,
   array_map1, array_map2,
   struct_map1, struct_map2
-), false, false, LocalTempView, true
+), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
       +- SubqueryAlias various_maps
          +- LocalRelation [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
index f4c932fa29f97..dd3e56fe9322d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
@@ -70,7 +70,7 @@ CreateViewCommand `various_maps`, SELECT * FROM VALUES (
   struct_map1, struct_map2,
   string_int_map1, string_int_map2,
   int_string_map1, int_string_map2
-), false, false, LocalTempView, true
+), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
       +- SubqueryAlias various_maps
          +- LocalRelation [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out
index 0a32a7eaac474..ccd34cfaeb67f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out
index 448ab457d3951..009e91f7ffacf 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 'aa' as a
 -- !query analysis
-CreateViewCommand `t`, SELECT 'aa' as a, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 'aa' as a, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [aa AS a#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out
index ff009f8bd64c0..029ec4abb6faf 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out
index 76595c6cbded2..170e7dff38ac3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query analysis
-CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, true
+CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part1.sql.out
index e8bdf42655498..ddfe742f7ea08 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part1.sql.out
@@ -154,70 +154,70 @@ Aggregate [stddev_pop(cast(cast(udf(cast(cast(3.0 as decimal(38,0)) as string))
 select sum(udf(CAST(null AS int))) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(udf(cast(cast(null as int) as string)) as int)) AS sum(udf(CAST(NULL AS INT)))#xL]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(udf(CAST(null AS long))) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(udf(cast(cast(null as bigint) as string)) as bigint)) AS sum(udf(CAST(NULL AS BIGINT)))#xL]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(udf(CAST(null AS Decimal(38,0)))) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(udf(cast(cast(null as decimal(38,0)) as string)) as decimal(38,0))) AS sum(udf(CAST(NULL AS DECIMAL(38,0))))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(udf(CAST(null AS DOUBLE))) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(udf(cast(cast(null as double) as string)) as double)) AS sum(udf(CAST(NULL AS DOUBLE)))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(udf(CAST(null AS int))) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(udf(cast(cast(null as int) as string)) as int)) AS avg(udf(CAST(NULL AS INT)))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(udf(CAST(null AS long))) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(udf(cast(cast(null as bigint) as string)) as bigint)) AS avg(udf(CAST(NULL AS BIGINT)))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(udf(CAST(null AS Decimal(38,0)))) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(udf(cast(cast(null as decimal(38,0)) as string)) as decimal(38,0))) AS avg(udf(CAST(NULL AS DECIMAL(38,0))))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(udf(CAST(null AS DOUBLE))) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(udf(cast(cast(null as double) as string)) as double)) AS avg(udf(CAST(NULL AS DOUBLE)))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select sum(CAST(udf('NaN') AS DOUBLE)) from range(1,4)
 -- !query analysis
 Aggregate [sum(cast(cast(udf(cast(NaN as string)) as string) as double)) AS sum(CAST(udf(NaN) AS DOUBLE))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
 select avg(CAST(udf('NaN') AS DOUBLE)) from range(1,4)
 -- !query analysis
 Aggregate [avg(cast(cast(udf(cast(NaN as string)) as string) as double)) AS avg(CAST(udf(NaN) AS DOUBLE))#x]
-+- Range (1, 4, step=1, splits=None)
++- Range (1, 4, step=1)
 
 
 -- !query
@@ -345,7 +345,7 @@ Aggregate [corr(cast(b#x as double), cast(cast(udf(cast(a#x as string)) as int)
 -- !query
 CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y)
 -- !query analysis
-CreateViewCommand `regr_test`, SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y), false, false, LocalTempView, true
+CreateViewCommand `regr_test`, SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [x#x, y#x]
       +- SubqueryAlias regr_test
          +- LocalRelation [x#x, y#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part2.sql.out
index bb8d0824eb5b7..99bc3cbf49b2a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-aggregates_part2.sql.out
@@ -14,7 +14,7 @@ CreateViewCommand `int4_tbl`, select * from values
   (-123456),
   (2147483647),
   (-2147483647)
-  as int4_tbl(f1), false, false, LocalTempView, true
+  as int4_tbl(f1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias int4_tbl
          +- LocalRelation [f1#x]
@@ -29,7 +29,7 @@ CREATE OR REPLACE TEMPORARY VIEW bitwise_test AS SELECT * FROM VALUES
 CreateViewCommand `bitwise_test`, SELECT * FROM VALUES
   (1, 1, 1, 1L),
   (3, 3, 3, null),
-  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4), false, true, LocalTempView, true
+  (7, 7, 7, 3L) AS bitwise_test(b1, b2, b3, b4), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [b1#x, b2#x, b3#x, b4#xL]
       +- SubqueryAlias bitwise_test
          +- LocalRelation [b1#x, b2#x, b3#x, b4#xL]
@@ -123,7 +123,7 @@ CREATE OR REPLACE TEMPORARY VIEW bool_test AS SELECT * FROM VALUES
 CreateViewCommand `bool_test`, SELECT * FROM VALUES
   (TRUE, null, FALSE, null),
   (FALSE, TRUE, null, null),
-  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4), false, true, LocalTempView, true
+  (null, TRUE, FALSE, null) AS bool_test(b1, b2, b3, b4), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [b1#x, b2#x, b3#x, b4#x]
       +- SubqueryAlias bool_test
          +- LocalRelation [b1#x, b2#x, b3#x, b4#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out
index cd743c0a7fefa..c74124402c554 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/postgreSQL/udf-join.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `INT2_TBL`, [(f1,None)], VALUES
   (smallint(trim('  1234 '))),
   (smallint(trim('    -1234'))),
   (smallint('32767')),
-  (smallint('-32767')), false, true, LocalTempView, true
+  (smallint('-32767')), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x]
 
 
@@ -23,7 +23,7 @@ CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
 -- !query analysis
 CreateViewCommand `INT4_TBL`, SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -47,7 +47,7 @@ CreateViewCommand `INT8_TBL`, SELECT * FROM
     (4567890123456789, 123),
     (4567890123456789, 4567890123456789),
     (4567890123456789, -4567890123456789))
-  AS v(q1, q2), false, true, LocalTempView, true
+  AS v(q1, q2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [q1#xL, q2#xL]
       +- SubqueryAlias v
          +- Project [col1#xL AS q1#xL, col2#xL AS q2#xL]
@@ -63,7 +63,7 @@ CREATE OR REPLACE TEMPORARY VIEW FLOAT8_TBL AS SELECT * FROM
 CreateViewCommand `FLOAT8_TBL`, SELECT * FROM
   (VALUES (0.0), (1004.30), (-34.84),
     (cast('1.2345678901234e+200' as double)), (cast('1.2345678901234e-200' as double)))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -77,7 +77,7 @@ CREATE OR REPLACE TEMPORARY VIEW TEXT_TBL AS SELECT * FROM
 -- !query analysis
 CreateViewCommand `TEXT_TBL`, SELECT * FROM
   (VALUES ('doh!'), ('hi de ho neighbor'))
-  AS v(f1), false, true, LocalTempView, true
+  AS v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -87,7 +87,7 @@ CreateViewCommand `TEXT_TBL`, SELECT * FROM
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
 -- !query analysis
-CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, true, LocalTempView, true
+CreateViewCommand `tenk2`, SELECT * FROM tenk1, false, true, LocalTempView, UNSUPPORTED, true
    +- Project [unique1#x, unique2#x, two#x, four#x, ten#x, twenty#x, hundred#x, thousand#x, twothousand#x, fivethous#x, tenthous#x, odd#x, even#x, stringu1#x, stringu2#x, string4#x]
       +- SubqueryAlias spark_catalog.default.tenk1
          +- Relation spark_catalog.default.tenk1[unique1#x,unique2#x,two#x,four#x,ten#x,twenty#x,hundred#x,thousand#x,twothousand#x,fivethous#x,tenthous#x,odd#x,even#x,stringu1#x,stringu2#x,string4#x] parquet
@@ -1005,7 +1005,7 @@ create or replace temporary view x as select * from
 -- !query analysis
 CreateViewCommand `x`, select * from
   (values (1,11), (2,22), (3,null), (4,44), (5,null))
-  as v(x1, x2), false, true, LocalTempView, true
+  as v(x1, x2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [x1#x, x2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS x1#x, col2#x AS x2#x]
@@ -1019,7 +1019,7 @@ create or replace temporary view y as select * from
 -- !query analysis
 CreateViewCommand `y`, select * from
   (values (1,111), (2,222), (3,333), (4,null))
-  as v(y1, y2), false, true, LocalTempView, true
+  as v(y1, y2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [y1#x, y2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS y1#x, col2#x AS y2#x]
@@ -1629,7 +1629,7 @@ create or replace temporary view tt1 as select * from
 -- !query analysis
 CreateViewCommand `tt1`, select * from
   (values (1, 11), (2, NULL))
-  as v(tt1_id, joincol), false, true, LocalTempView, true
+  as v(tt1_id, joincol), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [tt1_id#x, joincol#x]
       +- SubqueryAlias v
          +- Project [col1#x AS tt1_id#x, col2#x AS joincol#x]
@@ -1643,7 +1643,7 @@ create or replace temporary view tt2 as select * from
 -- !query analysis
 CreateViewCommand `tt2`, select * from
   (values (21, 11), (22, 11))
-  as v(tt2_id, joincol), false, true, LocalTempView, true
+  as v(tt2_id, joincol), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [tt2_id#x, joincol#x]
       +- SubqueryAlias v
          +- Project [col1#x AS tt2_id#x, col2#x AS joincol#x]
@@ -1727,7 +1727,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
 +- Project [cast(id#xL as int) AS f1#x, cast(repeat(xyzzy, 100)#x as string) AS f2#x]
    +- Project [id#xL, repeat(xyzzy, 100) AS repeat(xyzzy, 100)#x]
       +- SubqueryAlias x
-         +- Range (1, 10001, step=1, splits=None)
+         +- Range (1, 10001, step=1)
 
 
 -- !query
@@ -1786,7 +1786,7 @@ create or replace temporary view tt5 as select * from
 -- !query analysis
 CreateViewCommand `tt5`, select * from
   (values (1, 10), (1, 11))
-  as v(f1, f2), false, true, LocalTempView, true
+  as v(f1, f2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x, f2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x, col2#x AS f2#x]
@@ -1800,7 +1800,7 @@ create or replace temporary view tt6 as select * from
 -- !query analysis
 CreateViewCommand `tt6`, select * from
   (values (1, 9), (1, 2), (2, 9))
-  as v(f1, f2), false, true, LocalTempView, true
+  as v(f1, f2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x, f2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x, col2#x AS f2#x]
@@ -1836,7 +1836,7 @@ create or replace temporary view xx as select * from
 -- !query analysis
 CreateViewCommand `xx`, select * from
   (values (1), (2), (3))
-  as v(pkxx), false, true, LocalTempView, true
+  as v(pkxx), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [pkxx#x]
       +- SubqueryAlias v
          +- Project [col1#x AS pkxx#x]
@@ -1850,7 +1850,7 @@ create or replace temporary view yy as select * from
 -- !query analysis
 CreateViewCommand `yy`, select * from
   (values (101, 1), (201, 2), (301, NULL))
-  as v(pkyy, pkxx), false, true, LocalTempView, true
+  as v(pkyy, pkxx), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [pkyy#x, pkxx#x]
       +- SubqueryAlias v
          +- Project [col1#x AS pkyy#x, col2#x AS pkxx#x]
@@ -1911,7 +1911,7 @@ create or replace temporary view zt1 as select * from
 -- !query analysis
 CreateViewCommand `zt1`, select * from
   (values (53))
-  as v(f1), false, true, LocalTempView, true
+  as v(f1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f1#x]
@@ -1925,7 +1925,7 @@ create or replace temporary view zt2 as select * from
 -- !query analysis
 CreateViewCommand `zt2`, select * from
   (values (53))
-  as v(f2), false, true, LocalTempView, true
+  as v(f2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [f2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS f2#x]
@@ -1970,7 +1970,7 @@ Project [f2#x, f3#x, f1#x]
 -- !query
 create temp view zv1 as select *,'dummy' AS junk from zt1
 -- !query analysis
-CreateViewCommand `zv1`, select *,'dummy' AS junk from zt1, false, false, LocalTempView, true
+CreateViewCommand `zv1`, select *,'dummy' AS junk from zt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [f1#x, dummy AS junk#x]
       +- SubqueryAlias zt1
          +- View (`zt1`, [f1#x])
@@ -2145,7 +2145,7 @@ create or replace temporary view a as select * from
 -- !query analysis
 CreateViewCommand `a`, select * from
   (values ('p'), ('q'))
-  as v(code), false, true, LocalTempView, true
+  as v(code), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [code#x]
       +- SubqueryAlias v
          +- Project [col1#x AS code#x]
@@ -2159,7 +2159,7 @@ create or replace temporary view b as select * from
 -- !query analysis
 CreateViewCommand `b`, select * from
   (values ('p', 1), ('p', 2))
-  as v(a, num), false, true, LocalTempView, true
+  as v(a, num), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, num#x]
       +- SubqueryAlias v
          +- Project [col1#x AS a#x, col2#x AS num#x]
@@ -2173,7 +2173,7 @@ create or replace temporary view c as select * from
 -- !query analysis
 CreateViewCommand `c`, select * from
   (values ('A', 'p'), ('B', 'q'), ('C', null))
-  as v(name, a), false, true, LocalTempView, true
+  as v(name, a), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [name#x, a#x]
       +- SubqueryAlias v
          +- Project [col1#x AS name#x, col2#x AS a#x]
@@ -2346,7 +2346,7 @@ create or replace temporary view nt1 as select * from
 -- !query analysis
 CreateViewCommand `nt1`, select * from
   (values(1,true,true), (2,true,false), (3,false,false))
-  as v(id, a1, a2), false, true, LocalTempView, true
+  as v(id, a1, a2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, a1#x, a2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS a1#x, col3#x AS a2#x]
@@ -2360,7 +2360,7 @@ create or replace temporary view nt2 as select * from
 -- !query analysis
 CreateViewCommand `nt2`, select * from
   (values(1,1,true,true), (2,2,true,false), (3,3,false,false))
-  as v(id, nt1_id, b1, b2), false, true, LocalTempView, true
+  as v(id, nt1_id, b1, b2), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, nt1_id#x, b1#x, b2#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS nt1_id#x, col3#x AS b1#x, col4#x AS b2#x]
@@ -2374,7 +2374,7 @@ create or replace temporary view nt3 as select * from
 -- !query analysis
 CreateViewCommand `nt3`, select * from
   (values(1,1,true), (2,2,false), (3,3,true))
-  as v(id, nt2_id, c1), false, true, LocalTempView, true
+  as v(id, nt2_id, c1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, nt2_id#x, c1#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS nt2_id#x, col3#x AS c1#x]
@@ -2732,7 +2732,7 @@ create or replace temporary view parent as select * from
 -- !query analysis
 CreateViewCommand `parent`, select * from
   (values (1, 10), (2, 20), (3, 30))
-  as v(k, pd), false, true, LocalTempView, true
+  as v(k, pd), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, pd#x]
       +- SubqueryAlias v
          +- Project [col1#x AS k#x, col2#x AS pd#x]
@@ -2746,7 +2746,7 @@ create or replace temporary view child as select * from
 -- !query analysis
 CreateViewCommand `child`, select * from
   (values (1, 100), (4, 400))
-  as v(k, cd), false, true, LocalTempView, true
+  as v(k, cd), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, cd#x]
       +- SubqueryAlias v
          +- Project [col1#x AS k#x, col2#x AS cd#x]
@@ -2871,7 +2871,7 @@ create or replace temporary view a as select * from
 -- !query analysis
 CreateViewCommand `a`, select * from
   (values (0), (1))
-  as v(id), false, true, LocalTempView, true
+  as v(id), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x]
@@ -2885,7 +2885,7 @@ create or replace temporary view b as select * from
 -- !query analysis
 CreateViewCommand `b`, select * from
   (values (0, 0), (1, NULL))
-  as v(id, a_id), false, true, LocalTempView, true
+  as v(id, a_id), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#x, a_id#x]
       +- SubqueryAlias v
          +- Project [col1#x AS id#x, col2#x AS a_id#x]
@@ -2943,7 +2943,7 @@ create or replace temporary view innertab as select * from
 -- !query analysis
 CreateViewCommand `innertab`, select * from
   (values (123L, 42L))
-  as v(id, dat1), false, true, LocalTempView, true
+  as v(id, dat1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [id#xL, dat1#xL]
       +- SubqueryAlias v
          +- Project [col1#xL AS id#xL, col2#xL AS dat1#xL]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-count.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-count.sql.out
index 57c42b338c41a..2540daef71424 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-count.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out
index c704623140d67..c5ee1742f5d7c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `nt1`, select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
-  as nt1(k, v1), false, false, LocalTempView, true
+  as nt1(k, v1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- LocalRelation [k#x, v1#x]
@@ -27,7 +27,7 @@ CreateViewCommand `nt2`, select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
-  as nt2(k, v2), false, false, LocalTempView, true
+  as nt2(k, v2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v2#x]
       +- SubqueryAlias nt2
          +- LocalRelation [k#x, v2#x]
@@ -142,7 +142,7 @@ Project [cast(udf(cast(key#x as string)) as string) AS udf(key)#x, cast(udf(cast
 -- !query
 create temporary view A(a, va) as select * from nt1
 -- !query analysis
-CreateViewCommand `A`, [(a,None), (va,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `A`, [(a,None), (va,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
@@ -155,7 +155,7 @@ CreateViewCommand `A`, [(a,None), (va,None)], select * from nt1, false, false, L
 -- !query
 create temporary view B(b, vb) as select * from nt1
 -- !query analysis
-CreateViewCommand `B`, [(b,None), (vb,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `B`, [(b,None), (vb,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
@@ -168,7 +168,7 @@ CreateViewCommand `B`, [(b,None), (vb,None)], select * from nt1, false, false, L
 -- !query
 create temporary view C(c, vc) as select * from nt1
 -- !query analysis
-CreateViewCommand `C`, [(c,None), (vc,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `C`, [(c,None), (vc,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
@@ -181,7 +181,7 @@ CreateViewCommand `C`, [(c,None), (vc,None)], select * from nt1, false, false, L
 -- !query
 create temporary view D(d, vd) as select * from nt1
 -- !query analysis
-CreateViewCommand `D`, [(d,None), (vd,None)], select * from nt1, false, false, LocalTempView, true
+CreateViewCommand `D`, [(d,None), (vd,None)], select * from nt1, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- View (`nt1`, [k#x, v1#x])
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except-all.sql.out
index 74ae6c0c584a8..37c10b6fcd03c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except-all.sql.out
@@ -4,7 +4,7 @@ CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1)
 -- !query analysis
 CreateViewCommand `tab1`, SELECT * FROM VALUES
-    (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1), false, false, LocalTempView, true
+    (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c1#x]
       +- SubqueryAlias tab1
          +- LocalRelation [c1#x]
@@ -15,7 +15,7 @@ CREATE TEMPORARY VIEW tab2 AS SELECT * FROM VALUES
     (1), (2), (2), (3), (5), (5), (null) AS tab2(c1)
 -- !query analysis
 CreateViewCommand `tab2`, SELECT * FROM VALUES
-    (1), (2), (2), (3), (5), (5), (null) AS tab2(c1), false, false, LocalTempView, true
+    (1), (2), (2), (3), (5), (5), (null) AS tab2(c1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [c1#x]
       +- SubqueryAlias tab2
          +- LocalRelation [c1#x]
@@ -36,7 +36,7 @@ CreateViewCommand `tab3`, SELECT * FROM VALUES
     (1, 3),
     (2, 3),
     (2, 2)
-    AS tab3(k, v), false, false, LocalTempView, true
+    AS tab3(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab3
          +- LocalRelation [k#x, v#x]
@@ -57,7 +57,7 @@ CreateViewCommand `tab4`, SELECT * FROM VALUES
     (2, 2),
     (2, 2),
     (2, 20)
-    AS tab4(k, v), false, false, LocalTempView, true
+    AS tab4(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab4
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except.sql.out
index 07649c3e5620b..4a7a38bd88496 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-except.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `t1`, select * from values
   ("two", 2),
   ("three", 3),
   ("one", NULL)
-  as t1(k, v), false, false, LocalTempView, true
+  as t1(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias t1
          +- LocalRelation [k#x, v#x]
@@ -33,7 +33,7 @@ CreateViewCommand `t2`, select * from values
   ("one", 5),
   ("one", NULL),
   (NULL, 5)
-  as t2(k, v), false, false, LocalTempView, true
+  as t2(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias t2
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out
index 8e1eb80b4bac8..fbee3e2c8c89f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-analytics.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
@@ -75,7 +75,7 @@ AS courseSales(course, year, earnings)
 -- !query analysis
 CreateViewCommand `courseSales`, SELECT * FROM VALUES
 ("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
-AS courseSales(course, year, earnings), false, true, LocalTempView, true
+AS courseSales(course, year, earnings), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [course#x, year#x, earnings#x]
       +- SubqueryAlias courseSales
          +- LocalRelation [course#x, year#x, earnings#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out
index 12b903477b6c4..5811a4ff6566c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out
@@ -6,7 +6,7 @@ AS testData(a, b)
 -- !query analysis
 CreateViewCommand `testData`, SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
-AS testData(a, b), false, true, LocalTempView, true
+AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [a#x, b#x]
       +- SubqueryAlias testData
          +- LocalRelation [a#x, b#x]
@@ -240,7 +240,7 @@ CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM V
 (1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
 -- !query analysis
 CreateViewCommand `testDataHasSameNameWithAlias`, SELECT * FROM VALUES
-(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v), false, true, LocalTempView, true
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, a#x, v#x]
       +- SubqueryAlias testDataHasSameNameWithAlias
          +- LocalRelation [k#x, a#x, v#x]
@@ -362,7 +362,7 @@ SELECT udf(1) FROM range(10) HAVING true
 -- !query analysis
 Filter cast(true as boolean)
 +- Aggregate [cast(udf(cast(1 as string)) as int) AS udf(1)#x]
-   +- Range (0, 10, step=1, splits=None)
+   +- Range (0, 10, step=1)
 
 
 -- !query
@@ -371,7 +371,7 @@ SELECT udf(udf(1)) FROM range(10) HAVING MAX(id) > 0
 Project [udf(udf(1))#x]
 +- Filter (max(id#xL)#xL > cast(0 as bigint))
    +- Aggregate [cast(udf(cast(cast(udf(cast(1 as string)) as int) as string)) as int) AS udf(udf(1))#x, max(id#xL) AS max(id#xL)#xL]
-      +- Range (0, 10, step=1, splits=None)
+      +- Range (0, 10, step=1)
 
 
 -- !query
@@ -404,7 +404,7 @@ CreateViewCommand `test_agg`, SELECT * FROM VALUES
   (2, true),
   (3, false), (3, null),
   (4, null), (4, null),
-  (5, null), (5, true), (5, false) AS test_agg(k, v), false, true, LocalTempView, true
+  (5, null), (5, true), (5, false) AS test_agg(k, v), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias test_agg
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-having.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-having.sql.out
index 441fda84479e0..f5cbe4abf3538 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-having.sql.out
@@ -12,7 +12,7 @@ CreateViewCommand `hav`, select * from values
   ("two", 2),
   ("three", 3),
   ("one", 5)
-  as hav(k, v), false, false, LocalTempView, true
+  as hav(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias hav
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inner-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inner-join.sql.out
index 1454be010d9c9..129575c77e732 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inner-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inner-join.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -11,7 +11,7 @@ CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -20,7 +20,7 @@ CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW t3 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t3`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t3`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -29,7 +29,7 @@ CreateViewCommand `t3`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, fal
 -- !query
 CREATE TEMPORARY VIEW t4 AS SELECT * FROM VALUES (1), (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t4`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t4`, SELECT * FROM VALUES (1), (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -43,7 +43,7 @@ SELECT udf(a) AS a, udf('b') AS tag FROM t2
 -- !query analysis
 CreateViewCommand `ta`, SELECT udf(a) AS a, udf('a') AS tag FROM t1
 UNION ALL
-SELECT udf(a) AS a, udf('b') AS tag FROM t2, false, false, LocalTempView, true, [udf]
+SELECT udf(a) AS a, udf('b') AS tag FROM t2, false, false, LocalTempView, UNSUPPORTED, true, [udf]
    +- Union false, false
       :- Project [cast(udf(cast(a#x as string)) as int) AS a#x, cast(udf(cast(a as string)) as string) AS tag#x]
       :  +- SubqueryAlias t1
@@ -69,7 +69,7 @@ SELECT udf(a) AS a, udf('b') AS tag FROM t4
 -- !query analysis
 CreateViewCommand `tb`, SELECT udf(a) AS a, udf('a') AS tag FROM t3
 UNION ALL
-SELECT udf(a) AS a, udf('b') AS tag FROM t4, false, false, LocalTempView, true, [udf]
+SELECT udf(a) AS a, udf('b') AS tag FROM t4, false, false, LocalTempView, UNSUPPORTED, true, [udf]
    +- Union false, false
       :- Project [cast(udf(cast(a#x as string)) as int) AS a#x, cast(udf(cast(a as string)) as string) AS tag#x]
       :  +- SubqueryAlias t3
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-intersect-all.sql.out
index 323f07a7d8339..35306746932e2 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-intersect-all.sql.out
@@ -18,7 +18,7 @@ CreateViewCommand `tab1`, SELECT * FROM VALUES
     (2, 3),
     (null, null),
     (null, null)
-    AS tab1(k, v), false, false, LocalTempView, true
+    AS tab1(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab1
          +- LocalRelation [k#x, v#x]
@@ -41,7 +41,7 @@ CreateViewCommand `tab2`, SELECT * FROM VALUES
     (3, 4),
     (null, null),
     (null, null)
-    AS tab2(k, v), false, false, LocalTempView, true
+    AS tab2(k, v), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v#x]
       +- SubqueryAlias tab2
          +- LocalRelation [k#x, v#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-join-empty-relation.sql.out
index d5ffa3ebb2bb5..ebd5127112cc0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-join-empty-relation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-join-empty-relation.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -11,7 +11,7 @@ CreateViewCommand `t1`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query analysis
-CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, true
+CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- SubqueryAlias GROUPING
          +- LocalRelation [a#x]
@@ -20,7 +20,7 @@ CreateViewCommand `t2`, SELECT * FROM VALUES (1) AS GROUPING(a), false, false, L
 -- !query
 CREATE TEMPORARY VIEW empty_table as SELECT a FROM t2 WHERE false
 -- !query analysis
-CreateViewCommand `empty_table`, SELECT a FROM t2 WHERE false, false, false, LocalTempView, true
+CreateViewCommand `empty_table`, SELECT a FROM t2 WHERE false, false, false, LocalTempView, UNSUPPORTED, true
    +- Project [a#x]
       +- Filter false
          +- SubqueryAlias t2
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out
index 324622e615da0..5fc413c66326b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-natural-join.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `nt1`, select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
-  as nt1(k, v1), false, false, LocalTempView, true
+  as nt1(k, v1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- LocalRelation [k#x, v1#x]
@@ -27,7 +27,7 @@ CreateViewCommand `nt2`, select * from values
   ("one", 1),
   ("two", 22),
   ("one", 5)
-  as nt2(k, v2), false, false, LocalTempView, true
+  as nt2(k, v2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v2#x]
       +- SubqueryAlias nt2
          +- LocalRelation [k#x, v2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-outer-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-outer-join.sql.out
index 8eee8746637cd..df2169386249a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-outer-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-outer-join.sql.out
@@ -6,7 +6,7 @@ as t1(int_col1)
 -- !query analysis
 CreateViewCommand `t1`, SELECT * FROM VALUES
 (-234), (145), (367), (975), (298)
-as t1(int_col1), false, true, LocalTempView, true
+as t1(int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col1#x]
       +- SubqueryAlias t1
          +- LocalRelation [int_col1#x]
@@ -19,7 +19,7 @@ as t2(int_col0, int_col1)
 -- !query analysis
 CreateViewCommand `t2`, SELECT * FROM VALUES
 (-769, -244), (-800, -409), (940, 86), (-507, 304), (-367, 158)
-as t2(int_col0, int_col1), false, true, LocalTempView, true
+as t2(int_col0, int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col0#x, int_col1#x]
       +- SubqueryAlias t2
          +- LocalRelation [int_col0#x, int_col1#x]
@@ -58,7 +58,7 @@ Project [udf(sum(udf(coalesce(int_col1, int_col0))))#xL, (udf(coalesce(int_col1,
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (97) as t1(int_col1)
 -- !query analysis
-CreateViewCommand `t1`, SELECT * FROM VALUES (97) as t1(int_col1), false, true, LocalTempView, true
+CreateViewCommand `t1`, SELECT * FROM VALUES (97) as t1(int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col1#x]
       +- SubqueryAlias t1
          +- LocalRelation [int_col1#x]
@@ -67,7 +67,7 @@ CreateViewCommand `t1`, SELECT * FROM VALUES (97) as t1(int_col1), false, true,
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (0) as t2(int_col1)
 -- !query analysis
-CreateViewCommand `t2`, SELECT * FROM VALUES (0) as t2(int_col1), false, true, LocalTempView, true
+CreateViewCommand `t2`, SELECT * FROM VALUES (0) as t2(int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col1#x]
       +- SubqueryAlias t2
          +- LocalRelation [int_col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out
index e92b7003d6e58..5cfa86309f6d1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-pivot.sql.out
@@ -14,7 +14,7 @@ CreateViewCommand `courseSales`, select * from values
   ("dotNET", 2012, 5000),
   ("dotNET", 2013, 48000),
   ("Java", 2013, 30000)
-  as courseSales(course, year, earnings), false, false, LocalTempView, true
+  as courseSales(course, year, earnings), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [course#x, year#x, earnings#x]
       +- SubqueryAlias courseSales
          +- LocalRelation [course#x, year#x, earnings#x]
@@ -29,7 +29,7 @@ create temporary view years as select * from values
 CreateViewCommand `years`, select * from values
   (2012, 1),
   (2013, 2)
-  as years(y, s), false, false, LocalTempView, true
+  as years(y, s), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [y#x, s#x]
       +- SubqueryAlias years
          +- LocalRelation [y#x, s#x]
@@ -44,7 +44,7 @@ create temporary view yearsWithComplexTypes as select * from values
 CreateViewCommand `yearsWithComplexTypes`, select * from values
   (2012, array(1, 1), map('1', 1), struct(1, 'a')),
   (2013, array(2, 2), map('2', 2), struct(2, 'b'))
-  as yearsWithComplexTypes(y, a, m, s), false, false, LocalTempView, true
+  as yearsWithComplexTypes(y, a, m, s), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [y#x, a#x, m#x, s#x]
       +- SubqueryAlias yearsWithComplexTypes
          +- LocalRelation [y#x, a#x, m#x, s#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-udaf.sql.out
index b04e49da481da..248ed95df9ded 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-udaf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-udaf.sql.out
@@ -6,7 +6,7 @@ as t1(int_col1)
 -- !query analysis
 CreateViewCommand `t1`, SELECT * FROM VALUES
 (1), (2), (3), (4)
-as t1(int_col1), false, true, LocalTempView, true
+as t1(int_col1), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [int_col1#x]
       +- SubqueryAlias t1
          +- LocalRelation [int_col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out
index 671c7a4765296..a1436d0a77c83 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2)
 -- !query analysis
-CreateViewCommand `t1`, VALUES (1, 'a'), (2, 'b') tbl(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `t1`, VALUES (1, 'a'), (2, 'b') tbl(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias tbl
       +- LocalRelation [c1#x, c2#x]
 
@@ -10,7 +10,7 @@ CreateViewCommand `t1`, VALUES (1, 'a'), (2, 'b') tbl(c1, c2), false, true, Loca
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2)
 -- !query analysis
-CreateViewCommand `t2`, VALUES (1.0, 1), (2.0, 4) tbl(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `t2`, VALUES (1.0, 1), (2.0, 4) tbl(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias tbl
       +- LocalRelation [c1#x, c2#x]
 
@@ -97,7 +97,7 @@ Project [cast(udf(cast(cast(udf(cast(a#xL as string)) as bigint) as string)) as
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p1 AS VALUES 1 T(col)
 -- !query analysis
-CreateViewCommand `p1`, VALUES 1 T(col), false, true, LocalTempView, true
+CreateViewCommand `p1`, VALUES 1 T(col), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [col#x]
 
@@ -105,7 +105,7 @@ CreateViewCommand `p1`, VALUES 1 T(col), false, true, LocalTempView, true
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p2 AS VALUES 1 T(col)
 -- !query analysis
-CreateViewCommand `p2`, VALUES 1 T(col), false, true, LocalTempView, true
+CreateViewCommand `p2`, VALUES 1 T(col), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [col#x]
 
@@ -113,7 +113,7 @@ CreateViewCommand `p2`, VALUES 1 T(col), false, true, LocalTempView, true
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p3 AS VALUES 1 T(col)
 -- !query analysis
-CreateViewCommand `p3`, VALUES 1 T(col), false, true, LocalTempView, true
+CreateViewCommand `p3`, VALUES 1 T(col), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [col#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out
index e18f67055913f..c10988310c0a9 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-window.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `testData`, SELECT * FROM VALUES
 (3, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "b"),
 (null, null, null, null, null, null),
 (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
-AS testData(val, val_long, val_double, val_date, val_timestamp, cate), false, true, LocalTempView, true
+AS testData(val, val_long, val_double, val_date, val_timestamp, cate), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
       +- SubqueryAlias testData
          +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out
index cdfa4f69f6e70..4b53f1c6f19c4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out
@@ -14,7 +14,7 @@ DropTableCommand `spark_catalog`.`default`.`t2`, true, true, false
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t(c1, c2)
 -- !query analysis
-CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [c1#x, c2#x]
 
@@ -22,7 +22,7 @@ CreateViewCommand `t1`, VALUES (0, 1), (1, 2) t(c1, c2), false, true, LocalTempV
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 1), (1, 2), (1, 3) t(partition_col, input)
 -- !query analysis
-CreateViewCommand `t2`, VALUES (0, 1), (1, 2), (1, 3) t(partition_col, input), false, true, LocalTempView, true
+CreateViewCommand `t2`, VALUES (0, 1), (1, 2), (1, 3) t(partition_col, input), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias t
       +- LocalRelation [partition_col#x, input#x]
 
@@ -904,6 +904,26 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT * FROM UDTFPartitionByIndexingBug(
+    TABLE(
+        SELECT
+            5 AS unused_col,
+            'hi' AS partition_col,
+            1.0 AS double_col
+
+        UNION ALL
+
+        SELECT
+            4 AS unused_col,
+            'hi' AS partition_col,
+            1.0 AS double_col
+    )
+)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
 -- !query
 DROP VIEW t1
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out
index 44dff64f3a72e..cafdd850e86d6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out
@@ -2,7 +2,7 @@
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2)
 -- !query analysis
-CreateViewCommand `t1`, VALUES (1, 'a'), (2, 'b') tbl(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `t1`, VALUES (1, 'a'), (2, 'b') tbl(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias tbl
       +- LocalRelation [c1#x, c2#x]
 
@@ -10,7 +10,7 @@ CreateViewCommand `t1`, VALUES (1, 'a'), (2, 'b') tbl(c1, c2), false, true, Loca
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (1.0, 1), (2.0, 4) tbl(c1, c2)
 -- !query analysis
-CreateViewCommand `t2`, VALUES (1.0, 1), (2.0, 4) tbl(c1, c2), false, true, LocalTempView, true
+CreateViewCommand `t2`, VALUES (1.0, 1), (2.0, 4) tbl(c1, c2), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias tbl
       +- LocalRelation [c1#x, c2#x]
 
@@ -97,7 +97,7 @@ Project [a#xL]
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p1 AS VALUES 1 T(col)
 -- !query analysis
-CreateViewCommand `p1`, VALUES 1 T(col), false, true, LocalTempView, true
+CreateViewCommand `p1`, VALUES 1 T(col), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [col#x]
 
@@ -105,7 +105,7 @@ CreateViewCommand `p1`, VALUES 1 T(col), false, true, LocalTempView, true
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p2 AS VALUES 1 T(col)
 -- !query analysis
-CreateViewCommand `p2`, VALUES 1 T(col), false, true, LocalTempView, true
+CreateViewCommand `p2`, VALUES 1 T(col), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [col#x]
 
@@ -113,7 +113,7 @@ CreateViewCommand `p2`, VALUES 1 T(col), false, true, LocalTempView, true
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p3 AS VALUES 1 T(col)
 -- !query analysis
-CreateViewCommand `p3`, VALUES 1 T(col), false, true, LocalTempView, true
+CreateViewCommand `p3`, VALUES 1 T(col), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias T
       +- LocalRelation [col#x]
 
@@ -184,7 +184,7 @@ Union false, false
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t3 AS VALUES (decimal(1)) tbl(v)
 -- !query analysis
-CreateViewCommand `t3`, VALUES (decimal(1)) tbl(v), false, true, LocalTempView, true
+CreateViewCommand `t3`, VALUES (decimal(1)) tbl(v), false, true, LocalTempView, UNSUPPORTED, true
    +- SubqueryAlias tbl
       +- LocalRelation [v#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/unpivot.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/unpivot.sql.out
index 3b3a64073cbf8..7f4d1a5b7d467 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/unpivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/unpivot.sql.out
@@ -8,7 +8,7 @@ create temporary view courseEarnings as select * from values
 CreateViewCommand `courseEarnings`, select * from values
   ("dotNET", 15000, 48000, 22500),
   ("Java", 20000, 30000, NULL)
-  as courseEarnings(course, `2012`, `2013`, `2014`), false, false, LocalTempView, true
+  as courseEarnings(course, `2012`, `2013`, `2014`), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [course#x, 2012#x, 2013#x, 2014#x]
       +- SubqueryAlias courseEarnings
          +- LocalRelation [course#x, 2012#x, 2013#x, 2014#x]
@@ -73,7 +73,7 @@ create temporary view courseEarningsAndSales as select * from values
 CreateViewCommand `courseEarningsAndSales`, select * from values
   ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
   ("Java", 20000, 1, 30000, 2, NULL, NULL)
-  as courseEarningsAndSales(course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014), false, false, LocalTempView, true
+  as courseEarningsAndSales(course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
       +- SubqueryAlias courseEarningsAndSales
          +- LocalRelation [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
index 3c9c6ec169afa..5a74c4be107e3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `nt1`, select * from values
   ("one", 1),
   ("two", 2),
   ("three", 3)
-  as nt1(k, v1), false, false, LocalTempView, true
+  as nt1(k, v1), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v1#x]
       +- SubqueryAlias nt1
          +- LocalRelation [k#x, v1#x]
@@ -29,7 +29,7 @@ CreateViewCommand `nt2`, select * from values
   ("two", 22),
   ("one", 5),
   ("four", 4)
-  as nt2(k, v2), false, false, LocalTempView, true
+  as nt2(k, v2), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [k#x, v2#x]
       +- SubqueryAlias nt2
          +- LocalRelation [k#x, v2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out
new file mode 100644
index 0000000000000..efa221400b0be
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding-config.sql.out
@@ -0,0 +1,813 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaBindingMode,None)
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = false
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(false))
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT 1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 44,
+    "fragment" : "WITH SCHEMA BINDING"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT 1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 49,
+    "fragment" : "WITH SCHEMA COMPENSATION"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT 1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 51,
+    "fragment" : "WITH SCHEMA TYPE EVOLUTION"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT 1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 46,
+    "fragment" : "WITH SCHEMA EVOLUTION"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT 1
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT 1, false, true, PersistedView, UNSUPPORTED, true
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query analysis
+ShowTablesCommand default, v, [namespace#x, tableName#x, isTemporary#x, information#x], true
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v`, true, true, false
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v AS SELECT 1
+-- !query analysis
+CreateViewCommand `v`, SELECT 1, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query analysis
+ShowTablesCommand default, v, [namespace#x, tableName#x, isTemporary#x, information#x], true
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTempViewCommand v
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, UNSUPPORTED, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#xL as int) AS c1#x]
+         +- Project [c1#xL]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#xL] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = true
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(true))
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = false
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(false))
+
+
+-- !query
+SET spark.sql.ansi.enabled = false
+-- !query analysis
+SetCommand (spark.sql.ansi.enabled,Some(false))
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, BINDING, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+DROP TABLE t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as bigint) AS c1#xL]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = true
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(true))
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, COMPENSATION, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+DROP TABLE t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as bigint) AS c1#xL]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#xL as int) AS c1#x]
+         +- Project [c1#xL]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#xL] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+INSERT INTO t VALUES ('a', 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 MAP<STRING, STRING>, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"c1\"",
+    "srcType" : "\"MAP<STRING, STRING>\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 15,
+    "fragment" : "v"
+  } ]
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, COMPENSATION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c3 INT NOT NULL, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = false
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(false))
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = false
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(false))
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT 1
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT 1, false, true, PersistedView, UNSUPPORTED, true
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = true
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(true))
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query analysis
+ShowTablesCommand default, v, [namespace#x, tableName#x, isTemporary#x, information#x], true
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = true
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(true))
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query analysis
+ShowTablesCommand default, v, [namespace#x, tableName#x, isTemporary#x, information#x], true
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query analysis
+ShowCreateTableCommand `spark_catalog`.`default`.`v`, [createtab_stmt#x]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v`, true, true, false
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = false
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(false))
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = false
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(false))
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v AS SELECT 1
+-- !query analysis
+CreateViewCommand `v`, SELECT 1, false, true, LocalTempView, UNSUPPORTED, true
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = true
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaBindingMode,Some(true))
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query analysis
+ShowTablesCommand default, v, [namespace#x, tableName#x, isTemporary#x, information#x], true
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = true
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(true))
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query analysis
+ShowTablesCommand default, v, [namespace#x, tableName#x, isTemporary#x, information#x], true
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTempViewCommand v
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v`, true, true, false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out
new file mode 100644
index 0000000000000..75cae1f19d46d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-binding.sql.out
@@ -0,0 +1,256 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, BINDING, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, BINDING, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation=false
+-- !query analysis
+SetCommand (spark.sql.legacy.viewSchemaCompensation,Some(false))
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, BINDING, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA BINDING
+-- !query analysis
+AlterViewSchemaBindingCommand `spark_catalog`.`default`.`v`, BINDING
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v`, true, true, false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-compensation.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-compensation.sql.out
new file mode 100644
index 0000000000000..64295a6f9bc0c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-compensation.sql.out
@@ -0,0 +1,414 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SET spark.sql.ansi.enabled = false
+-- !query analysis
+SetCommand (spark.sql.ansi.enabled,Some(false))
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, COMPENSATION, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as bigint) AS c1#xL]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#xL as int) AS c1#x]
+         +- Project [c1#xL]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#xL] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+INSERT INTO t VALUES ('a', 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 MAP<STRING, STRING>, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"c1\"",
+    "srcType" : "\"MAP<STRING, STRING>\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 15,
+    "fragment" : "v"
+  } ]
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, COMPENSATION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c3 INT NOT NULL, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES(1)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as int) AS c1#x]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, BINDING, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES('1')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as string) AS c1#x]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA COMPENSATION
+-- !query analysis
+AlterViewSchemaBindingCommand `spark_catalog`.`default`.`v`, COMPENSATION
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v`, true, true, false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-evolution.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-evolution.sql.out
new file mode 100644
index 0000000000000..258edf31d4c17
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-evolution.sql.out
@@ -0,0 +1,781 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x, c2#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c4 STRING NOT NULL, c5 DOUBLE) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c4, c5]
++- Project [cast(col1#x as string) AS c4#x, cast(col2#x as double) AS c5#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c4#x, c5#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c4#x, c5#x])
+      +- Project [c4#x, c5#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[c4#x,c5#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c4 STRING, c5 DOUBLE, c6 DATE) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c4, c5, c6]
++- Project [cast(col1#x as string) AS c4#x, cast(col2#x as double) AS c5#x, cast(col3#x as date) AS c6#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c4#x, c5#x, c6#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c4#x, c5#x, c6#x])
+      +- Project [c4#x, c5#x, c6#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[c4#x,c5#x,c6#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x, c2#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [c1#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#x, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#x, a2#x])
+      +- Project [c1#x AS a1#x, c2#x AS a2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 DOUBLE) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as double) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#x, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#x, a2#x])
+      +- Project [c1#x AS a1#x, c2#x AS a2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING, c2 DOUBLE, c3 DATE) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2, c3]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as double) AS c2#x, cast(col3#x as date) AS c3#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#x, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#x, a2#x])
+      +- Project [c1#x AS a1#x, c2#x AS a2#x]
+         +- Project [c1#x, c2#x, c3#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x,c3#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#x, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#x, a2#x])
+      +- Project [c1#x AS a1#x, c2#x AS a2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v (a1, a2) AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c3 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v (a1, a2) AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT COMMENT 'c1', c2 INT COMMENT 'c2') USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6c', c2 STRING COMMENT 'c2 6c') USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#xL, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#xL, a2#x])
+      +- Project [c1#xL AS a1#xL, c2#x AS a2#x]
+         +- Project [c1#xL, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1 COMMENT 'a1', a2 COMMENT 'a2') WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,Some(a1)), (a2,Some(a2))], SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#xL, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6d', c2 STRING COMMENT 'c2 6d') USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#xL, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#xL, a2#x])
+      +- Project [c1#xL AS a1#xL, c2#x AS a2#x]
+         +- Project [c1#xL, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, EVOLUTION, true
+   +- Project [c1#xL, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6e', c2 STRING COMMENT 'c2 6e') USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#xL, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#xL, c2#x])
+      +- Project [c1#xL, c2#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t1
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1
+
+
+-- !query
+CREATE TABLE t1(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t1`, false
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2
+
+
+-- !query
+CREATE TABLE t2(c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t2`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t1, t2
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t1, t2, false, true, PersistedView, EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- Join Inner
+         :- SubqueryAlias spark_catalog.default.t1
+         :  +- Relation spark_catalog.default.t1[c1#x] parquet
+         +- SubqueryAlias spark_catalog.default.t2
+            +- Relation spark_catalog.default.t2[c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x, c2#x]
+         +- Join Inner
+            :- SubqueryAlias spark_catalog.default.t1
+            :  +- Relation spark_catalog.default.t1[c1#x] parquet
+            +- SubqueryAlias spark_catalog.default.t2
+               +- Relation spark_catalog.default.t2[c2#x] parquet
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2
+
+
+-- !query
+CREATE TABLE t2(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t2`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "COLUMN_ALREADY_EXISTS",
+  "sqlState" : "42711",
+  "messageParameters" : {
+    "columnName" : "`c1`"
+  }
+}
+
+
+-- !query
+DROP TABLE IF EXISTS t1
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES(1)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as int) AS c1#x]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, COMPENSATION, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA EVOLUTION
+-- !query analysis
+AlterViewSchemaBindingCommand `spark_catalog`.`default`.`v`, EVOLUTION
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x, c2#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v`, true, true, false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-type-evolution.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-type-evolution.sql.out
new file mode 100644
index 0000000000000..95aa35d59fdc8
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/view-schema-type-evolution.sql.out
@@ -0,0 +1,456 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x AS c1#x, c2#x AS c2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 DOUBLE) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as double) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x AS c1#x, c2#x AS c2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING, c2 DOUBLE, c3 DATE) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2, c3]
++- Project [cast(col1#x as string) AS c1#x, cast(col2#x as double) AS c2#x, cast(col3#x as date) AS c3#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x AS c1#x, c2#x AS c2#x]
+         +- Project [c1#x, c2#x, c3#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x,c3#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1, c2]
++- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x, c2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x, c2#x])
+      +- Project [c1#x AS c1#x, c2#x AS c2#x]
+         +- Project [c1#x, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c3 INT, c2 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT COMMENT 'c1', c2 INT COMMENT 'c2') USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,None), (a2,None)], SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6a', c2 STRING COMMENT 'c2 6a') USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#xL, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#xL, a2#x])
+      +- Project [c1#xL AS a1#xL, c2#x AS a2#x]
+         +- Project [c1#xL, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1 COMMENT 'a1', a2 COMMENT 'a2') WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, [(a1,Some(a1)), (a2,Some(a2))], SELECT * FROM t, false, true, PersistedView, TYPE EVOLUTION, true
+   +- Project [c1#xL, c2#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6b', c2 STRING COMMENT 'c2 6b') USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [a1#xL, a2#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [a1#xL, a2#x])
+      +- Project [c1#xL AS a1#xL, c2#x AS a2#x]
+         +- Project [c1#xL, c2#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#xL,c2#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES(1)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as int) AS c1#x]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT * FROM t
+-- !query analysis
+CreateViewCommand `spark_catalog`.`default`.`v`, SELECT * FROM t, false, true, PersistedView, COMPENSATION, true
+   +- Project [c1#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+CREATE TABLE t(c1 STRING) USING PARQUET
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+INSERT INTO t VALUES('1')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [c1]
++- Project [cast(col1#x as string) AS c1#x]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [cast(c1#x as int) AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA TYPE EVOLUTION
+-- !query analysis
+AlterViewSchemaBindingCommand `spark_catalog`.`default`.`v`, TYPE EVOLUTION
+
+
+-- !query
+SELECT * FROM v
+-- !query analysis
+Project [c1#x]
++- SubqueryAlias spark_catalog.default.v
+   +- View (`spark_catalog`.`default`.`v`, [c1#x])
+      +- Project [c1#x AS c1#x]
+         +- Project [c1#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[c1#x] parquet
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query analysis
+DescribeTableCommand `spark_catalog`.`default`.`v`, true, [col_name#x, data_type#x, comment#x]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query analysis
+DropTableCommand `spark_catalog`.`default`.`v`, true, true, false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
index a7168db622a66..8c129534e7d03 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
@@ -22,7 +22,7 @@ CreateViewCommand `testData`, SELECT * FROM VALUES
 (3, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "b"),
 (null, null, null, null, null, null),
 (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
-AS testData(val, val_long, val_double, val_date, val_timestamp, cate), false, true, LocalTempView, true
+AS testData(val, val_long, val_double, val_date, val_timestamp, cate), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
       +- SubqueryAlias testData
          +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
@@ -67,7 +67,7 @@ CreateViewCommand `basic_pays`, SELECT * FROM VALUES
 ('Pamela Castillo','SCM',11303),
 ('Larry Bott','SCM',11798),
 ('Barry Jones','SCM',10586)
-AS basic_pays(employee_name, department, salary), false, true, LocalTempView, true
+AS basic_pays(employee_name, department, salary), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [employee_name#x, department#x, salary#x]
       +- SubqueryAlias basic_pays
          +- LocalRelation [employee_name#x, department#x, salary#x]
@@ -96,7 +96,7 @@ CreateViewCommand `test_ignore_null`, SELECT * FROM VALUES
 ('a', 6, 'z'),
 ('a', 7, 'v'),
 ('a', 8, null)
-AS test_ignore_null(content, id, v), false, true, LocalTempView, true
+AS test_ignore_null(content, id, v), false, true, LocalTempView, UNSUPPORTED, true
    +- Project [content#x, id#x, v#x]
       +- SubqueryAlias test_ignore_null
          +- LocalRelation [content#x, id#x, v#x]
@@ -1284,7 +1284,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 create or replace temp view t1 (p, o) as values (1, 1), (1, 1), (1, 2), (2, 1), (2, 1), (2, 2)
 -- !query analysis
-CreateViewCommand `t1`, [(p,None), (o,None)], values (1, 1), (1, 1), (1, 2), (2, 1), (2, 1), (2, 2), false, true, LocalTempView, true
+CreateViewCommand `t1`, [(p,None), (o,None)], values (1, 1), (1, 1), (1, 2), (2, 1), (2, 1), (2, 2), false, true, LocalTempView, UNSUPPORTED, true
    +- LocalRelation [col1#x, col2#x]
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out
index 05b8eed46d1d6..de9fb2f395210 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out
@@ -359,7 +359,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 CREATE TEMPORARY VIEW xmlTable(xmlField, a) AS SELECT * FROM VALUES ('<p><a>1</a><b>"2"</b></p>', 'a')
 -- !query analysis
-CreateViewCommand `xmlTable`, [(xmlField,None), (a,None)], SELECT * FROM VALUES ('<p><a>1</a><b>"2"</b></p>', 'a'), false, false, LocalTempView, true
+CreateViewCommand `xmlTable`, [(xmlField,None), (a,None)], SELECT * FROM VALUES ('<p><a>1</a><b>"2"</b></p>', 'a'), false, false, LocalTempView, UNSUPPORTED, true
    +- Project [col1#x, col2#x]
       +- LocalRelation [col1#x, col2#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/binary.sql b/sql/core/src/test/resources/sql-tests/inputs/binary.sql
new file mode 100644
index 0000000000000..8cd33eccaaf07
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/binary.sql
@@ -0,0 +1,7 @@
+--SET spark.sql.binaryOutputStyle=UTF8
+
+SELECT X'';
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333';
+SELECT CAST('Spark' as BINARY);
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY));
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'));
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/binary_base64.sql b/sql/core/src/test/resources/sql-tests/inputs/binary_base64.sql
new file mode 100644
index 0000000000000..853eedd51773f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/binary_base64.sql
@@ -0,0 +1,3 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=BASE64
diff --git a/sql/core/src/test/resources/sql-tests/inputs/binary_basic.sql b/sql/core/src/test/resources/sql-tests/inputs/binary_basic.sql
new file mode 100644
index 0000000000000..1a5b64bdf7e05
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/binary_basic.sql
@@ -0,0 +1,4 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=BASIC
+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/binary_hex.sql b/sql/core/src/test/resources/sql-tests/inputs/binary_hex.sql
new file mode 100644
index 0000000000000..7863da737a72f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/binary_hex.sql
@@ -0,0 +1,3 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=HEX
diff --git a/sql/core/src/test/resources/sql-tests/inputs/binary_hex_discrete.sql b/sql/core/src/test/resources/sql-tests/inputs/binary_hex_discrete.sql
new file mode 100644
index 0000000000000..282a7634cbc5e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/binary_hex_discrete.sql
@@ -0,0 +1,3 @@
+--IMPORT binary.sql
+
+--SET spark.sql.binaryOutputStyle=HEX_DISCRETE
diff --git a/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql b/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql
index f9dfd161d0c07..e080fdd32a4aa 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/bitwise.sql
@@ -75,3 +75,19 @@ select getbit(11L, 2 + 1), getbit(11L, 3 - 1), getbit(10L + 1, 1 * 1), getbit(ca
 select getbit(11L, 63);
 select getbit(11L, -1);
 select getbit(11L, 64);
+
+SELECT 20181117 >> 2;
+SELECT 20181117 << 2;
+SELECT 20181117 >>> 2;
+SELECT 20181117 > > 2;
+SELECT 20181117 < < 2;
+SELECT 20181117 > >> 2;
+SELECT 20181117 <<< 2;
+SELECT 20181117 >>>> 2;
+select cast(null as array<array<int>>), 20181117 >> 2;
+select cast(null as array<array<int>>), 20181117 >>> 2;
+select cast(null as map<int, array<int>>), 20181117 >> 2;
+
+select 1 << 1 + 2 as plus_over_shift; -- if correct, the result is 8. otherwise, 4
+select 2 >> 1 << 1 as left_to_right; -- if correct, the result is 2. otherwise, 0
+select 1 & 2 >> 1 as shift_over_ampersand; -- if correct, the result is 1. otherwise, 0
diff --git a/sql/core/src/test/resources/sql-tests/inputs/collations.sql b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
index 619eb4470e9ad..c0262a0f0ad14 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/collations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
@@ -1,7 +1,7 @@
 -- test cases for collation support
 
 -- Create a test table with data
-create table t1(utf8_binary string collate utf8_binary, utf8_binary_lcase string collate utf8_binary_lcase) using parquet;
+create table t1(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
 insert into t1 values('aaa', 'aaa');
 insert into t1 values('AAA', 'AAA');
 insert into t1 values('bbb', 'bbb');
@@ -13,67 +13,80 @@ describe table t1;
 -- group by and count utf8_binary
 select count(*) from t1 group by utf8_binary;
 
--- group by and count utf8_binary_lcase
-select count(*) from t1 group by utf8_binary_lcase;
+-- group by and count utf8_lcase
+select count(*) from t1 group by utf8_lcase;
 
 -- filter equal utf8_binary
 select * from t1 where utf8_binary = 'aaa';
 
--- filter equal utf8_binary_lcase
-select * from t1 where utf8_binary_lcase = 'aaa' collate utf8_binary_lcase;
+-- filter equal utf8_lcase
+select * from t1 where utf8_lcase = 'aaa' collate utf8_lcase;
 
 -- filter less then utf8_binary
 select * from t1 where utf8_binary < 'bbb';
 
--- filter less then utf8_binary_lcase
-select * from t1 where utf8_binary_lcase < 'bbb' collate utf8_binary_lcase;
+-- filter less then utf8_lcase
+select * from t1 where utf8_lcase < 'bbb' collate utf8_lcase;
 
 -- inner join
-select l.utf8_binary, r.utf8_binary_lcase from t1 l join t1 r on l.utf8_binary_lcase = r.utf8_binary_lcase;
+select l.utf8_binary, r.utf8_lcase from t1 l join t1 r on l.utf8_lcase = r.utf8_lcase;
 
 -- create second table for anti-join
-create table t2(utf8_binary string collate utf8_binary, utf8_binary_lcase string collate utf8_binary_lcase) using parquet;
+create table t2(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
 insert into t2 values('aaa', 'aaa');
 insert into t2 values('bbb', 'bbb');
 
 -- anti-join on lcase
-select * from t1 anti join t2 on t1.utf8_binary_lcase = t2.utf8_binary_lcase;
+select * from t1 anti join t2 on t1.utf8_lcase = t2.utf8_lcase;
 
 drop table t2;
 drop table t1;
 
 -- set operations
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb');
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb');
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb');
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb');
-select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_lcase from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_lcase from values ('aaa'), ('bbb');
 
 -- create table with struct field
-create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_binary_lcase: string collate utf8_binary_lcase>) USING PARQUET;
+create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_lcase: string collate utf8_lcase>) USING PARQUET;
 
-insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa'));
-insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA'));
+insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_lcase', 'aaa'));
+insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_lcase', 'AAA'));
 
 -- aggregate against nested field utf8_binary
 select count(*) from t1 group by c1.utf8_binary;
 
--- aggregate against nested field utf8_binary_lcase
-select count(*) from t1 group by c1.utf8_binary_lcase;
+-- aggregate against nested field utf8_lcase
+select count(*) from t1 group by c1.utf8_lcase;
 
 drop table t1;
 
 -- array function tests
-select array_contains(ARRAY('aaa' collate utf8_binary_lcase),'AAA' collate utf8_binary_lcase);
-select array_position(ARRAY('aaa' collate utf8_binary_lcase, 'bbb' collate utf8_binary_lcase),'BBB' collate utf8_binary_lcase);
+select array_contains(ARRAY('aaa' collate utf8_lcase),'AAA' collate utf8_lcase);
+select array_position(ARRAY('aaa' collate utf8_lcase, 'bbb' collate utf8_lcase),'BBB' collate utf8_lcase);
 
 -- utility
-select nullif('aaa' COLLATE utf8_binary_lcase, 'AAA' COLLATE utf8_binary_lcase);
-select least('aaa' COLLATE utf8_binary_lcase, 'AAA' collate utf8_binary_lcase, 'a' collate utf8_binary_lcase);
+select nullif('aaa' COLLATE utf8_lcase, 'AAA' COLLATE utf8_lcase);
+select least('aaa' COLLATE utf8_lcase, 'AAA' collate utf8_lcase, 'a' collate utf8_lcase);
 
 -- array operations
-select arrays_overlap(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase));
-select array_distinct(array('aaa' collate utf8_binary_lcase, 'AAA' collate utf8_binary_lcase));
-select array_union(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase));
-select array_intersect(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase));
-select array_except(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase));
+select arrays_overlap(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+select array_distinct(array('aaa' collate utf8_lcase, 'AAA' collate utf8_lcase));
+select array_union(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+select array_intersect(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+select array_except(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase));
+
+-- ICU collations (all statements return true)
+select 'a' collate unicode < 'A';
+select 'a' collate unicode_ci = 'A';
+select 'a' collate unicode_ai = 'å';
+select 'a' collate unicode_ci_ai = 'Å';
+select 'a' collate en < 'A';
+select 'a' collate en_ci = 'A';
+select 'a' collate en_ai = 'å';
+select 'a' collate en_ci_ai = 'Å';
+select 'Kypper' collate sv < 'Köpfe';
+select 'Kypper' collate de > 'Köpfe';
+select 'I' collate tr_ci = 'ı';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql
index e5ef244341751..3b2ba1fcdd66e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql
@@ -17,6 +17,18 @@ SELECT (
   SELECT * FROM t
 );
 
+-- un-referenced CTE in subquery expression: outer reference in CTE relation
+SELECT (
+  WITH unreferenced AS (SELECT id)
+  SELECT 1
+) FROM range(1);
+
+-- un-referenced CTE in subquery expression: outer reference in CTE main query
+SELECT (
+  WITH unreferenced AS (SELECT 1)
+  SELECT id
+) FROM range(1);
+
 -- Make sure CTE in subquery is scoped to that subquery rather than global
 -- the 2nd half of the union should fail because the cte is scoped to the first half
 SELECT * FROM
diff --git a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
index 7925a21de04cd..37081de012e98 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
@@ -11,6 +11,8 @@ create or replace temporary view nested as values
 
 -- Only allow lambda's in higher order functions.
 select upper(x -> x) as v;
+-- Also test functions registered with `ExpressionBuilder`.
+select ceil(x -> x) as v;
 
 -- Identity transform an array
 select transform(zs, z -> z) as v from nested;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index fd53f44d3c33c..46461dcd048e3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -119,8 +119,8 @@ VALUES(IDENTIFIER(1));
 VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)));
 SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1);
 
-CREATE TABLE IDENTIFIER(1)(c1 INT);
-CREATE TABLE IDENTIFIER('a.b.c')(c1 INT);
+CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv;
+CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv;
 CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1);
 DROP TABLE IDENTIFIER('a.b.c');
 DROP VIEW IDENTIFIER('a.b.c');
@@ -132,6 +132,15 @@ CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.a
 DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg');
 CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1);
 
+-- SPARK-48273: Aggregation operation in statements using identifier clause for table name
+create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
+cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
+create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
+insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1;
+drop view v1;
+drop table t1;
+drop table t2;
+
 -- Not supported
 SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1);
 SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1'));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/math.sql b/sql/core/src/test/resources/sql-tests/inputs/math.sql
index 96fb0eeef7ac3..14a647a610cc3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/math.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/math.sql
@@ -77,3 +77,16 @@ SELECT conv('9223372036854775808', 10, 16);
 SELECT conv('92233720368547758070', 10, 16);
 SELECT conv('9223372036854775807', 36, 10);
 SELECT conv('-9223372036854775807', 36, 10);
+
+SELECT BIN(0);
+SELECT BIN(25);
+SELECT BIN(25L);
+SELECT BIN(25.5);
+
+SELECT POSITIVE(0Y);
+SELECT POSITIVE(25);
+SELECT POSITIVE(-25L);
+SELECT POSITIVE(25.5);
+SELECT POSITIVE("25.5");
+SELECT POSITIVE("invalid");
+SELECT POSITIVE(null);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql
index 6f64b0da6502e..195db17a3a1f9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql
@@ -1,3 +1,30 @@
+-- NOT
+select not true;
+select ! true;
+select not null::boolean;
+
+-- AND
+select true and true;
+select true and false;
+select false and true;
+select false and false;
+select true and null::boolean;
+select false and null::boolean;
+select null::boolean and true;
+select null::boolean and false;
+select null::boolean and null::boolean;
+
+-- OR
+select true or true;
+select true or false;
+select false or true;
+select false or false;
+select true or null::boolean;
+select false or null::boolean;
+select null::boolean or true;
+select null::boolean or false;
+select null::boolean or null::boolean;
+
 -- EqualTo
 select 1 = 1;
 select 1 = '1';
@@ -82,3 +109,12 @@ select 2.0 not between '1.0' and '3.0';
 select 'b' not between 'a' and 'c';
 select to_timestamp('2022-12-26 00:00:01') not between to_date('2022-03-01') and to_date('2022-12-31');
 select rand(123) not between 0.1 AND 0.2;
+
+-- Sanity test for legacy flag equating ! with NOT
+set spark.sql.legacy.bangEqualsNot=true;
+select 1 ! between 0 and 2;
+select 1 ! in (3, 4);
+select 'hello' ! like 'world';
+select 1 is ! null;
+select false is ! true;
+set spark.sql.legacy.bangEqualsNot=false;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 64ea6e655d0b5..c108f7c76f764 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -126,11 +126,23 @@ select encode('hello', 'WINDOWS-1252');
 select encode(scol, ecol) from values('hello', 'WINDOWS-1252') as t(scol, ecol);
 select encode('hello', 'Windows-xxx');
 select encode(scol, ecol) from values('hello', 'Windows-xxx') as t(scol, ecol);
+set spark.sql.legacy.codingErrorAction=true;
+select encode('渭城朝雨浥轻尘', 'US-ASCII');
+select encode(scol, ecol) from values('渭城朝雨浥轻尘', 'US-ASCII') as t(scol, ecol);
+set spark.sql.legacy.codingErrorAction=false;
+select encode('客舍青青柳色新', 'US-ASCII');
+select encode(scol, ecol) from values('客舍青青柳色新', 'US-ASCII') as t(scol, ecol);
+select encode(decode(encode('白日依山尽，黄河入海流。欲穷千里目，更上一层楼。', 'UTF-16'), 'UTF-16'), 'UTF-8');
+select encode(decode(encode('南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。', 'UTF-16'), 'UTF-16'), 'UTF-8');
+select encode(decode(encode('세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark', 'UTF-16'), 'UTF-16'), 'UTF-8');
+select encode(decode(encode('το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως', 'UTF-16'), 'UTF-16'), 'UTF-8');
+select encode(decode(encode('Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。', 'UTF-16'), 'UTF-16'), 'UTF-8');
 
 -- decode
 select decode();
 select decode(encode('abc', 'utf-8'));
 select decode(encode('abc', 'utf-8'), 'utf-8');
+select decode(encode('大千世界', 'utf-32'), 'utf-32');
 select decode(1, 1, 'Southlake');
 select decode(2, 1, 'Southlake');
 select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
@@ -146,6 +158,12 @@ select decode(scol, ecol) from values(X'68656c6c6f', 'WINDOWS-1252') as t(scol,
 set spark.sql.legacy.javaCharsets=false;
 select decode(X'68656c6c6f', 'WINDOWS-1252');
 select decode(scol, ecol) from values(X'68656c6c6f', 'WINDOWS-1252') as t(scol, ecol);
+set spark.sql.legacy.codingErrorAction=true;
+select decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII');
+select decode(scol, ecol) from values(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII') as t(scol, ecol);
+set spark.sql.legacy.codingErrorAction=false;
+select decode(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII');
+select decode(scol, ecol) from values(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII') as t(scol, ecol);
 
 -- contains
 SELECT CONTAINS(null, 'Spark');
@@ -275,3 +293,16 @@ select luhn_check(6011111111111117);
 select luhn_check(6011111111111118);
 select luhn_check(123.456);
 
+--utf8 string validation
+select is_valid_utf8('');
+select is_valid_utf8('abc');
+select is_valid_utf8(x'80');
+select make_valid_utf8('');
+select make_valid_utf8('abc');
+select make_valid_utf8(x'80');
+select validate_utf8('');
+select validate_utf8('abc');
+select validate_utf8(x'80');
+select try_validate_utf8('');
+select try_validate_utf8('abc');
+select try_validate_utf8(x'80');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql
index ad2e7ad563e08..bc732cc3d320d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-in-join-condition.sql
@@ -89,3 +89,7 @@ select * from x inner join y on x1 = y1 and exists (select * from z where z1 = y
 select * from x inner join y on x1 = y1 and not exists (select * from z where z1 = y1) order by x1, x2, y1, y2;
 select * from x left join y on x1 = y1 and exists (select * from z where z1 = y1) order by x1, x2, y1, y2;
 select * from x left join y on x1 = y1 and not exists (select * from z where z1 = y1) order by x1, x2, y1, y2;
+
+-- Correlated subquery references both left and right children, errors
+select * from x join y on x1 = y1 and exists (select * from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2;
+select * from x join y on x1 = y1 and not exists (select * from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql
index d519abdbacc05..c906390c99c32 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-subquery-in-join-condition.sql
@@ -84,3 +84,7 @@ select * from x inner join y on x1 = y1 and y2 IN (select z1 from z where z1 = y
 select * from x inner join y on x1 = y1 and y2 not IN (select z1 from z where z1 = y1) order by x1, x2, y1, y2;
 select * from x left join y on x1 = y1 and y2 IN (select z1 from z where z1 = y1) order by x1, x2, y1, y2;
 select * from x left join y on x1 = y1 and y2 not IN (select z1 from z where z1 = y1) order by x1, x2, y1, y2;
+
+-- Correlated subquery references both left and right children, errors
+select * from x left join y on x1 = y1 and x2 IN (select z1 from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2;
+select * from x left join y on x1 = y1 and x2 not IN (select z1 from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-group-by.sql
new file mode 100644
index 0000000000000..6787fac75b39a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-group-by.sql
@@ -0,0 +1,30 @@
+-- Tests for scalar subquery with a group-by. Only a group-by that guarantees a single row result is allowed. See SPARK-48503
+
+--ONLY_IF spark
+
+create temp view x (x1, x2) as values (1, 1), (2, 2);
+create temp view y (y1, y2) as values (2, 0), (3, -1);
+create temp view z (z1, z2) as values (1, 0), (1, 1);
+
+-- Legal queries
+select * from x where (select count(*) from y where y1 = x1 group by y1) = 1;
+select * from x where (select count(*) from y where y1 = x1 group by x1) = 1;
+select * from x where (select count(*) from y where y1 > x1 group by x1) = 1;
+
+-- Group-by column equal to constant - legal
+select *, (select count(*) from y where x1 = y1 and y2 = 1 group by y2) from x;
+-- Group-by column equal to expression with constants and outer refs - legal
+select *, (select count(*) from y where x1 = y1 and y2 = x1 + 1 group by y2) from x;
+
+-- Illegal queries
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1;
+select *, (select count(*) from y where y1 + y2 = x1 group by y1) from x;
+
+-- Certain other operators like OUTER JOIN or UNION between the correlating filter and the group-by also can cause the scalar subquery to return multiple values and hence make the query illegal.
+select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x;
+select *, (select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1) from x; -- The correlation below the join is unsupported in Spark anyway, but when we do support it this query should still be disallowed.
+
+-- Test legacy behavior conf
+set spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate = true;
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1;
+reset spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql
index 377b26c67a3ea..28fe4539855cd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql
@@ -1,6 +1,6 @@
 -- timestamp_ltz literals and constructors
 --CONFIG_DIM1 spark.sql.timestampType=TIMESTAMP_LTZ
---CONFIG_DIM1 spark.sql.timestampType=TIMESTAMP_NTZ
+--CONFIG_DIM2 spark.sql.timestampType=TIMESTAMP_NTZ
 
 select timestamp_ltz'2016-12-31 00:12:00', timestamp_ltz'2016-12-31';
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
index d744c0c19b42e..07901093cfba8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
@@ -1,6 +1,6 @@
 -- timestamp_ntz literals and constructors
 --CONFIG_DIM1 spark.sql.timestampType=TIMESTAMP_LTZ
---CONFIG_DIM1 spark.sql.timestampType=TIMESTAMP_NTZ
+--CONFIG_DIM2 spark.sql.timestampType=TIMESTAMP_NTZ
 
 select timestamp_ntz'2016-12-31 00:12:00', timestamp_ntz'2016-12-31';
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/to_from_avro.sql b/sql/core/src/test/resources/sql-tests/inputs/to_from_avro.sql
new file mode 100644
index 0000000000000..12541ff26e24e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/to_from_avro.sql
@@ -0,0 +1,21 @@
+-- Create some temporary test data.
+create table t as
+  select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s
+  from values (1, null), (null,  'a') tab(member0, member1);
+declare avro_schema string;
+set variable avro_schema =
+  '{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] }';
+
+-- Exercise invalid SQL syntax when calling the 'from_avro' and 'to_avro' functions.
+select from_avro(s, 42, map()) from t;
+select from_avro(s, avro_schema, 42) from t;
+select to_avro(s, 42) from t;
+
+-- Avro is not loaded in this testing environment, so queries calling the 'from_avro' or 'to_avro'
+-- SQL functions that otherwise pass analysis return appropriate "Avro not loaded" errors here.
+select to_avro(s, avro_schema) as result from t;
+select from_avro(result, avro_schema, map()).u from (select null as result);
+
+-- Clean up.
+drop temporary variable avro_schema;
+drop table t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
index 922a1d8177780..8570496d439e6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
@@ -415,4 +415,8 @@ FROM (
   ORDER BY a
 ) map_output
 SELECT TRANSFORM(a, b)
-  USING 'cat' AS (a, b);
\ No newline at end of file
+  USING 'cat' AS (a, b);
+
+SELECT TRANSFORM (a, b)
+  USING 'cat' AS (a CHAR(10), b VARCHAR(10))
+FROM VALUES('apache', 'spark') t(a, b);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
index 55907b6701e50..943865b68d39e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
@@ -1,6 +1,8 @@
 -- Numeric + Numeric
 SELECT try_add(1, 1);
 SELECT try_add(2147483647, 1);
+SELECT try_add(2147483647, decimal(1));
+SELECT try_add(2147483647, "1");
 SELECT try_add(-2147483648, -1);
 SELECT try_add(9223372036854775807L, 1);
 SELECT try_add(-9223372036854775808L, -1);
@@ -38,6 +40,8 @@ SELECT try_divide(0, 0);
 SELECT try_divide(1, (2147483647 + 1));
 SELECT try_divide(1L, (9223372036854775807L + 1L));
 SELECT try_divide(1, 1.0 / 0.0);
+SELECT try_divide(1, decimal(0));
+SELECT try_divide(1, "0");
 
 -- Interval / Numeric
 SELECT try_divide(interval 2 year, 2);
@@ -50,6 +54,8 @@ SELECT try_divide(interval 106751991 day, 0.5);
 -- Numeric - Numeric
 SELECT try_subtract(1, 1);
 SELECT try_subtract(2147483647, -1);
+SELECT try_subtract(2147483647, decimal(-1));
+SELECT try_subtract(2147483647, "-1");
 SELECT try_subtract(-2147483648, 1);
 SELECT try_subtract(9223372036854775807L, -1);
 SELECT try_subtract(-9223372036854775808L, 1);
@@ -66,6 +72,8 @@ SELECT try_subtract(interval 106751991 day, interval -3 day);
 -- Numeric * Numeric
 SELECT try_multiply(2, 3);
 SELECT try_multiply(2147483647, -2);
+SELECT try_multiply(2147483647, decimal(-2));
+SELECT try_multiply(2147483647, "-2");
 SELECT try_multiply(-2147483648, 2);
 SELECT try_multiply(9223372036854775807L, 2);
 SELECT try_multiply(-9223372036854775808L, -2);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql b/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql
index c83481f10dca6..a437b1f93b604 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql
@@ -143,6 +143,22 @@ SELECT * FROM UDTFWithSinglePartition(1, invalid_arg_name => 2);
 SELECT * FROM UDTFWithSinglePartition(1, initial_count => 2);
 SELECT * FROM UDTFWithSinglePartition(initial_count => 1, initial_count => 2);
 SELECT * FROM UDTFInvalidPartitionByOrderByParseError(TABLE(t2));
+-- Exercise the UDTF partitioning bug.
+SELECT * FROM UDTFPartitionByIndexingBug(
+    TABLE(
+        SELECT
+            5 AS unused_col,
+            'hi' AS partition_col,
+            1.0 AS double_col
+
+        UNION ALL
+
+        SELECT
+            4 AS unused_col,
+            'hi' AS partition_col,
+            1.0 AS double_col
+    )
+);
 
 -- cleanup
 DROP VIEW t1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql
new file mode 100644
index 0000000000000..e803254ea642a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding-config.sql
@@ -0,0 +1,172 @@
+-- This test suits check the spark.sql.viewSchemaBindingMode configuration.
+-- It can be DISABLED and COMPENSATION
+
+-- Verify the default binding is true
+SET spark.sql.legacy.viewSchemaBindingMode;
+
+-- 1. Test DISABLED mode.
+SET spark.sql.legacy.viewSchemaBindingMode = false;
+
+-- 1.a Attempts to use the SCHEMA BINDING clause fail with FEATURE_NOT_ENABLED
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT 1;
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT 1;
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT 1;
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT 1;
+
+-- 1.b Existing SHOW and DESCRIBE should behave as before Spark 4.0.0
+CREATE OR REPLACE VIEW v AS SELECT 1;
+DESCRIBE EXTENDED v;
+SHOW TABLE EXTENDED LIKE 'v';
+SHOW CREATE TABLE v;
+DROP VIEW IF EXISTS v;
+
+CREATE OR REPLACE TEMPORARY VIEW v AS SELECT 1;
+DESCRIBE EXTENDED v;
+SHOW TABLE EXTENDED LIKE 'v';
+DROP VIEW IF EXISTS v;
+
+-- 1.c Views get invalidated if the types change in an unsafe matter
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+CREATE OR REPLACE VIEW v AS SELECT * FROM t;
+SELECT * FROM v;
+-- Baseline: v(c1 INT);
+DESCRIBE EXTENDED v;
+SHOW CREATE TABLE v;
+
+-- Widen the column c1 in t
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET;
+-- The view should be invalidated, cannot upcast from BIGINT to INT
+SELECT * FROM v;
+
+-- The view still describes as v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- 2. Test true mode. In this mode Spark tolerates any supported CAST, not just up cast
+SET spark.sql.legacy.viewSchemaBindingMode = true;
+SET spark.sql.legacy.viewSchemaCompensation = false;
+
+-- To verify ANSI_MODE is enforced even if ANSI_MODE is turned off.
+SET spark.sql.ansi.enabled = false;
+
+-- 2.a In BINDING views get invalidated if the type can't cast
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+CREATE OR REPLACE VIEW v AS SELECT * FROM t;
+SELECT * FROM v;
+-- Baseline: v(c1 INT);
+DESCRIBE EXTENDED v;
+SHOW CREATE TABLE v;
+
+-- Widen the column c1 in t
+DROP TABLE t;
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET;
+INSERT INTO t VALUES (1);
+
+-- This fails
+SELECT * FROM v;
+-- The view still describes as v(c1 BIGINT)
+DESCRIBE EXTENDED v;
+SHOW CREATE TABLE v;
+
+-- 2.b Switch to default COMPENSATION
+SET spark.sql.legacy.viewSchemaCompensation = true;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+CREATE OR REPLACE VIEW v AS SELECT * FROM t;
+SELECT * FROM v;
+-- Baseline: v(c1 INT);
+DESCRIBE EXTENDED v;
+SHOW CREATE TABLE v;
+
+-- Widen the column c1 in t
+DROP TABLE t;
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET;
+INSERT INTO t VALUES (1);
+
+-- This now succeeds
+SELECT * FROM v;
+-- The view still describes as v(c1 BIGINT)
+DESCRIBE EXTENDED v;
+SHOW CREATE TABLE v;
+
+-- 2.c In COMPENSATION views ignore added columns and change the type
+-- Expect the added column to be ignore, but the type will be tolerated, as long as it can cast
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING NOT NULL, c2 INT) USING PARQUET;
+INSERT INTO t VALUES ('1', 2);
+SELECT * FROM v;
+-- The view still describes as v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- Runtime error if the cast fails
+INSERT INTO t VALUES ('a', 2);
+SELECT * FROM v;
+
+-- Compile time error if the cast can't be done
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 MAP<STRING, STRING>, c2 INT) USING PARQUET;
+
+-- The view should be invalidated, we can't cast a MAP to INT
+SELECT * FROM v;
+
+-- The view still describes as v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- 2.d Still can't drop a column, though
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v AS SELECT * FROM t;
+SELECT * FROM v;
+
+-- Describes as v(c1 INT, c2 INT)
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+
+-- The view should be invalidated, it lost a column
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- 2.e Attempt to rename a column
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c3 INT NOT NULL, c2 INT) USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- 3. Test the behavior of grandfathered views and temp views
+SET spark.sql.legacy.viewSchemaBindingMode = false;
+SET spark.sql.legacy.viewSchemaCompensation = false;
+CREATE OR REPLACE VIEW v AS SELECT 1;
+SET spark.sql.legacy.viewSchemaBindingMode = true;
+DESCRIBE EXTENDED v;
+SHOW TABLE EXTENDED LIKE 'v';
+SHOW CREATE TABLE v;
+
+SET spark.sql.legacy.viewSchemaCompensation = true;
+DESCRIBE EXTENDED v;
+SHOW TABLE EXTENDED LIKE 'v';
+SHOW CREATE TABLE v;
+
+DROP VIEW IF EXISTS v;
+
+SET spark.sql.legacy.viewSchemaBindingMode = false;
+SET spark.sql.legacy.viewSchemaCompensation = false;
+CREATE OR REPLACE TEMPORARY VIEW v AS SELECT 1;
+SET spark.sql.legacy.viewSchemaBindingMode = true;
+DESCRIBE EXTENDED v;
+SHOW TABLE EXTENDED LIKE 'v';
+
+SET spark.sql.legacy.viewSchemaCompensation = true;
+DESCRIBE EXTENDED v;
+SHOW TABLE EXTENDED LIKE 'v';
+
+DROP VIEW IF EXISTS v;
+
+-- 99 Cleanup
+DROP VIEW IF EXISTS v;
+DROP TABLE IF EXISTS t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding.sql
new file mode 100644
index 0000000000000..413322db10d28
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-binding.sql
@@ -0,0 +1,64 @@
+-- This test suite checks that the WITH SCHEMA BINDING clause is correctly implemented
+
+-- New view with schema binding
+-- 1.a BINDING is persisted
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t;
+SELECT * FROM v;
+-- Baseline: v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- Widen the column c1 in t
+DROP TABLE t;
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET;
+-- The view should be invalidated, cannot upcast from BIGINT to INT
+SELECT * FROM v;
+
+-- The view still describes as v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- 1.b In BINDING views get invalidated if a column is lost
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET;
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t;
+SELECT * FROM v;
+-- Baseline: v(c1 INT, c2 INT);
+DESCRIBE EXTENDED v;
+
+-- Drop the column c2 from t
+DROP TABLE t;
+CREATE TABLE t(c1 INT) USING PARQUET;
+-- The view should be invalidated, it lost a column
+SELECT * FROM v;
+
+-- The view still describes as v(c1 INT, c2 INT);
+DESCRIBE EXTENDED v;
+
+-- Test ALTER VIEW ... WITH SCHEMA BINDING
+SET spark.sql.legacy.viewSchemaCompensation=false;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+CREATE OR REPLACE VIEW v AS SELECT * FROM t;
+SELECT * FROM v;
+-- Baseline: v(c1 INT);
+-- There is no binding recorded
+DESCRIBE EXTENDED v;
+
+ALTER VIEW v WITH SCHEMA BINDING;
+-- Baseline: v(c1 INT);
+-- There is SCHEMA BINDING recorded
+DESCRIBE EXTENDED v;
+
+DROP TABLE t;
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET;
+-- The view should be invalidated, cannot upcast from BIGINT to INT
+SELECT * FROM v;
+
+-- The view still describes as v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- 99 Cleanup
+DROP VIEW IF EXISTS v;
+DROP TABLE IF EXISTS t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-compensation.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-compensation.sql
new file mode 100644
index 0000000000000..21a3ce1e12293
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-compensation.sql
@@ -0,0 +1,90 @@
+-- This test suite checks the WITH SCHEMA COMPENSATION clause
+-- Disable ANSI mode to ensure we are forcing it explicitly in the CASTS
+SET spark.sql.ansi.enabled = false;
+
+-- In COMPENSATION views get invalidated if the type can't cast
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT * FROM t;
+SELECT * FROM v;
+-- Baseline: v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- Widen the column c1 in t
+DROP TABLE t;
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET;
+INSERT INTO t VALUES (1);
+SELECT * FROM v;
+-- The view still describes as v(c1 BIGINT)
+DESCRIBE EXTENDED v;
+
+-- In COMPENSATION views ignore added a column and change the type
+-- Expect the added column to be ignore, but the type will be tolerated, as long as it can cast
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING NOT NULL, c2 INT) USING PARQUET;
+INSERT INTO t VALUES ('1', 2);
+SELECT * FROM v;
+-- The view still describes as v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- Runtime error if the cast fails
+INSERT INTO t VALUES ('a', 2);
+SELECT * FROM v;
+
+-- Compile time error if the cast can't be done
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 MAP<STRING, STRING>, c2 INT) USING PARQUET;
+
+-- The view should be invalidated, we can't cast a MAP to INT
+SELECT * FROM v;
+
+-- The view still describes as v(c1 INT);
+DESCRIBE EXTENDED v;
+
+-- Still can't drop a column, though
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v AS SELECT * FROM t;
+SELECT * FROM v;
+
+-- Describes as v(c1 INT, c2 INT)
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET;
+
+-- The view should be invalidated, it lost a column
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Attempt to rename a column, this fails
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c3 INT NOT NULL, c2 INT) USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Test ALTER VIEW ... WITH SCHEMA ...
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT) USING PARQUET;
+INSERT INTO t VALUES(1);
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t;
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING) USING PARQUET;
+INSERT INTO t VALUES('1');
+
+-- This fails, because teh view uses SCHEMA BINDING
+SELECT * FROM v;
+
+-- Now upgrade the view to schema compensation
+ALTER VIEW v WITH SCHEMA COMPENSATION;
+DESCRIBE EXTENDED v;
+
+-- Success
+SELECT * FROM v;
+
+-- Cleanup
+DROP VIEW IF EXISTS v;
+DROP TABLE IF EXISTS t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-evolution.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-evolution.sql
new file mode 100644
index 0000000000000..5ff153acef25c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-evolution.sql
@@ -0,0 +1,155 @@
+-- This test suite checks the WITH SCHEMA TYPE EVOLUTION clause
+
+-- In EVOLUTION mode Spark will inherit everything from the query, unless
+--  a column list is given. In that case it behaves like TYPE EVOLUTION
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- In EVOLUTION views inherit column type changes and name changes
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c4 STRING NOT NULL, c5 DOUBLE) USING PARQUET;
+INSERT INTO t VALUES ('1', 2.0);
+SELECT * FROM v;
+-- The view now describes as v(c3 STRING, c4 DOUBLE)
+DESCRIBE EXTENDED v;
+
+-- In EVOLUTION new columns are inherited
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c4 STRING, c5 DOUBLE, c6 DATE) USING PARQUET;
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01');
+SELECT * FROM v;
+-- The view describes as v(c4 STRING, c5 DOUBLE, c6 DATE)
+DESCRIBE EXTENDED v;
+
+-- We can even drop columns
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t;
+SELECT * FROM v;
+-- Describes as v(c1 INT, c2 INT)
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT) USING PARQUET;
+SELECT * FROM v;
+-- The view describes as v(c1 INT)
+DESCRIBE EXTENDED v;
+
+-- If a column list is given it behaves like TYPE EVOLUTION
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- In EVOLUTION views with explicit column lists still inherit column type changes
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING NOT NULL, c2 DOUBLE) USING PARQUET;
+INSERT INTO t VALUES ('1', 2.0);
+SELECT * FROM v;
+-- The view now describes as v(a1 STRING, a2 DOUBLE)
+DESCRIBE EXTENDED v;
+
+-- In EVOLUTION views with explicit column lists no new columns are inherited
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING, c2 DOUBLE, c3 DATE) USING PARQUET;
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01');
+SELECT * FROM v;
+-- The view still describes as v(a1 STRING, a2 DOUBLE)
+DESCRIBE EXTENDED v;
+
+-- In EVOLUTION views with explicit column lists can't drop a column
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t;
+SELECT * FROM v;
+
+-- Describes as v(a1 INT, a2 INT)
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT) USING PARQUET;
+
+-- The view should be invalidated, it lost a column
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Attempt to rename a column
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c3 INT, c2 INT) USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Test preservation of comments
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT COMMENT 'c1', c2 INT COMMENT 'c2') USING PARQUET;
+
+-- EVOLUTION, column list, but no comments
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t;
+DESCRIBE EXTENDED v;
+
+-- EVOLUTION, column list, but no comments
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t;
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6c', c2 STRING COMMENT 'c2 6c') USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- EVOLUTION, column list with comments
+CREATE OR REPLACE VIEW v(a1 COMMENT 'a1', a2 COMMENT 'a2') WITH SCHEMA EVOLUTION AS SELECT * FROM t;
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6d', c2 STRING COMMENT 'c2 6d') USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- EVOLUTION, no column list
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t;
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6e', c2 STRING COMMENT 'c2 6e') USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Test error condition where a duplicate column name is produced
+DROP TABLE IF EXISTS t1;
+CREATE TABLE t1(c1 INT) USING PARQUET;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t2(c2 INT) USING PARQUET;
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t1, t2;
+SELECT * FROM v;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t2(c1 INT) USING PARQUET;
+-- This should fail with a duplicate column error
+SELECT * FROM v;
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+
+-- Test ALTER VIEW ... WITH SCHEMA EVOLUTION
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT) USING PARQUET;
+INSERT INTO t VALUES(1);
+CREATE OR REPLACE VIEW v AS SELECT * FROM t;
+
+ALTER VIEW v WITH SCHEMA EVOLUTION;
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING, c2 INT) USING PARQUET;
+-- No error, extra column
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- clean up
+DROP VIEW IF EXISTS v;
+DROP TABLE IF EXISTS t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/view-schema-type-evolution.sql b/sql/core/src/test/resources/sql-tests/inputs/view-schema-type-evolution.sql
new file mode 100644
index 0000000000000..c0278f15b6418
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/view-schema-type-evolution.sql
@@ -0,0 +1,90 @@
+-- This test suite checks the WITH SCHEMA TYPE EVOLUTION clause
+
+-- In TYPE EVOLUTION mode Spark will inherit the view column types from the query
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING NOT NULL, c2 DOUBLE) USING PARQUET;
+INSERT INTO t VALUES ('1', 2.0);
+SELECT * FROM v;
+-- The view now describes as v(c1 STRING, c2 DOUBLE)
+DESCRIBE EXTENDED v;
+
+-- In TYPE EVOLUTION no new columns are inherited
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING, c2 DOUBLE, c3 DATE) USING PARQUET;
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01');
+SELECT * FROM v;
+-- The view still describes as v(c1 STRING, c2 DOUBLE)
+DESCRIBE EXTENDED v;
+
+-- Still can't drop a column
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET;
+INSERT INTO t VALUES (1, 2);
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t;
+SELECT * FROM v;
+
+-- Describes as v(c1 INT, c2 INT)
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT) USING PARQUET;
+
+-- The view should be invalid, it lost a column
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Attempt to rename a column
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c3 INT, c2 INT) USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Test preservation of comments
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT COMMENT 'c1', c2 INT COMMENT 'c2') USING PARQUET;
+
+-- Inherit comments from the table, if none are given
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t;
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6a', c2 STRING COMMENT 'c2 6a') USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- TYPE EVOLUTION, column list with comments
+CREATE OR REPLACE VIEW v(a1 COMMENT 'a1', a2 COMMENT 'a2') WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t;
+DESCRIBE EXTENDED v;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6b', c2 STRING COMMENT 'c2 6b') USING PARQUET;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Test ALTER VIEW ... WITH SCHEMA TYPE EVOLUTION
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 INT) USING PARQUET;
+INSERT INTO t VALUES(1);
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT * FROM t;
+
+DROP TABLE IF EXISTS t;
+CREATE TABLE t(c1 STRING) USING PARQUET;
+INSERT INTO t VALUES('1');
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+ALTER VIEW v WITH SCHEMA TYPE EVOLUTION;
+SELECT * FROM v;
+DESCRIBE EXTENDED v;
+
+-- Cleanup
+DROP VIEW IF EXISTS v;
+DROP TABLE IF EXISTS t;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
index ee4525285a9be..7bfc35a61e092 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
@@ -33,6 +33,28 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
index 8b4acd12911b4..cabbfa520d77a 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
@@ -20,9 +20,11 @@ AS	true
 ASC	false
 AT	false
 AUTHORIZATION	true
+BEGIN	false
 BETWEEN	false
 BIGINT	false
 BINARY	false
+BINDING	false
 BOOLEAN	false
 BOTH	true
 BUCKET	false
@@ -30,6 +32,7 @@ BUCKETS	false
 BY	false
 BYTE	false
 CACHE	false
+CALLED	false
 CASCADE	false
 CASE	true
 CAST	true
@@ -52,9 +55,11 @@ COMMENT	false
 COMMIT	false
 COMPACT	false
 COMPACTIONS	false
+COMPENSATION	false
 COMPUTE	false
 CONCATENATE	false
 CONSTRAINT	true
+CONTAINS	false
 COST	false
 CREATE	true
 CROSS	true
@@ -81,10 +86,12 @@ DECIMAL	false
 DECLARE	false
 DEFAULT	false
 DEFINED	false
+DEFINER	false
 DELETE	false
 DELIMITED	false
 DESC	false
 DESCRIBE	false
+DETERMINISTIC	false
 DFS	false
 DIRECTORIES	false
 DIRECTORY	false
@@ -97,6 +104,7 @@ ELSE	true
 END	true
 ESCAPE	true
 ESCAPED	false
+EVOLUTION	false
 EXCEPT	true
 EXCHANGE	false
 EXCLUDE	false
@@ -143,6 +151,7 @@ INDEX	false
 INDEXES	false
 INNER	true
 INPATH	false
+INPUT	false
 INPUTFORMAT	false
 INSERT	false
 INT	false
@@ -150,10 +159,12 @@ INTEGER	false
 INTERSECT	true
 INTERVAL	false
 INTO	true
+INVOKER	false
 IS	true
 ITEMS	false
 JOIN	true
 KEYS	false
+LANGUAGE	false
 LAST	false
 LATERAL	true
 LAZY	false
@@ -181,6 +192,7 @@ MILLISECONDS	false
 MINUS	false
 MINUTE	false
 MINUTES	false
+MODIFIES	false
 MONTH	false
 MONTHS	false
 MSCK	false
@@ -214,8 +226,6 @@ PARTITION	false
 PARTITIONED	false
 PARTITIONS	false
 PERCENT	false
-PERCENTILE_CONT	true
-PERCENTILE_DISC	true
 PIVOT	false
 PLACING	false
 POSITION	false
@@ -227,6 +237,7 @@ PURGE	false
 QUARTER	false
 QUERY	false
 RANGE	false
+READS	false
 REAL	false
 RECORDREADER	false
 RECORDWRITER	false
@@ -241,6 +252,8 @@ REPLACE	false
 RESET	false
 RESPECT	false
 RESTRICT	false
+RETURN	false
+RETURNS	false
 REVOKE	false
 RIGHT	true
 ROLE	false
@@ -253,6 +266,7 @@ SCHEMA	false
 SCHEMAS	false
 SECOND	false
 SECONDS	false
+SECURITY	false
 SELECT	true
 SEMI	false
 SEPARATED	false
@@ -270,6 +284,8 @@ SOME	true
 SORT	false
 SORTED	false
 SOURCE	false
+SPECIFIC	false
+SQL	true
 START	false
 STATISTICS	false
 STORED	false
@@ -401,14 +417,13 @@ OR
 ORDER
 OUTER
 OVERLAPS
-PERCENTILE_CONT
-PERCENTILE_DISC
 PRIMARY
 REFERENCES
 RIGHT
 SELECT
 SESSION_USER
 SOME
+SQL
 TABLE
 THEN
 TIME
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out
index 8cd1536d7f726..e2abcb099130a 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out
@@ -797,3 +797,108 @@ org.apache.spark.SparkArithmeticException
     "fragment" : "conv('-9223372036854775807', 36, 10)"
   } ]
 }
+
+
+-- !query
+SELECT BIN(0)
+-- !query schema
+struct<bin(0):string>
+-- !query output
+0
+
+
+-- !query
+SELECT BIN(25)
+-- !query schema
+struct<bin(25):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT BIN(25L)
+-- !query schema
+struct<bin(25):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT BIN(25.5)
+-- !query schema
+struct<bin(25.5):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT POSITIVE(0Y)
+-- !query schema
+struct<(+ 0):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT POSITIVE(25)
+-- !query schema
+struct<(+ 25):int>
+-- !query output
+25
+
+
+-- !query
+SELECT POSITIVE(-25L)
+-- !query schema
+struct<(+ -25):bigint>
+-- !query output
+-25
+
+
+-- !query
+SELECT POSITIVE(25.5)
+-- !query schema
+struct<(+ 25.5):decimal(3,1)>
+-- !query output
+25.5
+
+
+-- !query
+SELECT POSITIVE("25.5")
+-- !query schema
+struct<(+ 25.5):double>
+-- !query output
+25.5
+
+
+-- !query
+SELECT POSITIVE("invalid")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'invalid'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "POSITIVE(\"invalid\")"
+  } ]
+}
+
+
+-- !query
+SELECT POSITIVE(null)
+-- !query schema
+struct<(+ NULL):double>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index 8096cef266ec4..da2fa9ca0c18b 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -903,6 +903,110 @@ org.apache.spark.SparkIllegalArgumentException
 }
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	true
+
+
+-- !query
+select encode('渭城朝雨浥轻尘', 'US-ASCII')
+-- !query schema
+struct<encode(渭城朝雨浥轻尘, US-ASCII):binary>
+-- !query output
+???????
+
+
+-- !query
+select encode(scol, ecol) from values('渭城朝雨浥轻尘', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<encode(scol, ecol):binary>
+-- !query output
+???????
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	false
+
+
+-- !query
+select encode('客舍青青柳色新', 'US-ASCII')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`encode`"
+  }
+}
+
+
+-- !query
+select encode(scol, ecol) from values('客舍青青柳色新', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`encode`"
+  }
+}
+
+
+-- !query
+select encode(decode(encode('白日依山尽，黄河入海流。欲穷千里目，更上一层楼。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(白日依山尽，黄河入海流。欲穷千里目，更上一层楼。, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+白日依山尽，黄河入海流。欲穷千里目，更上一层楼。
+
+
+-- !query
+select encode(decode(encode('南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。
+
+
+-- !query
+select encode(decode(encode('세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark
+
+
+-- !query
+select encode(decode(encode('το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως
+
+
+-- !query
+select encode(decode(encode('Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。
+
+
 -- !query
 select decode()
 -- !query schema
@@ -961,6 +1065,14 @@ struct<decode(encode(abc, utf-8), utf-8):string>
 abc
 
 
+-- !query
+select decode(encode('大千世界', 'utf-32'), 'utf-32')
+-- !query schema
+struct<decode(encode(大千世界, utf-32), utf-32):string>
+-- !query output
+大千世界
+
+
 -- !query
 select decode(1, 1, 'Southlake')
 -- !query schema
@@ -1117,6 +1229,70 @@ org.apache.spark.SparkIllegalArgumentException
 }
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	true
+
+
+-- !query
+select decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII')
+-- !query schema
+struct<decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', US-ASCII):string>
+-- !query output
+���������������������
+
+
+-- !query
+select decode(scol, ecol) from values(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<decode(scol, ecol):string>
+-- !query output
+���������������������
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	false
+
+
+-- !query
+select decode(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`decode`"
+  }
+}
+
+
+-- !query
+select decode(scol, ecol) from values(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`decode`"
+  }
+}
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query schema
@@ -2058,3 +2234,106 @@ select luhn_check(123.456)
 struct<luhn_check(123.456):boolean>
 -- !query output
 false
+
+
+-- !query
+select is_valid_utf8('')
+-- !query schema
+struct<is_valid_utf8():boolean>
+-- !query output
+true
+
+
+-- !query
+select is_valid_utf8('abc')
+-- !query schema
+struct<is_valid_utf8(abc):boolean>
+-- !query output
+true
+
+
+-- !query
+select is_valid_utf8(x'80')
+-- !query schema
+struct<is_valid_utf8(X'80'):boolean>
+-- !query output
+false
+
+
+-- !query
+select make_valid_utf8('')
+-- !query schema
+struct<make_valid_utf8():string>
+-- !query output
+
+
+
+-- !query
+select make_valid_utf8('abc')
+-- !query schema
+struct<make_valid_utf8(abc):string>
+-- !query output
+abc
+
+
+-- !query
+select make_valid_utf8(x'80')
+-- !query schema
+struct<make_valid_utf8(X'80'):string>
+-- !query output
+�
+
+
+-- !query
+select validate_utf8('')
+-- !query schema
+struct<validate_utf8():string>
+-- !query output
+
+
+
+-- !query
+select validate_utf8('abc')
+-- !query schema
+struct<validate_utf8(abc):string>
+-- !query output
+abc
+
+
+-- !query
+select validate_utf8(x'80')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_UTF8_STRING",
+  "sqlState" : "22029",
+  "messageParameters" : {
+    "str" : "\\x80"
+  }
+}
+
+
+-- !query
+select try_validate_utf8('')
+-- !query schema
+struct<try_validate_utf8():string>
+-- !query output
+
+
+
+-- !query
+select try_validate_utf8('abc')
+-- !query schema
+struct<try_validate_utf8(abc):string>
+-- !query output
+abc
+
+
+-- !query
+select try_validate_utf8(x'80')
+-- !query schema
+struct<try_validate_utf8(X'80'):string>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
index adb6550e80830..acf6e70a50dea 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
@@ -15,6 +15,22 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query schema
+struct<try_add(2147483647, 1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query schema
+struct<try_add(2147483647, 1):bigint>
+-- !query output
+2147483648
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -341,6 +357,22 @@ org.apache.spark.SparkArithmeticException
 }
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query schema
+struct<try_divide(1, 0):decimal(12,11)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query schema
+struct<try_divide(1, 0):double>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -405,6 +437,22 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query schema
+struct<try_subtract(2147483647, -1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query schema
+struct<try_subtract(2147483647, -1):bigint>
+-- !query output
+2147483648
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -547,6 +595,22 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query schema
+struct<try_multiply(2147483647, -2):decimal(21,0)>
+-- !query output
+-4294967294
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query schema
+struct<try_multiply(2147483647, -2):bigint>
+-- !query output
+-4294967294
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/binary.sql.out b/sql/core/src/test/resources/sql-tests/results/binary.sql.out
new file mode 100644
index 0000000000000..050f05271411a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/binary.sql.out
@@ -0,0 +1,39 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query schema
+struct<X'':binary>
+-- !query output
+
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query schema
+struct<X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333':binary>
+-- !query output
+Eason Yao 2018-11-17:13:33:33
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query schema
+struct<CAST(Spark AS BINARY):binary>
+-- !query output
+Spark
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query schema
+struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
+-- !query output
+[,Eason Yao 2018-11-17:13:33:33,Spark]
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query schema
+struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
+-- !query output
+1,Eason Yao 2018-11-17:13:33:33
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out b/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
new file mode 100644
index 0000000000000..8724e8620b48f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/binary_base64.sql.out
@@ -0,0 +1,39 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query schema
+struct<X'':binary>
+-- !query output
+
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query schema
+struct<X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333':binary>
+-- !query output
+RWFzb24gWWFvIDIwMTgtMTEtMTc6MTM6MzM6MzM
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query schema
+struct<CAST(Spark AS BINARY):binary>
+-- !query output
+U3Bhcms
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query schema
+struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
+-- !query output
+[,RWFzb24gWWFvIDIwMTgtMTEtMTc6MTM6MzM6MzM,U3Bhcms]
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query schema
+struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
+-- !query output
+1,RWFzb24gWWFvIDIwMTgtMTEtMTc6MTM6MzM6MzM
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out b/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
new file mode 100644
index 0000000000000..0c543a7b45476
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/binary_basic.sql.out
@@ -0,0 +1,39 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query schema
+struct<X'':binary>
+-- !query output
+[]
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query schema
+struct<X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333':binary>
+-- !query output
+[69, 97, 115, 111, 110, 32, 89, 97, 111, 32, 50, 48, 49, 56, 45, 49, 49, 45, 49, 55, 58, 49, 51, 58, 51, 51, 58, 51, 51]
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query schema
+struct<CAST(Spark AS BINARY):binary>
+-- !query output
+[83, 112, 97, 114, 107]
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query schema
+struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
+-- !query output
+[[],[69, 97, 115, 111, 110, 32, 89, 97, 111, 32, 50, 48, 49, 56, 45, 49, 49, 45, 49, 55, 58, 49, 51, 58, 51, 51, 58, 51, 51],[83, 112, 97, 114, 107]]
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query schema
+struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
+-- !query output
+1,"[69, 97, 115, 111, 110, 32, 89, 97, 111, 32, 50, 48, 49, 56, 45, 49, 49, 45, 49, 55, 58, 49, 51, 58, 51, 51, 58, 51, 51]"
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out b/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
new file mode 100644
index 0000000000000..d977301f98e00
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/binary_hex.sql.out
@@ -0,0 +1,39 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query schema
+struct<X'':binary>
+-- !query output
+
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query schema
+struct<X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333':binary>
+-- !query output
+4561736F6E2059616F20323031382D31312D31373A31333A33333A3333
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query schema
+struct<CAST(Spark AS BINARY):binary>
+-- !query output
+537061726B
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query schema
+struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
+-- !query output
+[,4561736F6E2059616F20323031382D31312D31373A31333A33333A3333,537061726B]
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query schema
+struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
+-- !query output
+1,4561736F6E2059616F20323031382D31312D31373A31333A33333A3333
diff --git a/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out b/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
new file mode 100644
index 0000000000000..3fc6c0f53cc54
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/binary_hex_discrete.sql.out
@@ -0,0 +1,39 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT X''
+-- !query schema
+struct<X'':binary>
+-- !query output
+[]
+
+
+-- !query
+SELECT X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'
+-- !query schema
+struct<X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333':binary>
+-- !query output
+[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33 33 3A 33 33]
+
+
+-- !query
+SELECT CAST('Spark' as BINARY)
+-- !query schema
+struct<CAST(Spark AS BINARY):binary>
+-- !query output
+[53 70 61 72 6B]
+
+
+-- !query
+SELECT array( X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST('Spark' as BINARY))
+-- !query schema
+struct<array(X'', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333', CAST(Spark AS BINARY)):array<binary>>
+-- !query output
+[[],[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33 33 3A 33 33],[53 70 61 72 6B]]
+
+
+-- !query
+SELECT to_csv(named_struct('n', 1, 'info', X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333'))
+-- !query schema
+struct<to_csv(named_struct(n, 1, info, X'4561736F6E2059616F20323031382D31312D31373A31333A33333A3333')):string>
+-- !query output
+1,[45 61 73 6F 6E 20 59 61 6F 20 32 30 31 38 2D 31 31 2D 31 37 3A 31 33 3A 33 33 3A 33 33]
diff --git a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
index 2c8b733004aac..7233b0d0ae499 100644
--- a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
@@ -322,3 +322,155 @@ org.apache.spark.SparkIllegalArgumentException
     "upper" : "64"
   }
 }
+
+
+-- !query
+SELECT 20181117 >> 2
+-- !query schema
+struct<(20181117 >> 2):int>
+-- !query output
+5045279
+
+
+-- !query
+SELECT 20181117 << 2
+-- !query schema
+struct<(20181117 << 2):int>
+-- !query output
+80724468
+
+
+-- !query
+SELECT 20181117 >>> 2
+-- !query schema
+struct<(20181117 >>> 2):int>
+-- !query output
+5045279
+
+
+-- !query
+SELECT 20181117 > > 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'>'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 < < 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'<'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 > >> 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'>>'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 <<< 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'<'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 20181117 >>>> 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'>'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+select cast(null as array<array<int>>), 20181117 >> 2
+-- !query schema
+struct<NULL:array<array<int>>,(20181117 >> 2):int>
+-- !query output
+NULL	5045279
+
+
+-- !query
+select cast(null as array<array<int>>), 20181117 >>> 2
+-- !query schema
+struct<NULL:array<array<int>>,(20181117 >>> 2):int>
+-- !query output
+NULL	5045279
+
+
+-- !query
+select cast(null as map<int, array<int>>), 20181117 >> 2
+-- !query schema
+struct<NULL:map<int,array<int>>,(20181117 >> 2):int>
+-- !query output
+NULL	5045279
+
+
+-- !query
+select 1 << 1 + 2 as plus_over_shift
+-- !query schema
+struct<plus_over_shift:int>
+-- !query output
+8
+
+
+-- !query
+select 2 >> 1 << 1 as left_to_right
+-- !query schema
+struct<left_to_right:int>
+-- !query output
+2
+
+
+-- !query
+select 1 & 2 >> 1 as shift_over_ampersand
+-- !query schema
+struct<shift_over_ampersand:int>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index 8ff5865168878..568c9f3b29e87 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -193,6 +193,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	select * from char_tbl	                    
 View Original Text  	select * from char_tbl	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[c, v]
 
@@ -222,6 +223,7 @@ struct<createtab_stmt:string>
 CREATE VIEW default.char_view (
   c,
   v)
+WITH SCHEMA COMPENSATION
 AS select * from char_tbl
 
 
@@ -362,6 +364,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	select * from char_tbl2	                    
 View Original Text  	select * from char_tbl2	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[c, v]
 
@@ -422,6 +425,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	select * from char_tbl2	                    
 View Original Text  	select * from char_tbl2	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[c, v]              	                    
 Table Properties    	[yes=no]
@@ -482,6 +486,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	select * from char_tbl2	                    
 View Original Text  	select * from char_tbl2	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[c, v]
 
diff --git a/sql/core/src/test/resources/sql-tests/results/collations.sql.out b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
index 4485191ba1f3b..89e6665df9d04 100644
--- a/sql/core/src/test/resources/sql-tests/results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
@@ -1,6 +1,6 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
-create table t1(utf8_binary string collate utf8_binary, utf8_binary_lcase string collate utf8_binary_lcase) using parquet
+create table t1(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
 -- !query schema
 struct<>
 -- !query output
@@ -45,7 +45,7 @@ describe table t1
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
 utf8_binary         	string              	                    
-utf8_binary_lcase   	string collate UTF8_BINARY_LCASE
+utf8_lcase          	string collate UTF8_LCASE
 
 
 -- !query
@@ -60,7 +60,7 @@ struct<count(1):bigint>
 
 
 -- !query
-select count(*) from t1 group by utf8_binary_lcase
+select count(*) from t1 group by utf8_lcase
 -- !query schema
 struct<count(1):bigint>
 -- !query output
@@ -71,15 +71,15 @@ struct<count(1):bigint>
 -- !query
 select * from t1 where utf8_binary = 'aaa'
 -- !query schema
-struct<utf8_binary:string,utf8_binary_lcase:string collate UTF8_BINARY_LCASE>
+struct<utf8_binary:string,utf8_lcase:string collate UTF8_LCASE>
 -- !query output
 aaa	aaa
 
 
 -- !query
-select * from t1 where utf8_binary_lcase = 'aaa' collate utf8_binary_lcase
+select * from t1 where utf8_lcase = 'aaa' collate utf8_lcase
 -- !query schema
-struct<utf8_binary:string,utf8_binary_lcase:string collate UTF8_BINARY_LCASE>
+struct<utf8_binary:string,utf8_lcase:string collate UTF8_LCASE>
 -- !query output
 AAA	AAA
 aaa	aaa
@@ -88,7 +88,7 @@ aaa	aaa
 -- !query
 select * from t1 where utf8_binary < 'bbb'
 -- !query schema
-struct<utf8_binary:string,utf8_binary_lcase:string collate UTF8_BINARY_LCASE>
+struct<utf8_binary:string,utf8_lcase:string collate UTF8_LCASE>
 -- !query output
 AAA	AAA
 BBB	BBB
@@ -96,18 +96,18 @@ aaa	aaa
 
 
 -- !query
-select * from t1 where utf8_binary_lcase < 'bbb' collate utf8_binary_lcase
+select * from t1 where utf8_lcase < 'bbb' collate utf8_lcase
 -- !query schema
-struct<utf8_binary:string,utf8_binary_lcase:string collate UTF8_BINARY_LCASE>
+struct<utf8_binary:string,utf8_lcase:string collate UTF8_LCASE>
 -- !query output
 AAA	AAA
 aaa	aaa
 
 
 -- !query
-select l.utf8_binary, r.utf8_binary_lcase from t1 l join t1 r on l.utf8_binary_lcase = r.utf8_binary_lcase
+select l.utf8_binary, r.utf8_lcase from t1 l join t1 r on l.utf8_lcase = r.utf8_lcase
 -- !query schema
-struct<utf8_binary:string,utf8_binary_lcase:string collate UTF8_BINARY_LCASE>
+struct<utf8_binary:string,utf8_lcase:string collate UTF8_LCASE>
 -- !query output
 AAA	AAA
 AAA	aaa
@@ -120,7 +120,7 @@ bbb	bbb
 
 
 -- !query
-create table t2(utf8_binary string collate utf8_binary, utf8_binary_lcase string collate utf8_binary_lcase) using parquet
+create table t2(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
 -- !query schema
 struct<>
 -- !query output
@@ -144,9 +144,9 @@ struct<>
 
 
 -- !query
-select * from t1 anti join t2 on t1.utf8_binary_lcase = t2.utf8_binary_lcase
+select * from t1 anti join t2 on t1.utf8_lcase = t2.utf8_lcase
 -- !query schema
-struct<utf8_binary:string,utf8_binary_lcase:string collate UTF8_BINARY_LCASE>
+struct<utf8_binary:string,utf8_lcase:string collate UTF8_LCASE>
 -- !query output
 
 
@@ -168,17 +168,17 @@ struct<>
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query schema
-struct<collate(col1):string collate UTF8_BINARY_LCASE>
+struct<collate(col1):string collate UTF8_LCASE>
 -- !query output
 zzz
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query schema
-struct<collate(col1):string collate UTF8_BINARY_LCASE>
+struct<collate(col1):string collate UTF8_LCASE>
 -- !query output
 aaa
 bbb
@@ -187,9 +187,9 @@ zzz
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query schema
-struct<collate(col1):string collate UTF8_BINARY_LCASE>
+struct<collate(col1):string collate UTF8_LCASE>
 -- !query output
 aaa
 bbb
@@ -197,9 +197,9 @@ zzz
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query schema
-struct<collate(col1):string collate UTF8_BINARY_LCASE>
+struct<collate(col1):string collate UTF8_LCASE>
 -- !query output
 AAA
 BBB
@@ -212,16 +212,16 @@ zzz
 
 
 -- !query
-select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb')
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_lcase from values ('aaa'), ('bbb')
 -- !query schema
-struct<collate(col1):string collate UTF8_BINARY_LCASE>
+struct<collate(col1):string collate UTF8_LCASE>
 -- !query output
 aaa
 bbb
 
 
 -- !query
-create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_binary_lcase: string collate utf8_binary_lcase>) USING PARQUET
+create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_lcase: string collate utf8_lcase>) USING PARQUET
 -- !query schema
 struct<>
 -- !query output
@@ -229,7 +229,7 @@ struct<>
 
 
 -- !query
-insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa'))
+insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_lcase', 'aaa'))
 -- !query schema
 struct<>
 -- !query output
@@ -237,7 +237,7 @@ struct<>
 
 
 -- !query
-insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA'))
+insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_lcase', 'AAA'))
 -- !query schema
 struct<>
 -- !query output
@@ -254,7 +254,7 @@ struct<count(1):bigint>
 
 
 -- !query
-select count(*) from t1 group by c1.utf8_binary_lcase
+select count(*) from t1 group by c1.utf8_lcase
 -- !query schema
 struct<count(1):bigint>
 -- !query output
@@ -270,7 +270,7 @@ struct<>
 
 
 -- !query
-select array_contains(ARRAY('aaa' collate utf8_binary_lcase),'AAA' collate utf8_binary_lcase)
+select array_contains(ARRAY('aaa' collate utf8_lcase),'AAA' collate utf8_lcase)
 -- !query schema
 struct<array_contains(array(collate(aaa)), collate(AAA)):boolean>
 -- !query output
@@ -278,7 +278,7 @@ true
 
 
 -- !query
-select array_position(ARRAY('aaa' collate utf8_binary_lcase, 'bbb' collate utf8_binary_lcase),'BBB' collate utf8_binary_lcase)
+select array_position(ARRAY('aaa' collate utf8_lcase, 'bbb' collate utf8_lcase),'BBB' collate utf8_lcase)
 -- !query schema
 struct<array_position(array(collate(aaa), collate(bbb)), collate(BBB)):bigint>
 -- !query output
@@ -286,23 +286,23 @@ struct<array_position(array(collate(aaa), collate(bbb)), collate(BBB)):bigint>
 
 
 -- !query
-select nullif('aaa' COLLATE utf8_binary_lcase, 'AAA' COLLATE utf8_binary_lcase)
+select nullif('aaa' COLLATE utf8_lcase, 'AAA' COLLATE utf8_lcase)
 -- !query schema
-struct<nullif(collate(aaa), collate(AAA)):string collate UTF8_BINARY_LCASE>
+struct<nullif(collate(aaa), collate(AAA)):string collate UTF8_LCASE>
 -- !query output
 NULL
 
 
 -- !query
-select least('aaa' COLLATE utf8_binary_lcase, 'AAA' collate utf8_binary_lcase, 'a' collate utf8_binary_lcase)
+select least('aaa' COLLATE utf8_lcase, 'AAA' collate utf8_lcase, 'a' collate utf8_lcase)
 -- !query schema
-struct<least(collate(aaa), collate(AAA), collate(a)):string collate UTF8_BINARY_LCASE>
+struct<least(collate(aaa), collate(AAA), collate(a)):string collate UTF8_LCASE>
 -- !query output
 a
 
 
 -- !query
-select arrays_overlap(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select arrays_overlap(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query schema
 struct<arrays_overlap(array(collate(aaa)), array(collate(AAA))):boolean>
 -- !query output
@@ -310,32 +310,120 @@ true
 
 
 -- !query
-select array_distinct(array('aaa' collate utf8_binary_lcase, 'AAA' collate utf8_binary_lcase))
+select array_distinct(array('aaa' collate utf8_lcase, 'AAA' collate utf8_lcase))
 -- !query schema
-struct<array_distinct(array(collate(aaa), collate(AAA))):array<string collate UTF8_BINARY_LCASE>>
+struct<array_distinct(array(collate(aaa), collate(AAA))):array<string collate UTF8_LCASE>>
 -- !query output
 ["aaa"]
 
 
 -- !query
-select array_union(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select array_union(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query schema
-struct<array_union(array(collate(aaa)), array(collate(AAA))):array<string collate UTF8_BINARY_LCASE>>
+struct<array_union(array(collate(aaa)), array(collate(AAA))):array<string collate UTF8_LCASE>>
 -- !query output
 ["aaa"]
 
 
 -- !query
-select array_intersect(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select array_intersect(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query schema
-struct<array_intersect(array(collate(aaa)), array(collate(AAA))):array<string collate UTF8_BINARY_LCASE>>
+struct<array_intersect(array(collate(aaa)), array(collate(AAA))):array<string collate UTF8_LCASE>>
 -- !query output
 ["aaa"]
 
 
 -- !query
-select array_except(array('aaa' collate utf8_binary_lcase), array('AAA' collate utf8_binary_lcase))
+select array_except(array('aaa' collate utf8_lcase), array('AAA' collate utf8_lcase))
 -- !query schema
-struct<array_except(array(collate(aaa)), array(collate(AAA))):array<string collate UTF8_BINARY_LCASE>>
+struct<array_except(array(collate(aaa)), array(collate(AAA))):array<string collate UTF8_LCASE>>
 -- !query output
 []
+
+
+-- !query
+select 'a' collate unicode < 'A'
+-- !query schema
+struct<(collate(a) < A):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'a' collate unicode_ci = 'A'
+-- !query schema
+struct<(collate(a) = A):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'a' collate unicode_ai = 'å'
+-- !query schema
+struct<(collate(a) = å):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'a' collate unicode_ci_ai = 'Å'
+-- !query schema
+struct<(collate(a) = Å):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'a' collate en < 'A'
+-- !query schema
+struct<(collate(a) < A):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'a' collate en_ci = 'A'
+-- !query schema
+struct<(collate(a) = A):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'a' collate en_ai = 'å'
+-- !query schema
+struct<(collate(a) = å):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'a' collate en_ci_ai = 'Å'
+-- !query schema
+struct<(collate(a) = Å):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'Kypper' collate sv < 'Köpfe'
+-- !query schema
+struct<(collate(Kypper) < Köpfe):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'Kypper' collate de > 'Köpfe'
+-- !query schema
+struct<(collate(Kypper) > Köpfe):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'I' collate tr_ci = 'ı'
+-- !query schema
+struct<(collate(I) = ı):boolean>
+-- !query output
+true
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
index b79d8b1afb0d4..1255e8b51f301 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
@@ -33,6 +33,28 @@ struct<scalarsubquery():int>
 1
 
 
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT id)
+  SELECT 1
+) FROM range(1)
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+1
+
+
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT 1)
+  SELECT id
+) FROM range(1)
+-- !query schema
+struct<scalarsubquery(id):bigint>
+-- !query output
+0
+
+
 -- !query
 SELECT * FROM
   (
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
index a93bcb7593768..7cf488ce8cad4 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
@@ -33,6 +33,28 @@ struct<scalarsubquery():int>
 1
 
 
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT id)
+  SELECT 1
+) FROM range(1)
+-- !query schema
+struct<scalarsubquery(id):int>
+-- !query output
+1
+
+
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT 1)
+  SELECT id
+) FROM range(1)
+-- !query schema
+struct<scalarsubquery(id):bigint>
+-- !query output
+0
+
+
 -- !query
 SELECT * FROM
   (
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
index ba311c0253ab1..94ef47397eff1 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
@@ -33,6 +33,28 @@ struct<scalarsubquery():int>
 1
 
 
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT id)
+  SELECT 1
+) FROM range(1)
+-- !query schema
+struct<scalarsubquery(id):int>
+-- !query output
+1
+
+
+-- !query
+SELECT (
+  WITH unreferenced AS (SELECT 1)
+  SELECT id
+) FROM range(1)
+-- !query schema
+struct<scalarsubquery(id):bigint>
+-- !query output
+0
+
+
 -- !query
 SELECT * FROM
   (
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 10c27ea0cc794..004802c48d4c0 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -536,6 +536,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t     	                    
 View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]
 
@@ -560,6 +561,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t     	                    
 View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.default	                    
 View Query Output Columns	[a, b, c, d]
 
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 97a3f6c8f2ee2..3830b47ba8a6d 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -975,7 +975,7 @@ Execute CreateViewCommand (1)
 Output: []
 
 (2) CreateViewCommand
-Arguments: `spark_catalog`.`default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, true
+Arguments: `spark_catalog`.`default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, COMPENSATION, true
 
 (3) LogicalRelation
 Arguments: parquet, [key#x, val#x], `spark_catalog`.`default`.`explain_temp1`, false
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index e21d968eb252e..c0dee38e6d07a 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -870,7 +870,7 @@ Execute CreateViewCommand (1)
 Output: []
 
 (2) CreateViewCommand
-Arguments: `spark_catalog`.`default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, true
+Arguments: `spark_catalog`.`default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, COMPENSATION, true
 
 (3) LogicalRelation
 Arguments: parquet, [key#x, val#x], `spark_catalog`.`default`.`explain_temp1`, false
diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
index ee4525285a9be..7bfc35a61e092 100644
--- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
@@ -33,6 +33,28 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index 9dfc6a66b0782..2aa809324a763 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -843,7 +843,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER(1)(c1 INT)
+CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv
 -- !query schema
 struct<>
 -- !query output
@@ -867,7 +867,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER('a.b.c')(c1 INT)
+CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv
 -- !query schema
 struct<>
 -- !query output
@@ -1059,6 +1059,62 @@ org.apache.spark.sql.catalyst.parser.ParseException
 }
 
 
+-- !query
+create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop view v1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
index 884f17c23eb00..e304509aa6d75 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
@@ -20,9 +20,11 @@ AS	false
 ASC	false
 AT	false
 AUTHORIZATION	false
+BEGIN	false
 BETWEEN	false
 BIGINT	false
 BINARY	false
+BINDING	false
 BOOLEAN	false
 BOTH	false
 BUCKET	false
@@ -30,6 +32,7 @@ BUCKETS	false
 BY	false
 BYTE	false
 CACHE	false
+CALLED	false
 CASCADE	false
 CASE	false
 CAST	false
@@ -52,9 +55,11 @@ COMMENT	false
 COMMIT	false
 COMPACT	false
 COMPACTIONS	false
+COMPENSATION	false
 COMPUTE	false
 CONCATENATE	false
 CONSTRAINT	false
+CONTAINS	false
 COST	false
 CREATE	false
 CROSS	false
@@ -81,10 +86,12 @@ DECIMAL	false
 DECLARE	false
 DEFAULT	false
 DEFINED	false
+DEFINER	false
 DELETE	false
 DELIMITED	false
 DESC	false
 DESCRIBE	false
+DETERMINISTIC	false
 DFS	false
 DIRECTORIES	false
 DIRECTORY	false
@@ -97,6 +104,7 @@ ELSE	false
 END	false
 ESCAPE	false
 ESCAPED	false
+EVOLUTION	false
 EXCEPT	false
 EXCHANGE	false
 EXCLUDE	false
@@ -143,6 +151,7 @@ INDEX	false
 INDEXES	false
 INNER	false
 INPATH	false
+INPUT	false
 INPUTFORMAT	false
 INSERT	false
 INT	false
@@ -150,10 +159,12 @@ INTEGER	false
 INTERSECT	false
 INTERVAL	false
 INTO	false
+INVOKER	false
 IS	false
 ITEMS	false
 JOIN	false
 KEYS	false
+LANGUAGE	false
 LAST	false
 LATERAL	false
 LAZY	false
@@ -181,6 +192,7 @@ MILLISECONDS	false
 MINUS	false
 MINUTE	false
 MINUTES	false
+MODIFIES	false
 MONTH	false
 MONTHS	false
 MSCK	false
@@ -214,8 +226,6 @@ PARTITION	false
 PARTITIONED	false
 PARTITIONS	false
 PERCENT	false
-PERCENTILE_CONT	false
-PERCENTILE_DISC	false
 PIVOT	false
 PLACING	false
 POSITION	false
@@ -227,6 +237,7 @@ PURGE	false
 QUARTER	false
 QUERY	false
 RANGE	false
+READS	false
 REAL	false
 RECORDREADER	false
 RECORDWRITER	false
@@ -241,6 +252,8 @@ REPLACE	false
 RESET	false
 RESPECT	false
 RESTRICT	false
+RETURN	false
+RETURNS	false
 REVOKE	false
 RIGHT	false
 ROLE	false
@@ -253,6 +266,7 @@ SCHEMA	false
 SCHEMAS	false
 SECOND	false
 SECONDS	false
+SECURITY	false
 SELECT	false
 SEMI	false
 SEPARATED	false
@@ -270,6 +284,8 @@ SOME	false
 SORT	false
 SORTED	false
 SOURCE	false
+SPECIFIC	false
+SQL	false
 START	false
 STATISTICS	false
 STORED	false
diff --git a/sql/core/src/test/resources/sql-tests/results/math.sql.out b/sql/core/src/test/resources/sql-tests/results/math.sql.out
index d3df5cb933574..09f4383933288 100644
--- a/sql/core/src/test/resources/sql-tests/results/math.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/math.sql.out
@@ -493,3 +493,91 @@ SELECT conv('-9223372036854775807', 36, 10)
 struct<conv(-9223372036854775807, 36, 10):string>
 -- !query output
 18446744073709551615
+
+
+-- !query
+SELECT BIN(0)
+-- !query schema
+struct<bin(0):string>
+-- !query output
+0
+
+
+-- !query
+SELECT BIN(25)
+-- !query schema
+struct<bin(25):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT BIN(25L)
+-- !query schema
+struct<bin(25):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT BIN(25.5)
+-- !query schema
+struct<bin(25.5):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT POSITIVE(0Y)
+-- !query schema
+struct<(+ 0):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT POSITIVE(25)
+-- !query schema
+struct<(+ 25):int>
+-- !query output
+25
+
+
+-- !query
+SELECT POSITIVE(-25L)
+-- !query schema
+struct<(+ -25):bigint>
+-- !query output
+-25
+
+
+-- !query
+SELECT POSITIVE(25.5)
+-- !query schema
+struct<(+ 25.5):decimal(3,1)>
+-- !query output
+25.5
+
+
+-- !query
+SELECT POSITIVE("25.5")
+-- !query schema
+struct<(+ 25.5):double>
+-- !query output
+25.5
+
+
+-- !query
+SELECT POSITIVE("invalid")
+-- !query schema
+struct<(+ invalid):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT POSITIVE(null)
+-- !query schema
+struct<(+ NULL):double>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out
index 64a6c4b23722a..299730fdf3787 100644
--- a/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out
@@ -188,7 +188,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_TABLE_ARGUMENT",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "treeNode" : "'Generate explode(table-argument#x []), false\n:  +- SubqueryAlias v\n:     +- View (`v`, [id#xL])\n:        +- Project [cast(id#xL as bigint) AS id#xL]\n:           +- Project [id#xL]\n:              +- Range (0, 8, step=1, splits=None)\n+- OneRowRelation\n"
+    "treeNode" : "'Generate explode(table-argument#x []), false\n:  +- SubqueryAlias v\n:     +- View (`v`, [id#xL])\n:        +- Project [cast(id#xL as bigint) AS id#xL]\n:           +- Project [id#xL]\n:              +- Range (0, 8, step=1)\n+- OneRowRelation\n"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index 5a36cfd2369b4..4c2fd671229a1 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -267,6 +267,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM base_table	                    
 View Original Text  	SELECT * FROM base_table	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]
 
@@ -332,6 +333,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM base_table	                    
 View Original Text  	SELECT * FROM base_table	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]
 
@@ -387,6 +389,7 @@ View Text           	SELECT t1.a AS t1_a, t2.a AS t2_a
 View Original Text  	SELECT t1.a AS t1_a, t2.a AS t2_a
     FROM base_table t1, base_table2 t2
     WHERE t1.id = t2.id	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[t1_a, t2_a]
 
@@ -459,6 +462,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
 View Original Text  	SELECT * FROM base_table WHERE id IN (SELECT id FROM base_table2)	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]
 
@@ -489,6 +493,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
 View Original Text  	SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM base_table2) t2	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[id, a]
 
@@ -519,6 +524,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM base_table2)	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]
 
@@ -549,6 +555,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
 View Original Text  	SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM base_table2)	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]
 
@@ -579,6 +586,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
 View Original Text  	SELECT * FROM base_table WHERE EXISTS (SELECT 1)	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.temp_view_test	                    
 View Query Output Columns	[a, id]
 
@@ -790,6 +798,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t1 CROSS JOIN t2	                    
 View Original Text  	SELECT * FROM t1 CROSS JOIN t2	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]
 
@@ -840,6 +849,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
 View Original Text  	SELECT * FROM t1 INNER JOIN t2 ON t1.num = t2.num2	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]
 
@@ -890,6 +900,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]
 
@@ -940,6 +951,7 @@ Created By [not included in comparison]
 Type                	VIEW                	                    
 View Text           	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
 View Original Text  	SELECT * FROM t1 LEFT JOIN t2 ON t1.num = t2.num2 AND t2.value = 'xxx'	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[num, name, num2, value]
 
@@ -1060,6 +1072,7 @@ AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
 View Original Text  	SELECT * FROM tbl1 WHERE tbl1.a
 BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
 AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]
 
@@ -1099,6 +1112,7 @@ View Original Text  	SELECT * FROM tbl1 WHERE tbl1.a
 BETWEEN (SELECT d FROM tbl2 WHERE c = 1) AND (SELECT e FROM tbl3 WHERE f = 2)
 AND EXISTS (SELECT g FROM tbl4 LEFT JOIN tbl3 ON tbl4.h = tbl3.f)
 AND NOT EXISTS (SELECT g FROM tbl4 LEFT JOIN tmptbl ON tbl4.h = tmptbl.j)	                    
+View Schema Mode    	COMPENSATION        	                    
 View Catalog and Namespace	spark_catalog.testviewschm2	                    
 View Query Output Columns	[a, b]
 
diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
index 71c342054ae47..5b97f2a27b8ed 100644
--- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
@@ -1,4 +1,172 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+select not true
+-- !query schema
+struct<(NOT true):boolean>
+-- !query output
+false
+
+
+-- !query
+select ! true
+-- !query schema
+struct<(NOT true):boolean>
+-- !query output
+false
+
+
+-- !query
+select not null::boolean
+-- !query schema
+struct<(NOT CAST(NULL AS BOOLEAN)):boolean>
+-- !query output
+NULL
+
+
+-- !query
+select true and true
+-- !query schema
+struct<(true AND true):boolean>
+-- !query output
+true
+
+
+-- !query
+select true and false
+-- !query schema
+struct<(true AND false):boolean>
+-- !query output
+false
+
+
+-- !query
+select false and true
+-- !query schema
+struct<(false AND true):boolean>
+-- !query output
+false
+
+
+-- !query
+select false and false
+-- !query schema
+struct<(false AND false):boolean>
+-- !query output
+false
+
+
+-- !query
+select true and null::boolean
+-- !query schema
+struct<(true AND CAST(NULL AS BOOLEAN)):boolean>
+-- !query output
+NULL
+
+
+-- !query
+select false and null::boolean
+-- !query schema
+struct<(false AND CAST(NULL AS BOOLEAN)):boolean>
+-- !query output
+false
+
+
+-- !query
+select null::boolean and true
+-- !query schema
+struct<(CAST(NULL AS BOOLEAN) AND true):boolean>
+-- !query output
+NULL
+
+
+-- !query
+select null::boolean and false
+-- !query schema
+struct<(CAST(NULL AS BOOLEAN) AND false):boolean>
+-- !query output
+false
+
+
+-- !query
+select null::boolean and null::boolean
+-- !query schema
+struct<(CAST(NULL AS BOOLEAN) AND CAST(NULL AS BOOLEAN)):boolean>
+-- !query output
+NULL
+
+
+-- !query
+select true or true
+-- !query schema
+struct<(true OR true):boolean>
+-- !query output
+true
+
+
+-- !query
+select true or false
+-- !query schema
+struct<(true OR false):boolean>
+-- !query output
+true
+
+
+-- !query
+select false or true
+-- !query schema
+struct<(false OR true):boolean>
+-- !query output
+true
+
+
+-- !query
+select false or false
+-- !query schema
+struct<(false OR false):boolean>
+-- !query output
+false
+
+
+-- !query
+select true or null::boolean
+-- !query schema
+struct<(true OR CAST(NULL AS BOOLEAN)):boolean>
+-- !query output
+true
+
+
+-- !query
+select false or null::boolean
+-- !query schema
+struct<(false OR CAST(NULL AS BOOLEAN)):boolean>
+-- !query output
+NULL
+
+
+-- !query
+select null::boolean or true
+-- !query schema
+struct<(CAST(NULL AS BOOLEAN) OR true):boolean>
+-- !query output
+true
+
+
+-- !query
+select null::boolean or false
+-- !query schema
+struct<(CAST(NULL AS BOOLEAN) OR false):boolean>
+-- !query output
+NULL
+
+
+-- !query
+select null::boolean or null::boolean
+-- !query schema
+struct<(CAST(NULL AS BOOLEAN) OR CAST(NULL AS BOOLEAN)):boolean>
+-- !query output
+NULL
+
+
 -- !query
 select 1 = 1
 -- !query schema
@@ -517,3 +685,59 @@ select rand(123) not between 0.1 AND 0.2
 struct<(NOT between(rand(123), 0.1, 0.2)):boolean>
 -- !query output
 false
+
+
+-- !query
+set spark.sql.legacy.bangEqualsNot=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.bangEqualsNot	true
+
+
+-- !query
+select 1 ! between 0 and 2
+-- !query schema
+struct<(NOT between(1, 0, 2)):boolean>
+-- !query output
+false
+
+
+-- !query
+select 1 ! in (3, 4)
+-- !query schema
+struct<(NOT (1 IN (3, 4))):boolean>
+-- !query output
+true
+
+
+-- !query
+select 'hello' ! like 'world'
+-- !query schema
+struct<(NOT hello LIKE world):boolean>
+-- !query output
+true
+
+
+-- !query
+select 1 is ! null
+-- !query schema
+struct<(1 IS NOT NULL):boolean>
+-- !query output
+true
+
+
+-- !query
+select false is ! true
+-- !query schema
+struct<(NOT (false <=> true)):boolean>
+-- !query output
+true
+
+
+-- !query
+set spark.sql.legacy.bangEqualsNot=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.bangEqualsNot	false
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index dcb96b9d2dce6..ad96e7e106ad9 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -327,6 +327,7 @@ struct<createtab_stmt:string>
 CREATE VIEW default.view_spark_30302 (
   aaa,
   bbb)
+WITH SCHEMA COMPENSATION
 AS SELECT a, b FROM tbl
 
 
@@ -338,6 +339,7 @@ struct<createtab_stmt:string>
 CREATE VIEW default.view_spark_30302 (
   aaa,
   bbb)
+WITH SCHEMA COMPENSATION
 AS SELECT a, b FROM tbl
 
 
@@ -368,6 +370,7 @@ CREATE VIEW default.view_spark_30302 (
   aaa COMMENT 'comment with \'quoted text\' for aaa',
   bbb)
 COMMENT 'This is a comment with \'quoted text\' for view'
+WITH SCHEMA COMPENSATION
 AS SELECT a, b FROM tbl
 
 
@@ -380,6 +383,7 @@ CREATE VIEW default.view_spark_30302 (
   aaa COMMENT 'comment with \'quoted text\' for aaa',
   bbb)
 COMMENT 'This is a comment with \'quoted text\' for view'
+WITH SCHEMA COMPENSATION
 AS SELECT a, b FROM tbl
 
 
@@ -412,6 +416,7 @@ CREATE VIEW default.view_spark_30302 (
 TBLPROPERTIES (
   'a' = '1',
   'b' = '2')
+WITH SCHEMA COMPENSATION
 AS SELECT a, b FROM tbl
 
 
@@ -426,6 +431,7 @@ CREATE VIEW default.view_spark_30302 (
 TBLPROPERTIES (
   'a' = '1',
   'b' = '2')
+WITH SCHEMA COMPENSATION
 AS SELECT a, b FROM tbl
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index 442f0fe5d5f27..cc32e2eff2551 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -124,6 +124,7 @@ Created Time [not included in comparison]
 Last Access [not included in comparison]
 Created By [not included in comparison]
 Type: VIEW
+View Schema Mode: BINDING
 Schema: root
  |-- e: integer (nullable = true)
 
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 91ad830dd3d7a..d42c387c8057f 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -835,6 +835,110 @@ org.apache.spark.SparkIllegalArgumentException
 }
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	true
+
+
+-- !query
+select encode('渭城朝雨浥轻尘', 'US-ASCII')
+-- !query schema
+struct<encode(渭城朝雨浥轻尘, US-ASCII):binary>
+-- !query output
+???????
+
+
+-- !query
+select encode(scol, ecol) from values('渭城朝雨浥轻尘', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<encode(scol, ecol):binary>
+-- !query output
+???????
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	false
+
+
+-- !query
+select encode('客舍青青柳色新', 'US-ASCII')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`encode`"
+  }
+}
+
+
+-- !query
+select encode(scol, ecol) from values('客舍青青柳色新', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`encode`"
+  }
+}
+
+
+-- !query
+select encode(decode(encode('白日依山尽，黄河入海流。欲穷千里目，更上一层楼。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(白日依山尽，黄河入海流。欲穷千里目，更上一层楼。, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+白日依山尽，黄河入海流。欲穷千里目，更上一层楼。
+
+
+-- !query
+select encode(decode(encode('南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+南山經之首曰䧿山。其首曰招搖之山，臨於西海之上。
+
+
+-- !query
+select encode(decode(encode('세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark
+
+
+-- !query
+select encode(decode(encode('το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως
+
+
+-- !query
+select encode(decode(encode('Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。', 'UTF-16'), 'UTF-16'), 'UTF-8')
+-- !query schema
+struct<encode(decode(encode(Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。, UTF-16), UTF-16), UTF-8):binary>
+-- !query output
+Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。
+
+
 -- !query
 select decode()
 -- !query schema
@@ -893,6 +997,14 @@ struct<decode(encode(abc, utf-8), utf-8):string>
 abc
 
 
+-- !query
+select decode(encode('大千世界', 'utf-32'), 'utf-32')
+-- !query schema
+struct<decode(encode(大千世界, utf-32), utf-32):string>
+-- !query output
+大千世界
+
+
 -- !query
 select decode(1, 1, 'Southlake')
 -- !query schema
@@ -1049,6 +1161,70 @@ org.apache.spark.SparkIllegalArgumentException
 }
 
 
+-- !query
+set spark.sql.legacy.codingErrorAction=true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	true
+
+
+-- !query
+select decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII')
+-- !query schema
+struct<decode(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', US-ASCII):string>
+-- !query output
+���������������������
+
+
+-- !query
+select decode(scol, ecol) from values(X'E58A9DE5909BE69BB4E5B0BDE4B880E69DAFE98592', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<decode(scol, ecol):string>
+-- !query output
+���������������������
+
+
+-- !query
+set spark.sql.legacy.codingErrorAction=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.codingErrorAction	false
+
+
+-- !query
+select decode(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`decode`"
+  }
+}
+
+
+-- !query
+select decode(scol, ecol) from values(X'E8A5BFE587BAE998B3E585B3E697A0E69585E4BABA', 'US-ASCII') as t(scol, ecol)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "MALFORMED_CHARACTER_CODING",
+  "sqlState" : "22000",
+  "messageParameters" : {
+    "charset" : "US-ASCII",
+    "function" : "`decode`"
+  }
+}
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query schema
@@ -1990,3 +2166,106 @@ select luhn_check(123.456)
 struct<luhn_check(123.456):boolean>
 -- !query output
 false
+
+
+-- !query
+select is_valid_utf8('')
+-- !query schema
+struct<is_valid_utf8():boolean>
+-- !query output
+true
+
+
+-- !query
+select is_valid_utf8('abc')
+-- !query schema
+struct<is_valid_utf8(abc):boolean>
+-- !query output
+true
+
+
+-- !query
+select is_valid_utf8(x'80')
+-- !query schema
+struct<is_valid_utf8(X'80'):boolean>
+-- !query output
+false
+
+
+-- !query
+select make_valid_utf8('')
+-- !query schema
+struct<make_valid_utf8():string>
+-- !query output
+
+
+
+-- !query
+select make_valid_utf8('abc')
+-- !query schema
+struct<make_valid_utf8(abc):string>
+-- !query output
+abc
+
+
+-- !query
+select make_valid_utf8(x'80')
+-- !query schema
+struct<make_valid_utf8(X'80'):string>
+-- !query output
+�
+
+
+-- !query
+select validate_utf8('')
+-- !query schema
+struct<validate_utf8():string>
+-- !query output
+
+
+
+-- !query
+select validate_utf8('abc')
+-- !query schema
+struct<validate_utf8(abc):string>
+-- !query output
+abc
+
+
+-- !query
+select validate_utf8(x'80')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "INVALID_UTF8_STRING",
+  "sqlState" : "22029",
+  "messageParameters" : {
+    "str" : "\\x80"
+  }
+}
+
+
+-- !query
+select try_validate_utf8('')
+-- !query schema
+struct<try_validate_utf8():string>
+-- !query output
+
+
+
+-- !query
+select try_validate_utf8('abc')
+-- !query schema
+struct<try_validate_utf8(abc):string>
+-- !query output
+abc
+
+
+-- !query
+select try_validate_utf8(x'80')
+-- !query schema
+struct<try_validate_utf8(X'80'):string>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-in-join-condition.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-in-join-condition.sql.out
index b490704bebc57..c9c68a5f0602b 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-in-join-condition.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-in-join-condition.sql.out
@@ -472,3 +472,33 @@ struct<x1:int,x2:int,y1:int,y2:int>
 1	1	1	4
 2	1	NULL	NULL
 3	4	NULL	NULL
+
+
+-- !query
+select * from x join y on x1 = y1 and exists (select * from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_EXPRESSION_IN_JOIN_CONDITION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "subqueryExpression" : "exists(x.x2, y.y2, (z.z2 = x.x2), (z.z2 = y.y2))"
+  }
+}
+
+
+-- !query
+select * from x join y on x1 = y1 and not exists (select * from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_EXPRESSION_IN_JOIN_CONDITION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "subqueryExpression" : "exists(x.x2, y.y2, (z.z2 = x.x2), (z.z2 = y.y2))"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-subquery-in-join-condition.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-subquery-in-join-condition.sql.out
index 9f829d522ad25..13af4c81173ae 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-subquery-in-join-condition.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-subquery-in-join-condition.sql.out
@@ -434,3 +434,33 @@ struct<x1:int,x2:int,y1:int,y2:int>
 1	1	1	4
 2	1	NULL	NULL
 3	4	NULL	NULL
+
+
+-- !query
+select * from x left join y on x1 = y1 and x2 IN (select z1 from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_EXPRESSION_IN_JOIN_CONDITION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "subqueryExpression" : "(x.x2 IN (listquery(x.x2, y.y2, (z.z2 = x.x2), (z.z2 = y.y2))))"
+  }
+}
+
+
+-- !query
+select * from x left join y on x1 = y1 and x2 not IN (select z1 from z where z2 = x2 AND z2 = y2) order by x1, x2, y1, y2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_CORRELATED_EXPRESSION_IN_JOIN_CONDITION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "subqueryExpression" : "(x.x2 IN (listquery(x.x2, y.y2, (z.z2 = x.x2), (z.z2 = y.y2))))"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
new file mode 100644
index 0000000000000..85ebd91c28c9c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
@@ -0,0 +1,207 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temp view x (x1, x2) as values (1, 1), (2, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temp view y (y1, y2) as values (2, 0), (3, -1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temp view z (z1, z2) as values (1, 0), (1, 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select * from x where (select count(*) from y where y1 = x1 group by y1) = 1
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+2	2
+
+
+-- !query
+select * from x where (select count(*) from y where y1 = x1 group by x1) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"x1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 61,
+    "stopIndex" : 71,
+    "fragment" : "group by x1"
+  } ]
+}
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by x1) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"x1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 61,
+    "stopIndex" : 71,
+    "fragment" : "group by x1"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from y where x1 = y1 and y2 = 1 group by y2) from x
+-- !query schema
+struct<x1:int,x2:int,scalarsubquery(x1):bigint>
+-- !query output
+1	1	NULL
+2	2	NULL
+
+
+-- !query
+select *, (select count(*) from y where x1 = y1 and y2 = x1 + 1 group by y2) from x
+-- !query schema
+struct<x1:int,x2:int,scalarsubquery(x1, x1):bigint>
+-- !query output
+1	1	NULL
+2	2	NULL
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "y1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 72,
+    "fragment" : "(select count(*) from y where y1 > x1 group by y1)"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from y where y1 + y2 = x1 group by y1) from x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "y1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 65,
+    "fragment" : "(select count(*) from y where y1 + y2 = x1 group by y1)"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "y1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 106,
+    "fragment" : "(select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1)"
+  } ]
+}
+
+
+-- !query
+select *, (select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1) from x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "value" : "z1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 103,
+    "fragment" : "(select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1)"
+  } ]
+}
+
+
+-- !query
+set spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate	true
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+1	1
+1	1
+2	2
+
+
+-- !query
+reset spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out b/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out
new file mode 100644
index 0000000000000..f9f491bd70fd1
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out
@@ -0,0 +1,144 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create table t as
+  select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s
+  from values (1, null), (null,  'a') tab(member0, member1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+declare avro_schema string
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+set variable avro_schema =
+  '{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] }'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select from_avro(s, 42, map()) from t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "hint" : "",
+    "msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format",
+    "sqlExpr" : "\"fromavro(s, 42, map())\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "from_avro(s, 42, map())"
+  } ]
+}
+
+
+-- !query
+select from_avro(s, avro_schema, 42) from t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "hint" : "",
+    "msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format",
+    "sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "from_avro(s, avro_schema, 42)"
+  } ]
+}
+
+
+-- !query
+select to_avro(s, 42) from t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "hint" : "",
+    "msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format",
+    "sqlExpr" : "\"toavro(s, 42)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "to_avro(s, 42)"
+  } ]
+}
+
+
+-- !query
+select to_avro(s, avro_schema) as result from t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE",
+  "sqlState" : "22KD3",
+  "messageParameters" : {
+    "functionName" : "TO_AVRO"
+  }
+}
+
+
+-- !query
+select from_avro(result, avro_schema, map()).u from (select null as result)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE",
+  "sqlState" : "22KD3",
+  "messageParameters" : {
+    "functionName" : "FROM_AVRO"
+  }
+}
+
+
+-- !query
+drop temporary variable avro_schema
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
index ab726b93c07c8..7975392fd0147 100644
--- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
@@ -837,3 +837,13 @@ struct<a:string,b:string>
 3	3
 3	3
 3	3
+
+
+-- !query
+SELECT TRANSFORM (a, b)
+  USING 'cat' AS (a CHAR(10), b VARCHAR(10))
+FROM VALUES('apache', 'spark') t(a, b)
+-- !query schema
+struct<a:string,b:string>
+-- !query output
+apache	spark
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
index fa83652da0edc..b12680c2a6751 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
@@ -15,6 +15,22 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query schema
+struct<try_add(2147483647, 1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query schema
+struct<try_add(2147483647, 1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -249,6 +265,22 @@ struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
 NULL
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query schema
+struct<try_divide(1, 0):decimal(12,11)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query schema
+struct<try_divide(1, 0):double>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -313,6 +345,22 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query schema
+struct<try_subtract(2147483647, -1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query schema
+struct<try_subtract(2147483647, -1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -409,6 +457,22 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query schema
+struct<try_multiply(2147483647, -2):decimal(21,0)>
+-- !query output
+-4294967294
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query schema
+struct<try_multiply(2147483647, -2):double>
+-- !query output
+-4.294967294E9
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out.java21 b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out.java21
index dcdb9d0dcb194..002a0dfcf37ef 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out.java21
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out.java21
@@ -15,6 +15,22 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query schema
+struct<try_add(2147483647, 1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query schema
+struct<try_add(2147483647, 1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -249,6 +265,22 @@ struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
 NULL
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query schema
+struct<try_divide(1, 0):decimal(12,11)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query schema
+struct<try_divide(1, 0):double>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -313,6 +345,22 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query schema
+struct<try_subtract(2147483647, -1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query schema
+struct<try_subtract(2147483647, -1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -409,6 +457,22 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query schema
+struct<try_multiply(2147483647, -2):decimal(21,0)>
+-- !query output
+-4294967294
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query schema
+struct<try_multiply(2147483647, -2):double>
+-- !query output
+-4.294967294E9
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out b/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out
index 78ad8b7c02cd5..f99c6c30c07e2 100644
--- a/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out
@@ -1069,6 +1069,32 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 }
 
 
+-- !query
+SELECT * FROM UDTFPartitionByIndexingBug(
+    TABLE(
+        SELECT
+            5 AS unused_col,
+            'hi' AS partition_col,
+            1.0 AS double_col
+
+        UNION ALL
+
+        SELECT
+            4 AS unused_col,
+            'hi' AS partition_col,
+            1.0 AS double_col
+    )
+)
+-- !query schema
+struct<partition_col:string,double_col:double>
+-- !query output
+NULL	1.0
+NULL	1.0
+NULL	1.0
+NULL	1.0
+NULL	1.0
+
+
 -- !query
 DROP VIEW t1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
new file mode 100644
index 0000000000000..b0d497e070477
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
@@ -0,0 +1,1229 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaBindingMode	true
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaBindingMode	false
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 44,
+    "fragment" : "WITH SCHEMA BINDING"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 49,
+    "fragment" : "WITH SCHEMA COMPENSATION"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 51,
+    "fragment" : "WITH SCHEMA TYPE EVOLUTION"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "FEATURE_NOT_ENABLED",
+  "sqlState" : "56038",
+  "messageParameters" : {
+    "configKey" : "spark.sql.legacy.viewSchemaBindingMode",
+    "configValue" : "true",
+    "featureName" : "VIEW ... WITH SCHEMA ..."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 46,
+    "fragment" : "WITH SCHEMA EVOLUTION"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+1                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT 1            	                    
+View Original Text  	SELECT 1            	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[1]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query schema
+struct<namespace:string,tableName:string,isTemporary:boolean,information:string>
+-- !query output
+default	v	false	Catalog: spark_catalog
+Database: default
+Table: v
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type: VIEW
+View Text: SELECT 1
+View Original Text: SELECT 1
+View Catalog and Namespace: spark_catalog.default
+View Query Output Columns: [1]
+Schema: root
+ |-- 1: integer (nullable = false)
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  `1`)
+AS SELECT 1
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+1                   	int
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query schema
+struct<namespace:string,tableName:string,isTemporary:boolean,information:string>
+-- !query output
+	v	true	Table: v
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type: VIEW
+View Text: SELECT 1
+View Catalog and Namespace: spark_catalog.default
+View Query Output Columns: [1]
+Schema: root
+ |-- 1: integer (nullable = false)
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  c1)
+AS SELECT * FROM t
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaBindingMode	true
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaCompensation	false
+
+
+-- !query
+SET spark.sql.ansi.enabled = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.ansi.enabled	false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  c1)
+WITH SCHEMA BINDING
+AS SELECT * FROM t
+
+
+-- !query
+DROP TABLE t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  c1)
+WITH SCHEMA BINDING
+AS SELECT * FROM t
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaCompensation	true
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  c1)
+WITH SCHEMA COMPENSATION
+AS SELECT * FROM t
+
+
+-- !query
+DROP TABLE t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  c1)
+WITH SCHEMA COMPENSATION
+AS SELECT * FROM t
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+INSERT INTO t VALUES ('a', 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 15,
+    "fragment" : "v"
+  } ]
+}
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 MAP<STRING, STRING>, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"c1\"",
+    "srcType" : "\"MAP<STRING, STRING>\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 15,
+    "fragment" : "v"
+  } ]
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c3 INT NOT NULL, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaBindingMode	false
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaCompensation	false
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaBindingMode	true
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+1                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT 1            	                    
+View Original Text  	SELECT 1            	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[1]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query schema
+struct<namespace:string,tableName:string,isTemporary:boolean,information:string>
+-- !query output
+default	v	false	Catalog: spark_catalog
+Database: default
+Table: v
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type: VIEW
+View Text: SELECT 1
+View Original Text: SELECT 1
+View Schema Mode: BINDING
+View Catalog and Namespace: spark_catalog.default
+View Query Output Columns: [1]
+Schema: root
+ |-- 1: integer (nullable = false)
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  `1`)
+WITH SCHEMA BINDING
+AS SELECT 1
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaCompensation	true
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+1                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT 1            	                    
+View Original Text  	SELECT 1            	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[1]
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query schema
+struct<namespace:string,tableName:string,isTemporary:boolean,information:string>
+-- !query output
+default	v	false	Catalog: spark_catalog
+Database: default
+Table: v
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type: VIEW
+View Text: SELECT 1
+View Original Text: SELECT 1
+View Schema Mode: BINDING
+View Catalog and Namespace: spark_catalog.default
+View Query Output Columns: [1]
+Schema: root
+ |-- 1: integer (nullable = false)
+
+
+-- !query
+SHOW CREATE TABLE v
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE VIEW default.v (
+  `1`)
+WITH SCHEMA BINDING
+AS SELECT 1
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaBindingMode	false
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaCompensation	false
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW v AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaBindingMode = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaBindingMode	true
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+1                   	int
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query schema
+struct<namespace:string,tableName:string,isTemporary:boolean,information:string>
+-- !query output
+	v	true	Table: v
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type: VIEW
+View Text: SELECT 1
+View Schema Mode: BINDING
+View Catalog and Namespace: spark_catalog.default
+View Query Output Columns: [1]
+Schema: root
+ |-- 1: integer (nullable = false)
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaCompensation	true
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+1                   	int
+
+
+-- !query
+SHOW TABLE EXTENDED LIKE 'v'
+-- !query schema
+struct<namespace:string,tableName:string,isTemporary:boolean,information:string>
+-- !query output
+	v	true	Table: v
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type: VIEW
+View Text: SELECT 1
+View Schema Mode: BINDING
+View Catalog and Namespace: spark_catalog.default
+View Query Output Columns: [1]
+Schema: root
+ |-- 1: integer (nullable = false)
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out
new file mode 100644
index 0000000000000..a4e5820cb7ce6
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-binding.sql.out
@@ -0,0 +1,386 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+SET spark.sql.legacy.viewSchemaCompensation=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.legacy.viewSchemaCompensation	false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA BINDING
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
new file mode 100644
index 0000000000000..ffd1fbec47bbb
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
@@ -0,0 +1,593 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SET spark.sql.ansi.enabled = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.ansi.enabled	false
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+INSERT INTO t VALUES ('a', 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 15,
+    "fragment" : "v"
+  } ]
+}
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 MAP<STRING, STRING>, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"c1\"",
+    "srcType" : "\"MAP<STRING, STRING>\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 15,
+    "fragment" : "v"
+  } ]
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c3 INT NOT NULL, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES(1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA BINDING AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	BINDING             	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES('1')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_UP_CAST_DATATYPE",
+  "sqlState" : "42846",
+  "messageParameters" : {
+    "details" : "The type path of the target object is:\n\nYou can either add an explicit cast to the input data or choose a higher precision type of the field in the target object",
+    "expression" : "spark_catalog.default.t.c1",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  }
+}
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA COMPENSATION
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
new file mode 100644
index 0000000000000..46d6acc8d98e2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-evolution.sql.out
@@ -0,0 +1,1113 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c4 STRING NOT NULL, c5 DOUBLE) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c4:string,c5:double>
+-- !query output
+1	2.0
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c4                  	string              	                    
+c5                  	double              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c4, c5]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c4 STRING, c5 DOUBLE, c6 DATE) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c4:string,c5:double,c6:date>
+-- !query output
+1	2.0	2022-01-01
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c4                  	string              	                    
+c5                  	double              	                    
+c6                  	date                	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c4, c5, c6]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:int,a2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	int                 	                    
+a2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 DOUBLE) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:string,a2:double>
+-- !query output
+1	2.0
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	string              	                    
+a2                  	double              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING, c2 DOUBLE, c3 DATE) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:string,a2:double>
+-- !query output
+1	2.0
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	string              	                    
+a2                  	double              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:int,a2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	int                 	                    
+a2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v (a1, a2) AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	int                 	                    
+a2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c3 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v (a1, a2) AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	int                 	                    
+a2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT COMMENT 'c1', c2 INT COMMENT 'c2') USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	int                 	c1                  
+a2                  	int                 	c2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	int                 	c1                  
+a2                  	int                 	c2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6c', c2 STRING COMMENT 'c2 6c') USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:bigint,a2:string>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	bigint              	c1                  
+a2                  	string              	c2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1 COMMENT 'a1', a2 COMMENT 'a2') WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	bigint              	a1                  
+a2                  	string              	a2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6d', c2 STRING COMMENT 'c2 6d') USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:bigint,a2:string>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	bigint              	a1                  
+a2                  	string              	a2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	bigint              	c1 6d               
+c2                  	string              	c2 6d               
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6e', c2 STRING COMMENT 'c2 6e') USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:bigint,c2:string>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	bigint              	c1 6e               
+c2                  	string              	c2 6e               
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t1(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t2(c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA EVOLUTION AS SELECT * FROM t1, t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t2(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "COLUMN_ALREADY_EXISTS",
+  "sqlState" : "42711",
+  "messageParameters" : {
+    "columnName" : "`c1`"
+  }
+}
+
+
+-- !query
+DROP TABLE IF EXISTS t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES(1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA EVOLUTION
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:string,c2:int>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	string              	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	EVOLUTION           	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-type-evolution.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-type-evolution.sql.out
new file mode 100644
index 0000000000000..707715120a861
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-type-evolution.sql.out
@@ -0,0 +1,663 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT NOT NULL, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING NOT NULL, c2 DOUBLE) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:string,c2:double>
+-- !query output
+1	2.0
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	string              	                    
+c2                  	double              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING, c2 DOUBLE, c3 DATE) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES ('1', 2.0, DATE'2022-01-01')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:string,c2:double>
+-- !query output
+1	2.0
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	string              	                    
+c2                  	double              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+1	2
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c2",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c3 INT, c2 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "sqlState" : "51024",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "c1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.default.v AS SELECT * FROM t",
+    "viewName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+c2                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT COMMENT 'c1', c2 INT COMMENT 'c2') USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1, a2) WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	int                 	c1                  
+a2                  	int                 	c2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6a', c2 STRING COMMENT 'c2 6a') USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:bigint,a2:string>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	bigint              	c1                  
+a2                  	string              	c2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+CREATE OR REPLACE VIEW v(a1 COMMENT 'a1', a2 COMMENT 'a2') WITH SCHEMA TYPE EVOLUTION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	bigint              	a1                  
+a2                  	string              	a2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 BIGINT COMMENT 'c1 6b', c2 STRING COMMENT 'c2 6b') USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<a1:bigint,a2:string>
+-- !query output
+
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a1                  	bigint              	a1                  
+a2                  	string              	a2                  
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1, c2]
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 INT) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES(1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE VIEW v WITH SCHEMA COMPENSATION AS SELECT * FROM t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t(c1 STRING) USING PARQUET
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+INSERT INTO t VALUES('1')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	COMPENSATION        	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+ALTER VIEW v WITH SCHEMA TYPE EVOLUTION
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v
+-- !query schema
+struct<c1:string>
+-- !query output
+1
+
+
+-- !query
+DESCRIBE EXTENDED v
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+c1                  	string              	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	v                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	VIEW                	                    
+View Text           	SELECT * FROM t     	                    
+View Original Text  	SELECT * FROM t     	                    
+View Schema Mode    	TYPE EVOLUTION      	                    
+View Catalog and Namespace	spark_catalog.default	                    
+View Query Output Columns	[c1]
+
+
+-- !query
+DROP VIEW IF EXISTS v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE IF EXISTS t
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/.metadata.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/.metadata.crc
new file mode 100644
index 0000000000000..6177f01d501b1
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/.metadata.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/.0.crc
new file mode 100644
index 0000000000000..1aee7033161ec
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/.1.crc
new file mode 100644
index 0000000000000..1aee7033161ec
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/.1.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/metadata
new file mode 100644
index 0000000000000..c8acdedc074b7
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/metadata
@@ -0,0 +1 @@
+{"id":"a13717f3-7485-421b-b55a-21625123b680"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/.0.crc
new file mode 100644
index 0000000000000..121286161cb66
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/.1.crc
new file mode 100644
index 0000000000000..89d73c77c55c0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/.1.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/0
new file mode 100644
index 0000000000000..7a7b38628a15d
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719278977158,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/1
new file mode 100644
index 0000000000000..589d400395e1c
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719278978807,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/.1.delta.crc
new file mode 100644
index 0000000000000..1992982c58ff2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/.2.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/.2.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/1.delta
new file mode 100644
index 0000000000000..fec40e83a5471
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/_metadata/.schema.crc
new file mode 100644
index 0000000000000..97b2fbbd4cdf9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/_metadata/.schema.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000..c35505fa363fb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/0/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/.2.delta.crc
new file mode 100644
index 0000000000000..d18b77b93aff2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/.2.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/2.delta
new file mode 100644
index 0000000000000..fcbf8df80f5f9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/1/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/.2.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/.2.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/2/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/.2.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/.2.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/3/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/.2.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/.2.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-deduplication/state/0/4/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/.metadata.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/.metadata.crc
new file mode 100644
index 0000000000000..a0afa9cbeabb7
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/.metadata.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/commits/.0.crc
new file mode 100644
index 0000000000000..1aee7033161ec
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/commits/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/metadata
new file mode 100644
index 0000000000000..9f8d6f4d5cf50
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/metadata
@@ -0,0 +1 @@
+{"id":"e41911da-47c9-4560-a95d-e2ab97f2bc85"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/offsets/.0.crc
new file mode 100644
index 0000000000000..6cd2a4731154f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/offsets/.0.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/offsets/0
new file mode 100644
index 0000000000000..a45ae3899e0b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/offsets/0
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719343083746,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}}
+0
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/_metadata/.schema.crc
new file mode 100644
index 0000000000000..0b5ede7660246
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/_metadata/.schema.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/_metadata/.schema.crc
new file mode 100644
index 0000000000000..3f303a9e7b035
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/_metadata/.schema.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..42448b3b584ce
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/left-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/_metadata/.schema.crc
new file mode 100644
index 0000000000000..0b5ede7660246
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/_metadata/.schema.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/_metadata/.schema.crc
new file mode 100644
index 0000000000000..bcc7311689f0a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/_metadata/.schema.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..8fa8f1675bc82
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/0/right-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/1/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..d73ee1ba16c2e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..8dee4c86270f2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..7e6dce9cc108c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..d73ee1ba16c2e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..d0e2f40c18ada
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..2ec494e6a636f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/2/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..cf1d68e2acee3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/3/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..20918a4ffe6ff
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..cb8ce356ad7f3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..56b52ab974a3d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyToNumValues/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyToNumValues/.1.delta.crc
new file mode 100644
index 0000000000000..20918a4ffe6ff
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyToNumValues/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyWithIndexToValue/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyWithIndexToValue/.1.delta.crc
new file mode 100644
index 0000000000000..a874ad31b7403
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyWithIndexToValue/.1.delta.crc differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..0ed4feb1bd9b6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.5.1-streaming-join/state/0/4/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/2
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/3
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/metadata
new file mode 100644
index 0000000000000..6fb99c5969bd9
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/metadata
@@ -0,0 +1 @@
+{"id":"ab7bcd9a-4146-45d3-933d-a615b381c3be"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/0
new file mode 100644
index 0000000000000..ba13971f3848b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290655102,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/1
new file mode 100644
index 0000000000000..5fd58f6716944
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290659041,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/2
new file mode 100644
index 0000000000000..be839a1efa191
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/2
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290661716,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/3
new file mode 100644
index 0000000000000..c87f4b1b97bcc
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/offsets/3
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290664278,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+3
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000..20b45317e0a22
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/0/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/1.delta
new file mode 100644
index 0000000000000..dae5bfc800597
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/2.delta
new file mode 100644
index 0000000000000..4405af3420786
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/2.snapshot
new file mode 100644
index 0000000000000..4405af3420786
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/3.delta
new file mode 100644
index 0000000000000..433925dab114d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/4.delta
new file mode 100644
index 0000000000000..064533dcd4f30
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/4.snapshot
new file mode 100644
index 0000000000000..b4e23cc0ed4e5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/1/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/2.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/2.snapshot
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/3.delta
new file mode 100644
index 0000000000000..acf7619c291f3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/4.delta
new file mode 100644
index 0000000000000..27ca4dcede3dc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/4.snapshot
new file mode 100644
index 0000000000000..27ca4dcede3dc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/2/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/1.delta
new file mode 100644
index 0000000000000..859c2b1315a5e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/2.snapshot
new file mode 100644
index 0000000000000..859c2b1315a5e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/4.snapshot
new file mode 100644
index 0000000000000..859c2b1315a5e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/3/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/2.delta
new file mode 100644
index 0000000000000..3465b025dfa03
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/2.snapshot
new file mode 100644
index 0000000000000..3465b025dfa03
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/4.delta
new file mode 100644
index 0000000000000..6070683f44e12
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/4.snapshot
new file mode 100644
index 0000000000000..bf46f06c500d8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/4/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..5094b71aa3581
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/agg/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"stateStoreSave"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/2
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/3
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/metadata
new file mode 100644
index 0000000000000..b151b6c27e031
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/metadata
@@ -0,0 +1 @@
+{"id":"9253a36d-8c80-4cdc-9d2a-3cd9b5ceff59"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/0
new file mode 100644
index 0000000000000..acbebd05ef160
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289915309,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/1
new file mode 100644
index 0000000000000..3485e7b5927c1
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289918096,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/2
new file mode 100644
index 0000000000000..a5be1f2e758aa
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/2
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289921002,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/3
new file mode 100644
index 0000000000000..6a238cc687bf7
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/offsets/3
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289923707,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+3
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000..70ebb77e1b781
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/0/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/1.delta
new file mode 100644
index 0000000000000..aac301da2c2ab
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/2.snapshot
new file mode 100644
index 0000000000000..aac301da2c2ab
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/3.delta
new file mode 100644
index 0000000000000..c88091a5bbc9e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/4.snapshot
new file mode 100644
index 0000000000000..8cd4b5dcb1ec2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/1/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/2.delta
new file mode 100644
index 0000000000000..52dacd1351c7e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/2.snapshot
new file mode 100644
index 0000000000000..52dacd1351c7e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/4.snapshot
new file mode 100644
index 0000000000000..52dacd1351c7e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/2/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/1.delta
new file mode 100644
index 0000000000000..d86baf6d41aa2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/2.snapshot
new file mode 100644
index 0000000000000..d86baf6d41aa2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/4.snapshot
new file mode 100644
index 0000000000000..d86baf6d41aa2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/3/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/1.delta
new file mode 100644
index 0000000000000..1a985dfde9d45
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/2.snapshot
new file mode 100644
index 0000000000000..1a985dfde9d45
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/4.delta
new file mode 100644
index 0000000000000..9d22a051ebfdc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/4.snapshot
new file mode 100644
index 0000000000000..eb35f4815bfe8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/4/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..39ce28c9b4aa5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/dedup/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"dedupe"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/1
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/2
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/3
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/4
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/commits/4
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/metadata
new file mode 100644
index 0000000000000..c3656bc51c886
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/metadata
@@ -0,0 +1 @@
+{"id":"fbcd445d-b716-4589-ad15-774afa5a243d"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/0
new file mode 100644
index 0000000000000..4c4f47f8e9ec5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/0
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289926768,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/1
new file mode 100644
index 0000000000000..13cd6f3002a74
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/1
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289932001,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/2
new file mode 100644
index 0000000000000..013735ad0d044
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/2
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719289935688,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/3
new file mode 100644
index 0000000000000..6802d77f1ecc8
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/3
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719289939702,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/4
new file mode 100644
index 0000000000000..8621a231056b2
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/offsets/4
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":9000,"batchTimestampMs":1719289942516,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..42448b3b584ce
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/left-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..8fa8f1675bc82
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/0/right-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/1/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..25d24f4cedf3f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..d295efee0d000
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..4460eadea0c0d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..7e6dce9cc108c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..21e2a706e8c7a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..5bfdddd0e2f14
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..30e88f6cb35c6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..25d24f4cedf3f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..d295efee0d000
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..4d3cf654ce551
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..25d24f4cedf3f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..2ec494e6a636f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..0877f7564366f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..5bfdddd0e2f14
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..edcc5dd1f672a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..0877f7564366f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/2/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..1aeeffd771c08
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..379479c0ccc3b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..cafd9d540f8e0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..9899fb58eebe4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..69437319e872d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..aa76ff9b416a1
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..1aeeffd771c08
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..379479c0ccc3b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..47dff164d42d0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..9899fb58eebe4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..9899fb58eebe4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..105c7cc4255a9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..aa76ff9b416a1
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..d536c6fdcbce4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/3/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6070683f44e12
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..837a1434917ca
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..32506a0366066
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..cf43298608153
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..2f2da77129ea4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..56b52ab974a3d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..26d97da9c610f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..2dbdd331b3e97
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..3603b3f81bc77
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..b92431e0a4df6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..71fdc6c434ca3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6070683f44e12
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..837a1434917ca
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..2f2da77129ea4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..32506a0366066
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..3b529f86101ac
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..83a5f723a34ab
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..0ed4feb1bd9b6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..31a686912dc97
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..2dbdd331b3e97
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..71fdc6c434ca3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..649772d35ffd8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..50b057756915b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..1c4c3974de1d5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/4/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..b73f1e3e66ac5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join1/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"symmetricHashJoin"},"stateStoreInfo":[{"storeName":"left-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"left-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/1
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/2
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/3
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/4
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/commits/4
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/metadata
new file mode 100644
index 0000000000000..331d37d197a17
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/metadata
@@ -0,0 +1 @@
+{"id":"26806544-709f-4745-a6e4-7641361fe94a"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/0
new file mode 100644
index 0000000000000..20e0fddaf646b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/0
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289946613,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/1
new file mode 100644
index 0000000000000..f71629438b9e2
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/1
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289951327,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/2
new file mode 100644
index 0000000000000..789561fd971a4
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/2
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719289954259,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/3
new file mode 100644
index 0000000000000..0d224bfc5920a
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/3
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719289958068,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/4
new file mode 100644
index 0000000000000..68158e5f2542e
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/offsets/4
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":9000,"batchTimestampMs":1719289960765,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..42448b3b584ce
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/left-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..8fa8f1675bc82
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/0/right-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/1/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..25d24f4cedf3f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..d295efee0d000
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..4460eadea0c0d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..7e6dce9cc108c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..21e2a706e8c7a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..5bfdddd0e2f14
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..30e88f6cb35c6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..4e421cd377fb6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..25d24f4cedf3f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..d295efee0d000
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..4d3cf654ce551
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..25d24f4cedf3f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..2ec494e6a636f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..0877f7564366f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..5bfdddd0e2f14
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..edcc5dd1f672a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..0877f7564366f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/2/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..1aeeffd771c08
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..379479c0ccc3b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..cafd9d540f8e0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..9899fb58eebe4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..69437319e872d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..aa76ff9b416a1
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..74fa1fc58b611
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..1aeeffd771c08
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..379479c0ccc3b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..47dff164d42d0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..9899fb58eebe4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..9899fb58eebe4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..105c7cc4255a9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..aa76ff9b416a1
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..d536c6fdcbce4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/3/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6070683f44e12
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..837a1434917ca
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..32506a0366066
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..cf43298608153
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..2f2da77129ea4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..56b52ab974a3d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..26d97da9c610f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..2dbdd331b3e97
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..3603b3f81bc77
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..b92431e0a4df6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..71fdc6c434ca3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/left-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/1.delta
new file mode 100644
index 0000000000000..0bdaf341003b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/2.delta
new file mode 100644
index 0000000000000..6070683f44e12
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/2.snapshot
new file mode 100644
index 0000000000000..837a1434917ca
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/3.delta
new file mode 100644
index 0000000000000..2f2da77129ea4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/4.delta
new file mode 100644
index 0000000000000..32506a0366066
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/4.snapshot
new file mode 100644
index 0000000000000..3b529f86101ac
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/5.delta
new file mode 100644
index 0000000000000..83a5f723a34ab
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyToNumValues/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/1.delta
new file mode 100644
index 0000000000000..0ed4feb1bd9b6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/2.delta
new file mode 100644
index 0000000000000..31a686912dc97
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/2.snapshot
new file mode 100644
index 0000000000000..2dbdd331b3e97
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/3.delta
new file mode 100644
index 0000000000000..71fdc6c434ca3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/4.delta
new file mode 100644
index 0000000000000..649772d35ffd8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/4.snapshot
new file mode 100644
index 0000000000000..50b057756915b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/5.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/5.delta
new file mode 100644
index 0000000000000..1c4c3974de1d5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/4/right-keyWithIndexToValue/5.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..b73f1e3e66ac5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/join2/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"symmetricHashJoin"},"stateStoreInfo":[{"storeName":"left-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"left-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/2
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/3
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/metadata
new file mode 100644
index 0000000000000..1d512939c8dca
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/metadata
@@ -0,0 +1 @@
+{"id":"5ed656f0-84dd-414c-abbc-851e4ce58b93"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/0
new file mode 100644
index 0000000000000..95444ab5cb96a
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289904957,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/1
new file mode 100644
index 0000000000000..614f4bb85c018
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289907608,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/2
new file mode 100644
index 0000000000000..b4f3aebbbe875
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/2
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289910090,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/3
new file mode 100644
index 0000000000000..7692f0f1abead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/offsets/3
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719289912575,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+3
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/1.delta
new file mode 100644
index 0000000000000..701f5bd986b86
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/1.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/2.delta
new file mode 100644
index 0000000000000..1de87347fb513
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/2.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/2.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/2.snapshot
new file mode 100644
index 0000000000000..1de87347fb513
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/2.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/3.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/3.delta
new file mode 100644
index 0000000000000..5b1d0ce87f287
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/3.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/4.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/4.delta
new file mode 100644
index 0000000000000..cc309967b185e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/4.delta differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/4.snapshot b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/4.snapshot
new file mode 100644
index 0000000000000..cc309967b185e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/4.snapshot differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000..371b0df09d80b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/0/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..5792421dd423e
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/hdfs/limit/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"globalLimit"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/2
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/3
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/metadata
new file mode 100644
index 0000000000000..24c81c5d47be5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/metadata
@@ -0,0 +1 @@
+{"id":"0da797f0-6c12-4954-89cf-cea200a87f97"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/0
new file mode 100644
index 0000000000000..add7a6458926f
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290931555,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/1
new file mode 100644
index 0000000000000..2857388877a31
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290934440,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/2
new file mode 100644
index 0000000000000..5df970a41a729
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/2
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290937697,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/3
new file mode 100644
index 0000000000000..a8efebc1cf7ec
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/offsets/3
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290940519,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+3
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/2.zip
new file mode 100644
index 0000000000000..b52a3e04cde93
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/4.zip
new file mode 100644
index 0000000000000..97e703840ba71
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000..20b45317e0a22
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/0/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/1.changelog
new file mode 100644
index 0000000000000..497e792645825
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/2.changelog
new file mode 100644
index 0000000000000..c197734e2608c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/2.zip
new file mode 100644
index 0000000000000..4002b00264aac
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/3.changelog
new file mode 100644
index 0000000000000..35985490c08d1
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/4.changelog
new file mode 100644
index 0000000000000..bbd5a488fb858
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/4.zip
new file mode 100644
index 0000000000000..00e9f17f64f12
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/SSTs/000008-e414df11-1a90-4be8-807c-8d6f970b5f56.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/SSTs/000008-e414df11-1a90-4be8-807c-8d6f970b5f56.sst
new file mode 100644
index 0000000000000..c41d3cc2bafb8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/SSTs/000008-e414df11-1a90-4be8-807c-8d6f970b5f56.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/SSTs/000015-97d0e283-eed7-4af0-87d9-eded3afe15ca.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/SSTs/000015-97d0e283-eed7-4af0-87d9-eded3afe15ca.sst
new file mode 100644
index 0000000000000..186fba2d13a5c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/1/SSTs/000015-97d0e283-eed7-4af0-87d9-eded3afe15ca.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/2.changelog
new file mode 100644
index 0000000000000..2daeae74a72b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/2.zip
new file mode 100644
index 0000000000000..1c1ae3f894804
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/3.changelog
new file mode 100644
index 0000000000000..5d9eea4ad997e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/4.changelog
new file mode 100644
index 0000000000000..1ddb6499c28c4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/4.zip
new file mode 100644
index 0000000000000..e574f5e25473b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/SSTs/000008-d6862a4a-f10f-4641-ba89-0ea25cd816b0.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/SSTs/000008-d6862a4a-f10f-4641-ba89-0ea25cd816b0.sst
new file mode 100644
index 0000000000000..9e38658bda2de
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/SSTs/000008-d6862a4a-f10f-4641-ba89-0ea25cd816b0.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/SSTs/000015-1e8225d7-793e-47df-aec6-fb47522e2494.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/SSTs/000015-1e8225d7-793e-47df-aec6-fb47522e2494.sst
new file mode 100644
index 0000000000000..8f4391b954d05
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/2/SSTs/000015-1e8225d7-793e-47df-aec6-fb47522e2494.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/1.changelog
new file mode 100644
index 0000000000000..46e739559ac8d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/2.zip
new file mode 100644
index 0000000000000..847766f4eb425
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/4.zip
new file mode 100644
index 0000000000000..a2b18fa0d827e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/SSTs/000008-637e442e-feb6-4022-a0b1-919d388073ae.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/SSTs/000008-637e442e-feb6-4022-a0b1-919d388073ae.sst
new file mode 100644
index 0000000000000..66dd9dfb28c19
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/3/SSTs/000008-637e442e-feb6-4022-a0b1-919d388073ae.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/1.changelog
new file mode 100644
index 0000000000000..9099712c6134d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/2.changelog
new file mode 100644
index 0000000000000..1152f1a527f20
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/2.zip
new file mode 100644
index 0000000000000..9a806e422149f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/4.changelog
new file mode 100644
index 0000000000000..c5a3af6244a01
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/4.zip
new file mode 100644
index 0000000000000..eebcf56f8533c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/SSTs/000008-0f086ebd-c188-4508-97d3-9282313b0a97.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/SSTs/000008-0f086ebd-c188-4508-97d3-9282313b0a97.sst
new file mode 100644
index 0000000000000..fcf1c915751ea
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/SSTs/000008-0f086ebd-c188-4508-97d3-9282313b0a97.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/SSTs/000015-074aa977-f8df-4b58-8143-c5c4691cdb9a.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/SSTs/000015-074aa977-f8df-4b58-8143-c5c4691cdb9a.sst
new file mode 100644
index 0000000000000..7522d3bd7cadb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/4/SSTs/000015-074aa977-f8df-4b58-8143-c5c4691cdb9a.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..5094b71aa3581
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/agg/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"stateStoreSave"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/2
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/3
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/metadata
new file mode 100644
index 0000000000000..46c70457ae13f
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/metadata
@@ -0,0 +1 @@
+{"id":"7715e3b8-26dd-416a-ac64-c804bb48a972"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/0
new file mode 100644
index 0000000000000..ef98649c9fc3a
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290943839,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/1
new file mode 100644
index 0000000000000..46d8177c1ed43
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290946665,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/2
new file mode 100644
index 0000000000000..dc7562efc9fd2
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/2
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290949666,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/3
new file mode 100644
index 0000000000000..52f25fb6acccd
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/offsets/3
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290952214,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+3
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/2.zip
new file mode 100644
index 0000000000000..5910abeed315f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/4.zip
new file mode 100644
index 0000000000000..ddba4b5772e50
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000..70ebb77e1b781
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/0/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/1.changelog
new file mode 100644
index 0000000000000..1132252c8945f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/2.zip
new file mode 100644
index 0000000000000..0bf7ee845bc08
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/3.changelog
new file mode 100644
index 0000000000000..c5ebbc7bbed26
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/4.zip
new file mode 100644
index 0000000000000..33af823d62d1f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/SSTs/000008-3073b336-661b-4d30-984d-427402150d37.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/SSTs/000008-3073b336-661b-4d30-984d-427402150d37.sst
new file mode 100644
index 0000000000000..20f8285636f8f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/SSTs/000008-3073b336-661b-4d30-984d-427402150d37.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/SSTs/000009-5669c494-2412-46c4-96ef-1bb9ab6b2710.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/SSTs/000009-5669c494-2412-46c4-96ef-1bb9ab6b2710.sst
new file mode 100644
index 0000000000000..cf9e952972a54
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/1/SSTs/000009-5669c494-2412-46c4-96ef-1bb9ab6b2710.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/2.changelog
new file mode 100644
index 0000000000000..f96b5ad48b3f8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/2.zip
new file mode 100644
index 0000000000000..bc12dd5ba4c2d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/4.zip
new file mode 100644
index 0000000000000..9a720d8720d88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/SSTs/000008-90e6688f-83a0-4917-ae32-9e107c002bcc.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/SSTs/000008-90e6688f-83a0-4917-ae32-9e107c002bcc.sst
new file mode 100644
index 0000000000000..3bbb3466e6f82
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/2/SSTs/000008-90e6688f-83a0-4917-ae32-9e107c002bcc.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/1.changelog
new file mode 100644
index 0000000000000..b3f54eeacdd04
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/2.zip
new file mode 100644
index 0000000000000..ae313425f0644
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/4.zip
new file mode 100644
index 0000000000000..fa589eca47e36
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/SSTs/000008-e115799d-89cb-4f4d-8e58-fe104d382c80.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/SSTs/000008-e115799d-89cb-4f4d-8e58-fe104d382c80.sst
new file mode 100644
index 0000000000000..8b786dd457f96
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/3/SSTs/000008-e115799d-89cb-4f4d-8e58-fe104d382c80.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/1.changelog
new file mode 100644
index 0000000000000..9b224728c8bcb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/2.zip
new file mode 100644
index 0000000000000..faf5ea1b8158b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/4.changelog
new file mode 100644
index 0000000000000..6fc1bb07e798d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/4.zip
new file mode 100644
index 0000000000000..03837a7357d71
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/SSTs/000008-5f249adf-c98e-46a3-8217-e39a4360b624.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/SSTs/000008-5f249adf-c98e-46a3-8217-e39a4360b624.sst
new file mode 100644
index 0000000000000..a17404703c22a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/SSTs/000008-5f249adf-c98e-46a3-8217-e39a4360b624.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/SSTs/000009-0d1befcf-847e-43a5-b354-a4d11afeaaba.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/SSTs/000009-0d1befcf-847e-43a5-b354-a4d11afeaaba.sst
new file mode 100644
index 0000000000000..5d68995c5c8b0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/4/SSTs/000009-0d1befcf-847e-43a5-b354-a4d11afeaaba.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..39ce28c9b4aa5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/dedup/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"dedupe"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/1
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/2
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/3
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/4
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/commits/4
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/metadata
new file mode 100644
index 0000000000000..e0a261d1ce0e2
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/metadata
@@ -0,0 +1 @@
+{"id":"160224de-518b-4191-b9d6-544602476824"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/0
new file mode 100644
index 0000000000000..67dd9ed17d96c
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/0
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290955223,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/1
new file mode 100644
index 0000000000000..dd1850eb28417
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/1
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290960126,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/2
new file mode 100644
index 0000000000000..65fe14e65bd8d
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/2
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719290962748,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/3
new file mode 100644
index 0000000000000..756be72687efd
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/3
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719290966185,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/4
new file mode 100644
index 0000000000000..d2cbfe3c08429
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/offsets/4
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":9000,"batchTimestampMs":1719290968718,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..42448b3b584ce
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/left-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..8fa8f1675bc82
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/0/right-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..0bedf21fe2da4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..59a6ea16dc12b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/1/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..2daeae74a72b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..abdd6a5fca257
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..3802a7e7b1ee8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..f5d38ea63e7b6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/SSTs/000008-97286fb3-8d19-4e91-aecb-3f9647f6103f.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/SSTs/000008-97286fb3-8d19-4e91-aecb-3f9647f6103f.sst
new file mode 100644
index 0000000000000..d7494daaea026
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyToNumValues/SSTs/000008-97286fb3-8d19-4e91-aecb-3f9647f6103f.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..88a2191cae130
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..57d0436d95d79
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..1d20ea2ddd926
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..171c10c2f069c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/SSTs/000008-44099692-68de-4b22-8238-3a8029f693ad.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/SSTs/000008-44099692-68de-4b22-8238-3a8029f693ad.sst
new file mode 100644
index 0000000000000..865f1fef7f1f5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/left-keyWithIndexToValue/SSTs/000008-44099692-68de-4b22-8238-3a8029f693ad.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..2daeae74a72b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..abdd6a5fca257
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..c3a661fc13ade
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..e8da61cb5ddc9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..f6157326c212d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/SSTs/000008-a9a5b2ba-3bff-402f-b750-b9232ac3d1b1.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/SSTs/000008-a9a5b2ba-3bff-402f-b750-b9232ac3d1b1.sst
new file mode 100644
index 0000000000000..a9160d9a66ab6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/SSTs/000008-a9a5b2ba-3bff-402f-b750-b9232ac3d1b1.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/SSTs/000009-c5863121-41a9-4ffc-9c54-5f4c2a1139a2.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/SSTs/000009-c5863121-41a9-4ffc-9c54-5f4c2a1139a2.sst
new file mode 100644
index 0000000000000..f4987ae824e82
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyToNumValues/SSTs/000009-c5863121-41a9-4ffc-9c54-5f4c2a1139a2.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..14f1066107657
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..e1452512bade8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..6411680f0c8fb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6b20acbc9b79b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..982f220c2cbd7
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/SSTs/000008-a55c633d-a594-4cac-b6f2-c63907cb9581.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/SSTs/000008-a55c633d-a594-4cac-b6f2-c63907cb9581.sst
new file mode 100644
index 0000000000000..6464d1dbfcc7e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/SSTs/000008-a55c633d-a594-4cac-b6f2-c63907cb9581.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/SSTs/000009-f8390e48-d464-49b8-8a3b-6393448f907d.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/SSTs/000009-f8390e48-d464-49b8-8a3b-6393448f907d.sst
new file mode 100644
index 0000000000000..df6034ac2f3db
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/2/right-keyWithIndexToValue/SSTs/000009-f8390e48-d464-49b8-8a3b-6393448f907d.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..99210c3a3c567
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..d02b01bc35f23
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..262abc1cc08a5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..e198b32fff211
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/SSTs/000008-ad09ce4b-4be4-44ce-a620-4244a73f84f5.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/SSTs/000008-ad09ce4b-4be4-44ce-a620-4244a73f84f5.sst
new file mode 100644
index 0000000000000..07de5247476c7
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/SSTs/000008-ad09ce4b-4be4-44ce-a620-4244a73f84f5.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/SSTs/000009-5561a93a-1e0b-46aa-98d5-685048b992fa.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/SSTs/000009-5561a93a-1e0b-46aa-98d5-685048b992fa.sst
new file mode 100644
index 0000000000000..7619f71cd55ad
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyToNumValues/SSTs/000009-5561a93a-1e0b-46aa-98d5-685048b992fa.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..150d775115a47
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..91cea46a6cd86
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..3964e93f5a400
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..9319b07246660
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/SSTs/000008-36205897-8ada-4516-b663-122c915754f0.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/SSTs/000008-36205897-8ada-4516-b663-122c915754f0.sst
new file mode 100644
index 0000000000000..cad982214ad18
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/SSTs/000008-36205897-8ada-4516-b663-122c915754f0.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/SSTs/000009-bffa46f8-b6c7-431e-a579-d8725aec338b.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/SSTs/000009-bffa46f8-b6c7-431e-a579-d8725aec338b.sst
new file mode 100644
index 0000000000000..aa392eaadd1d9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/left-keyWithIndexToValue/SSTs/000009-bffa46f8-b6c7-431e-a579-d8725aec338b.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..99210c3a3c567
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..3ae503e5deff3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..262abc1cc08a5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..4ba14fb5af442
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..5074a90bc3b90
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/SSTs/000008-f6d87143-b4d6-4c30-bd43-8e90c98704e9.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/SSTs/000008-f6d87143-b4d6-4c30-bd43-8e90c98704e9.sst
new file mode 100644
index 0000000000000..e3dbd832a2321
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/SSTs/000008-f6d87143-b4d6-4c30-bd43-8e90c98704e9.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/SSTs/000009-64a8ab01-cdfe-4c82-94e3-5bc4c64b6671.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/SSTs/000009-64a8ab01-cdfe-4c82-94e3-5bc4c64b6671.sst
new file mode 100644
index 0000000000000..4801a5b8254f7
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyToNumValues/SSTs/000009-64a8ab01-cdfe-4c82-94e3-5bc4c64b6671.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6d01a6e79416f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..6a9f94a8b33bf
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..28785bebe081e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..5b8f0d2bbf151
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..ff933a6cfd56c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/SSTs/000008-50764781-b9d2-482d-a307-b2d1cab8c639.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/SSTs/000008-50764781-b9d2-482d-a307-b2d1cab8c639.sst
new file mode 100644
index 0000000000000..9a627936b57ca
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/SSTs/000008-50764781-b9d2-482d-a307-b2d1cab8c639.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/SSTs/000009-38fca8e3-893a-4d8b-8f4a-57366102a54c.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/SSTs/000009-38fca8e3-893a-4d8b-8f4a-57366102a54c.sst
new file mode 100644
index 0000000000000..307b166f4e9b3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/3/right-keyWithIndexToValue/SSTs/000009-38fca8e3-893a-4d8b-8f4a-57366102a54c.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..9099712c6134d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..c5a3af6244a01
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..1ac5a3348bbe6
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..4b3cfd1f719bb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..54e0959698acf
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..298ebddcda718
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/SSTs/000008-fd11870f-bea5-4721-aedf-b14399b966bd.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/SSTs/000008-fd11870f-bea5-4721-aedf-b14399b966bd.sst
new file mode 100644
index 0000000000000..4ae4e252afb2a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/SSTs/000008-fd11870f-bea5-4721-aedf-b14399b966bd.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/SSTs/000009-1ba9d77a-6c31-42e5-a18e-f66c0ef201b5.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/SSTs/000009-1ba9d77a-6c31-42e5-a18e-f66c0ef201b5.sst
new file mode 100644
index 0000000000000..c3dd64fbe2805
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyToNumValues/SSTs/000009-1ba9d77a-6c31-42e5-a18e-f66c0ef201b5.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..a7735153b54cc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..f4407b5c4f83b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..ee8a442d7eaa0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..3e2ea96d857f0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..2dfce06a4435b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..9ec7dcb0d0515
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/SSTs/000008-7fbc8ecb-8e90-4de6-a334-ce789dfe3dd5.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/SSTs/000008-7fbc8ecb-8e90-4de6-a334-ce789dfe3dd5.sst
new file mode 100644
index 0000000000000..6b5b6c05ae0d2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/SSTs/000008-7fbc8ecb-8e90-4de6-a334-ce789dfe3dd5.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/SSTs/000009-05c7d657-5d52-4d9d-9e5e-d28daf8d6500.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/SSTs/000009-05c7d657-5d52-4d9d-9e5e-d28daf8d6500.sst
new file mode 100644
index 0000000000000..72187124c13f2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/left-keyWithIndexToValue/SSTs/000009-05c7d657-5d52-4d9d-9e5e-d28daf8d6500.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..9099712c6134d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..c5a3af6244a01
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..ed87d10aecafa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..298ebddcda718
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..4b3cfd1f719bb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..b777691d26852
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..d50df0481f970
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/SSTs/000008-fb249fa1-17cf-4f28-99fa-e31932ec1caf.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/SSTs/000008-fb249fa1-17cf-4f28-99fa-e31932ec1caf.sst
new file mode 100644
index 0000000000000..c8b94f7b4bc85
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/SSTs/000008-fb249fa1-17cf-4f28-99fa-e31932ec1caf.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/SSTs/000009-6dc46235-1157-4bb7-9af0-58aee4e2edf2.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/SSTs/000009-6dc46235-1157-4bb7-9af0-58aee4e2edf2.sst
new file mode 100644
index 0000000000000..564acccff46f8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyToNumValues/SSTs/000009-6dc46235-1157-4bb7-9af0-58aee4e2edf2.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..32458caceea0a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..a3bc5bfd26f8e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..54fd13c914ae4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..9ec7dcb0d0515
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..e3906cea3366a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..97ba13e356c67
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..ffe4b92ca56dd
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/SSTs/000008-2695fe50-483f-4beb-a12d-770bfe63da9c.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/SSTs/000008-2695fe50-483f-4beb-a12d-770bfe63da9c.sst
new file mode 100644
index 0000000000000..fa6f037ecb6ab
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/SSTs/000008-2695fe50-483f-4beb-a12d-770bfe63da9c.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/SSTs/000009-c819e3a7-826b-46d0-8667-1670b9b2d13c.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/SSTs/000009-c819e3a7-826b-46d0-8667-1670b9b2d13c.sst
new file mode 100644
index 0000000000000..9db30172f1477
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/4/right-keyWithIndexToValue/SSTs/000009-c819e3a7-826b-46d0-8667-1670b9b2d13c.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..b73f1e3e66ac5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join1/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"symmetricHashJoin"},"stateStoreInfo":[{"storeName":"left-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"left-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/1
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/2
new file mode 100644
index 0000000000000..1715390973f9b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":5000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/3
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/4
new file mode 100644
index 0000000000000..2a2d02b0bb3b3
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/commits/4
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":9000}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/metadata
new file mode 100644
index 0000000000000..67f735ddd54b5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/metadata
@@ -0,0 +1 @@
+{"id":"cb2f2800-2129-4356-bf83-e985db7d8556"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/0
new file mode 100644
index 0000000000000..fe391df89fb42
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/0
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290972508,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/1
new file mode 100644
index 0000000000000..448160d0ae047
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/1
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290977372,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/2
new file mode 100644
index 0000000000000..740bc6b031054
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/2
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719290979966,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/3
new file mode 100644
index 0000000000000..1987fabf89c19
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/3
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":5000,"batchTimestampMs":1719290983375,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/4 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/4
new file mode 100644
index 0000000000000..7eeef49c1f168
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/offsets/4
@@ -0,0 +1,4 @@
+v1
+{"batchWatermarkMs":9000,"batchTimestampMs":1719290985871,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..89a73981724aa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..85424f0d5afba
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..89a73981724aa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..85424f0d5afba
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..42448b3b584ce
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/left-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..993d32e965db5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..cfcea9ae02e88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/_metadata/schema
new file mode 100644
index 0000000000000..4da637d143496
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyToNumValues/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..993d32e965db5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..cfcea9ae02e88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/_metadata/schema
new file mode 100644
index 0000000000000..8fa8f1675bc82
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/0/right-keyWithIndexToValue/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..89a73981724aa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..cfcea9ae02e88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..89a73981724aa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..85424f0d5afba
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..993d32e965db5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..cfcea9ae02e88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..993d32e965db5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..cfcea9ae02e88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/1/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..2daeae74a72b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..abdd6a5fca257
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..23e8882b8d9f2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..218dc89456c8b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/SSTs/000008-bfd9de3b-a339-42ad-8270-343b70b3999e.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/SSTs/000008-bfd9de3b-a339-42ad-8270-343b70b3999e.sst
new file mode 100644
index 0000000000000..6dcf6ef780c87
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyToNumValues/SSTs/000008-bfd9de3b-a339-42ad-8270-343b70b3999e.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..88a2191cae130
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..57d0436d95d79
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..46803957b004a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..2d25b2447e398
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/SSTs/000008-daf5150d-98b0-4042-ac23-f6ffcb9eef2f.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/SSTs/000008-daf5150d-98b0-4042-ac23-f6ffcb9eef2f.sst
new file mode 100644
index 0000000000000..10fd7fc691c70
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/left-keyWithIndexToValue/SSTs/000008-daf5150d-98b0-4042-ac23-f6ffcb9eef2f.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..2daeae74a72b9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..abdd6a5fca257
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..ad3d69d2a8789
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..e8da61cb5ddc9
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..c89c97f5641fe
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/SSTs/000008-8de57e57-02b1-47fc-8150-eb984771e1ca.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/SSTs/000008-8de57e57-02b1-47fc-8150-eb984771e1ca.sst
new file mode 100644
index 0000000000000..4744bbf75039b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/SSTs/000008-8de57e57-02b1-47fc-8150-eb984771e1ca.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/SSTs/000009-a8eb1b10-f5fe-4a44-94ab-545d1e33121c.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/SSTs/000009-a8eb1b10-f5fe-4a44-94ab-545d1e33121c.sst
new file mode 100644
index 0000000000000..5706bad6347fe
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyToNumValues/SSTs/000009-a8eb1b10-f5fe-4a44-94ab-545d1e33121c.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..14f1066107657
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..e1452512bade8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..a600351f2674f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6b20acbc9b79b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..4d53649b5608c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/SSTs/000008-4f4617f6-01ce-48f5-aaa9-c9d7007ac482.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/SSTs/000008-4f4617f6-01ce-48f5-aaa9-c9d7007ac482.sst
new file mode 100644
index 0000000000000..650af4d3042de
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/SSTs/000008-4f4617f6-01ce-48f5-aaa9-c9d7007ac482.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/SSTs/000009-840bae7d-ff30-4aef-9672-bf70fa6943c4.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/SSTs/000009-840bae7d-ff30-4aef-9672-bf70fa6943c4.sst
new file mode 100644
index 0000000000000..e1ed48d909d9c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/2/right-keyWithIndexToValue/SSTs/000009-840bae7d-ff30-4aef-9672-bf70fa6943c4.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..99210c3a3c567
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..2123cc3a99531
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..262abc1cc08a5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..83efaf04ab162
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/SSTs/000008-e1bd2941-d044-4898-a841-7d6d094b7c30.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/SSTs/000008-e1bd2941-d044-4898-a841-7d6d094b7c30.sst
new file mode 100644
index 0000000000000..37af8fb0bf6b4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/SSTs/000008-e1bd2941-d044-4898-a841-7d6d094b7c30.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/SSTs/000009-e646f573-4a5b-408d-88da-85810aabe966.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/SSTs/000009-e646f573-4a5b-408d-88da-85810aabe966.sst
new file mode 100644
index 0000000000000..149350b5abec4
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyToNumValues/SSTs/000009-e646f573-4a5b-408d-88da-85810aabe966.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..150d775115a47
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..4f0ff11d17441
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..3964e93f5a400
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..c4ac694c0f31b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/SSTs/000008-28cb1ac6-cf15-4f63-8c74-ea655f8ce669.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/SSTs/000008-28cb1ac6-cf15-4f63-8c74-ea655f8ce669.sst
new file mode 100644
index 0000000000000..cbf2c438d8437
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/SSTs/000008-28cb1ac6-cf15-4f63-8c74-ea655f8ce669.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/SSTs/000009-5a4a38a3-8e5a-4436-b0b7-718fe7ed09a6.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/SSTs/000009-5a4a38a3-8e5a-4436-b0b7-718fe7ed09a6.sst
new file mode 100644
index 0000000000000..f3769a666d813
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/left-keyWithIndexToValue/SSTs/000009-5a4a38a3-8e5a-4436-b0b7-718fe7ed09a6.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..99210c3a3c567
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..4a5a9bd672051
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..262abc1cc08a5
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..52e86b7a99073
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..5074a90bc3b90
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/SSTs/000008-294ce63e-1e67-4bc5-98fa-67b6f40669ff.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/SSTs/000008-294ce63e-1e67-4bc5-98fa-67b6f40669ff.sst
new file mode 100644
index 0000000000000..0e93b773ae5ca
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/SSTs/000008-294ce63e-1e67-4bc5-98fa-67b6f40669ff.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/SSTs/000009-ad3aeaf9-d804-4237-91e5-e2d61f7c8338.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/SSTs/000009-ad3aeaf9-d804-4237-91e5-e2d61f7c8338.sst
new file mode 100644
index 0000000000000..2eb1556d44bac
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyToNumValues/SSTs/000009-ad3aeaf9-d804-4237-91e5-e2d61f7c8338.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..6d01a6e79416f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..e64d4d046de88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..28785bebe081e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..e5fda4000120f
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..ff933a6cfd56c
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/SSTs/000008-e9c9dd20-a2b3-4eac-94e6-3336a8bf2953.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/SSTs/000008-e9c9dd20-a2b3-4eac-94e6-3336a8bf2953.sst
new file mode 100644
index 0000000000000..a737c2036d013
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/SSTs/000008-e9c9dd20-a2b3-4eac-94e6-3336a8bf2953.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/SSTs/000009-7db59f7a-dabc-41f7-8878-87ca30afb09d.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/SSTs/000009-7db59f7a-dabc-41f7-8878-87ca30afb09d.sst
new file mode 100644
index 0000000000000..4af5716c9c24e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/3/right-keyWithIndexToValue/SSTs/000009-7db59f7a-dabc-41f7-8878-87ca30afb09d.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..9099712c6134d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..c5a3af6244a01
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..a62d4d6504f88
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..4b3cfd1f719bb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..3003df2131bf3
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..298ebddcda718
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/SSTs/000008-a9c68839-18e4-4ef3-a329-432d2810b0c3.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/SSTs/000008-a9c68839-18e4-4ef3-a329-432d2810b0c3.sst
new file mode 100644
index 0000000000000..5b9827d63d689
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/SSTs/000008-a9c68839-18e4-4ef3-a329-432d2810b0c3.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/SSTs/000009-e91db969-62ec-45d7-a7da-a4301e129feb.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/SSTs/000009-e91db969-62ec-45d7-a7da-a4301e129feb.sst
new file mode 100644
index 0000000000000..ad1f76fb3e750
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyToNumValues/SSTs/000009-e91db969-62ec-45d7-a7da-a4301e129feb.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..a7735153b54cc
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..f4407b5c4f83b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..b251d50f7e4ba
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..6352978051846
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..3e2ea96d857f0
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..1873033aa1c9a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..9ec7dcb0d0515
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/SSTs/000008-5e19d13c-8855-47c9-8d4b-f14964f7fdfa.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/SSTs/000008-5e19d13c-8855-47c9-8d4b-f14964f7fdfa.sst
new file mode 100644
index 0000000000000..71dec68b333a2
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/SSTs/000008-5e19d13c-8855-47c9-8d4b-f14964f7fdfa.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/SSTs/000009-a97d5e47-e4eb-4878-b62d-30cc77dac3b1.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/SSTs/000009-a97d5e47-e4eb-4878-b62d-30cc77dac3b1.sst
new file mode 100644
index 0000000000000..a562e80e27871
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/left-keyWithIndexToValue/SSTs/000009-a97d5e47-e4eb-4878-b62d-30cc77dac3b1.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/1.changelog
new file mode 100644
index 0000000000000..9099712c6134d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/2.changelog
new file mode 100644
index 0000000000000..c5a3af6244a01
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/2.zip
new file mode 100644
index 0000000000000..5c32348084ebb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/3.changelog
new file mode 100644
index 0000000000000..298ebddcda718
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/4.changelog
new file mode 100644
index 0000000000000..4b3cfd1f719bb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/4.zip
new file mode 100644
index 0000000000000..954265fe88cfa
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/5.changelog
new file mode 100644
index 0000000000000..d50df0481f970
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/SSTs/000008-432ddd3b-981c-4a2b-a1e4-e06ecbc4a8a7.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/SSTs/000008-432ddd3b-981c-4a2b-a1e4-e06ecbc4a8a7.sst
new file mode 100644
index 0000000000000..e752148099e3d
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/SSTs/000008-432ddd3b-981c-4a2b-a1e4-e06ecbc4a8a7.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/SSTs/000009-2e821d73-a2c6-4261-bcb6-d69e82e4c0d3.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/SSTs/000009-2e821d73-a2c6-4261-bcb6-d69e82e4c0d3.sst
new file mode 100644
index 0000000000000..27f61ffe48bf8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyToNumValues/SSTs/000009-2e821d73-a2c6-4261-bcb6-d69e82e4c0d3.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/1.changelog
new file mode 100644
index 0000000000000..32458caceea0a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/2.changelog
new file mode 100644
index 0000000000000..a3bc5bfd26f8e
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/2.zip
new file mode 100644
index 0000000000000..13bd0a0979309
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/3.changelog
new file mode 100644
index 0000000000000..9ec7dcb0d0515
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/4.changelog
new file mode 100644
index 0000000000000..e3906cea3366a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/4.zip
new file mode 100644
index 0000000000000..d403db9b3e121
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/5.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/5.changelog
new file mode 100644
index 0000000000000..ffe4b92ca56dd
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/5.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/SSTs/000008-4e1c24f5-bbcb-4797-95d9-cfa4de1be09d.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/SSTs/000008-4e1c24f5-bbcb-4797-95d9-cfa4de1be09d.sst
new file mode 100644
index 0000000000000..2aafcbeadee17
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/SSTs/000008-4e1c24f5-bbcb-4797-95d9-cfa4de1be09d.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/SSTs/000009-366ba158-6a59-4922-9cb0-df3e2b9aa789.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/SSTs/000009-366ba158-6a59-4922-9cb0-df3e2b9aa789.sst
new file mode 100644
index 0000000000000..68f1e65fa2885
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/4/right-keyWithIndexToValue/SSTs/000009-366ba158-6a59-4922-9cb0-df3e2b9aa789.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..b73f1e3e66ac5
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/join2/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"symmetricHashJoin"},"stateStoreInfo":[{"storeName":"left-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"left-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyToNumValues","numColsPrefixKey":0,"numPartitions":5},{"storeName":"right-keyWithIndexToValue","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/0
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/0
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/1
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/1
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/2
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/2
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/3
new file mode 100644
index 0000000000000..9c1e3021c3ead
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/commits/3
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/metadata
new file mode 100644
index 0000000000000..fa82647084453
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/metadata
@@ -0,0 +1 @@
+{"id":"711e4377-ce90-456e-8530-9e0374ce4791"}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/0
new file mode 100644
index 0000000000000..50bf84e3a759e
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/0
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290921178,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/1
new file mode 100644
index 0000000000000..1690ffdd2b79b
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/1
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290923799,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+1
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/2 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/2
new file mode 100644
index 0000000000000..c331cc80a3e1e
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/2
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290926380,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+2
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/3 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/3
new file mode 100644
index 0000000000000..2d42b90630780
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/offsets/3
@@ -0,0 +1,3 @@
+v1
+{"batchWatermarkMs":0,"batchTimestampMs":1719290928784,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4"}}
+3
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/1.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/1.changelog
new file mode 100644
index 0000000000000..1d9f25b472ace
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/1.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/2.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/2.changelog
new file mode 100644
index 0000000000000..c335f52e07bca
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/2.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/2.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/2.zip
new file mode 100644
index 0000000000000..e707c3f31376a
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/2.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/3.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/3.changelog
new file mode 100644
index 0000000000000..41e61d1915140
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/3.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/4.changelog b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/4.changelog
new file mode 100644
index 0000000000000..d51b4fc081740
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/4.changelog differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/4.zip b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/4.zip
new file mode 100644
index 0000000000000..35f294cf782ce
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/4.zip differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/SSTs/000008-8f6e7567-9c70-4f55-9f28-8b6b497c85ae.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/SSTs/000008-8f6e7567-9c70-4f55-9f28-8b6b497c85ae.sst
new file mode 100644
index 0000000000000..e8da97359c7fb
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/SSTs/000008-8f6e7567-9c70-4f55-9f28-8b6b497c85ae.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/SSTs/000009-71eca19b-ab68-4608-bcfa-897365067239.sst b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/SSTs/000009-71eca19b-ab68-4608-bcfa-897365067239.sst
new file mode 100644
index 0000000000000..fd296636929b8
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/SSTs/000009-71eca19b-ab68-4608-bcfa-897365067239.sst differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/_metadata/schema
new file mode 100644
index 0000000000000..371b0df09d80b
Binary files /dev/null and b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/0/_metadata/schema differ
diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/_metadata/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/_metadata/metadata
new file mode 100644
index 0000000000000..5792421dd423e
--- /dev/null
+++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-4.0.0/rocksdb/limit/state/0/_metadata/metadata
@@ -0,0 +1,2 @@
+v1
+{"operatorInfo":{"operatorId":0,"operatorName":"globalLimit"},"stateStoreInfo":[{"storeName":"default","numColsPrefixKey":0,"numPartitions":5}]}
\ No newline at end of file
diff --git a/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas b/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas
index 8d6ff942610c4..f6eadd776cc6e 100644
--- a/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas
+++ b/sql/core/src/test/resources/structured-streaming/partition-tests/randomSchemas
@@ -1 +1 @@
-col_0 STRUCT<col_0: BINARY, col_1: BIGINT NOT NULL, col_2: ARRAY<DOUBLE> NOT NULL, col_3: FLOAT NOT NULL, col_4: INT NOT NULL>,col_1 STRUCT<col_0: STRING, col_1: TIMESTAMP NOT NULL, col_2: STRUCT<col_0: FLOAT NOT NULL>, col_3: ARRAY<INT> NOT NULL, col_4: ARRAY<BINARY>, col_5: TIMESTAMP NOT NULL, col_6: STRUCT<col_0: ARRAY<DOUBLE>, col_1: BIGINT NOT NULL> NOT NULL, col_7: ARRAY<INT> NOT NULL, col_8: ARRAY<BIGINT>, col_9: BIGINT NOT NULL> NOT NULL,col_2 BIGINT NOT NULL,col_3 STRUCT<col_0: BINARY> NOT NULL,col_4 STRUCT<col_0: STRUCT<col_0: ARRAY<FLOAT> NOT NULL> NOT NULL> NOT NULL,col_5 ARRAY<INT> NOT NULL
+col_0 ARRAY<BINARY>,col_1 STRUCT<col_0: STRING> NOT NULL,col_2 STRING NOT NULL,col_3 STRUCT<col_0: INT, col_1: ARRAY<STRING>, col_2: ARRAY<DOUBLE> NOT NULL> NOT NULL,col_4 BINARY NOT NULL,col_5 ARRAY<BINARY> NOT NULL,col_6 ARRAY<FLOAT>,col_7 DOUBLE NOT NULL,col_8 ARRAY<DOUBLE> NOT NULL,col_9 ARRAY<TIMESTAMP>,col_10 FLOAT NOT NULL,col_11 STRUCT<col_0: STRUCT<col_0: ARRAY<TIMESTAMP> NOT NULL>, col_1: STRUCT<col_0: ARRAY<STRING> NOT NULL, col_1: INT, col_2: STRUCT<col_0: STRUCT<col_0: STRING>> NOT NULL>, col_2: BINARY NOT NULL, col_3: STRUCT<col_0: ARRAY<TIMESTAMP> NOT NULL> NOT NULL> NOT NULL,col_12 ARRAY<STRING>
diff --git a/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds b/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds
index 3902d6d7d5f61..1b2eda8502e5e 100644
Binary files a/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds and b/sql/core/src/test/resources/structured-streaming/partition-tests/rowsAndPartIds differ
diff --git a/sql/core/src/test/resources/test-data/char.csv b/sql/core/src/test/resources/test-data/char.csv
new file mode 100644
index 0000000000000..d2be68a15fc12
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/char.csv
@@ -0,0 +1,4 @@
+color,name
+pink,Bob
+blue,Mike
+grey,Tom
diff --git a/sql/core/src/test/resources/test-data/xml-resources/cdata-ending-eof.xml b/sql/core/src/test/resources/test-data/xml-resources/cdata-ending-eof.xml
index 15edf3186a52a..c1ec547db3391 100644
--- a/sql/core/src/test/resources/test-data/xml-resources/cdata-ending-eof.xml
+++ b/sql/core/src/test/resources/test-data/xml-resources/cdata-ending-eof.xml
@@ -23,10 +23,11 @@
     <ROW><a>10</a></ROW>
     <!-- <ROW><a>0</a></ROW> -->
     <![CDATA[<!-- <ROW><a>0</a></ROW>]]>
-    <ROW> <![CDATA[<ROW><a>0</a></ROW>]]> <a>11</a> <![CDATA[<ROW><a>0</a></ROW>]]> </ROW>
-    <![CDATA[<ROW><a>0</a> <!-- --> </ROW>]]><ROW> <![CDATA[a>0</a>]]> <a>12</a> </ROW> <![CDATA[<ROW><a>0</a></ROW>]]>
+    <!-- CDATA within row should not be ignored -->
+    <ROW>  <a> <![CDATA[ 11 ]]> </a> </ROW>
+    <![CDATA[<ROW><a>0</a> <!-- --> </ROW>]]><ROW> <a> <![CDATA[ 12 ]]> </a> </ROW> <![CDATA[<ROW><a>0</a></ROW>]]>
     <ROW> <a>13</a> </ROW>
-    <![CDATA[<ROW><a>0</a></ROW> <!-- open comment]]> <ROW> <![CDATA[<a>0</a>]]> <a>14</a> <![CDATA[<a>0</a>]]> </ROW> <![CDATA[<a>0</a>]]>
+    <![CDATA[<ROW><a>0</a></ROW> <!-- open comment]]> <ROW> <a> <![CDATA[ 14 ]]> </a> </ROW> <![CDATA[<a>0</a>]]>
     <![CDATA[
         <ROW><a>0</a></ROW>
         <a>0</a> <!-- <a>0</a> -->
diff --git a/sql/core/src/test/resources/test-data/xml-resources/cdata-no-close.xml b/sql/core/src/test/resources/test-data/xml-resources/cdata-no-close.xml
index 45ee814001d58..133ef22538ec3 100644
--- a/sql/core/src/test/resources/test-data/xml-resources/cdata-no-close.xml
+++ b/sql/core/src/test/resources/test-data/xml-resources/cdata-no-close.xml
@@ -23,10 +23,11 @@
     <ROW><a>10</a></ROW>
     <!-- <ROW><a>0</a></ROW> -->
     <![CDATA[<!-- <ROW><a>0</a></ROW>]]>
-    <ROW> <![CDATA[<ROW><a>0</a></ROW>]]> <a>11</a> <![CDATA[<ROW><a>0</a></ROW>]]> </ROW>
-    <![CDATA[<ROW><a>0</a> <!-- --> </ROW>]]><ROW> <![CDATA[a>0</a>]]> <a>12</a> </ROW> <![CDATA[<ROW><a>0</a></ROW>]]>
+    <!-- CDATA within row should not be ignored -->
+    <ROW>  <a> <![CDATA[ 11 ]]> </a> </ROW>
+    <![CDATA[<ROW><a>0</a> <!-- --> </ROW>]]><ROW> <a> <![CDATA[ 12 ]]> </a> </ROW> <![CDATA[<ROW><a>0</a></ROW>]]>
     <ROW> <a>13</a> </ROW>
-    <![CDATA[<ROW><a>0</a></ROW> <!-- open comment]]> <ROW> <![CDATA[<a>0</a>]]> <a>14</a> <![CDATA[<a>0</a>]]> </ROW> <![CDATA[<a>0</a>]]>
+    <![CDATA[<ROW><a>0</a></ROW> <!-- open comment]]> <ROW> <a> <![CDATA[ 14 ]]> </a> </ROW> <![CDATA[<a>0</a>]]>
     <![CDATA[
         <ROW><a>0</a></ROW>
         <a>0</a> <!-- <a>0</a> -->
@@ -44,5 +45,4 @@
     <ROW> <a>17</a> </ROW>
 </ROWSET>
 <![CDATA[
-        CDATA no close tag
-
+        CDATA ending at eof
diff --git a/sql/core/src/test/resources/test-data/xml-resources/cdata-no-ignore.xml b/sql/core/src/test/resources/test-data/xml-resources/cdata-no-ignore.xml
new file mode 100644
index 0000000000000..59df39a2d69cc
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/xml-resources/cdata-no-ignore.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<!-- Should read CDATA as raw string -->
+<ROWSET>
+    <![CDATA[ <ROW> ignored row </ROW> ]]>
+    <ROW><![CDATA[ <a>1</a> ]]></ROW>
+    <ROW><![CDATA[ 2 ]]></ROW>
+    <ROW><![CDATA[ <ROW>3</ROW> ]]></ROW>
+    <ROW><![CDATA[ 4 ]]></ROW>
+    <ROW><![CDATA[ <ROW>5</ROW> ]]></ROW>
+    <![CDATA[ <ROW> ignored row </ROW> ]]>
+</ROWSET>
diff --git a/sql/core/src/test/resources/test-data/xml-resources/ignored-rows.xml b/sql/core/src/test/resources/test-data/xml-resources/ignored-rows.xml
index 31822ee3e3e96..3b964c68cfcb2 100644
--- a/sql/core/src/test/resources/test-data/xml-resources/ignored-rows.xml
+++ b/sql/core/src/test/resources/test-data/xml-resources/ignored-rows.xml
@@ -23,10 +23,11 @@
     <ROW><a>10</a></ROW>
     <!-- <ROW><a>0</a></ROW> -->
     <![CDATA[<!-- <ROW><a>0</a></ROW>]]>
-    <ROW> <![CDATA[<ROW><a>0</a></ROW>]]> <a>11</a> <![CDATA[<ROW><a>0</a></ROW>]]> </ROW>
-    <![CDATA[<ROW><a>0</a> <!-- --> </ROW>]]><ROW> <![CDATA[a>0</a>]]> <a>12</a> </ROW> <![CDATA[<ROW><a>0</a></ROW>]]>
+    <!-- CDATA within row should not be ignored -->
+    <ROW>  <a> <![CDATA[ 11 ]]> </a> </ROW>
+    <![CDATA[<ROW><a>0</a> <!-- --> </ROW>]]><ROW> <a> <![CDATA[ 12 ]]> </a> </ROW> <![CDATA[<ROW><a>0</a></ROW>]]>
     <ROW> <a>13</a> </ROW>
-    <![CDATA[<ROW><a>0</a></ROW> <!-- open comment]]> <ROW> <![CDATA[<a>0</a>]]> <a>14</a> <![CDATA[<a>0</a>]]> </ROW> <![CDATA[<a>0</a>]]>
+    <![CDATA[<ROW><a>0</a></ROW> <!-- open comment]]> <ROW> <a> <![CDATA[ 14 ]]> </a> </ROW> <![CDATA[<a>0</a>]]>
     <![CDATA[
         <ROW><a>0</a></ROW>
         <a>0</a> <!-- <a>0</a> -->
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
index 4edb51d271903..9b39a2295e7d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
@@ -26,10 +26,12 @@ import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.LongType
+import org.apache.spark.tags.ExtendedSQLTest
 
 /**
  * Query tests for the Bloom filter aggregate and filter function.
  */
+@ExtendedSQLTest
 class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index a06b50d175f90..7b608b7438c29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -703,6 +703,17 @@ abstract class CTEInlineSuiteBase
       checkErrorTableNotFound(e, "`tab_non_exists`", ExpectedContext("tab_non_exists", 83, 96))
     }
   }
+
+  test("SPARK-48307: not-inlined CTE references sibling") {
+    val df = sql(
+      """
+        |WITH
+        |v1 AS (SELECT 1 col),
+        |v2 AS (SELECT col, rand() FROM v1)
+        |SELECT l.col FROM v2 l JOIN v2 r ON l.col = r.col
+        |""".stripMargin)
+    checkAnswer(df, Row(1))
+  }
 }
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 7865e7f1f864c..71b420bb85eac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -1107,7 +1107,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
     assert(queryStats1.map(_._1.name).isEmpty)
 
     val cacheManager = spark.sharedState.cacheManager
-    val cachedData = cacheManager.lookupCachedData(query().logicalPlan)
+    val cachedData = cacheManager.lookupCachedData(query())
     assert(cachedData.isDefined)
     val queryAttrs = cachedData.get.plan.output
     assert(queryAttrs.size === 3)
@@ -1436,7 +1436,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       withSQLConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW.key -> storeAnalyzed.toString) {
         withGlobalTempView("view1") {
           withTempView("view2") {
-            val db = spark.sharedState.globalTempViewManager.database
+            val db = spark.sharedState.globalTempDB
             sql("CREATE GLOBAL TEMPORARY VIEW view1 AS SELECT * FROM testData WHERE key > 1")
             sql(s"CACHE TABLE view2 AS SELECT * FROM ${db}.view1 WHERE value > 1")
             assert(spark.catalog.isCached("view2"))
@@ -1487,7 +1487,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       withSQLConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW.key -> storeAnalyzed.toString) {
         withGlobalTempView("view1") {
           withTempView("view2") {
-            val db = spark.sharedState.globalTempViewManager.database
+            val db = spark.sharedState.globalTempDB
             sql("CREATE GLOBAL TEMPORARY VIEW view1 AS SELECT * FROM testData WHERE key > 1")
             sql(s"CACHE TABLE view2 AS SELECT * FROM $db.view1 WHERE value > 1")
             assert(spark.catalog.isCached("view2"))
@@ -1517,7 +1517,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
     Seq(true, false).foreach { storeAnalyzed =>
       withSQLConf(SQLConf.STORE_ANALYZED_PLAN_FOR_VIEW.key -> storeAnalyzed.toString) {
         withGlobalTempView("global_tv") {
-          val db = spark.sharedState.globalTempViewManager.database
+          val db = spark.sharedState.globalTempDB
           testAlterTemporaryViewAsWithCache(TableIdentifier("global_tv", Some(db)), storeAnalyzed)
         }
       }
@@ -1575,7 +1575,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
 
   test("SPARK-34699: CREATE GLOBAL TEMP VIEW USING should uncache correctly") {
     withGlobalTempView("global_tv") {
-      val db = spark.sharedState.globalTempViewManager.database
+      val db = spark.sharedState.globalTempDB
       testCreateTemporaryViewUsingWithCache(TableIdentifier("global_tv", Some(db)))
     }
   }
@@ -1770,4 +1770,23 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       withSQLConf(SQLConf.DEFAULT_CACHE_STORAGE_LEVEL.key -> "DISK") {}
     }
   }
+
+  test("SPARK-47633: Cache hit for lateral join with join condition") {
+    withTempView("t", "q1") {
+      sql("create or replace temp view t(c1, c2) as values (0, 1), (1, 2)")
+      val query = """select *
+                    |from t
+                    |join lateral (
+                    |  select c1 as a, c2 as b
+                    |  from t)
+                    |on c1 = a;
+                    |""".stripMargin
+      sql(s"cache table q1 as $query")
+      val df = sql(query)
+      checkAnswer(df,
+        Row(0, 1, 0, 1) :: Row(1, 2, 1, 2) :: Nil)
+      assert(getNumInMemoryRelations(df) == 1)
+    }
+
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 013177425da78..a93dee3bf2a61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -942,6 +942,34 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
       }
     }
   }
+
+  test("SPARK-48498: always do char padding in predicates") {
+    import testImplicits._
+    withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false") {
+      withTempPath { dir =>
+        withTable("t") {
+          Seq(
+            "12" -> "12",
+            "12" -> "12 ",
+            "12 " -> "12",
+            "12 " -> "12 "
+          ).toDF("c1", "c2").write.format(format).save(dir.toString)
+          sql(s"CREATE TABLE t (c1 CHAR(3), c2 STRING) USING $format LOCATION '$dir'")
+          // Comparing CHAR column with STRING column directly compares the stored value.
+          checkAnswer(
+            sql("SELECT c1 = c2 FROM t"),
+            Seq(Row(true), Row(false), Row(false), Row(true))
+          )
+          // No matter the CHAR type value is padded or not in the storage, we should always pad it
+          // before comparison with STRING literals.
+          checkAnswer(
+            sql("SELECT c1 = '12', c1 = '12 ', c1 = '12  ' FROM t WHERE c2 = '12'"),
+            Seq(Row(true, true, true), Row(true, true, true))
+          )
+        }
+      }
+    }
+  }
 }
 
 class DSV2CharVarcharTestSuite extends CharVarcharTestSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala
deleted file mode 100644
index 0876425847bbb..0000000000000
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationRegexpExpressionsSuite.scala
+++ /dev/null
@@ -1,329 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import scala.collection.immutable.Seq
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{ArrayType, BooleanType, IntegerType, StringType}
-
-class CollationRegexpExpressionsSuite
-  extends QueryTest
-  with SharedSparkSession
-  with ExpressionEvalHelper {
-
-  test("Support Like string expression with collation") {
-    // Supported collations
-    case class LikeTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      LikeTestCase("ABC", "%B%", "UTF8_BINARY", true)
-    )
-    testCases.foreach(t => {
-      val query = s"SELECT like(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class LikeTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      LikeTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
-      LikeTestFail("ABC", "%B%", "UNICODE"),
-      LikeTestFail("ABC", "%b%", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query = s"SELECT like(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support ILike string expression with collation") {
-    // Supported collations
-    case class ILikeTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      ILikeTestCase("ABC", "%b%", "UTF8_BINARY", true)
-    )
-    testCases.foreach(t => {
-      val query = s"SELECT ilike(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class ILikeTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      ILikeTestFail("ABC", "%b%", "UTF8_BINARY_LCASE"),
-      ILikeTestFail("ABC", "%b%", "UNICODE"),
-      ILikeTestFail("ABC", "%b%", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query = s"SELECT ilike(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support RLike string expression with collation") {
-    // Supported collations
-    case class RLikeTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      RLikeTestCase("ABC", ".B.", "UTF8_BINARY", true)
-    )
-    testCases.foreach(t => {
-      val query = s"SELECT rlike(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class RLikeTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      RLikeTestFail("ABC", ".b.", "UTF8_BINARY_LCASE"),
-      RLikeTestFail("ABC", ".B.", "UNICODE"),
-      RLikeTestFail("ABC", ".b.", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query = s"SELECT rlike(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support StringSplit string expression with collation") {
-    // Supported collations
-    case class StringSplitTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C"))
-    )
-    testCases.foreach(t => {
-      val query = s"SELECT split(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c))))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class StringSplitTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      StringSplitTestFail("ABC", "[b]", "UTF8_BINARY_LCASE"),
-      StringSplitTestFail("ABC", "[B]", "UNICODE"),
-      StringSplitTestFail("ABC", "[b]", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query = s"SELECT split(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support RegExpReplace string expression with collation") {
-    // Supported collations
-    case class RegExpReplaceTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      RegExpReplaceTestCase("ABCDE", ".C.", "UTF8_BINARY", "AFFFE")
-    )
-    testCases.foreach(t => {
-      val query =
-        s"SELECT regexp_replace(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'), 'FFF')"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class RegExpReplaceTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      RegExpReplaceTestFail("ABCDE", ".c.", "UTF8_BINARY_LCASE"),
-      RegExpReplaceTestFail("ABCDE", ".C.", "UNICODE"),
-      RegExpReplaceTestFail("ABCDE", ".c.", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query =
-        s"SELECT regexp_replace(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'), 'FFF')"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support RegExpExtract string expression with collation") {
-    // Supported collations
-    case class RegExpExtractTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      RegExpExtractTestCase("ABCDE", ".C.", "UTF8_BINARY", "BCD")
-    )
-    testCases.foreach(t => {
-      val query =
-        s"SELECT regexp_extract(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'), 0)"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class RegExpExtractTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      RegExpExtractTestFail("ABCDE", ".c.", "UTF8_BINARY_LCASE"),
-      RegExpExtractTestFail("ABCDE", ".C.", "UNICODE"),
-      RegExpExtractTestFail("ABCDE", ".c.", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query =
-        s"SELECT regexp_extract(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'), 0)"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support RegExpExtractAll string expression with collation") {
-    // Supported collations
-    case class RegExpExtractAllTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      RegExpExtractAllTestCase("ABCDE", ".C.", "UTF8_BINARY", Seq("BCD"))
-    )
-    testCases.foreach(t => {
-      val query =
-        s"SELECT regexp_extract_all(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'), 0)"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c))))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class RegExpExtractAllTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      RegExpExtractAllTestFail("ABCDE", ".c.", "UTF8_BINARY_LCASE"),
-      RegExpExtractAllTestFail("ABCDE", ".C.", "UNICODE"),
-      RegExpExtractAllTestFail("ABCDE", ".c.", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query =
-        s"SELECT regexp_extract_all(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'), 0)"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support RegExpCount string expression with collation") {
-    // Supported collations
-    case class RegExpCountTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      RegExpCountTestCase("ABCDE", ".C.", "UTF8_BINARY", 1)
-    )
-    testCases.foreach(t => {
-      val query = s"SELECT regexp_count(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class RegExpCountTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      RegExpCountTestFail("ABCDE", ".c.", "UTF8_BINARY_LCASE"),
-      RegExpCountTestFail("ABCDE", ".C.", "UNICODE"),
-      RegExpCountTestFail("ABCDE", ".c.", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query = s"SELECT regexp_count(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support RegExpSubStr string expression with collation") {
-    // Supported collations
-    case class RegExpSubStrTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      RegExpSubStrTestCase("ABCDE", ".C.", "UTF8_BINARY", "BCD")
-    )
-    testCases.foreach(t => {
-      val query = s"SELECT regexp_substr(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class RegExpSubStrTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      RegExpSubStrTestFail("ABCDE", ".c.", "UTF8_BINARY_LCASE"),
-      RegExpSubStrTestFail("ABCDE", ".C.", "UNICODE"),
-      RegExpSubStrTestFail("ABCDE", ".c.", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query = s"SELECT regexp_substr(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      val unsupportedCollation = intercept[AnalysisException] { sql(query) }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-  test("Support RegExpInStr string expression with collation") {
-    // Supported collations
-    case class RegExpInStrTestCase[R](l: String, r: String, c: String, result: R)
-    val testCases = Seq(
-      RegExpInStrTestCase("ABCDE", ".C.", "UTF8_BINARY", 2)
-    )
-    testCases.foreach(t => {
-      val query = s"SELECT regexp_instr(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
-      // TODO: Implicit casting (not currently supported)
-    })
-    // Unsupported collations
-    case class RegExpInStrTestFail(l: String, r: String, c: String)
-    val failCases = Seq(
-      RegExpInStrTestFail("ABCDE", ".c.", "UTF8_BINARY_LCASE"),
-      RegExpInStrTestFail("ABCDE", ".C.", "UNICODE"),
-      RegExpInStrTestFail("ABCDE", ".c.", "UNICODE_CI")
-    )
-    failCases.foreach(t => {
-      val query = s"SELECT regexp_instr(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      val unsupportedCollation = intercept[AnalysisException] {
-        sql(query)
-      }
-      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
-    })
-    // TODO: Collation mismatch (not currently supported)
-  }
-
-}
-
-class CollationRegexpExpressionsANSISuite extends CollationRegexpExpressionsSuite {
-  override protected def sparkConf: SparkConf =
-    super.sparkConf.set(SQLConf.ANSI_ENABLED, true)
-
-  // TODO: If needed, add more tests for other regexp expressions (with ANSI mode enabled)
-
-}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
new file mode 100644
index 0000000000000..7994c496cb65c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
@@ -0,0 +1,2307 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.sql.{Date, Timestamp}
+import java.text.SimpleDateFormat
+
+import scala.collection.immutable.Seq
+
+import org.apache.spark.{SparkConf, SparkException, SparkIllegalArgumentException, SparkRuntimeException}
+import org.apache.spark.sql.catalyst.ExtendedAnalysisException
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.aggregate.Mode
+import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.collection.OpenHashMap
+
+// scalastyle:off nonascii
+class CollationSQLExpressionsSuite
+  extends QueryTest
+  with SharedSparkSession {
+
+  private val testSuppCollations = Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI")
+
+  test("Support Md5 hash expression with collation") {
+    case class Md5TestCase(
+      input: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      Md5TestCase("Spark", "UTF8_BINARY", "8cde774d6f7333752ed72cacddb05126"),
+      Md5TestCase("Spark", "UTF8_LCASE", "8cde774d6f7333752ed72cacddb05126"),
+      Md5TestCase("SQL", "UNICODE", "9778840a0100cb30c982876741b0b5a2"),
+      Md5TestCase("SQL", "UNICODE_CI", "9778840a0100cb30c982876741b0b5a2")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select md5('${t.input}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support Sha2 hash expression with collation") {
+    case class Sha2TestCase(
+      input: String,
+      collationName: String,
+      bitLength: Int,
+      result: String
+    )
+
+    val testCases = Seq(
+      Sha2TestCase("Spark", "UTF8_BINARY", 256,
+        "529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b"),
+      Sha2TestCase("Spark", "UTF8_LCASE", 256,
+        "529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b"),
+      Sha2TestCase("SQL", "UNICODE", 256,
+        "a7056a455639d1c7deec82ee787db24a0c1878e2792b4597709f0facf7cc7b35"),
+      Sha2TestCase("SQL", "UNICODE_CI", 256,
+        "a7056a455639d1c7deec82ee787db24a0c1878e2792b4597709f0facf7cc7b35")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select sha2('${t.input}', ${t.bitLength})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support Sha1 hash expression with collation") {
+    case class Sha1TestCase(
+      input: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      Sha1TestCase("Spark", "UTF8_BINARY", "85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c"),
+      Sha1TestCase("Spark", "UTF8_LCASE", "85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c"),
+      Sha1TestCase("SQL", "UNICODE", "2064cb643caa8d9e1de12eea7f3e143ca9f8680d"),
+      Sha1TestCase("SQL", "UNICODE_CI", "2064cb643caa8d9e1de12eea7f3e143ca9f8680d")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select sha1('${t.input}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support Crc32 hash expression with collation") {
+    case class Crc321TestCase(
+     input: String,
+     collationName: String,
+     result: Int
+    )
+
+    val testCases = Seq(
+      Crc321TestCase("Spark", "UTF8_BINARY", 1557323817),
+      Crc321TestCase("Spark", "UTF8_LCASE", 1557323817),
+      Crc321TestCase("SQL", "UNICODE", 1299261525),
+      Crc321TestCase("SQL", "UNICODE_CI", 1299261525)
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select crc32('${t.input}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+      }
+    })
+  }
+
+  test("Support Murmur3Hash hash expression with collation") {
+    case class Murmur3HashTestCase(
+     input: String,
+     collationName: String,
+     result: Int
+    )
+
+    val testCases = Seq(
+      Murmur3HashTestCase("Spark", "UTF8_BINARY", 228093765),
+      Murmur3HashTestCase("Spark", "UTF8_LCASE", 228093765),
+      Murmur3HashTestCase("SQL", "UNICODE", 17468742),
+      Murmur3HashTestCase("SQL", "UNICODE_CI", 17468742)
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select hash('${t.input}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+      }
+    })
+  }
+
+  test("Support XxHash64 hash expression with collation") {
+    case class XxHash64TestCase(
+      input: String,
+      collationName: String,
+      result: Long
+    )
+
+    val testCases = Seq(
+      XxHash64TestCase("Spark", "UTF8_BINARY", -4294468057691064905L),
+      XxHash64TestCase("Spark", "UTF8_LCASE", -4294468057691064905L),
+      XxHash64TestCase("SQL", "UNICODE", -2147923034195946097L),
+      XxHash64TestCase("SQL", "UNICODE_CI", -2147923034195946097L)
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select xxhash64('${t.input}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+      }
+    })
+  }
+
+  test("Support UrlEncode hash expression with collation") {
+    case class UrlEncodeTestCase(
+     input: String,
+     collationName: String,
+     result: String
+    )
+
+    val testCases = Seq(
+      UrlEncodeTestCase("https://spark.apache.org", "UTF8_BINARY",
+        "https%3A%2F%2Fspark.apache.org"),
+      UrlEncodeTestCase("https://spark.apache.org", "UTF8_LCASE",
+        "https%3A%2F%2Fspark.apache.org"),
+      UrlEncodeTestCase("https://spark.apache.org", "UNICODE",
+        "https%3A%2F%2Fspark.apache.org"),
+      UrlEncodeTestCase("https://spark.apache.org", "UNICODE_CI",
+        "https%3A%2F%2Fspark.apache.org")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select url_encode('${t.input}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support UrlDecode hash expression with collation") {
+    case class UrlDecodeTestCase(
+      input: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UTF8_BINARY",
+        "https://spark.apache.org"),
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UTF8_LCASE",
+        "https://spark.apache.org"),
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UNICODE",
+        "https://spark.apache.org"),
+      UrlDecodeTestCase("https%3A%2F%2Fspark.apache.org", "UNICODE_CI",
+        "https://spark.apache.org")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select url_decode('${t.input}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support ParseUrl hash expression with collation") {
+    case class ParseUrlTestCase(
+      input: String,
+      collationName: String,
+      path: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      ParseUrlTestCase("http://spark.apache.org/path?query=1", "UTF8_BINARY", "HOST",
+        "spark.apache.org"),
+      ParseUrlTestCase("http://spark.apache.org/path?query=2", "UTF8_LCASE", "PATH",
+        "/path"),
+      ParseUrlTestCase("http://spark.apache.org/path?query=3", "UNICODE", "QUERY",
+        "query=3"),
+      ParseUrlTestCase("http://spark.apache.org/path?query=4", "UNICODE_CI", "PROTOCOL",
+        "http")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select parse_url('${t.input}', '${t.path}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support CsvToStructs csv expression with collation") {
+    case class CsvToStructsTestCase(
+     input: String,
+     collationName: String,
+     schema: String,
+     options: String,
+     result: Row,
+     structFields: Seq[StructField]
+    )
+
+    val testCases = Seq(
+      CsvToStructsTestCase("1", "UTF8_BINARY", "'a INT'", "",
+        Row(1), Seq(
+          StructField("a", IntegerType, nullable = true)
+        )),
+      CsvToStructsTestCase("true, 0.8", "UTF8_LCASE", "'A BOOLEAN, B DOUBLE'", "",
+        Row(true, 0.8), Seq(
+          StructField("A", BooleanType, nullable = true),
+          StructField("B", DoubleType, nullable = true)
+        )),
+      CsvToStructsTestCase("\"Spark\"", "UNICODE", "'a STRING'", "",
+        Row("Spark"), Seq(
+          StructField("a", StringType("UNICODE"), nullable = true)
+        )),
+      CsvToStructsTestCase("26/08/2015", "UTF8_BINARY", "'time Timestamp'",
+        ", map('timestampFormat', 'dd/MM/yyyy')", Row(
+          new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S").parse("2015-08-26 00:00:00.0")
+        ), Seq(
+          StructField("time", TimestampType, nullable = true)
+        ))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select from_csv('${t.input}', ${t.schema} ${t.options})
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        val queryResult = testQuery.collect().head
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StructType(t.structFields)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support SchemaOfCsv csv expression with collation") {
+    case class SchemaOfCsvTestCase(
+      input: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      SchemaOfCsvTestCase("1", "UTF8_BINARY", "STRUCT<_c0: INT>"),
+      SchemaOfCsvTestCase("true,0.8", "UTF8_LCASE",
+        "STRUCT<_c0: BOOLEAN, _c1: DOUBLE>"),
+      SchemaOfCsvTestCase("2015-08-26", "UNICODE", "STRUCT<_c0: DATE>"),
+      SchemaOfCsvTestCase("abc", "UNICODE_CI",
+        "STRUCT<_c0: STRING>")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select schema_of_csv('${t.input}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support StructsToCsv csv expression with collation") {
+    case class StructsToCsvTestCase(
+     input: String,
+     collationName: String,
+     result: String
+    )
+
+    val testCases = Seq(
+      StructsToCsvTestCase("named_struct('a', 1, 'b', 2)", "UTF8_BINARY", "1,2"),
+      StructsToCsvTestCase("named_struct('A', true, 'B', 2.0)", "UTF8_LCASE", "true,2.0"),
+      StructsToCsvTestCase("named_struct()", "UNICODE", null),
+      StructsToCsvTestCase("named_struct('time', to_timestamp('2015-08-26'))", "UNICODE_CI",
+        "2015-08-26T00:00:00.000-07:00")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select to_csv(${t.input})
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Conv expression with collation") {
+    // Supported collations
+    case class ConvTestCase(
+        num: String,
+        from_base: String,
+        to_base: String,
+        collationName: String,
+        result: String)
+
+    val testCases = Seq(
+      ConvTestCase("100", "2", "10", "UTF8_BINARY", "4"),
+      ConvTestCase("100", "2", "10", "UTF8_LCASE", "4"),
+      ConvTestCase("100", "2", "10", "UNICODE", "4"),
+      ConvTestCase("100", "2", "10", "UNICODE_CI", "4")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select conv(collate('${t.num}', '${t.collationName}'), ${t.from_base}, ${t.to_base})
+           |""".stripMargin
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collationName)))
+    })
+  }
+
+  test("Bin expression with collation") {
+    // Supported collations
+    case class BinTestCase(
+        num: String,
+        collationName: String,
+        result: String)
+
+    val testCases = Seq(
+      BinTestCase("13", "UTF8_BINARY", "1101"),
+      BinTestCase("13", "UTF8_LCASE", "1101"),
+      BinTestCase("13", "UNICODE", "1101"),
+      BinTestCase("13", "UNICODE_CI", "1101")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select bin(${t.num})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collationName)))
+      }
+    })
+  }
+
+  test("Hex with non-string input expression with collation") {
+    case class HexTestCase(
+        num: String,
+        collationName: String,
+        result: String)
+
+    val testCases = Seq(
+      HexTestCase("13", "UTF8_BINARY", "D"),
+      HexTestCase("13", "UTF8_LCASE", "D"),
+      HexTestCase("13", "UNICODE", "D"),
+      HexTestCase("13", "UNICODE_CI", "D")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select hex(${t.num})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collationName)))
+      }
+    })
+  }
+
+  test("Hex with string input expression with collation") {
+    case class HexTestCase(
+        num: String,
+        collationName: String,
+        result: String)
+
+    val testCases = Seq(
+      HexTestCase("Spark SQL", "UTF8_BINARY", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "UTF8_LCASE", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "UNICODE", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "UNICODE_CI", "537061726B2053514C")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select hex(collate('${t.num}', '${t.collationName}'))
+           |""".stripMargin
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collationName)))
+    })
+  }
+
+  test("UnHex expression with collation") {
+    case class UnHexTestCase(
+        num: String,
+        collationName: String,
+        result: String)
+
+    val testCases = Seq(
+      UnHexTestCase("537061726B2053514C", "UTF8_BINARY", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "UTF8_LCASE", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "UNICODE", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "UNICODE_CI", "Spark SQL")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select decode(unhex(collate('${t.num}', '${t.collationName}')), 'UTF-8')
+           |""".stripMargin
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType("UTF8_BINARY")))
+    })
+  }
+
+  test("Support XPath expressions with collation") {
+    case class XPathTestCase(
+      xml: String,
+      xpath: String,
+      functionName: String,
+      collationName: String,
+      result: Any,
+      resultType: DataType
+    )
+
+    val testCases = Seq(
+      XPathTestCase("<a><b>1</b></a>", "a/b",
+        "xpath_boolean", "UTF8_BINARY", true, BooleanType),
+      XPathTestCase("<A><B>1</B><B>2</B></A>", "sum(A/B)",
+        "xpath_short", "UTF8_BINARY", 3, ShortType),
+      XPathTestCase("<a><b>3</b><b>4</b></a>", "sum(a/b)",
+        "xpath_int", "UTF8_LCASE", 7, IntegerType),
+      XPathTestCase("<A><B>5</B><B>6</B></A>", "sum(A/B)",
+        "xpath_long", "UTF8_LCASE", 11, LongType),
+      XPathTestCase("<a><b>7</b><b>8</b></a>", "sum(a/b)",
+        "xpath_float", "UNICODE", 15.0, FloatType),
+      XPathTestCase("<A><B>9</B><B>0</B></A>", "sum(A/B)",
+        "xpath_double", "UNICODE", 9.0, DoubleType),
+      XPathTestCase("<a><b>b</b><c>cc</c></a>", "a/c",
+        "xpath_string", "UNICODE_CI", "cc", StringType("UNICODE_CI")),
+      XPathTestCase("<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>", "a/b/text()",
+        "xpath", "UNICODE_CI", Array("b1", "b2", "b3"), ArrayType(StringType("UNICODE_CI")))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select ${t.functionName}('${t.xml}', '${t.xpath}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        assert(testQuery.schema.fields.head.dataType.sameType(t.resultType))
+      }
+    })
+  }
+
+  test("Support StringSpace expression with collation") {
+    case class StringSpaceTestCase(
+      input: Int,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      StringSpaceTestCase(1, "UTF8_BINARY", " "),
+      StringSpaceTestCase(2, "UTF8_LCASE", "  "),
+      StringSpaceTestCase(3, "UNICODE", "   "),
+      StringSpaceTestCase(4, "UNICODE_CI", "    ")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select space(${t.input})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support ToNumber & TryToNumber expressions with collation") {
+    case class ToNumberTestCase(
+      input: String,
+      collationName: String,
+      format: String,
+      result: Any,
+      resultType: DataType
+    )
+
+    val testCases = Seq(
+      ToNumberTestCase("123", "UTF8_BINARY", "999", 123, DecimalType(3, 0)),
+      ToNumberTestCase("1", "UTF8_LCASE", "0.00", 1.00, DecimalType(3, 2)),
+      ToNumberTestCase("99,999", "UNICODE", "99,999", 99999, DecimalType(5, 0)),
+      ToNumberTestCase("$14.99", "UNICODE_CI", "$99.99", 14.99, DecimalType(4, 2))
+    )
+
+    // Supported collations (ToNumber)
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select to_number('${t.input}', '${t.format}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        assert(testQuery.schema.fields.head.dataType.sameType(t.resultType))
+      }
+    })
+
+    // Supported collations (TryToNumber)
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select try_to_number('${t.input}', '${t.format}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        assert(testQuery.schema.fields.head.dataType.sameType(t.resultType))
+      }
+    })
+  }
+
+  test("Handle invalid number for ToNumber variant expression with collation") {
+    // to_number should throw an exception if the conversion fails
+    val number = "xx"
+    val query = s"SELECT to_number('$number', '999');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      val e = intercept[SparkIllegalArgumentException] {
+        val testQuery = sql(query)
+        testQuery.collect()
+      }
+      assert(e.getErrorClass === "INVALID_FORMAT.MISMATCH_INPUT")
+    }
+  }
+
+  test("Handle invalid number for TryToNumber variant expression with collation") {
+    // try_to_number shouldn't throw an exception if the conversion fails
+    val number = "xx"
+    val query = s"SELECT try_to_number('$number', '999');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      val testQuery = sql(query)
+      checkAnswer(testQuery, Row(null))
+    }
+  }
+
+  test("Support ToChar expression with collation") {
+    case class ToCharTestCase(
+      input: Int,
+      collationName: String,
+      format: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      ToCharTestCase(12, "UTF8_BINARY", "999", " 12"),
+      ToCharTestCase(34, "UTF8_LCASE", "000D00", "034.00"),
+      ToCharTestCase(56, "UNICODE", "$99.99", "$56.00"),
+      ToCharTestCase(78, "UNICODE_CI", "99D9S", "78.0+")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select to_char(${t.input}, '${t.format}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support GetJsonObject json expression with collation") {
+    case class GetJsonObjectTestCase(
+      input: String,
+      path: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      GetJsonObjectTestCase("{\"a\":\"b\"}", "$.a", "UTF8_BINARY", "b"),
+      GetJsonObjectTestCase("{\"A\":\"1\"}", "$.A", "UTF8_LCASE", "1"),
+      GetJsonObjectTestCase("{\"x\":true}", "$.x", "UNICODE", "true"),
+      GetJsonObjectTestCase("{\"X\":1}", "$.X", "UNICODE_CI", "1")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT get_json_object('${t.input}', '${t.path}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support JsonTuple json expression with collation") {
+    case class JsonTupleTestCase(
+      input: String,
+      names: String,
+      collationName: String,
+      result: Row
+    )
+
+    val testCases = Seq(
+      JsonTupleTestCase("{\"a\":1, \"b\":2}", "'a', 'b'", "UTF8_BINARY",
+        Row("1", "2")),
+      JsonTupleTestCase("{\"A\":\"3\", \"B\":\"4\"}", "'A', 'B'", "UTF8_LCASE",
+        Row("3", "4")),
+      JsonTupleTestCase("{\"x\":true, \"y\":false}", "'x', 'y'", "UNICODE",
+        Row("true", "false")),
+      JsonTupleTestCase("{\"X\":null, \"Y\":null}", "'X', 'Y'", "UNICODE_CI",
+        Row(null, null))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT json_tuple('${t.input}', ${t.names})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, t.result)
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support JsonToStructs json expression with collation") {
+    case class JsonToStructsTestCase(
+      input: String,
+      schema: String,
+      collationName: String,
+      result: Row
+    )
+
+    val testCases = Seq(
+      JsonToStructsTestCase("{\"a\":1, \"b\":2.0}", "a INT, b DOUBLE",
+        "UTF8_BINARY", Row(Row(1, 2.0))),
+      JsonToStructsTestCase("{\"A\":\"3\", \"B\":4}", "A STRING COLLATE UTF8_LCASE, B INT",
+        "UTF8_LCASE", Row(Row("3", 4))),
+      JsonToStructsTestCase("{\"x\":true, \"y\":null}", "x BOOLEAN, y VOID",
+        "UNICODE", Row(Row(true, null))),
+      JsonToStructsTestCase("{\"X\":null, \"Y\":false}", "X VOID, Y BOOLEAN",
+        "UNICODE_CI", Row(Row(null, false)))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT from_json('${t.input}', '${t.schema}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, t.result)
+        val dataType = StructType.fromDDL(t.schema)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support StructsToJson json expression with collation") {
+    case class StructsToJsonTestCase(
+      struct: String,
+      collationName: String,
+      result: Row
+    )
+
+    val testCases = Seq(
+      StructsToJsonTestCase("named_struct('a', 1, 'b', 2)",
+        "UTF8_BINARY", Row("{\"a\":1,\"b\":2}")),
+      StructsToJsonTestCase("array(named_struct('a', 1, 'b', 2))",
+        "UTF8_LCASE", Row("[{\"a\":1,\"b\":2}]")),
+      StructsToJsonTestCase("map('a', named_struct('b', 1))",
+        "UNICODE", Row("{\"a\":{\"b\":1}}")),
+      StructsToJsonTestCase("array(map('a', 1))",
+        "UNICODE_CI", Row("[{\"a\":1}]"))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT to_json(${t.struct})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, t.result)
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support LengthOfJsonArray json expression with collation") {
+    case class LengthOfJsonArrayTestCase(
+      input: String,
+      collationName: String,
+      result: Row
+    )
+
+    val testCases = Seq(
+      LengthOfJsonArrayTestCase("'[1,2,3,4]'", "UTF8_BINARY", Row(4)),
+      LengthOfJsonArrayTestCase("'[1,2,3,{\"f1\":1,\"f2\":[5,6]},4]'", "UTF8_LCASE", Row(5)),
+      LengthOfJsonArrayTestCase("'[1,2'", "UNICODE", Row(null)),
+      LengthOfJsonArrayTestCase("'['", "UNICODE_CI", Row(null))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT json_array_length(${t.input})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, t.result)
+        assert(testQuery.schema.fields.head.dataType.sameType(IntegerType))
+      }
+    })
+  }
+
+  test("Support JsonObjectKeys json expression with collation") {
+    case class JsonObjectKeysJsonArrayTestCase(
+      input: String,
+      collationName: String,
+      result: Row
+    )
+
+    val testCases = Seq(
+      JsonObjectKeysJsonArrayTestCase("{}", "UTF8_BINARY",
+        Row(Seq())),
+      JsonObjectKeysJsonArrayTestCase("{\"k\":", "UTF8_LCASE",
+        Row(null)),
+      JsonObjectKeysJsonArrayTestCase("{\"k1\": \"v1\"}", "UNICODE",
+        Row(Seq("k1"))),
+      JsonObjectKeysJsonArrayTestCase("{\"k1\":1,\"k2\":{\"k3\":3, \"k4\":4}}", "UNICODE_CI",
+        Row(Seq("k1", "k2")))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT json_object_keys('${t.input}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, t.result)
+        val dataType = ArrayType(StringType(t.collationName))
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support SchemaOfJson json expression with collation") {
+    case class SchemaOfJsonTestCase(
+     input: String,
+     collationName: String,
+     result: Row
+    )
+
+    val testCases = Seq(
+      SchemaOfJsonTestCase("'[{\"col\":0}]'",
+        "UTF8_BINARY", Row("ARRAY<STRUCT<col: BIGINT>>")),
+      SchemaOfJsonTestCase("'[{\"col\":01}]', map('allowNumericLeadingZeros', 'true')",
+        "UTF8_LCASE", Row("ARRAY<STRUCT<col: BIGINT>>")),
+      SchemaOfJsonTestCase("'[]'",
+        "UNICODE", Row("ARRAY<STRING>")),
+      SchemaOfJsonTestCase("''",
+        "UNICODE_CI", Row("STRING"))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT schema_of_json(${t.input})
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, t.result)
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support StringToMap expression with collation") {
+    // Supported collations
+    case class StringToMapTestCase[R](t: String, p: String, k: String, c: String, result: R)
+    val testCases = Seq(
+      StringToMapTestCase("a:1,b:2,c:3", ",", ":", "UTF8_BINARY",
+        Map("a" -> "1", "b" -> "2", "c" -> "3")),
+      StringToMapTestCase("A-1;B-2;C-3", ";", "-", "UTF8_LCASE",
+        Map("A" -> "1", "B" -> "2", "C" -> "3")),
+      StringToMapTestCase("1:a,2:b,3:c", ",", ":", "UNICODE",
+        Map("1" -> "a", "2" -> "b", "3" -> "c")),
+      StringToMapTestCase("1/A!2/B!3/C", "!", "/", "UNICODE_CI",
+        Map("1" -> "A", "2" -> "B", "3" -> "C"))
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT str_to_map(collate('${t.t}', '${t.c}'), '${t.p}', '${t.k}');"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      val dataType = MapType(StringType(t.c), StringType(t.c), true)
+      assert(sql(query).schema.fields.head.dataType.sameType(dataType))
+    })
+  }
+
+  test("Support RaiseError misc expression with collation") {
+    // Supported collations
+    case class RaiseErrorTestCase(errorMessage: String, collationName: String)
+    val testCases = Seq(
+      RaiseErrorTestCase("custom error message 1", "UTF8_BINARY"),
+      RaiseErrorTestCase("custom error message 2", "UTF8_LCASE"),
+      RaiseErrorTestCase("custom error message 3", "UNICODE"),
+      RaiseErrorTestCase("custom error message 4", "UNICODE_CI")
+    )
+    testCases.foreach(t => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val query = s"SELECT raise_error('${t.errorMessage}')"
+        // Result & data type
+        val userException = intercept[SparkRuntimeException] {
+          sql(query).collect()
+        }
+        assert(userException.getErrorClass === "USER_RAISED_EXCEPTION")
+        assert(userException.getMessage.contains(t.errorMessage))
+      }
+    })
+  }
+
+  test("Support CurrentDatabase/Catalog/User expressions with collation") {
+    // Supported collations
+    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName =>
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val queryDatabase = sql("SELECT current_schema()")
+        val queryCatalog = sql("SELECT current_catalog()")
+        val queryUser = sql("SELECT current_user()")
+        // Data type
+        val dataType = StringType(collationName)
+        assert(queryDatabase.schema.fields.head.dataType.sameType(dataType))
+        assert(queryCatalog.schema.fields.head.dataType.sameType(dataType))
+        assert(queryUser.schema.fields.head.dataType.sameType(dataType))
+      }
+    )
+  }
+
+  test("Support Uuid misc expression with collation") {
+    // Supported collations
+    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName =>
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val query = s"SELECT uuid()"
+        // Result & data type
+        val testQuery = sql(query)
+        val queryResult = testQuery.collect().head.getString(0)
+        val uuidFormat = "^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
+        assert(queryResult.matches(uuidFormat))
+        val dataType = StringType(collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    )
+  }
+
+  test("Support SparkVersion misc expression with collation") {
+    // Supported collations
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName =>
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val query = s"SELECT version()"
+        // Result & data type
+        val testQuery = sql(query)
+        val queryResult = testQuery.collect().head.getString(0)
+        val versionFormat = "^[0-9]\\.[0-9]\\.[0-9] [0-9a-f]{40}$"
+        assert(queryResult.matches(versionFormat))
+        val dataType = StringType(collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    )
+  }
+
+  test("Support TypeOf misc expression with collation") {
+    // Supported collations
+    case class TypeOfTestCase(input: String, collationName: String, result: String)
+    val testCases = Seq(
+      TypeOfTestCase("1", "UTF8_BINARY", "int"),
+      TypeOfTestCase("\"A\"", "UTF8_LCASE", "string collate UTF8_LCASE"),
+      TypeOfTestCase("array(1)", "UNICODE", "array<int>"),
+      TypeOfTestCase("null", "UNICODE_CI", "void")
+    )
+    testCases.foreach(t => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val query = s"SELECT typeof(${t.input})"
+        // Result & data type
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support AesEncrypt misc expression with collation") {
+    // Supported collations
+    case class AesEncryptTestCase(
+     input: String,
+     collationName: String,
+     params: String,
+     result: String
+    )
+    val testCases = Seq(
+      AesEncryptTestCase("Spark", "UTF8_BINARY", "'1234567890abcdef', 'ECB'",
+        "8DE7DB79A23F3E8ED530994DDEA98913"),
+      AesEncryptTestCase("Spark", "UTF8_LCASE", "'1234567890abcdef', 'ECB', 'DEFAULT', ''",
+        "8DE7DB79A23F3E8ED530994DDEA98913"),
+      AesEncryptTestCase("Spark", "UNICODE", "'1234567890abcdef', 'GCM', 'DEFAULT', " +
+        "unhex('000000000000000000000000')",
+        "00000000000000000000000046596B2DE09C729FE48A0F81A00A4E7101DABEB61D"),
+      AesEncryptTestCase("Spark", "UNICODE_CI", "'1234567890abcdef', 'CBC', 'DEFAULT', " +
+        "unhex('00000000000000000000000000000000')",
+        "000000000000000000000000000000008DE7DB79A23F3E8ED530994DDEA98913")
+    )
+    testCases.foreach(t => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val query = s"SELECT hex(aes_encrypt('${t.input}', ${t.params}))"
+        // Result & data type
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support AesDecrypt misc expression with collation") {
+    // Supported collations
+    case class AesDecryptTestCase(
+     input: String,
+     collationName: String,
+     params: String,
+     result: String
+    )
+    val testCases = Seq(
+      AesDecryptTestCase("8DE7DB79A23F3E8ED530994DDEA98913",
+        "UTF8_BINARY", "'1234567890abcdef', 'ECB'", "Spark"),
+      AesDecryptTestCase("8DE7DB79A23F3E8ED530994DDEA98913",
+        "UTF8_LCASE", "'1234567890abcdef', 'ECB', 'DEFAULT', ''", "Spark"),
+      AesDecryptTestCase("00000000000000000000000046596B2DE09C729FE48A0F81A00A4E7101DABEB61D",
+        "UNICODE", "'1234567890abcdef', 'GCM', 'DEFAULT'", "Spark"),
+      AesDecryptTestCase("000000000000000000000000000000008DE7DB79A23F3E8ED530994DDEA98913",
+        "UNICODE_CI", "'1234567890abcdef', 'CBC', 'DEFAULT'", "Spark")
+    )
+    testCases.foreach(t => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val query = s"SELECT aes_decrypt(unhex('${t.input}'), ${t.params})"
+        // Result & data type
+        val testQuery = sql(query)
+        checkAnswer(testQuery, sql(s"SELECT to_binary('${t.result}', 'utf-8')"))
+        assert(testQuery.schema.fields.head.dataType.sameType(BinaryType))
+      }
+    })
+  }
+
+  test("Support Mask expression with collation") {
+    // Supported collations
+    case class MaskTestCase[R](i: String, u: String, l: String, d: String, o: String, c: String,
+      result: R)
+    val testCases = Seq(
+      MaskTestCase("ab-CD-12-@$", null, null, null, null, "UTF8_BINARY", "ab-CD-12-@$"),
+      MaskTestCase("ab-CD-12-@$", "X", null, null, null, "UTF8_LCASE", "ab-XX-12-@$"),
+      MaskTestCase("ab-CD-12-@$", "X", "x", null, null, "UNICODE", "xx-XX-12-@$"),
+      MaskTestCase("ab-CD-12-@$", "X", "x", "0", "#", "UNICODE_CI", "xx#XX#00###")
+    )
+    testCases.foreach(t => {
+      def col(s: String): String = if (s == null) "null" else s"collate('$s', '${t.c}')"
+      val query = s"SELECT mask(${col(t.i)}, ${col(t.u)}, ${col(t.l)}, ${col(t.d)}, ${col(t.o)})"
+      // Result & data type
+      var result = sql(query)
+      checkAnswer(result, Row(t.result))
+      assert(result.schema.fields.head.dataType.sameType(StringType(t.c)))
+    })
+    // Implicit casting
+    val testCasting = Seq(
+      MaskTestCase("ab-CD-12-@$", "X", "x", "0", "#", "UNICODE_CI", "xx#XX#00###")
+    )
+    testCasting.foreach(t => {
+      def col(s: String): String = if (s == null) "null" else s"collate('$s', '${t.c}')"
+      def str(s: String): String = if (s == null) "null" else s"'$s'"
+      val query1 = s"SELECT mask(${col(t.i)}, ${str(t.u)}, ${str(t.l)}, ${str(t.d)}, ${str(t.o)})"
+      val query2 = s"SELECT mask(${str(t.i)}, ${col(t.u)}, ${str(t.l)}, ${str(t.d)}, ${str(t.o)})"
+      val query3 = s"SELECT mask(${str(t.i)}, ${str(t.u)}, ${col(t.l)}, ${str(t.d)}, ${str(t.o)})"
+      val query4 = s"SELECT mask(${str(t.i)}, ${str(t.u)}, ${str(t.l)}, ${col(t.d)}, ${str(t.o)})"
+      val query5 = s"SELECT mask(${str(t.i)}, ${str(t.u)}, ${str(t.l)}, ${str(t.d)}, ${col(t.o)})"
+      for (q <- Seq(query1, query2, query3, query4, query5)) {
+        val result = sql(q)
+        checkAnswer(result, Row(t.result))
+        assert(result.schema.fields.head.dataType.sameType(StringType(t.c)))
+      }
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT mask(collate('ab-CD-12-@$','UNICODE'),collate('X','UNICODE_CI'),'x','0','#')")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("Support XmlToStructs xml expression with collation") {
+    case class XmlToStructsTestCase(
+     input: String,
+     collationName: String,
+     schema: String,
+     options: String,
+     result: Row,
+     structFields: Seq[StructField]
+    )
+
+    val testCases = Seq(
+      XmlToStructsTestCase("<p><a>1</a></p>", "UTF8_BINARY", "'a INT'", "",
+        Row(1), Seq(
+          StructField("a", IntegerType, nullable = true)
+        )),
+      XmlToStructsTestCase("<p><A>true</A><B>0.8</B></p>", "UTF8_LCASE",
+        "'A BOOLEAN, B DOUBLE'", "", Row(true, 0.8), Seq(
+          StructField("A", BooleanType, nullable = true),
+          StructField("B", DoubleType, nullable = true)
+        )),
+      XmlToStructsTestCase("<p><s>Spark</s></p>", "UNICODE", "'s STRING'", "",
+        Row("Spark"), Seq(
+          StructField("s", StringType("UNICODE"), nullable = true)
+        )),
+      XmlToStructsTestCase("<p><time>26/08/2015</time></p>", "UNICODE_CI", "'time Timestamp'",
+        ", map('timestampFormat', 'dd/MM/yyyy')", Row(
+          new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.S").parse("2015-08-26 00:00:00.0")
+        ), Seq(
+          StructField("time", TimestampType, nullable = true)
+        ))
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select from_xml('${t.input}', ${t.schema} ${t.options})
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StructType(t.structFields)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support SchemaOfXml xml expression with collation") {
+    case class SchemaOfXmlTestCase(
+     input: String,
+     collationName: String,
+     result: String
+    )
+
+    val testCases = Seq(
+      SchemaOfXmlTestCase("<p><a>1</a></p>", "UTF8_BINARY", "STRUCT<a: BIGINT>"),
+      SchemaOfXmlTestCase("<p><A>true</A><B>0.8</B></p>", "UTF8_LCASE",
+        "STRUCT<A: BOOLEAN, B: DOUBLE>"),
+      SchemaOfXmlTestCase("<p></p>", "UNICODE", "STRUCT<>"),
+      SchemaOfXmlTestCase("<p><A>1</A><A>2</A><A>3</A></p>", "UNICODE_CI",
+        "STRUCT<A: ARRAY<BIGINT>>")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select schema_of_xml('${t.input}')
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support StructsToXml xml expression with collation") {
+    case class StructsToXmlTestCase(
+      input: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      StructsToXmlTestCase("named_struct('a', 1, 'b', 2)", "UTF8_BINARY",
+        s"""<ROW>
+           |    <a>1</a>
+           |    <b>2</b>
+           |</ROW>""".stripMargin),
+      StructsToXmlTestCase("named_struct('A', true, 'B', 2.0)", "UTF8_LCASE",
+        s"""<ROW>
+           |    <A>true</A>
+           |    <B>2.0</B>
+           |</ROW>""".stripMargin),
+      StructsToXmlTestCase("named_struct('A', 'aa', 'B', 'bb')", "UTF8_LCASE",
+        s"""<ROW>
+           |    <A>aa</A>
+           |    <B>bb</B>
+           |</ROW>""".stripMargin),
+      StructsToXmlTestCase("named_struct('A', 'aa', 'B', 'bb')", "UTF8_BINARY",
+        s"""<ROW>
+           |    <A>aa</A>
+           |    <B>bb</B>
+           |</ROW>""".stripMargin),
+      StructsToXmlTestCase("named_struct()", "UNICODE",
+        "<ROW/>"),
+      StructsToXmlTestCase("named_struct('time', to_timestamp('2015-08-26'))", "UNICODE_CI",
+        s"""<ROW>
+           |    <time>2015-08-26T00:00:00.000-07:00</time>
+           |</ROW>""".stripMargin)
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select to_xml(${t.input})
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        val dataType = StringType(t.collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("Support ParseJson & TryParseJson variant expressions with collation") {
+    case class ParseJsonTestCase(
+      input: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      ParseJsonTestCase("{\"a\":1,\"b\":2}", "UTF8_BINARY", "{\"a\":1,\"b\":2}"),
+      ParseJsonTestCase("{\"A\":3,\"B\":4}", "UTF8_LCASE", "{\"A\":3,\"B\":4}"),
+      ParseJsonTestCase("{\"c\":5,\"d\":6}", "UNICODE", "{\"c\":5,\"d\":6}"),
+      ParseJsonTestCase("{\"C\":7,\"D\":8}", "UNICODE_CI", "{\"C\":7,\"D\":8}")
+    )
+
+    // Supported collations (ParseJson)
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT parse_json('${t.input}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        val testResult = testQuery.collect().map(_.toString()).mkString("")
+        assert(testResult === "[" + t.result + "]") // can't use checkAnswer for Variant
+        assert(testQuery.schema.fields.head.dataType.sameType(VariantType))
+      }
+    })
+
+    // Supported collations (TryParseJson)
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT try_parse_json('${t.input}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        val testResult = testQuery.collect().map(_.toString()).mkString("")
+        assert(testResult === "[" + t.result + "]") // can't use checkAnswer for Variant
+        assert(testQuery.schema.fields.head.dataType.sameType(VariantType))
+      }
+    })
+  }
+
+  test("Handle invalid JSON for ParseJson variant expression with collation") {
+    // parse_json should throw an exception when the string is not valid JSON value
+    val json = "{\"a\":1,"
+    val query = s"SELECT parse_json('$json');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      val e = intercept[SparkException] {
+        val testQuery = sql(query)
+        testQuery.collect()
+      }
+      assert(e.getErrorClass === "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION")
+    }
+  }
+
+  test("Handle invalid JSON for TryParseJson variant expression with collation") {
+    // try_parse_json shouldn't throw an exception when the string is not valid JSON value
+    val json = "{\"a\":1,]"
+    val query = s"SELECT try_parse_json('$json');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      val testQuery = sql(query)
+      val testResult = testQuery.collect().map(_.toString()).mkString("")
+      assert(testResult === s"[null]")
+    }
+  }
+
+  test("Support IsVariantNull variant expressions with collation") {
+    case class IsVariantNullTestCase(
+      input: String,
+      collationName: String,
+      result: Boolean
+    )
+
+    val testCases = Seq(
+      IsVariantNullTestCase("'null'", "UTF8_BINARY", result = true),
+      IsVariantNullTestCase("'\"null\"'", "UTF8_LCASE", result = false),
+      IsVariantNullTestCase("'13'", "UNICODE", result = false),
+      IsVariantNullTestCase("null", "UNICODE_CI", result = false)
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT is_variant_null(parse_json(${t.input}))
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+      }
+    })
+  }
+
+  test("Support VariantGet & TryVariantGet variant expressions with collation") {
+    case class VariantGetTestCase(
+      input: String,
+      path: String,
+      variantType: String,
+      collationName: String,
+      result: Any,
+      resultType: DataType
+    )
+
+    val testCases = Seq(
+      VariantGetTestCase("{\"a\": 1}", "$.a", "int", "UTF8_BINARY", 1, IntegerType),
+      VariantGetTestCase("{\"a\": 1}", "$.b", "int", "UTF8_LCASE", null, IntegerType),
+      VariantGetTestCase("[1, \"2\"]", "$[1]", "string", "UNICODE", "2", StringType("UNICODE")),
+      VariantGetTestCase("[1, \"2\"]", "$[2]", "string", "UNICODE_CI", null,
+        StringType("UNICODE_CI"))
+    )
+
+    // Supported collations (VariantGet)
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT variant_get(parse_json('${t.input}'), '${t.path}', '${t.variantType}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        val testResult = testQuery.collect().map(_.toString()).mkString("")
+        assert(testResult === "[" + t.result + "]") // can't use checkAnswer for Variant
+        assert(testQuery.schema.fields.head.dataType.sameType(t.resultType))
+      }
+    })
+
+    // Supported collations (TryVariantGet)
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT try_variant_get(parse_json('${t.input}'), '${t.path}', '${t.variantType}')
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        val testResult = testQuery.collect().map(_.toString()).mkString("")
+        assert(testResult === "[" + t.result + "]") // can't use checkAnswer for Variant
+        assert(testQuery.schema.fields.head.dataType.sameType(t.resultType))
+      }
+    })
+  }
+
+  test("Handle invalid JSON for VariantGet variant expression with collation") {
+    // variant_get should throw an exception if the cast fails
+    val json = "[1, \"Spark\"]"
+    val query = s"SELECT variant_get(parse_json('$json'), '$$[1]', 'int');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      val e = intercept[SparkRuntimeException] {
+        val testQuery = sql(query)
+        testQuery.collect()
+      }
+      assert(e.getErrorClass === "INVALID_VARIANT_CAST")
+    }
+  }
+
+  test("Handle invalid JSON for TryVariantGet variant expression with collation") {
+    // try_variant_get shouldn't throw an exception if the cast fails
+    val json = "[1, \"Spark\"]"
+    val query = s"SELECT try_variant_get(parse_json('$json'), '$$[1]', 'int');"
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      val testQuery = sql(query)
+      val testResult = testQuery.collect().map(_.toString()).mkString("")
+      assert(testResult === s"[null]")
+    }
+  }
+
+  test("Support VariantExplode variant expressions with collation") {
+    case class VariantExplodeTestCase(
+     input: String,
+     collationName: String,
+     result: String,
+     resultType: Seq[StructField]
+    )
+
+    val testCases = Seq(
+      VariantExplodeTestCase("[\"hello\", \"world\"]", "UTF8_BINARY",
+          Row(0, "null", "\"hello\"").toString() + Row(1, "null", "\"world\"").toString(),
+          Seq[StructField](
+              StructField("pos", IntegerType, nullable = false),
+              StructField("key", StringType("UTF8_BINARY")),
+              StructField("value", VariantType, nullable = false)
+          )
+      ),
+      VariantExplodeTestCase("[\"Spark\", \"SQL\"]", "UTF8_LCASE",
+        Row(0, "null", "\"Spark\"").toString() + Row(1, "null", "\"SQL\"").toString(),
+        Seq[StructField](
+          StructField("pos", IntegerType, nullable = false),
+          StructField("key", StringType("UTF8_LCASE")),
+          StructField("value", VariantType, nullable = false)
+        )
+      ),
+      VariantExplodeTestCase("{\"a\": true, \"b\": 3.14}", "UNICODE",
+        Row(0, "a", "true").toString() + Row(1, "b", "3.14").toString(),
+        Seq[StructField](
+          StructField("pos", IntegerType, nullable = false),
+          StructField("key", StringType("UNICODE")),
+          StructField("value", VariantType, nullable = false)
+        )
+      ),
+      VariantExplodeTestCase("{\"A\": 9.99, \"B\": false}", "UNICODE_CI",
+        Row(0, "A", "9.99").toString() + Row(1, "B", "false").toString(),
+        Seq[StructField](
+          StructField("pos", IntegerType, nullable = false),
+          StructField("key", StringType("UNICODE_CI")),
+          StructField("value", VariantType, nullable = false)
+        )
+      )
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT * from variant_explode(parse_json('${t.input}'))
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        val testResult = testQuery.collect().map(_.toString()).mkString("")
+        assert(testResult === t.result) // can't use checkAnswer for Variant
+        assert(testQuery.schema.fields.sameElements(t.resultType))
+      }
+    })
+  }
+
+  test("Support SchemaOfVariant variant expressions with collation") {
+    case class SchemaOfVariantTestCase(
+     input: String,
+     collationName: String,
+     result: String
+    )
+
+    val testCases = Seq(
+      SchemaOfVariantTestCase("null", "UTF8_BINARY", "VOID"),
+      SchemaOfVariantTestCase("[]", "UTF8_LCASE", "ARRAY<VOID>"),
+      SchemaOfVariantTestCase("[{\"a\":true,\"b\":0}]", "UNICODE",
+        "ARRAY<STRUCT<a: BOOLEAN, b: BIGINT>>"),
+      SchemaOfVariantTestCase("[{\"A\":\"x\",\"B\":-1.00}]", "UNICODE_CI",
+        "ARRAY<STRUCT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(1,0)>>")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT schema_of_variant(parse_json('${t.input}'))
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        assert(testQuery.schema.fields.head.dataType.sameType(StringType(t.collationName)))
+      }
+    })
+  }
+
+  test("Support SchemaOfVariantAgg variant expressions with collation") {
+    case class SchemaOfVariantAggTestCase(
+      input: String,
+      collationName: String,
+      result: String
+    )
+
+    val testCases = Seq(
+      SchemaOfVariantAggTestCase("('1'), ('2'), ('3')", "UTF8_BINARY", "BIGINT"),
+      SchemaOfVariantAggTestCase("('true'), ('false'), ('true')", "UTF8_LCASE", "BOOLEAN"),
+      SchemaOfVariantAggTestCase("('{\"a\": 1}'), ('{\"b\": true}'), ('{\"c\": 1.23}')",
+        "UNICODE", "STRUCT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>"),
+      SchemaOfVariantAggTestCase("('{\"A\": \"x\"}'), ('{\"B\": 9.99}'), ('{\"C\": 0}')",
+        "UNICODE_CI", "STRUCT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(3,2), C: BIGINT>")
+    )
+
+    // Supported collations
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |SELECT schema_of_variant_agg(parse_json(j)) FROM VALUES ${t.input} AS tab(j)
+           |""".stripMargin
+      // Result & data type
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(t.result))
+        assert(testQuery.schema.fields.head.dataType.sameType(StringType(t.collationName)))
+      }
+    })
+  }
+
+  test("Support InputFileName expression with collation") {
+    // Supported collations
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName => {
+      val query =
+        s"""
+           |select input_file_name()
+           |""".stripMargin
+      // Result
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val testQuery = sql(query)
+        checkAnswer(testQuery, Row(""))
+        val dataType = StringType(collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("DateFormat expression with collation") {
+    case class DateFormatTestCase[R](date: String, format: String, collation: String, result: R)
+    val testCases = Seq(
+      DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UTF8_BINARY", "2021-01-01"),
+      DateFormatTestCase("2021-01-01", "yyyy-dd", "UTF8_LCASE", "2021-01"),
+      DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UNICODE", "2021-01-01"),
+      DateFormatTestCase("2021-01-01", "yyyy", "UNICODE_CI", "2021")
+    )
+
+    for {
+      collateDate <- Seq(true, false)
+      collateFormat <- Seq(true, false)
+    } {
+      testCases.foreach(t => {
+        val dateArg = if (collateDate) s"collate('${t.date}', '${t.collation}')" else s"'${t.date}'"
+        val formatArg =
+          if (collateFormat) {
+            s"collate('${t.format}', '${t.collation}')"
+          } else {
+            s"'${t.format}'"
+          }
+
+        withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collation) {
+          val query = s"SELECT date_format(${dateArg}, ${formatArg})"
+          // Result & data type
+          checkAnswer(sql(query), Row(t.result))
+          assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+        }
+      })
+    }
+  }
+
+  test("Support mode for string expression with collation - Basic Test") {
+    Seq("utf8_binary", "UTF8_LCASE", "unicode_ci", "unicode").foreach { collationId =>
+      val query = s"SELECT mode(collate('abc', '${collationId}'))"
+      checkAnswer(sql(query), Row("abc"))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(collationId)))
+    }
+  }
+
+  test("Support mode for string expression with collation - Advanced Test") {
+    case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
+    val testCases = Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a")
+    )
+    testCases.foreach(t => {
+      val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+        (0L to numRepeats).map(_ => s"('$elt')").mkString(",")
+      }.mkString(",")
+
+      val tableName = s"t_${t.collationId}_mode"
+      withTable(s"${tableName}") {
+        sql(s"CREATE TABLE ${tableName}(i STRING) USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+        val query = s"SELECT mode(collate(i, '${t.collationId}')) FROM ${tableName}"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collationId)))
+
+      }
+    })
+  }
+
+  test("Support Mode.eval(buffer)") {
+    case class UTF8StringModeTestCase[R](
+        collationId: String,
+        bufferValues: Map[UTF8String, Long],
+        result: R)
+
+    val bufferValuesUTF8String = Map(
+      UTF8String.fromString("a") -> 5L,
+      UTF8String.fromString("b") -> 4L,
+      UTF8String.fromString("B") -> 3L,
+      UTF8String.fromString("d") -> 2L,
+      UTF8String.fromString("e") -> 1L)
+
+    val testCasesUTF8String = Seq(
+      UTF8StringModeTestCase("utf8_binary", bufferValuesUTF8String, "a"),
+      UTF8StringModeTestCase("UTF8_LCASE", bufferValuesUTF8String, "b"),
+      UTF8StringModeTestCase("unicode_ci", bufferValuesUTF8String, "b"),
+      UTF8StringModeTestCase("unicode", bufferValuesUTF8String, "a"))
+
+    testCasesUTF8String.foreach(t => {
+      val buffer = new OpenHashMap[AnyRef, Long](5)
+      val myMode = Mode(child = Literal.create("some_column_name", StringType(t.collationId)))
+      t.bufferValues.foreach { case (k, v) => buffer.update(k, v) }
+      assert(myMode.eval(buffer).toString.toLowerCase() == t.result.toLowerCase())
+    })
+  }
+
+  test("Support mode for string expression with collated strings in struct") {
+    case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
+    val testCases = Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+    )
+    testCases.foreach(t => {
+      val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+        (0L to numRepeats).map(_ => s"named_struct('f1'," +
+          s" collate('$elt', '${t.collationId}'), 'f2', 1)").mkString(",")
+      }.mkString(",")
+
+      val tableName = s"t_${t.collationId}_mode_struct"
+      withTable(tableName) {
+        sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRING COLLATE " +
+          t.collationId + ", f2: INT>) USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+        val query = s"SELECT lower(mode(i).f1) FROM ${tableName}"
+        if(t.collationId == "UTF8_LCASE" ||
+          t.collationId == "unicode_ci" ||
+          t.collationId == "unicode") {
+          // Cannot resolve "mode(i)" due to data type mismatch:
+          // Input to function mode was a complex type with strings collated on non-binary
+          // collations, which is not yet supported.. SQLSTATE: 42K09; line 1 pos 13;
+          val params = Seq(("sqlExpr", "\"mode(i)\""),
+            ("msg", "The input to the function 'mode'" +
+              " was a type of binary-unstable type that is not currently supported by mode."),
+            ("hint", "")).toMap
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(query)
+            },
+            errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+            parameters = params,
+            queryContext = Array(
+              ExpectedContext(objectType = "",
+                objectName = "",
+                startIndex = 13,
+                stopIndex = 19,
+                fragment = "mode(i)")
+            )
+          )
+        } else {
+          checkAnswer(sql(query), Row(t.result))
+        }
+      }
+    })
+  }
+
+  test("Support mode for string expression with collated strings in recursively nested struct") {
+    case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
+    val testCases = Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+    )
+    testCases.foreach(t => {
+      val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+        (0L to numRepeats).map(_ => s"named_struct('f1', " +
+          s"named_struct('f2', collate('$elt', '${t.collationId}')), 'f3', 1)").mkString(",")
+      }.mkString(",")
+
+      val tableName = s"t_${t.collationId}_mode_nested_struct"
+      withTable(tableName) {
+        sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRUCT<f2: STRING COLLATE " +
+          t.collationId + ">, f3: INT>) USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+        val query = s"SELECT lower(mode(i).f1.f2) FROM ${tableName}"
+        if(t.collationId == "UTF8_LCASE" ||
+          t.collationId == "unicode_ci" ||
+          t.collationId == "unicode") {
+          // Cannot resolve "mode(i)" due to data type mismatch:
+          // Input to function mode was a complex type with strings collated on non-binary
+          // collations, which is not yet supported.. SQLSTATE: 42K09; line 1 pos 13;
+          val params = Seq(("sqlExpr", "\"mode(i)\""),
+            ("msg", "The input to the function 'mode' " +
+              "was a type of binary-unstable type that is not currently supported by mode."),
+            ("hint", "")).toMap
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(query)
+            },
+            errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+            parameters = params,
+            queryContext = Array(
+              ExpectedContext(objectType = "",
+                objectName = "",
+                startIndex = 13,
+                stopIndex = 19,
+                fragment = "mode(i)")
+            )
+          )
+        } else {
+          checkAnswer(sql(query), Row(t.result))
+        }
+      }
+    })
+  }
+
+  test("Support mode for string expression with collated strings in array complex type") {
+    case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
+    val testCases = Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+    )
+    testCases.foreach(t => {
+      val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+        (0L to numRepeats).map(_ => s"array(named_struct('s1', named_struct('a2', " +
+          s"array(collate('$elt', '${t.collationId}'))), 'f3', 1))").mkString(",")
+      }.mkString(",")
+
+      val tableName = s"t_${t.collationId}_mode_nested_struct"
+      withTable(tableName) {
+        sql(s"CREATE TABLE ${tableName}(" +
+          s"i ARRAY<STRUCT<s1: STRUCT<a2: ARRAY<STRING COLLATE ${t.collationId}>>, f3: INT>>)" +
+          s" USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+        val query = s"SELECT lower(element_at(element_at(mode(i), 1).s1.a2, 1)) FROM ${tableName}"
+        if(t.collationId == "UTF8_LCASE" ||
+          t.collationId == "unicode_ci" || t.collationId == "unicode") {
+          val params = Seq(("sqlExpr", "\"mode(i)\""),
+            ("msg", "The input to the function 'mode' was a type" +
+              " of binary-unstable type that is not currently supported by mode."),
+            ("hint", "")).toMap
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(query)
+            },
+            errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+            parameters = params,
+            queryContext = Array(
+              ExpectedContext(objectType = "",
+                objectName = "",
+                startIndex = 35,
+                stopIndex = 41,
+                fragment = "mode(i)")
+            )
+          )
+        } else {
+          checkAnswer(sql(query), Row(t.result))
+        }
+      }
+    })
+  }
+
+  test("SPARK-48430: Map value extraction with collations") {
+    for {
+      collateKey <- Seq(true, false)
+      collateVal <- Seq(true, false)
+      defaultCollation <- Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE")
+    } {
+      val mapKey = if (collateKey) "'a' collate utf8_lcase" else "'a'"
+      val mapVal = if (collateVal) "'b' collate utf8_lcase" else "'b'"
+      val collation = if (collateVal) "UTF8_LCASE" else "UTF8_BINARY"
+      val queryExtractor = s"select collation(map($mapKey, $mapVal)[$mapKey])"
+      val queryElementAt = s"select collation(element_at(map($mapKey, $mapVal), $mapKey))"
+
+      checkAnswer(sql(queryExtractor), Row(collation))
+      checkAnswer(sql(queryElementAt), Row(collation))
+
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> defaultCollation) {
+        val res = if (collateVal) "UTF8_LCASE" else defaultCollation
+        checkAnswer(sql(queryExtractor), Row(res))
+        checkAnswer(sql(queryElementAt), Row(res))
+      }
+    }
+  }
+
+  test("CurrentTimeZone expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query = "select current_timezone()"
+      // Data type check
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val testQuery = sql(query)
+        val dataType = StringType(collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("DayName expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query = "select dayname(current_date())"
+      // Data type check
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val testQuery = sql(query)
+        val dataType = StringType(collationName)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+  }
+
+  test("ToUnixTimestamp expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select to_unix_timestamp(collate('2021-01-01 00:00:00', '${collationName}'),
+          |collate('yyyy-MM-dd HH:mm:ss', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = LongType
+      val expectedResult = 1609488000L
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(expectedResult))
+    })
+  }
+
+  test("FromUnixTime expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select from_unixtime(1609488000, collate('yyyy-MM-dd HH:mm:ss', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val testQuery = sql(query)
+        val dataType = StringType(collationName)
+        val expectedResult = "2021-01-01 00:00:00"
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+        checkAnswer(testQuery, Row(expectedResult))
+      }
+    })
+  }
+
+  test("NextDay expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select next_day('2015-01-14', collate('TU', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val testQuery = sql(query)
+        val dataType = DateType
+        val expectedResult = "2015-01-20"
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+        checkAnswer(testQuery, Row(Date.valueOf(expectedResult)))
+      }
+    })
+  }
+
+  test("FromUTCTimestamp expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select from_utc_timestamp(collate('2016-08-31', '${collationName}'),
+          |collate('Asia/Seoul', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = TimestampType
+      val expectedResult = "2016-08-31 09:00:00.0"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Timestamp.valueOf(expectedResult)))
+    })
+  }
+
+  test("ToUTCTimestamp expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select to_utc_timestamp(collate('2016-08-31 09:00:00', '${collationName}'),
+          |collate('Asia/Seoul', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = TimestampType
+      val expectedResult = "2016-08-31 00:00:00.0"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Timestamp.valueOf(expectedResult)))
+    })
+  }
+
+  test("ParseToDate expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select to_date(collate('2016-12-31', '${collationName}'),
+          |collate('yyyy-MM-dd', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = DateType
+      val expectedResult = "2016-12-31"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Date.valueOf(expectedResult)))
+    })
+  }
+
+  test("ParseToTimestamp expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select to_timestamp(collate('2016-12-31 23:59:59', '${collationName}'),
+          |collate('yyyy-MM-dd HH:mm:ss', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = TimestampType
+      val expectedResult = "2016-12-31 23:59:59.0"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Timestamp.valueOf(expectedResult)))
+    })
+  }
+
+  test("TruncDate expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select trunc(collate('2016-12-31 23:59:59', '${collationName}'), 'MM')
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = DateType
+      val expectedResult = "2016-12-01"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Date.valueOf(expectedResult)))
+    })
+  }
+
+  test("TruncTimestamp expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select date_trunc(collate('HOUR', '${collationName}'),
+          |collate('2015-03-05T09:32:05.359', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = TimestampType
+      val expectedResult = "2015-03-05 09:00:00.0"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Timestamp.valueOf(expectedResult)))
+    })
+  }
+
+  test("MakeTimestamp expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select make_timestamp(2014, 12, 28, 6, 30, 45.887, collate('CET', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = TimestampType
+      val expectedResult = "2014-12-27 21:30:45.887"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Timestamp.valueOf(expectedResult)))
+    })
+  }
+
+  test("ExtractValue expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val query =
+          s"""
+             |select col['Field1']
+             |from values (named_struct('Field1', 'Spark', 'Field2', 5)) as tab(col);
+             |""".stripMargin
+        // Result & data type check
+        val testQuery = sql(query)
+        val dataType = StringType(collationName)
+        val expectedResult = "Spark"
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+        checkAnswer(testQuery, Row(expectedResult))
+      }
+    })
+  }
+
+  test("Lag expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+           |SELECT lag(a, -1, 'default' collate $collationName) OVER (PARTITION BY b ORDER BY a)
+           |FROM VALUES ('A1', 2), ('A2', 1), ('A2', 3), ('A1', 1) tab(a, b);
+           |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = StringType(collationName)
+      val expectedResult = Seq("A2", "default", "default", "default")
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, expectedResult.map(Row(_)))
+    })
+  }
+
+  test("Lead expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+           |SELECT lead(a, -1, 'default' collate $collationName) OVER (PARTITION BY b ORDER BY a)
+           |FROM VALUES ('A1', 2), ('A2', 1), ('A2', 3), ('A1', 1) tab(a, b);
+           |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = StringType(collationName)
+      val expectedResult = Seq("A1", "default", "default", "default")
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, expectedResult.map(Row(_)))
+    })
+  }
+
+  test("DatePart expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select date_part(collate('Week', '${collationName}'),
+          |collate('2019-08-12 01:00:00.123456', '${collationName}'))
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = IntegerType
+      val expectedResult = 33
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(expectedResult))
+    })
+  }
+
+  test("DateAdd expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query = s"""select date_add(collate('2016-07-30', '${collationName}'), 1)"""
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = DateType
+      val expectedResult = "2016-07-31"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Date.valueOf(expectedResult)))
+    })
+  }
+
+  test("DateSub expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query = s"""select date_sub(collate('2016-07-30', '${collationName}'), 1)"""
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = DateType
+      val expectedResult = "2016-07-29"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(Date.valueOf(expectedResult)))
+    })
+  }
+
+  test("WindowTime and TimeWindow expressions with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val query =
+          s"""SELECT window_time(window)
+             | FROM (SELECT a, window, count(*) as cnt FROM VALUES
+             |('A1', '2021-01-01 00:00:00'),
+             |('A1', '2021-01-01 00:04:30'),
+             |('A1', '2021-01-01 00:06:00'),
+             |('A2', '2021-01-01 00:01:00') AS tab(a, b)
+             |GROUP by a, window(b, '5 minutes') ORDER BY a, window.start);
+             |""".stripMargin
+        // Result & data type check
+        val testQuery = sql(query)
+        val dataType = TimestampType
+        val expectedResults =
+          Seq("2021-01-01 00:04:59.999999",
+            "2021-01-01 00:09:59.999999",
+            "2021-01-01 00:04:59.999999")
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+        checkAnswer(testQuery, expectedResults.map(ts => Row(Timestamp.valueOf(ts))))
+      }
+    })
+  }
+
+  test("SessionWindow expressions with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
+        val query =
+          s"""SELECT count(*) as cnt
+             | FROM VALUES
+             |('A1', '2021-01-01 00:00:00'),
+             |('A1', '2021-01-01 00:04:30'),
+             |('A1', '2021-01-01 00:10:00'),
+             |('A2', '2021-01-01 00:01:00'),
+             |('A2', '2021-01-01 00:04:30') AS tab(a, b)
+             |GROUP BY a,
+             |session_window(b, CASE WHEN a = 'A1' THEN '5 minutes'  ELSE '1 minutes' END)
+             |ORDER BY a, session_window.start;
+             |""".stripMargin
+        // Result & data type check
+        val testQuery = sql(query)
+        val dataType = LongType
+        val expectedResults = Seq(2, 1, 1, 1)
+        assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+        checkAnswer(testQuery, expectedResults.map(Row(_)))
+      }
+    })
+  }
+
+  test("ConvertTimezone expression with collation") {
+    // Supported collations
+    testSuppCollations.foreach(collationName => {
+      val query =
+        s"""
+          |select date_format(convert_timezone(collate('America/Los_Angeles', '${collationName}'),
+          |collate('UTC', '${collationName}'), collate('2021-12-06 00:00:00', '${collationName}')),
+          |'yyyy-MM-dd HH:mm:ss.S')
+          |""".stripMargin
+      // Result & data type check
+      val testQuery = sql(query)
+      val dataType = StringType
+      val expectedResult = "2021-12-06 08:00:00.0"
+      assert(testQuery.schema.fields.head.dataType.sameType(dataType))
+      checkAnswer(testQuery, Row(expectedResult))
+    })
+  }
+
+  test("Reflect expressions with collated strings") {
+    // be aware that output of java.util.UUID.fromString is always lowercase
+
+    case class ReflectExpressions(
+      left: String,
+      leftCollation: String,
+      right: String,
+      rightCollation: String,
+      result: Boolean
+    )
+
+    val testCases = Seq(
+      ReflectExpressions("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary",
+        "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary", true),
+      ReflectExpressions("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary",
+        "A5Cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_binary", false),
+
+      ReflectExpressions("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
+        "a5cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_lcase", true),
+      ReflectExpressions("A5cf6C42-0C85-418f-af6c-3E4E5b1328f2", "utf8_binary",
+        "A5Cf6c42-0c85-418f-af6c-3e4e5b1328f2", "utf8_lcase", true)
+    )
+    testCases.foreach(testCase => {
+      val query =
+        s"""
+           |SELECT REFLECT('java.util.UUID', 'fromString',
+           |collate('${testCase.left}', '${testCase.leftCollation}'))=
+           |collate('${testCase.right}', '${testCase.rightCollation}');
+           |""".stripMargin
+      val testQuery = sql(query)
+      checkAnswer(testQuery, Row(testCase.result))
+    })
+
+    val queryPass =
+      s"""
+         |SELECT REFLECT('java.lang.Integer', 'toHexString',2);
+         |""".stripMargin
+    val testQueryPass = sql(queryPass)
+    checkAnswer(testQueryPass, Row("2"))
+
+    val queryFail =
+      s"""
+         |SELECT REFLECT('java.lang.Integer', 'toHexString',"2");
+         |""".stripMargin
+    val typeException = intercept[ExtendedAnalysisException] {
+      sql(queryFail).collect()
+    }
+    assert(typeException.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_STATIC_METHOD")
+  }
+
+  // TODO: Add more tests for other SQL expressions
+
+}
+// scalastyle:on nonascii
+
+class CollationSQLExpressionsANSIOffSuite extends CollationSQLExpressionsSuite {
+  override protected def sparkConf: SparkConf =
+    super.sparkConf.set(SQLConf.ANSI_ENABLED, false)
+
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
new file mode 100644
index 0000000000000..885ed37098680
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
@@ -0,0 +1,496 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.Project
+import org.apache.spark.sql.internal.SqlApiConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{ArrayType, BooleanType, IntegerType, StringType}
+
+// scalastyle:off nonascii
+class CollationSQLRegexpSuite
+  extends QueryTest
+  with SharedSparkSession
+  with ExpressionEvalHelper {
+
+  test("Support Like string expression with collation") {
+    // Supported collations
+    case class LikeTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      LikeTestCase("ABC", "%B%", "UTF8_BINARY", true),
+      LikeTestCase("AḂC", "%ḃ%", "UTF8_LCASE", true),
+      LikeTestCase("ABC", "%b%", "UTF8_BINARY", false)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT like(collate('${t.l}', '${t.c}'), '${t.r}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+    // Unsupported collations
+    case class LikeTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      LikeTestFail("ABC", "%b%", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT like(collate('${t.l}', '${t.c}'), '${t.r}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Like simplification should work with collated strings") {
+    case class SimplifyLikeTestCase[R](collation: String, str: String, cls: Class[_], result: R)
+    val testCases = Seq(
+      SimplifyLikeTestCase("UTF8_BINARY", "ab%", classOf[StartsWith], false),
+      SimplifyLikeTestCase("UTF8_BINARY", "%bc", classOf[EndsWith], false),
+      SimplifyLikeTestCase("UTF8_BINARY", "a%c", classOf[And], false),
+      SimplifyLikeTestCase("UTF8_BINARY", "%b%", classOf[Contains], false),
+      SimplifyLikeTestCase("UTF8_BINARY", "abc", classOf[EqualTo], false),
+      SimplifyLikeTestCase("UTF8_LCASE", "ab%", classOf[StartsWith], true),
+      SimplifyLikeTestCase("UTF8_LCASE", "%bc", classOf[EndsWith], true),
+      SimplifyLikeTestCase("UTF8_LCASE", "a%c", classOf[And], true),
+      SimplifyLikeTestCase("UTF8_LCASE", "%b%", classOf[Contains], true),
+      SimplifyLikeTestCase("UTF8_LCASE", "abc", classOf[EqualTo], true)
+    )
+    val tableName = "T"
+    withTable(tableName) {
+      sql(s"CREATE TABLE IF NOT EXISTS $tableName(c STRING) using PARQUET")
+      sql(s"INSERT INTO $tableName(c) VALUES('ABC')")
+      testCases.foreach { t =>
+        val query = sql(s"select c collate ${t.collation} like '${t.str}' FROM t")
+        checkAnswer(query, Row(t.result))
+        val optimizedPlan = query.queryExecution.optimizedPlan.asInstanceOf[Project]
+        assert(optimizedPlan.projectList.head.asInstanceOf[Alias].child.getClass == t.cls)
+      }
+    }
+  }
+
+  test("Like simplification should work with collated strings (for default collation)") {
+    val tableNameBinary = "T_BINARY"
+    withTable(tableNameBinary) {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_BINARY") {
+        sql(s"CREATE TABLE IF NOT EXISTS $tableNameBinary(c STRING) using PARQUET")
+        sql(s"INSERT INTO $tableNameBinary(c) VALUES('ABC')")
+        checkAnswer(sql(s"select c like 'ab%' FROM $tableNameBinary"), Row(false))
+        checkAnswer(sql(s"select c like '%bc' FROM $tableNameBinary"), Row(false))
+        checkAnswer(sql(s"select c like 'a%c' FROM $tableNameBinary"), Row(false))
+        checkAnswer(sql(s"select c like '%b%' FROM $tableNameBinary"), Row(false))
+        checkAnswer(sql(s"select c like 'abc' FROM $tableNameBinary"), Row(false))
+      }
+    }
+    val tableNameLcase = "T_LCASE"
+    withTable(tableNameLcase) {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_LCASE") {
+        sql(s"CREATE TABLE IF NOT EXISTS $tableNameLcase(c STRING) using PARQUET")
+        sql(s"INSERT INTO $tableNameLcase(c) VALUES('ABC')")
+        checkAnswer(sql(s"select c like 'ab%' FROM $tableNameLcase"), Row(true))
+        checkAnswer(sql(s"select c like '%bc' FROM $tableNameLcase"), Row(true))
+        checkAnswer(sql(s"select c like 'a%c' FROM $tableNameLcase"), Row(true))
+        checkAnswer(sql(s"select c like '%b%' FROM $tableNameLcase"), Row(true))
+        checkAnswer(sql(s"select c like 'abc' FROM $tableNameLcase"), Row(true))
+      }
+    }
+  }
+
+  test("Support ILike string expression with collation") {
+    // Supported collations
+    case class ILikeTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      ILikeTestCase("ABC", "%b%", "UTF8_BINARY", true),
+      ILikeTestCase("AḂC", "%ḃ%", "UTF8_LCASE", true),
+      ILikeTestCase("ABC", "%b%", "UTF8_BINARY", true)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT ilike(collate('${t.l}', '${t.c}'), '${t.r}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+    // Unsupported collations
+    case class ILikeTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      ILikeTestFail("ABC", "%b%", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT ilike(collate('${t.l}', '${t.c}'), '${t.r}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support LikeAll string expression with collation") {
+    // Supported collations
+    case class LikeAllTestCase[R](s: String, p: Seq[String], c: String, result: R)
+    val testCases = Seq(
+      LikeAllTestCase("foo", Seq("%foo%", "%oo"), "UTF8_BINARY", true),
+      LikeAllTestCase("Foo", Seq("%foo%", "%oo"), "UTF8_LCASE", true),
+      LikeAllTestCase("foo", Seq("%foo%", "%bar%"), "UTF8_BINARY", false)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') LIKE ALL ('${t.p.mkString("','")}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+    // Unsupported collations
+    case class LikeAllTestFail(s: String, p: Seq[String], c: String)
+    val failCases = Seq(
+      LikeAllTestFail("Foo", Seq("%foo%", "%oo"), "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') LIKE ALL ('${t.p.mkString("','")}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support NotLikeAll string expression with collation") {
+    // Supported collations
+    case class NotLikeAllTestCase[R](s: String, p: Seq[String], c: String, result: R)
+    val testCases = Seq(
+      NotLikeAllTestCase("foo", Seq("%foo%", "%oo"), "UTF8_BINARY", false),
+      NotLikeAllTestCase("Foo", Seq("%foo%", "%oo"), "UTF8_LCASE", false),
+      NotLikeAllTestCase("foo", Seq("%goo%", "%bar%"), "UTF8_BINARY", true)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') NOT LIKE ALL ('${t.p.mkString("','")}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+    // Unsupported collations
+    case class NotLikeAllTestFail(s: String, p: Seq[String], c: String)
+    val failCases = Seq(
+      NotLikeAllTestFail("Foo", Seq("%foo%", "%oo"), "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') NOT LIKE ALL ('${t.p.mkString("','")}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support LikeAny string expression with collation") {
+    // Supported collations
+    case class LikeAnyTestCase[R](s: String, p: Seq[String], c: String, result: R)
+    val testCases = Seq(
+      LikeAnyTestCase("foo", Seq("%foo%", "%bar"), "UTF8_BINARY", true),
+      LikeAnyTestCase("Foo", Seq("%foo%", "%bar"), "UTF8_LCASE", true),
+      LikeAnyTestCase("foo", Seq("%goo%", "%hoo%"), "UTF8_BINARY", false)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') LIKE ANY ('${t.p.mkString("','")}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+    // Unsupported collations
+    case class LikeAnyTestFail(s: String, p: Seq[String], c: String)
+    val failCases = Seq(
+      LikeAnyTestFail("Foo", Seq("%foo%", "%oo"), "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') LIKE ANY ('${t.p.mkString("','")}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support NotLikeAny string expression with collation") {
+    // Supported collations
+    case class NotLikeAnyTestCase[R](s: String, p: Seq[String], c: String, result: R)
+    val testCases = Seq(
+      NotLikeAnyTestCase("foo", Seq("%foo%", "%hoo"), "UTF8_BINARY", true),
+      NotLikeAnyTestCase("Foo", Seq("%foo%", "%hoo"), "UTF8_LCASE", true),
+      NotLikeAnyTestCase("foo", Seq("%foo%", "%oo%"), "UTF8_BINARY", false)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') NOT LIKE ANY ('${t.p.mkString("','")}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+    // Unsupported collations
+    case class NotLikeAnyTestFail(s: String, p: Seq[String], c: String)
+    val failCases = Seq(
+      NotLikeAnyTestFail("Foo", Seq("%foo%", "%oo"), "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT collate('${t.s}', '${t.c}') NOT LIKE ANY ('${t.p.mkString("','")}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support RLike string expression with collation") {
+    // Supported collations
+    case class RLikeTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      RLikeTestCase("ABC", ".B.", "UTF8_BINARY", true),
+      RLikeTestCase("AḂC", ".ḃ.", "UTF8_LCASE", true),
+      RLikeTestCase("ABC", ".b.", "UTF8_BINARY", false)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT rlike(collate('${t.l}', '${t.c}'), '${t.r}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+    // Unsupported collations
+    case class RLikeTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      RLikeTestFail("ABC", ".b.", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT rlike(collate('${t.l}', '${t.c}'), '${t.r}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support StringSplit string expression with collation") {
+    // Supported collations
+    case class StringSplitTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C")),
+      StringSplitTestCase("AḂC", "[ḃ]", "UTF8_LCASE", Seq("A", "C")),
+      StringSplitTestCase("ABC", "[B]", "UTF8_BINARY", Seq("A", "C"))
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT split(collate('${t.l}', '${t.c}'), '${t.r}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c))))
+    })
+    // Unsupported collations
+    case class StringSplitTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      StringSplitTestFail("ABC", "[b]", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT split(collate('${t.l}', '${t.c}'), '${t.r}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support RegExpReplace string expression with collation") {
+    // Supported collations
+    case class RegExpReplaceTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      RegExpReplaceTestCase("ABCDE", ".C.", "UTF8_BINARY", "AFFFE"),
+      RegExpReplaceTestCase("ABĆDE", ".ć.", "UTF8_LCASE", "AFFFE"),
+      RegExpReplaceTestCase("ABCDE", ".c.", "UTF8_BINARY", "ABCDE")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"SELECT regexp_replace(collate('${t.l}', '${t.c}'), '${t.r}', collate('FFF', '${t.c}'))"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Implicit casting
+      checkAnswer(sql(s"SELECT regexp_replace(collate('${t.l}', '${t.c}'), '${t.r}', 'FFF')"),
+        Row(t.result))
+      checkAnswer(sql(s"SELECT regexp_replace('${t.l}', '${t.r}', collate('FFF', '${t.c}'))"),
+        Row(t.result))
+    })
+    // Collation mismatch
+    val (c1, c2) = ("UTF8_BINARY", "UTF8_LCASE")
+    val collationMismatch = intercept[AnalysisException] {
+      sql(s"SELECT regexp_replace(collate('ABCDE','$c1'), '.c.', collate('FFF','$c2'))")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+    // Unsupported collations
+    case class RegExpReplaceTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      RegExpReplaceTestFail("ABCDE", ".c.", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query =
+        s"SELECT regexp_replace(collate('${t.l}', '${t.c}'), '${t.r}', 'FFF')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support RegExpExtract string expression with collation") {
+    // Supported collations
+    case class RegExpExtractTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      RegExpExtractTestCase("ABCDE", ".C.", "UTF8_BINARY", "BCD"),
+      RegExpExtractTestCase("ABĆDE", ".ć.", "UTF8_LCASE", "BĆD"),
+      RegExpExtractTestCase("ABCDE", ".c.", "UTF8_BINARY", "")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"SELECT regexp_extract(collate('${t.l}', '${t.c}'), '${t.r}', 0)"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+    })
+    // Unsupported collations
+    case class RegExpExtractTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      RegExpExtractTestFail("ABCDE", ".c.", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query =
+        s"SELECT regexp_extract(collate('${t.l}', '${t.c}'), '${t.r}', 0)"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support RegExpExtractAll string expression with collation") {
+    // Supported collations
+    case class RegExpExtractAllTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      RegExpExtractAllTestCase("ABCDE", ".C.", "UTF8_BINARY", Seq("BCD")),
+      RegExpExtractAllTestCase("ABĆDE", ".ć.", "UTF8_LCASE", Seq("BĆD")),
+      RegExpExtractAllTestCase("ABCDE", ".c.", "UTF8_BINARY", Seq())
+    )
+    testCases.foreach(t => {
+      val query =
+        s"SELECT regexp_extract_all(collate('${t.l}', '${t.c}'), '${t.r}', 0)"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(ArrayType(StringType(t.c))))
+    })
+    // Unsupported collations
+    case class RegExpExtractAllTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      RegExpExtractAllTestFail("ABCDE", ".c.", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query =
+        s"SELECT regexp_extract_all(collate('${t.l}', '${t.c}'), '${t.r}', 0)"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support RegExpCount string expression with collation") {
+    // Supported collations
+    case class RegExpCountTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      RegExpCountTestCase("ABCDE", ".C.", "UTF8_BINARY", 1),
+      RegExpCountTestCase("ABĆDE", ".ć.", "UTF8_LCASE", 1),
+      RegExpCountTestCase("ABCDE", ".c.", "UTF8_BINARY", 0)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT regexp_count(collate('${t.l}', '${t.c}'), '${t.r}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+    })
+    // Unsupported collations
+    case class RegExpCountTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      RegExpCountTestFail("ABCDE", ".c.", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT regexp_count(collate('${t.l}', '${t.c}'), '${t.r}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support RegExpSubStr string expression with collation") {
+    // Supported collations
+    case class RegExpSubStrTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      RegExpSubStrTestCase("ABCDE", ".C.", "UTF8_BINARY", "BCD"),
+      RegExpSubStrTestCase("ABĆDE", ".ć.", "UTF8_LCASE", "BĆD"),
+      RegExpSubStrTestCase("ABCDE", ".c.", "UTF8_BINARY", null)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT regexp_substr(collate('${t.l}', '${t.c}'), '${t.r}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+    })
+    // Unsupported collations
+    case class RegExpSubStrTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      RegExpSubStrTestFail("ABCDE", ".c.", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT regexp_substr(collate('${t.l}', '${t.c}'), '${t.r}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+  test("Support RegExpInStr string expression with collation") {
+    // Supported collations
+    case class RegExpInStrTestCase[R](l: String, r: String, c: String, result: R)
+    val testCases = Seq(
+      RegExpInStrTestCase("ABCDE", ".C.", "UTF8_BINARY", 2),
+      RegExpInStrTestCase("ABĆDE", ".ć.", "UTF8_LCASE", 2),
+      RegExpInStrTestCase("ABCDE", ".c.", "UTF8_BINARY", 0)
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT regexp_instr(collate('${t.l}', '${t.c}'), '${t.r}')"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+    })
+    // Unsupported collations
+    case class RegExpInStrTestFail(l: String, r: String, c: String)
+    val failCases = Seq(
+      RegExpInStrTestFail("ABCDE", ".c.", "UNICODE_CI")
+    )
+    failCases.foreach(t => {
+      val query = s"SELECT regexp_instr(collate('${t.l}', '${t.c}'), '${t.r}')"
+      val unsupportedCollation = intercept[AnalysisException] {
+        sql(query)
+      }
+      assert(unsupportedCollation.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+  }
+
+}
+// scalastyle:on nonascii
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
index e815a5051ed20..78aee5b80e549 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
@@ -17,23 +17,27 @@
 
 package org.apache.spark.sql
 
-import scala.collection.immutable.Seq
+import scala.jdk.CollectionConverters.MapHasAsScala
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkIllegalArgumentException}
+import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, Literal, StringTrim, StringTrimLeft, StringTrimRight}
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataType, IntegerType, StringType}
 
+// scalastyle:off nonascii
 class CollationStringExpressionsSuite
   extends QueryTest
-  with SharedSparkSession {
+  with SharedSparkSession
+  with ExpressionEvalHelper {
 
   test("Support ConcatWs string expression with collation") {
     // Supported collations
     case class ConcatWsTestCase[R](s: String, a: Array[String], c: String, result: R)
     val testCases = Seq(
       ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_BINARY", "Spark SQL"),
-      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_BINARY_LCASE", "Spark SQL"),
+      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_LCASE", "Spark SQL"),
       ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE", "Spark SQL"),
       ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE_CI", "Spark SQL")
     )
@@ -54,7 +58,7 @@ class CollationStringExpressionsSuite
     })
     // Collation mismatch
     val collationMismatch = intercept[AnalysisException] {
-      sql("SELECT concat_ws(' ',collate('Spark', 'UTF8_BINARY_LCASE'),collate('SQL', 'UNICODE'))")
+      sql("SELECT concat_ws(' ',collate('Spark', 'UTF8_LCASE'),collate('SQL', 'UNICODE'))")
     }
     assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
   }
@@ -64,7 +68,7 @@ class CollationStringExpressionsSuite
     case class EltTestCase[R](index: Int, inputs: Array[String], c: String, result: R)
     val testCases = Seq(
       EltTestCase(1, Array("Spark", "SQL"), "UTF8_BINARY", "Spark"),
-      EltTestCase(1, Array("Spark", "SQL"), "UTF8_BINARY_LCASE", "Spark"),
+      EltTestCase(1, Array("Spark", "SQL"), "UTF8_LCASE", "Spark"),
       EltTestCase(2, Array("Spark", "SQL"), "UNICODE", "SQL"),
       EltTestCase(2, Array("Spark", "SQL"), "UNICODE_CI", "SQL")
     )
@@ -84,18 +88,35 @@ class CollationStringExpressionsSuite
     })
     // Collation mismatch
     val collationMismatch = intercept[AnalysisException] {
-      sql("SELECT elt(0 ,collate('Spark', 'UTF8_BINARY_LCASE'), collate('SQL', 'UNICODE'))")
+      sql("SELECT elt(0 ,collate('Spark', 'UTF8_LCASE'), collate('SQL', 'UNICODE'))")
     }
     assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
   }
 
+  test("Support SplitPart string expression with collation") {
+    // Supported collations
+    case class SplitPartTestCase[R](s: String, d: String, p: Int, c: String, result: R)
+    val testCases = Seq(
+      SplitPartTestCase("1a2", "a", 2, "UTF8_BINARY", "2"),
+      SplitPartTestCase("1a2", "a", 2, "UNICODE", "2"),
+      SplitPartTestCase("1a2", "A", 2, "UTF8_LCASE", "2"),
+      SplitPartTestCase("1a2", "A", 2, "UNICODE_CI", "2")
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT split_part(collate('${t.s}','${t.c}'),collate('${t.d}','${t.c}'),${t.p})"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+    })
+  }
+
   test("Support Contains string expression with collation") {
     // Supported collations
     case class ContainsTestCase[R](l: String, r: String, c: String, result: R)
     val testCases = Seq(
       ContainsTestCase("", "", "UTF8_BINARY", true),
       ContainsTestCase("abcde", "C", "UNICODE", false),
-      ContainsTestCase("abcde", "FGH", "UTF8_BINARY_LCASE", false),
+      ContainsTestCase("abcde", "FGH", "UTF8_LCASE", false),
       ContainsTestCase("abcde", "BCD", "UNICODE_CI", true)
     )
     testCases.foreach(t => {
@@ -109,7 +130,100 @@ class CollationStringExpressionsSuite
     })
     // Collation mismatch
     val collationMismatch = intercept[AnalysisException] {
-      sql("SELECT contains(collate('abcde','UTF8_BINARY_LCASE'),collate('C','UNICODE_CI'))")
+      sql("SELECT contains(collate('abcde','UTF8_LCASE'),collate('C','UNICODE_CI'))")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("Support SubstringIndex expression with collation") {
+    case class SubstringIndexTestCase[R](string: String, delimiter: String, count: Integer,
+      c: String, result: R)
+    val testCases = Seq(
+      SubstringIndexTestCase("wwwgapachegorg", "g", -3, "UTF8_BINARY", "apachegorg"),
+      SubstringIndexTestCase("www||apache||org", "||", 2, "UTF8_BINARY", "www||apache"),
+      SubstringIndexTestCase("wwwXapacheXorg", "x", 2, "UTF8_LCASE", "wwwXapache"),
+      SubstringIndexTestCase("aaaaaaaaaa", "aa", 2, "UNICODE", "a"),
+      SubstringIndexTestCase("wwwmapacheMorg", "M", -2, "UNICODE_CI", "apacheMorg")
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT substring_index(collate('${t.string}','${t.c}')," +
+        s"collate('${t.delimiter}','${t.c}'),${t.count})"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(
+        StringType(CollationFactory.collationNameToId(t.c))))
+      // Implicit casting
+      checkAnswer(sql(s"SELECT substring_index(collate('${t.string}','${t.c}')," +
+        s"'${t.delimiter}',${t.count})"), Row(t.result))
+      checkAnswer(sql(s"SELECT substring_index('${t.string}',collate('${t.delimiter}','${t.c}')," +
+        s"${t.count})"), Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT substring_index(collate('abcde','UTF8_LCASE')," +
+        "collate('C','UNICODE_CI'),1)")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("Support StringInStr string expression with collation") {
+    case class StringInStrTestCase[R](string: String, substring: String, c: String, result: R)
+    val testCases = Seq(
+      // scalastyle:off
+      StringInStrTestCase("test大千世界X大千世界", "大千", "UTF8_BINARY", 5),
+      StringInStrTestCase("test大千世界X大千世界", "界x", "UTF8_LCASE", 8),
+      StringInStrTestCase("test大千世界X大千世界", "界x", "UNICODE", 0),
+      StringInStrTestCase("test大千世界X大千世界", "界y", "UNICODE_CI", 0),
+      StringInStrTestCase("test大千世界X大千世界", "界x", "UNICODE_CI", 8),
+      StringInStrTestCase("abİo12", "i̇o", "UNICODE_CI", 3)
+      // scalastyle:on
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT instr(collate('${t.string}','${t.c}')," +
+        s"collate('${t.substring}','${t.c}'))"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      // Implicit casting
+      checkAnswer(sql(s"SELECT instr(collate('${t.string}','${t.c}')," +
+        s"'${t.substring}')"), Row(t.result))
+      checkAnswer(sql(s"SELECT instr('${t.string}'," +
+        s"collate('${t.substring}','${t.c}'))"), Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql(s"SELECT instr(collate('aaads','UTF8_BINARY'), collate('Aa','UTF8_LCASE'))")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("Support FindInSet string expression with collation") {
+    case class FindInSetTestCase[R](word: String, set: String, c: String, result: R)
+    val testCases = Seq(
+      FindInSetTestCase("AB", "abc,b,ab,c,def", "UTF8_BINARY", 0),
+      FindInSetTestCase("C", "abc,b,ab,c,def", "UTF8_LCASE", 4),
+      FindInSetTestCase("d,ef", "abc,b,ab,c,def", "UNICODE", 0),
+      // scalastyle:off
+      FindInSetTestCase("i̇o", "ab,İo,12", "UNICODE_CI", 2),
+      FindInSetTestCase("İo", "ab,i̇o,12", "UNICODE_CI", 2)
+      // scalastyle:on
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT find_in_set(collate('${t.word}', '${t.c}')," +
+        s"collate('${t.set}', '${t.c}'))"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      // Implicit casting
+      checkAnswer(sql(s"SELECT find_in_set(collate('${t.word}', '${t.c}')," +
+        s"'${t.set}')"), Row(t.result))
+      checkAnswer(sql(s"SELECT find_in_set('${t.word}'," +
+        s"collate('${t.set}', '${t.c}'))"), Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql(s"SELECT find_in_set(collate('AB','UTF8_BINARY')," +
+        s"collate('ab,xyz,fgh','UTF8_LCASE'))")
     }
     assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
   }
@@ -120,7 +234,7 @@ class CollationStringExpressionsSuite
     val testCases = Seq(
       StartsWithTestCase("", "", "UTF8_BINARY", true),
       StartsWithTestCase("abcde", "A", "UNICODE", false),
-      StartsWithTestCase("abcde", "FGH", "UTF8_BINARY_LCASE", false),
+      StartsWithTestCase("abcde", "FGH", "UTF8_LCASE", false),
       StartsWithTestCase("abcde", "ABC", "UNICODE_CI", true)
     )
     testCases.foreach(t => {
@@ -134,7 +248,116 @@ class CollationStringExpressionsSuite
     })
     // Collation mismatch
     val collationMismatch = intercept[AnalysisException] {
-      sql("SELECT startswith(collate('abcde', 'UTF8_BINARY_LCASE'),collate('C', 'UNICODE_CI'))")
+      sql("SELECT startswith(collate('abcde', 'UTF8_LCASE'),collate('C', 'UNICODE_CI'))")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+  test("TRANSLATE check result on explicitly collated string") {
+    // Supported collations
+    case class TranslateTestCase[R](input: String, matchExpression: String,
+        replaceExpression: String, collation: String, result: R)
+    val testCases = Seq(
+      TranslateTestCase("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e"),
+      TranslateTestCase("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e"),
+      TranslateTestCase("TRanslate", "rnlt", "XxXx", "UTF8_LCASE", "xXaxsXaxe"),
+      TranslateTestCase("TRanslater", "Rrnlt", "xXxXx", "UTF8_LCASE", "xxaxsXaxex"),
+      TranslateTestCase("TRanslater", "Rrnlt", "XxxXx", "UTF8_LCASE", "xXaxsXaxeX"),
+      // scalastyle:off
+      TranslateTestCase("test大千世界X大千世界", "界x", "AB", "UTF8_LCASE", "test大千世AB大千世A"),
+      TranslateTestCase("大千世界test大千世界", "TEST", "abcd", "UTF8_LCASE", "大千世界abca大千世界"),
+      TranslateTestCase("Test大千世界大千世界", "tT", "oO", "UTF8_LCASE", "oeso大千世界大千世界"),
+      TranslateTestCase("大千世界大千世界tesT", "Tt", "Oo", "UTF8_LCASE", "大千世界大千世界OesO"),
+      TranslateTestCase("大千世界大千世界tesT", "大千", "世世", "UTF8_LCASE", "世世世界世世世界tesT"),
+      // scalastyle:on
+      TranslateTestCase("Translate", "Rnlt", "1234", "UNICODE", "Tra2s3a4e"),
+      TranslateTestCase("TRanslate", "rnlt", "XxXx", "UNICODE", "TRaxsXaxe"),
+      TranslateTestCase("TRanslater", "Rrnlt", "xXxXx", "UNICODE", "TxaxsXaxeX"),
+      TranslateTestCase("TRanslater", "Rrnlt", "XxxXx", "UNICODE", "TXaxsXaxex"),
+      // scalastyle:off
+      TranslateTestCase("test大千世界X大千世界", "界x", "AB", "UNICODE", "test大千世AX大千世A"),
+      TranslateTestCase("Test大千世界大千世界", "tT", "oO", "UNICODE", "Oeso大千世界大千世界"),
+      TranslateTestCase("大千世界大千世界tesT", "Tt", "Oo", "UNICODE", "大千世界大千世界oesO"),
+      // scalastyle:on
+      TranslateTestCase("Translate", "Rnlt", "1234", "UNICODE_CI", "41a2s3a4e"),
+      TranslateTestCase("TRanslate", "rnlt", "XxXx", "UNICODE_CI", "xXaxsXaxe"),
+      TranslateTestCase("TRanslater", "Rrnlt", "xXxXx", "UNICODE_CI", "xxaxsXaxex"),
+      TranslateTestCase("TRanslater", "Rrnlt", "XxxXx", "UNICODE_CI", "xXaxsXaxeX"),
+      // scalastyle:off
+      TranslateTestCase("test大千世界X大千世界", "界x", "AB", "UNICODE_CI", "test大千世AB大千世A"),
+      TranslateTestCase("大千世界test大千世界", "TEST", "abcd", "UNICODE_CI", "大千世界abca大千世界"),
+      TranslateTestCase("Test大千世界大千世界", "tT", "oO", "UNICODE_CI", "oeso大千世界大千世界"),
+      TranslateTestCase("大千世界大千世界tesT", "Tt", "Oo", "UNICODE_CI", "大千世界大千世界OesO"),
+      TranslateTestCase("大千世界大千世界tesT", "大千", "世世", "UNICODE_CI", "世世世界世世世界tesT"),
+      // scalastyle:on
+      TranslateTestCase("Translate", "Rnlasdfjhgadt", "1234", "UTF8_LCASE", "14234e"),
+      TranslateTestCase("Translate", "Rnlasdfjhgadt", "1234", "UNICODE_CI", "14234e"),
+      TranslateTestCase("Translate", "Rnlasdfjhgadt", "1234", "UNICODE", "Tr4234e"),
+      TranslateTestCase("Translate", "Rnlasdfjhgadt", "1234", "UTF8_BINARY", "Tr4234e"),
+      TranslateTestCase("Translate", "Rnlt", "123495834634", "UTF8_LCASE", "41a2s3a4e"),
+      TranslateTestCase("Translate", "Rnlt", "123495834634", "UNICODE", "Tra2s3a4e"),
+      TranslateTestCase("Translate", "Rnlt", "123495834634", "UNICODE_CI", "41a2s3a4e"),
+      TranslateTestCase("Translate", "Rnlt", "123495834634", "UTF8_BINARY", "Tra2s3a4e"),
+      TranslateTestCase("abcdef", "abcde", "123", "UTF8_BINARY", "123f"),
+      TranslateTestCase("abcdef", "abcde", "123", "UTF8_LCASE", "123f"),
+      TranslateTestCase("abcdef", "abcde", "123", "UNICODE", "123f"),
+      TranslateTestCase("abcdef", "abcde", "123", "UNICODE_CI", "123f")
+    )
+
+    testCases.foreach(t => {
+      val query = s"SELECT translate(collate('${t.input}', '${t.collation}')," +
+        s"collate('${t.matchExpression}', '${t.collation}')," +
+        s"collate('${t.replaceExpression}', '${t.collation}'))"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(
+        StringType(CollationFactory.collationNameToId(t.collation))))
+      // Implicit casting
+      checkAnswer(sql(s"SELECT translate(collate('${t.input}', '${t.collation}')," +
+        s"'${t.matchExpression}', '${t.replaceExpression}')"), Row(t.result))
+      checkAnswer(sql(s"SELECT translate('${t.input}', collate('${t.matchExpression}'," +
+        s"'${t.collation}'), '${t.replaceExpression}')"), Row(t.result))
+      checkAnswer(sql(s"SELECT translate('${t.input}', '${t.matchExpression}'," +
+        s"collate('${t.replaceExpression}', '${t.collation}'))"), Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql(s"SELECT translate(collate('Translate', 'UTF8_LCASE')," +
+        s"collate('Rnlt', 'UNICODE'), '1234')")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("Support Replace string expression with collation") {
+    case class ReplaceTestCase[R](source: String, search: String, replace: String,
+        c: String, result: R)
+    val testCases = Seq(
+      // scalastyle:off
+      ReplaceTestCase("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace"),
+      ReplaceTestCase("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace"),
+      ReplaceTestCase("abcdabcd", "bc", "", "UNICODE", "adad"),
+      ReplaceTestCase("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c"),
+      ReplaceTestCase("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy"),
+      ReplaceTestCase("abİo12i̇o", "i̇o", "xx", "UNICODE_CI", "abxx12xx")
+      // scalastyle:on
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT replace(collate('${t.source}','${t.c}'),collate('${t.search}'," +
+        s"'${t.c}'),collate('${t.replace}','${t.c}'))"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(
+        StringType(CollationFactory.collationNameToId(t.c))))
+      // Implicit casting
+      checkAnswer(sql(s"SELECT replace(collate('${t.source}','${t.c}'),'${t.search}'," +
+        s"'${t.replace}')"), Row(t.result))
+      checkAnswer(sql(s"SELECT replace('${t.source}',collate('${t.search}','${t.c}')," +
+        s"'${t.replace}')"), Row(t.result))
+      checkAnswer(sql(s"SELECT replace('${t.source}','${t.search}'," +
+        s"collate('${t.replace}','${t.c}'))"), Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT startswith(collate('abcde', 'UTF8_LCASE'),collate('C', 'UNICODE_CI'))")
     }
     assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
   }
@@ -145,7 +368,7 @@ class CollationStringExpressionsSuite
     val testCases = Seq(
       EndsWithTestCase("", "", "UTF8_BINARY", true),
       EndsWithTestCase("abcde", "E", "UNICODE", false),
-      EndsWithTestCase("abcde", "FGH", "UTF8_BINARY_LCASE", false),
+      EndsWithTestCase("abcde", "FGH", "UTF8_LCASE", false),
       EndsWithTestCase("abcde", "CDE", "UNICODE_CI", true)
     )
     testCases.foreach(t => {
@@ -159,7 +382,7 @@ class CollationStringExpressionsSuite
     })
     // Collation mismatch
     val collationMismatch = intercept[AnalysisException] {
-      sql("SELECT endswith(collate('abcde', 'UTF8_BINARY_LCASE'),collate('C', 'UNICODE_CI'))")
+      sql("SELECT endswith(collate('abcde', 'UTF8_LCASE'),collate('C', 'UNICODE_CI'))")
     }
     assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
   }
@@ -170,7 +393,7 @@ class CollationStringExpressionsSuite
     val testCases = Seq(
       StringRepeatTestCase("", 1, "UTF8_BINARY", ""),
       StringRepeatTestCase("a", 0, "UNICODE", ""),
-      StringRepeatTestCase("XY", 3, "UTF8_BINARY_LCASE", "XYXYXY"),
+      StringRepeatTestCase("XY", 3, "UTF8_LCASE", "XYXYXY"),
       StringRepeatTestCase("123", 2, "UNICODE_CI", "123123")
     )
     testCases.foreach(t => {
@@ -185,16 +408,16 @@ class CollationStringExpressionsSuite
     case class AsciiUnBase64TestCase[R](q: String, dt: DataType, r: R)
     val testCases = Seq(
       AsciiUnBase64TestCase("select ascii('a' collate utf8_binary)", IntegerType, 97),
-      AsciiUnBase64TestCase("select ascii('B' collate utf8_binary_lcase)", IntegerType, 66),
+      AsciiUnBase64TestCase("select ascii('B' collate utf8_lcase)", IntegerType, 66),
       AsciiUnBase64TestCase("select ascii('#' collate unicode)", IntegerType, 35),
       AsciiUnBase64TestCase("select ascii('!' collate unicode_ci)", IntegerType, 33),
       AsciiUnBase64TestCase("select unbase64('QUJD' collate utf8_binary)", BinaryType,
         Seq(65, 66, 67)),
-      AsciiUnBase64TestCase("select unbase64('eHl6' collate utf8_binary_lcase)", BinaryType,
+      AsciiUnBase64TestCase("select unbase64('eHl6' collate utf8_lcase)", BinaryType,
         Seq(120, 121, 122)),
       AsciiUnBase64TestCase("select unbase64('IyMj' collate utf8_binary)", BinaryType,
         Seq(35, 35, 35)),
-      AsciiUnBase64TestCase("select unbase64('IQ==' collate utf8_binary_lcase)", BinaryType,
+      AsciiUnBase64TestCase("select unbase64('IQ==' collate utf8_lcase)", BinaryType,
         Seq(33))
     )
     testCases.foreach(t => {
@@ -208,11 +431,11 @@ class CollationStringExpressionsSuite
     case class DefaultCollationTestCase[R](q: String, c: String, r: R)
     val testCases = Seq(
       DefaultCollationTestCase("select chr(97)", "UTF8_BINARY", "a"),
-      DefaultCollationTestCase("select chr(66)", "UTF8_BINARY_LCASE", "B"),
+      DefaultCollationTestCase("select chr(66)", "UTF8_LCASE", "B"),
       DefaultCollationTestCase("select base64('xyz')", "UNICODE", "eHl6"),
       DefaultCollationTestCase("select base64('!')", "UNICODE_CI", "IQ=="),
       DefaultCollationTestCase("select decode(encode('$', 'utf-8'), 'utf-8')", "UTF8_BINARY", "$"),
-      DefaultCollationTestCase("select decode(encode('X', 'utf-8'), 'utf-8')", "UTF8_BINARY_LCASE",
+      DefaultCollationTestCase("select decode(encode('X', 'utf-8'), 'utf-8')", "UTF8_LCASE",
         "X"),
       DefaultCollationTestCase("select format_number(123.123, '###.###')", "UNICODE", "123.123"),
       DefaultCollationTestCase("select format_number(99.99, '##.##')", "UNICODE_CI", "99.99")
@@ -231,7 +454,7 @@ class CollationStringExpressionsSuite
     val testCases = Seq(
       EncodeToBinarySentencesTestCase("select encode('a' collate utf8_binary, 'utf-8')",
         BinaryType, Seq(97)),
-      EncodeToBinarySentencesTestCase("select encode('$' collate utf8_binary_lcase, 'utf-8')",
+      EncodeToBinarySentencesTestCase("select encode('$' collate utf8_lcase, 'utf-8')",
         BinaryType, Seq(36)),
       EncodeToBinarySentencesTestCase("select to_binary('B' collate unicode, 'utf-8')",
         BinaryType, Seq(66)),
@@ -244,9 +467,9 @@ class CollationStringExpressionsSuite
         ArrayType(ArrayType(StringType)), Seq(Seq("Hello", "world"), Seq("Nice", "day"))),
       EncodeToBinarySentencesTestCase(
         """
-          |select sentences('Something else. Nothing here.' collate utf8_binary_lcase)
+          |select sentences('Something else. Nothing here.' collate utf8_lcase)
           |""".stripMargin,
-        ArrayType(ArrayType(StringType("UTF8_BINARY_LCASE"))),
+        ArrayType(ArrayType(StringType("UTF8_LCASE"))),
         Seq(Seq("Something", "else"), Seq("Nothing", "here")))
     )
     testCases.foreach(t => {
@@ -261,7 +484,7 @@ class CollationStringExpressionsSuite
     case class UpperTestCase[R](s: String, c: String, result: R)
     val testCases = Seq(
       UpperTestCase("aBc", "UTF8_BINARY", "ABC"),
-      UpperTestCase("aBc", "UTF8_BINARY_LCASE", "ABC"),
+      UpperTestCase("aBc", "UTF8_LCASE", "ABC"),
       UpperTestCase("aBc", "UNICODE", "ABC"),
       UpperTestCase("aBc", "UNICODE_CI", "ABC")
     )
@@ -278,7 +501,7 @@ class CollationStringExpressionsSuite
     case class LowerTestCase[R](s: String, c: String, result: R)
     val testCases = Seq(
       LowerTestCase("aBc", "UTF8_BINARY", "abc"),
-      LowerTestCase("aBc", "UTF8_BINARY_LCASE", "abc"),
+      LowerTestCase("aBc", "UTF8_LCASE", "abc"),
       LowerTestCase("aBc", "UNICODE", "abc"),
       LowerTestCase("aBc", "UNICODE_CI", "abc")
     )
@@ -295,7 +518,7 @@ class CollationStringExpressionsSuite
     case class InitCapTestCase[R](s: String, c: String, result: R)
     val testCases = Seq(
       InitCapTestCase("aBc ABc", "UTF8_BINARY", "Abc Abc"),
-      InitCapTestCase("aBc ABc", "UTF8_BINARY_LCASE", "Abc Abc"),
+      InitCapTestCase("aBc ABc", "UTF8_LCASE", "Abc Abc"),
       InitCapTestCase("aBc ABc", "UNICODE", "Abc Abc"),
       InitCapTestCase("aBc ABc", "UNICODE_CI", "Abc Abc")
     )
@@ -307,9 +530,552 @@ class CollationStringExpressionsSuite
     })
   }
 
+  test("Overlay string expression with collation") {
+    // Supported collations
+    case class OverlayTestCase(l: String, r: String, pos: Int, c: String, result: String)
+    val testCases = Seq(
+      OverlayTestCase("hello", " world", 6, "UTF8_BINARY", "hello world"),
+      OverlayTestCase("nice", " day", 5, "UTF8_LCASE", "nice day"),
+      OverlayTestCase("A", "B", 1, "UNICODE", "B"),
+      OverlayTestCase("!", "!!!", 1, "UNICODE_CI", "!!!")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select overlay(collate('${t.l}', '${t.c}') placing
+           |collate('${t.r}', '${t.c}') from ${t.pos})
+           |""".stripMargin
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Implicit casting
+      checkAnswer(sql(
+        s"""
+           |select overlay(collate('${t.l}', '${t.c}') placing '${t.r}' from ${t.pos})
+           |""".stripMargin), Row(t.result))
+      checkAnswer(sql(
+        s"""
+           |select overlay('${t.l}' placing collate('${t.r}', '${t.c}') from ${t.pos})
+           |""".stripMargin), Row(t.result))
+      checkAnswer(sql(
+        s"""
+           |select overlay(collate('${t.l}', '${t.c}')
+           |placing '${t.r}' from collate('${t.pos}', '${t.c}'))
+           |""".stripMargin), Row(t.result))
+    })
+    // Collation mismatch
+    assert(
+      intercept[AnalysisException] {
+        sql("SELECT overlay('a' collate UNICODE PLACING 'b' collate UNICODE_CI FROM 1)")
+      }.getErrorClass == "COLLATION_MISMATCH.EXPLICIT"
+    )
+  }
+
+  test("FormatString string expression with collation") {
+    // Supported collations
+    case class FormatStringTestCase(f: String, a: Seq[Any], c: String, r: String)
+    val testCases = Seq(
+      FormatStringTestCase("%s%s", Seq("'a'", "'b'"), "UTF8_BINARY", "ab"),
+      FormatStringTestCase("%d", Seq(123), "UTF8_LCASE", "123"),
+      FormatStringTestCase("%s%d", Seq("'A'", 0), "UNICODE", "A0"),
+      FormatStringTestCase("%s%s", Seq("'Hello'", "'!!!'"), "UNICODE_CI", "Hello!!!")
+    )
+    testCases.foreach(t => {
+      val query =
+        s"""
+           |select format_string(collate('${t.f}', '${t.c}'), ${t.a.mkString(", ")})
+           |""".stripMargin
+      // Result & data type
+      checkAnswer(sql(query), Row(t.r))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+    })
+  }
+
+  test("SoundEx string expression with collation") {
+    // Supported collations
+    case class SoundExTestCase(q: String, c: String, r: String)
+    val testCases = Seq(
+      SoundExTestCase("select soundex('A' collate utf8_binary)", "UTF8_BINARY", "A000"),
+      SoundExTestCase("select soundex('!' collate utf8_lcase)", "UTF8_LCASE", "!"),
+      SoundExTestCase("select soundex('$' collate unicode)", "UNICODE", "$"),
+      SoundExTestCase("select soundex('X' collate unicode_ci)", "UNICODE_CI", "X000")
+    )
+    testCases.foreach(t => {
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.c) {
+        // Result & data type
+        checkAnswer(sql(t.q), Row(t.r))
+        assert(sql(t.q).schema.fields.head.dataType.sameType(StringType(t.c)))
+      }
+    })
+  }
+
+  test("Length, BitLength & OctetLength string expressions with collations") {
+    // Supported collations
+    case class LenTestCase(q: String, r: Int)
+    val testCases = Seq(
+      LenTestCase("select length('hello' collate utf8_binary)", 5),
+      LenTestCase("select length('world' collate utf8_lcase)", 5),
+      LenTestCase("select length('ﬀ' collate unicode)", 1),
+      LenTestCase("select bit_length('hello' collate unicode_ci)", 40),
+      LenTestCase("select bit_length('world' collate utf8_binary)", 40),
+      LenTestCase("select bit_length('ﬀ' collate utf8_lcase)", 24),
+      LenTestCase("select octet_length('hello' collate unicode)", 5),
+      LenTestCase("select octet_length('world' collate unicode_ci)", 5),
+      LenTestCase("select octet_length('ﬀ' collate utf8_binary)", 3)
+    )
+    testCases.foreach(t => {
+      // Result & data type
+      checkAnswer(sql(t.q), Row(t.r))
+      assert(sql(t.q).schema.fields.head.dataType.sameType(IntegerType))
+    })
+  }
+
+  test("Luhncheck string expression with collation") {
+    // Supported collations
+    case class LuhncheckTestCase(q: String, c: String, r: Boolean)
+    val testCases = Seq(
+      LuhncheckTestCase("123", "UTF8_BINARY", r = false),
+      LuhncheckTestCase("000", "UTF8_LCASE", r = true),
+      LuhncheckTestCase("111", "UNICODE", r = false),
+      LuhncheckTestCase("222", "UNICODE_CI", r = false)
+    )
+    testCases.foreach(t => {
+      val query = s"select luhn_check(${t.q})"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.r))
+      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+    })
+  }
+
+  test("Levenshtein string expression with collation") {
+    // Supported collations
+    case class LevenshteinTestCase(
+      left: String, right: String, collationName: String, threshold: Option[Int], result: Int
+    )
+    val testCases = Seq(
+      LevenshteinTestCase("kitten", "sitTing", "UTF8_BINARY", None, result = 4),
+      LevenshteinTestCase("kitten", "sitTing", "UTF8_LCASE", None, result = 4),
+      LevenshteinTestCase("kitten", "sitTing", "UNICODE", Some(3), result = -1),
+      LevenshteinTestCase("kitten", "sitTing", "UNICODE_CI", Some(3), result = -1)
+    )
+    testCases.foreach(t => {
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collationName) {
+        val th = if (t.threshold.isDefined) s", ${t.threshold.get}" else ""
+        val query = s"select levenshtein('${t.left}', '${t.right}'$th)"
+        // Result & data type
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      }
+    })
+  }
+
+  test("Support IsValidUTF8 string expression with collation") {
+    // Supported collations
+    case class IsValidUTF8TestCase(input: String, collationName: String, result: Any)
+    val testCases = Seq(
+      IsValidUTF8TestCase("null", "UTF8_BINARY", result = null),
+      IsValidUTF8TestCase("''", "UTF8_LCASE", result = true),
+      IsValidUTF8TestCase("'abc'", "UNICODE", result = true),
+      IsValidUTF8TestCase("x'FF'", "UNICODE_CI", result = false)
+    )
+    testCases.foreach { testCase =>
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
+        val query = s"SELECT is_valid_utf8(${testCase.input})"
+        // Result & data type
+        checkAnswer(sql(query), Row(testCase.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+      }
+    }
+  }
+
+  test("Support MakeValidUTF8 string expression with collation") {
+    // Supported collations
+    case class MakeValidUTF8TestCase(input: String, collationName: String, result: Any)
+    val testCases = Seq(
+      MakeValidUTF8TestCase("null", "UTF8_BINARY", result = null),
+      MakeValidUTF8TestCase("''", "UTF8_LCASE", result = ""),
+      MakeValidUTF8TestCase("'abc'", "UNICODE", result = "abc"),
+      MakeValidUTF8TestCase("x'FF'", "UNICODE_CI", result = "\uFFFD")
+    )
+    testCases.foreach { testCase =>
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
+        val query = s"SELECT make_valid_utf8(${testCase.input})"
+        // Result & data type
+        checkAnswer(sql(query), Row(testCase.result))
+        val dataType = StringType(testCase.collationName)
+        assert(sql(query).schema.fields.head.dataType.sameType(dataType))
+      }
+    }
+  }
+
+  test("Support ValidateUTF8 string expression with collation") {
+    // Supported collations
+    case class ValidateUTF8TestCase(input: String, collationName: String, result: Any)
+    val testCases = Seq(
+      ValidateUTF8TestCase("null", "UTF8_BINARY", result = null),
+      ValidateUTF8TestCase("''", "UTF8_LCASE", result = ""),
+      ValidateUTF8TestCase("'abc'", "UNICODE", result = "abc"),
+      ValidateUTF8TestCase("x'FF'", "UNICODE_CI", result = None)
+    )
+    testCases.foreach { testCase =>
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
+        val query = s"SELECT validate_utf8(${testCase.input})"
+        if (testCase.result == None) {
+          // Exception thrown
+          val e = intercept[SparkIllegalArgumentException] {
+            sql(query).collect()
+          }
+          assert(e.getErrorClass == "INVALID_UTF8_STRING")
+          assert(e.getMessageParameters.asScala == Map("str" -> "\\xFF"))
+        } else {
+          // Result & data type
+          checkAnswer(sql(query), Row(testCase.result))
+          val dataType = StringType(testCase.collationName)
+          assert(sql(query).schema.fields.head.dataType.sameType(dataType))
+        }
+      }
+    }
+  }
+
+  test("Support TryValidateUTF8 string expression with collation") {
+    // Supported collations
+    case class ValidateUTF8TestCase(input: String, collationName: String, result: Any)
+    val testCases = Seq(
+      ValidateUTF8TestCase("null", "UTF8_BINARY", result = null),
+      ValidateUTF8TestCase("''", "UTF8_LCASE", result = ""),
+      ValidateUTF8TestCase("'abc'", "UNICODE", result = "abc"),
+      ValidateUTF8TestCase("x'FF'", "UNICODE_CI", result = null)
+    )
+    testCases.foreach { testCase =>
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
+        val query = s"SELECT try_validate_utf8(${testCase.input})"
+        // Result & data type
+        checkAnswer(sql(query), Row(testCase.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(testCase.collationName)))
+      }
+    }
+  }
+
+  test("Support Left/Right/Substr with collation") {
+    case class SubstringTestCase(
+        method: String,
+        str: String,
+        len: String,
+        pad: Option[String],
+        collation: String,
+        result: Row) {
+      val strString = if (str == "null") "null" else s"'$str'"
+      val query =
+        s"SELECT $method(collate($strString, '$collation')," +
+          s" $len${pad.map(p => s", '$p'").getOrElse("")})"
+    }
+
+    val checks = Seq(
+      SubstringTestCase("substr", "example", "1", Some("100"), "utf8_lcase", Row("example")),
+      SubstringTestCase("substr", "example", "2", Some("2"), "utf8_binary", Row("xa")),
+      SubstringTestCase("right", "", "1", None, "utf8_lcase", Row("")),
+      SubstringTestCase("substr", "example", "0", Some("0"), "unicode", Row("")),
+      SubstringTestCase("substr", "example", "-3", Some("2"), "unicode_ci", Row("pl")),
+      SubstringTestCase("substr", " a世a ", "2", Some("3"), "utf8_lcase", Row("a世a")),
+      SubstringTestCase("left", " a世a ", "3", None, "utf8_binary", Row(" a世")),
+      SubstringTestCase("right", " a世a ", "3", None, "unicode", Row("世a ")),
+      SubstringTestCase("left", "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "3", None, "unicode_ci", Row("ÀÃÂ")),
+      SubstringTestCase("right", "ÀÃÂĀĂȦÄäâãȻȻȻȻȻǢǼÆ", "3", None, "utf8_lcase", Row("ǢǼÆ")),
+      SubstringTestCase("substr", "", "1", Some("1"), "utf8_lcase", Row("")),
+      SubstringTestCase("substr", "", "1", Some("1"), "unicode", Row("")),
+      SubstringTestCase("left", "", "1", None, "utf8_binary", Row("")),
+      SubstringTestCase("left", "null", "1", None, "utf8_lcase", Row(null)),
+      SubstringTestCase("right", "null", "1", None, "unicode", Row(null)),
+      SubstringTestCase("substr", "null", "1", None, "utf8_binary", Row(null)),
+      SubstringTestCase("substr", "null", "1", Some("1"), "unicode_ci", Row(null)),
+      SubstringTestCase("left", "null", "null", None, "utf8_lcase", Row(null)),
+      SubstringTestCase("right", "null", "null", None, "unicode", Row(null)),
+      SubstringTestCase("substr", "null", "null", Some("null"), "utf8_binary", Row(null)),
+      SubstringTestCase("substr", "null", "null", None, "unicode_ci", Row(null)),
+      SubstringTestCase("left", "ÀÃÂȦÄäåäáâãȻȻȻǢǼÆ", "null", None, "utf8_lcase", Row(null)),
+      SubstringTestCase("right", "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "null", None, "unicode", Row(null)),
+      SubstringTestCase("substr", "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "null", None, "utf8_binary", Row(null)),
+      SubstringTestCase("substr", "", "null", None, "unicode_ci", Row(null))
+    )
+
+    checks.foreach { check =>
+      // Result & data type
+      checkAnswer(sql(check.query), check.result)
+      assert(sql(check.query).schema.fields.head.dataType.sameType(StringType(check.collation)))
+    }
+  }
+
+  test("Support StringRPad string expressions with collation") {
+    // Supported collations
+    case class StringRPadTestCase[R](s: String, len: Int, pad: String, c: String, result: R)
+    val testCases = Seq(
+      StringRPadTestCase("", 5, " ", "UTF8_BINARY", "     "),
+      StringRPadTestCase("abc", 5, " ", "UNICODE", "abc  "),
+      StringRPadTestCase("Hello", 7, "Wörld", "UTF8_LCASE", "HelloWö"),
+      StringRPadTestCase("1234567890", 5, "aaaAAa", "UNICODE_CI", "12345"),
+      StringRPadTestCase("aaAA", 2, " ", "UTF8_BINARY", "aa"),
+      StringRPadTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ℀℃", 2, "1", "UTF8_LCASE", "ÀÃ"),
+      StringRPadTestCase("ĂȦÄäåäá", 20, "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "UNICODE", "ĂȦÄäåäáÀÃÂĀĂȦÄäåäáâã"),
+      StringRPadTestCase("aȦÄä", 8, "a1", "UNICODE_CI", "aȦÄäa1a1")
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT rpad(collate('${t.s}', '${t.c}')," +
+        s" ${t.len}, collate('${t.pad}', '${t.c}'))"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Implicit casting
+      checkAnswer(
+        sql(s"SELECT rpad(collate('${t.s}', '${t.c}'), ${t.len}, '${t.pad}')"),
+        Row(t.result))
+      checkAnswer(
+        sql(s"SELECT rpad('${t.s}', ${t.len}, collate('${t.pad}', '${t.c}'))"),
+        Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT rpad(collate('abcde', 'UNICODE_CI'),1,collate('C', 'UTF8_LCASE'))")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("Support StringLPad string expressions with collation") {
+    // Supported collations
+    case class StringLPadTestCase[R](s: String, len: Int, pad: String, c: String, result: R)
+    val testCases = Seq(
+      StringLPadTestCase("", 5, " ", "UTF8_BINARY", "     "),
+      StringLPadTestCase("abc", 5, " ", "UNICODE", "  abc"),
+      StringLPadTestCase("Hello", 7, "Wörld", "UTF8_LCASE", "WöHello"),
+      StringLPadTestCase("1234567890", 5, "aaaAAa", "UNICODE_CI", "12345"),
+      StringLPadTestCase("aaAA", 2, " ", "UTF8_BINARY", "aa"),
+      StringLPadTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ℀℃", 2, "1", "UTF8_LCASE", "ÀÃ"),
+      StringLPadTestCase("ĂȦÄäåäá", 20, "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "UNICODE", "ÀÃÂĀĂȦÄäåäáâãĂȦÄäåäá"),
+      StringLPadTestCase("aȦÄä", 8, "a1", "UNICODE_CI", "a1a1aȦÄä")
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT lpad(collate('${t.s}', '${t.c}')," +
+        s" ${t.len}, collate('${t.pad}', '${t.c}'))"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Implicit casting
+      checkAnswer(
+        sql(s"SELECT lpad(collate('${t.s}', '${t.c}'), ${t.len}, '${t.pad}')"),
+        Row(t.result))
+      checkAnswer(
+        sql(s"SELECT lpad('${t.s}', ${t.len}, collate('${t.pad}', '${t.c}'))"),
+        Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT lpad(collate('abcde', 'UNICODE_CI'),1,collate('C', 'UTF8_LCASE'))")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("Support StringLPad string expressions with explicit collation on second parameter") {
+    val query = "SELECT lpad('abc', collate('5', 'unicode_ci'), ' ')"
+    checkAnswer(sql(query), Row("  abc"))
+    assert(sql(query).schema.fields.head.dataType.sameType(StringType(0)))
+  }
+
+  test("Support Locate string expression with collation") {
+    case class StringLocateTestCase[R](substring: String, string: String, start: Integer,
+        c: String, result: R)
+    val testCases = Seq(
+      // scalastyle:off
+      StringLocateTestCase("aa", "aaads", 0, "UTF8_BINARY", 0),
+      StringLocateTestCase("aa", "Aaads", 0, "UTF8_LCASE", 0),
+      StringLocateTestCase("界x", "test大千世界X大千世界", 1, "UTF8_LCASE", 8),
+      StringLocateTestCase("aBc", "abcabc", 4, "UTF8_LCASE", 4),
+      StringLocateTestCase("aa", "Aaads", 0, "UNICODE", 0),
+      StringLocateTestCase("abC", "abCabC", 2, "UNICODE", 4),
+      StringLocateTestCase("aa", "Aaads", 0, "UNICODE_CI", 0),
+      StringLocateTestCase("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8)
+      // scalastyle:on
+    )
+    testCases.foreach(t => {
+      val query = s"SELECT locate(collate('${t.substring}','${t.c}')," +
+        s"collate('${t.string}','${t.c}'),${t.start})"
+      // Result & data type
+      checkAnswer(sql(query), Row(t.result))
+      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      // Implicit casting
+      checkAnswer(sql(s"SELECT locate(collate('${t.substring}','${t.c}')," +
+        s"'${t.string}',${t.start})"), Row(t.result))
+      checkAnswer(sql(s"SELECT locate('${t.substring}',collate('${t.string}'," +
+        s"'${t.c}'),${t.start})"), Row(t.result))
+    })
+    // Collation mismatch
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT locate(collate('aBc', 'UTF8_BINARY'),collate('abcabc', 'UTF8_LCASE'),4)")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("StringTrim* functions - unit tests for both paths (codegen and eval)") {
+    // Without trimString param.
+    checkEvaluation(
+      StringTrim(Literal.create( "  asd  ", StringType("UTF8_BINARY"))), "asd")
+    checkEvaluation(
+      StringTrimLeft(Literal.create("  asd  ", StringType("UTF8_LCASE"))), "asd  ")
+    checkEvaluation(StringTrimRight(
+      Literal.create("  asd  ", StringType("UTF8_BINARY"))), "  asd")
+
+    // With trimString param.
+    checkEvaluation(
+      StringTrim(
+        Literal.create("  asd  ", StringType("UTF8_BINARY")),
+        Literal.create(" ", StringType("UTF8_BINARY"))),
+      "asd")
+    checkEvaluation(
+      StringTrimLeft(
+        Literal.create("  asd  ", StringType("UTF8_LCASE")),
+        Literal.create(" ", StringType("UTF8_LCASE"))),
+      "asd  ")
+    checkEvaluation(
+      StringTrimRight(
+        Literal.create("  asd  ", StringType("UTF8_BINARY")),
+        Literal.create(" ", StringType("UTF8_BINARY"))),
+      "  asd")
+
+    checkEvaluation(
+      StringTrim(
+        Literal.create("xxasdxx", StringType("UTF8_BINARY")),
+        Literal.create("x", StringType("UTF8_BINARY"))),
+      "asd")
+    checkEvaluation(
+      StringTrimLeft(
+        Literal.create("xxasdxx", StringType("UTF8_LCASE")),
+        Literal.create("x", StringType("UTF8_LCASE"))),
+      "asdxx")
+    checkEvaluation(
+      StringTrimRight(
+        Literal.create("xxasdxx", StringType("UTF8_BINARY")),
+        Literal.create("x", StringType("UTF8_BINARY"))),
+      "xxasd")
+  }
+
+  test("StringTrim* functions - E2E tests") {
+    case class StringTrimTestCase(
+      collation: String,
+      trimFunc: String,
+      sourceString: String,
+      hasTrimString: Boolean,
+      trimString: String,
+      expectedResultString: String)
+
+    val testCases = Seq(
+      StringTrimTestCase("UTF8_BINARY", "TRIM", "  asd  ", false, null, "asd"),
+      StringTrimTestCase("UTF8_BINARY", "BTRIM", "  asd  ", true, null, null),
+      StringTrimTestCase("UTF8_BINARY", "LTRIM", "xxasdxx", true, "x", "asdxx"),
+      StringTrimTestCase("UTF8_BINARY", "RTRIM", "xxasdxx", true, "x", "xxasd"),
+
+      StringTrimTestCase("UTF8_LCASE", "TRIM", "  asd  ", true, null, null),
+      StringTrimTestCase("UTF8_LCASE", "BTRIM", "xxasdxx", true, "x", "asd"),
+      StringTrimTestCase("UTF8_LCASE", "LTRIM", "xxasdxx", true, "x", "asdxx"),
+      StringTrimTestCase("UTF8_LCASE", "RTRIM", "  asd  ", false, null, "  asd"),
+
+      StringTrimTestCase("UTF8_BINARY", "TRIM", "xxasdxx", true, "x", "asd"),
+      StringTrimTestCase("UTF8_BINARY", "BTRIM", "xxasdxx", true, "x", "asd"),
+      StringTrimTestCase("UTF8_BINARY", "LTRIM", "  asd  ", false, null, "asd  "),
+      StringTrimTestCase("UTF8_BINARY", "RTRIM", "  asd  ", true, null, null)
+
+      // Other more complex cases can be found in unit tests in CollationSupportSuite.java.
+    )
+
+    testCases.foreach(testCase => {
+      var df: DataFrame = null
+
+      if (testCase.trimFunc.equalsIgnoreCase("BTRIM")) {
+        // BTRIM has arguments in (srcStr, trimStr) order
+        df = sql(s"SELECT ${testCase.trimFunc}(" +
+          s"COLLATE('${testCase.sourceString}', '${testCase.collation}')" +
+            (if (!testCase.hasTrimString) ""
+            else if (testCase.trimString == null) ", null"
+            else s", '${testCase.trimString}'") +
+          ")")
+      }
+      else {
+        // While other functions have arguments in (trimStr, srcStr) order
+        df = sql(s"SELECT ${testCase.trimFunc}(" +
+            (if (!testCase.hasTrimString) ""
+            else if (testCase.trimString == null) "null, "
+            else s"'${testCase.trimString}', ") +
+          s"COLLATE('${testCase.sourceString}', '${testCase.collation}')" +
+          ")")
+      }
+
+      checkAnswer(df = df, expectedAnswer = Row(testCase.expectedResultString))
+    })
+  }
+
+  test("StringTrim* functions - implicit collations") {
+    checkAnswer(
+      df = sql("SELECT TRIM(COLLATE('x', 'UTF8_BINARY'), COLLATE('xax', 'UTF8_BINARY'))"),
+      expectedAnswer = Row("a"))
+    checkAnswer(
+      df = sql("SELECT BTRIM(COLLATE('xax', 'UTF8_LCASE'), "
+        + "COLLATE('x', 'UTF8_LCASE'))"),
+      expectedAnswer = Row("a"))
+    checkAnswer(
+      df = sql("SELECT LTRIM(COLLATE('x', 'UTF8_BINARY'), COLLATE('xax', 'UTF8_BINARY'))"),
+      expectedAnswer = Row("ax"))
+
+    checkAnswer(
+      df = sql("SELECT RTRIM('x', COLLATE('xax', 'UTF8_BINARY'))"),
+      expectedAnswer = Row("xa"))
+    checkAnswer(
+      df = sql("SELECT TRIM('x', COLLATE('xax', 'UTF8_LCASE'))"),
+      expectedAnswer = Row("a"))
+    checkAnswer(
+      df = sql("SELECT BTRIM('xax', COLLATE('x', 'UTF8_BINARY'))"),
+      expectedAnswer = Row("a"))
+
+    checkAnswer(
+      df = sql("SELECT LTRIM(COLLATE('x', 'UTF8_BINARY'), 'xax')"),
+      expectedAnswer = Row("ax"))
+    checkAnswer(
+      df = sql("SELECT RTRIM(COLLATE('x', 'UTF8_LCASE'), 'xax')"),
+      expectedAnswer = Row("xa"))
+    checkAnswer(
+      df = sql("SELECT TRIM(COLLATE('x', 'UTF8_BINARY'), 'xax')"),
+      expectedAnswer = Row("a"))
+  }
+
+  test("StringTrim* functions - collation type mismatch") {
+    List("TRIM", "LTRIM", "RTRIM").foreach(func => {
+      val collationMismatch = intercept[AnalysisException] {
+        sql("SELECT " + func + "(COLLATE('x', 'UTF8_LCASE'), "
+          + "COLLATE('xxaaaxx', 'UTF8_BINARY'))")
+      }
+      assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+    })
+
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT BTRIM(COLLATE('xxaaaxx', 'UTF8_BINARY'), COLLATE('x', 'UTF8_LCASE'))")
+    }
+    assert(collationMismatch.getErrorClass === "COLLATION_MISMATCH.EXPLICIT")
+  }
+
+  test("StringTrim* functions - unsupported collation types") {
+    List("TRIM", "LTRIM", "RTRIM").foreach(func => {
+      val collationMismatch = intercept[AnalysisException] {
+        sql("SELECT " + func + "(COLLATE('x', 'UNICODE_CI'), COLLATE('xxaaaxx', 'UNICODE_CI'))")
+      }
+      assert(collationMismatch.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+    })
+
+    val collationMismatch = intercept[AnalysisException] {
+      sql("SELECT BTRIM(COLLATE('xxaaaxx', 'UNICODE_CI'), COLLATE('x', 'UNICODE_CI'))")
+    }
+    assert(collationMismatch.getErrorClass === "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+  }
+
   // TODO: Add more tests for other string expressions
 
 }
+// scalastyle:on nonascii
 
 class CollationStringExpressionsANSISuite extends CollationStringExpressionsSuite {
   override protected def sparkConf: SparkConf =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index c4ddd25c99b6c..f662b86eaf815 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.sql
 
-import scala.collection.immutable.Seq
 import scala.jdk.CollectionConverters.MapHasAsJava
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.connector.{DatasourceV2SQLBase, FakeV2ProviderWithCustomSchema}
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable}
@@ -30,8 +31,9 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec}
-import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
-import org.apache.spark.sql.internal.SqlApiConf
+import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+import org.apache.spark.sql.execution.joins._
+import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
 import org.apache.spark.sql.types.{MapType, StringType, StructField, StructType}
 
 class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
@@ -42,7 +44,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   private val allFileBasedDataSources = collationPreservingSources ++  collationNonPreservingSources
 
   test("collate returns proper type") {
-    Seq("utf8_binary", "utf8_binary_lcase", "unicode", "unicode_ci").foreach { collationName =>
+    Seq("utf8_binary", "utf8_lcase", "unicode", "unicode_ci").foreach { collationName =>
       checkAnswer(sql(s"select 'aaa' collate $collationName"), Row("aaa"))
       val collationId = CollationFactory.collationNameToId(collationName)
       assert(sql(s"select 'aaa' collate $collationName").schema(0).dataType
@@ -51,7 +53,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   }
 
   test("collation name is case insensitive") {
-    Seq("uTf8_BiNaRy", "uTf8_BiNaRy_Lcase", "uNicOde", "UNICODE_ci").foreach { collationName =>
+    Seq("uTf8_BiNaRy", "utf8_lcase", "uNicOde", "UNICODE_ci").foreach { collationName =>
       checkAnswer(sql(s"select 'aaa' collate $collationName"), Row("aaa"))
       val collationId = CollationFactory.collationNameToId(collationName)
       assert(sql(s"select 'aaa' collate $collationName").schema(0).dataType
@@ -60,15 +62,25 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   }
 
   test("collation expression returns name of collation") {
-    Seq("utf8_binary", "utf8_binary_lcase", "unicode", "unicode_ci").foreach { collationName =>
+    Seq("utf8_binary", "utf8_lcase", "unicode", "unicode_ci").foreach { collationName =>
       checkAnswer(
         sql(s"select collation('aaa' collate $collationName)"), Row(collationName.toUpperCase()))
     }
   }
 
   test("collate function syntax") {
-    assert(sql(s"select collate('aaa', 'utf8_binary')").schema(0).dataType == StringType(0))
-    assert(sql(s"select collate('aaa', 'utf8_binary_lcase')").schema(0).dataType == StringType(1))
+    assert(sql(s"select collate('aaa', 'utf8_binary')").schema(0).dataType ==
+      StringType("UTF8_BINARY"))
+    assert(sql(s"select collate('aaa', 'utf8_lcase')").schema(0).dataType ==
+      StringType("UTF8_LCASE"))
+  }
+
+  test("collate function syntax with default collation set") {
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_LCASE") {
+      assert(sql(s"select collate('aaa', 'utf8_lcase')").schema(0).dataType ==
+        StringType("UTF8_LCASE"))
+      assert(sql(s"select collate('aaa', 'UNICODE')").schema(0).dataType == StringType("UNICODE"))
+    }
   }
 
   test("collate function syntax invalid arg count") {
@@ -141,7 +153,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       exception = intercept[SparkException] { sql("select 'aaa' collate UTF8_BS") },
       errorClass = "COLLATION_INVALID_NAME",
       sqlState = "42704",
-      parameters = Map("proposal" -> "UTF8_BINARY", "collationName" -> "UTF8_BS"))
+      parameters = Map("collationName" -> "UTF8_BS", "proposals" -> "UTF8_LCASE"))
   }
 
   test("disable bucketing on collated string column") {
@@ -178,9 +190,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     Seq(
       ("utf8_binary", "aaa", "AAA", false),
       ("utf8_binary", "aaa", "aaa", true),
-      ("utf8_binary_lcase", "aaa", "aaa", true),
-      ("utf8_binary_lcase", "aaa", "AAA", true),
-      ("utf8_binary_lcase", "aaa", "bbb", false),
+      ("utf8_lcase", "aaa", "aaa", true),
+      ("utf8_lcase", "aaa", "AAA", true),
+      ("utf8_lcase", "aaa", "bbb", false),
       ("unicode", "aaa", "aaa", true),
       ("unicode", "aaa", "AAA", false),
       ("unicode_CI", "aaa", "aaa", true),
@@ -202,9 +214,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       ("utf8_binary", "AAA", "aaa", true),
       ("utf8_binary", "aaa", "aaa", false),
       ("utf8_binary", "aaa", "BBB", false),
-      ("utf8_binary_lcase", "aaa", "aaa", false),
-      ("utf8_binary_lcase", "AAA", "aaa", false),
-      ("utf8_binary_lcase", "aaa", "bbb", true),
+      ("utf8_lcase", "aaa", "aaa", false),
+      ("utf8_lcase", "AAA", "aaa", false),
+      ("utf8_lcase", "aaa", "bbb", true),
       ("unicode", "aaa", "aaa", false),
       ("unicode", "aaa", "AAA", true),
       ("unicode", "aaa", "BBB", true),
@@ -276,9 +288,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       ("utf8_binary", Seq("AAA", "aaa"), Seq(Row(1, "AAA"), Row(1, "aaa"))),
       ("utf8_binary", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
       ("utf8_binary", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
-      ("utf8_binary_lcase", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
-      ("utf8_binary_lcase", Seq("AAA", "aaa"), Seq(Row(2, "AAA"))),
-      ("utf8_binary_lcase", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
+      ("utf8_lcase", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
+      ("utf8_lcase", Seq("AAA", "aaa"), Seq(Row(2, "AAA"))),
+      ("utf8_lcase", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
       ("unicode", Seq("AAA", "aaa"), Seq(Row(1, "AAA"), Row(1, "aaa"))),
       ("unicode", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
       ("unicode", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
@@ -304,7 +316,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     val tableNameBinary = "T_BINARY"
     withTable(tableNameNonBinary) {
       withTable(tableNameBinary) {
-        sql(s"CREATE TABLE $tableNameNonBinary (c STRING COLLATE UTF8_BINARY_LCASE) USING PARQUET")
+        sql(s"CREATE TABLE $tableNameNonBinary (c STRING COLLATE UTF8_LCASE) USING PARQUET")
         sql(s"INSERT INTO $tableNameNonBinary VALUES ('aaa')")
         sql(s"CREATE TABLE $tableNameBinary (c STRING COLLATE UTF8_BINARY) USING PARQUET")
         sql(s"INSERT INTO $tableNameBinary VALUES ('aaa')")
@@ -334,7 +346,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
 
   test("create table with collation") {
     val tableName = "dummy_tbl"
-    val collationName = "UTF8_BINARY_LCASE"
+    val collationName = "UTF8_LCASE"
     val collationId = CollationFactory.collationNameToId(collationName)
 
     allFileBasedDataSources.foreach { format =>
@@ -382,7 +394,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   test("add collated column with alter table") {
     val tableName = "alter_column_tbl"
     val defaultCollation = "UTF8_BINARY"
-    val collationName = "UTF8_BINARY_LCASE"
+    val collationName = "UTF8_LCASE"
     val collationId = CollationFactory.collationNameToId(collationName)
 
     withTable(tableName) {
@@ -413,12 +425,12 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
-  test("implicit casting of collated strings") {
+  test("SPARK-47210: Implicit casting of collated strings") {
     val tableName = "parquet_dummy_implicit_cast_t22"
     withTable(tableName) {
       spark.sql(
         s"""
-           | CREATE TABLE $tableName(c1 STRING COLLATE UTF8_BINARY_LCASE,
+           | CREATE TABLE $tableName(c1 STRING COLLATE UTF8_LCASE,
            | c2 STRING COLLATE UNICODE, c3 STRING COLLATE UNICODE_CI, c4 STRING)
            | USING PARQUET
            |""".stripMargin)
@@ -471,7 +483,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
 
       // concat + in
       checkAnswer(sql(s"SELECT c1 FROM $tableName where c1 || 'a' " +
-        s"IN (COLLATE('aa', 'UTF8_BINARY_LCASE'))"), Seq(Row("a"), Row("A")))
+        s"IN (COLLATE('aa', 'UTF8_LCASE'))"), Seq(Row("a"), Row("A")))
       checkAnswer(sql(s"SELECT c1 FROM $tableName where (c1 || 'a') " +
         s"IN (COLLATE('aa', 'UTF8_BINARY'))"), Seq(Row("a")))
 
@@ -567,13 +579,72 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
-  test("cast of default collated strings in IN expression") {
+  test("SPARK-47692: Parameter marker with EXECUTE IMMEDIATE implicit casting") {
+    sql(s"DECLARE stmtStr1 = 'SELECT collation(:var1 || :var2)';")
+    sql(s"DECLARE stmtStr2 = 'SELECT collation(:var1 || (\\\'a\\\' COLLATE UNICODE))';")
+
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE stmtStr1 USING
+          | 'a' AS var1,
+          | 'b' AS var2;""".stripMargin),
+      Seq(Row("UTF8_BINARY"))
+    )
+
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE stmtStr1 USING
+            | 'a' AS var1,
+            | 'b' AS var2;""".stripMargin),
+        Seq(Row("UNICODE"))
+      )
+    }
+
+    checkAnswer(
+      sql(
+        """EXECUTE IMMEDIATE stmtStr2 USING
+          | 'a' AS var1;""".stripMargin),
+      Seq(Row("UNICODE"))
+    )
+
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      checkAnswer(
+        sql(
+          """EXECUTE IMMEDIATE stmtStr2 USING
+            | 'a' AS var1;""".stripMargin),
+        Seq(Row("UNICODE"))
+      )
+    }
+  }
+
+  test("SPARK-47692: Parameter markers with variable mapping") {
+    checkAnswer(
+      spark.sql(
+        "SELECT collation(:var1 || :var2)",
+        Map("var1" -> Literal.create('a', StringType("UTF8_BINARY")),
+            "var2" -> Literal.create('b', StringType("UNICODE")))),
+      Seq(Row("UTF8_BINARY"))
+    )
+
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      checkAnswer(
+        spark.sql(
+          "SELECT collation(:var1 || :var2)",
+          Map("var1" -> Literal.create('a', StringType("UTF8_BINARY")),
+              "var2" -> Literal.create('b', StringType("UNICODE")))),
+        Seq(Row("UNICODE"))
+      )
+    }
+  }
+
+  test("SPARK-47210: Cast of default collated strings in IN expression") {
     val tableName = "t1"
     withTable(tableName) {
       spark.sql(
         s"""
            | CREATE TABLE $tableName(utf8_binary STRING COLLATE UTF8_BINARY,
-           | utf8_binary_lcase STRING COLLATE UTF8_BINARY_LCASE)
+           | utf8_lcase STRING COLLATE UTF8_LCASE)
            | USING PARQUET
            |""".stripMargin)
       sql(s"INSERT INTO $tableName VALUES ('aaa', 'aaa')")
@@ -582,24 +653,24 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       sql(s"INSERT INTO $tableName VALUES ('BBB', 'BBB')")
 
       checkAnswer(sql(s"SELECT * FROM $tableName " +
-        s"WHERE utf8_binary_lcase IN " +
-        s"('aaa' COLLATE UTF8_BINARY_LCASE, 'bbb' COLLATE UTF8_BINARY_LCASE)"),
+        s"WHERE utf8_lcase IN " +
+        s"('aaa' COLLATE UTF8_LCASE, 'bbb' COLLATE UTF8_LCASE)"),
         Seq(Row("aaa", "aaa"), Row("AAA", "AAA"), Row("bbb", "bbb"), Row("BBB", "BBB")))
       checkAnswer(sql(s"SELECT * FROM $tableName " +
-        s"WHERE utf8_binary_lcase IN ('aaa' COLLATE UTF8_BINARY_LCASE, 'bbb')"),
+        s"WHERE utf8_lcase IN ('aaa' COLLATE UTF8_LCASE, 'bbb')"),
         Seq(Row("aaa", "aaa"), Row("AAA", "AAA"), Row("bbb", "bbb"), Row("BBB", "BBB")))
     }
   }
 
   // TODO(SPARK-47210): Add indeterminate support
-  test("indeterminate collation checks") {
+  test("SPARK-47210: Indeterminate collation checks") {
     val tableName = "t1"
     val newTableName = "t2"
     withTable(tableName) {
       spark.sql(
         s"""
            | CREATE TABLE $tableName(c1 STRING COLLATE UNICODE,
-           | c2 STRING COLLATE UTF8_BINARY_LCASE)
+           | c2 STRING COLLATE UTF8_LCASE)
            | USING PARQUET
            |""".stripMargin)
       sql(s"INSERT INTO $tableName VALUES ('aaa', 'aaa')")
@@ -607,21 +678,21 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       sql(s"INSERT INTO $tableName VALUES ('bbb', 'bbb')")
       sql(s"INSERT INTO $tableName VALUES ('BBB', 'BBB')")
 
-      sql(s"SET spark.sql.legacy.createHiveTableByDefault=false")
-
-      withTable(newTableName) {
-        checkError(
-          exception = intercept[AnalysisException] {
-            sql(s"CREATE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName")
-          },
-          errorClass = "COLLATION_MISMATCH.IMPLICIT")
+      withSQLConf("spark.sql.legacy.createHiveTableByDefault" -> "false") {
+        withTable(newTableName) {
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"CREATE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName")
+            },
+            errorClass = "COLLATION_MISMATCH.IMPLICIT")
+        }
       }
     }
   }
 
   test("create v2 table with collation column") {
     val tableName = "testcat.table_name"
-    val collationName = "UTF8_BINARY_LCASE"
+    val collationName = "UTF8_LCASE"
     val collationId = CollationFactory.collationNameToId(collationName)
 
     withTable(tableName) {
@@ -685,7 +756,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
 
     val schema = StructType(StructField(
       "col",
-      StringType(CollationFactory.collationNameToId("UTF8_BINARY_LCASE"))) :: Nil)
+      StringType(CollationFactory.collationNameToId("UTF8_LCASE"))) :: Nil)
     val df = spark.createDataFrame(sparkContext.parallelize(in), schema)
 
     df.repartition(10, df.col("col")).foreachPartition(
@@ -699,37 +770,6 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     })
   }
 
-  test("hash based joins not allowed for non-binary collated strings") {
-    val in = (('a' to 'z') ++ ('A' to 'Z')).map(_.toString * 3).map(e => Row.apply(e, e))
-
-    val schema = StructType(StructField(
-      "col_non_binary",
-      StringType(CollationFactory.collationNameToId("UTF8_BINARY_LCASE"))) ::
-      StructField("col_binary", StringType) :: Nil)
-    val df1 = spark.createDataFrame(sparkContext.parallelize(in), schema)
-
-    // Binary collations are allowed to use hash join.
-    assert(collectFirst(
-      df1.hint("broadcast").join(df1, df1("col_binary") === df1("col_binary"))
-        .queryExecution.executedPlan) {
-      case _: BroadcastHashJoinExec => ()
-    }.nonEmpty)
-
-    // Even with hint broadcast, hash join is not used for non-binary collated strings.
-    assert(collectFirst(
-      df1.hint("broadcast").join(df1, df1("col_non_binary") === df1("col_non_binary"))
-        .queryExecution.executedPlan) {
-      case _: BroadcastHashJoinExec => ()
-    }.isEmpty)
-
-    // Instead they will default to sort merge join.
-    assert(collectFirst(
-      df1.hint("broadcast").join(df1, df1("col_non_binary") === df1("col_non_binary"))
-        .queryExecution.executedPlan) {
-      case _: SortMergeJoinExec => ()
-    }.nonEmpty)
-  }
-
   test("Generated column expressions using collations - errors out") {
     checkError(
       exception = intercept[AnalysisException] {
@@ -747,7 +787,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         "fieldName" -> "c2",
         "expressionStr" -> "SUBSTRING(c1, 0, 1)",
         "reason" ->
-          "generation expression cannot contain non-binary orderable collated string type"))
+          "generation expression cannot contain non utf8 binary collated string type"))
 
     checkError(
       exception = intercept[AnalysisException] {
@@ -765,7 +805,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         "fieldName" -> "c2",
         "expressionStr" -> "LOWER(c1)",
         "reason" ->
-          "generation expression cannot contain non-binary orderable collated string type"))
+          "generation expression cannot contain non utf8 binary collated string type"))
 
     checkError(
       exception = intercept[AnalysisException] {
@@ -783,7 +823,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         "fieldName" -> "c2",
         "expressionStr" -> "UCASE(struct1.a)",
         "reason" ->
-          "generation expression cannot contain non-binary orderable collated string type"))
+          "generation expression cannot contain non utf8 binary collated string type"))
   }
 
   test("SPARK-47431: Default collation set to UNICODE, literal test") {
@@ -792,6 +832,45 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("SPARK-47972: Cast expression limitation for collations") {
+    checkError(
+      exception = intercept[ParseException]
+        (sql("SELECT cast(1 as string collate unicode)")),
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map(
+        "typeName" -> toSQLType(StringType("UNICODE"))),
+      context =
+        ExpectedContext(fragment = s"cast(1 as string collate unicode)", start = 7, stop = 39)
+    )
+
+    checkError(
+      exception = intercept[ParseException]
+        (sql("SELECT 'A' :: string collate unicode")),
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map(
+        "typeName" -> toSQLType(StringType("UNICODE"))),
+      context = ExpectedContext(fragment = s"'A' :: string collate unicode", start = 7, stop = 35)
+    )
+
+    checkAnswer(sql(s"SELECT cast(1 as string)"), Seq(Row("1")))
+    checkAnswer(sql(s"SELECT cast('A' as string)"), Seq(Row("A")))
+
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
+      checkError(
+        exception = intercept[ParseException]
+          (sql("SELECT cast(1 as string collate unicode)")),
+        errorClass = "UNSUPPORTED_DATATYPE",
+        parameters = Map(
+          "typeName" -> toSQLType(StringType("UNICODE"))),
+        context =
+          ExpectedContext(fragment = s"cast(1 as string collate unicode)", start = 7, stop = 39)
+      )
+
+      checkAnswer(sql(s"SELECT cast(1 as string)"), Seq(Row("1")))
+      checkAnswer(sql(s"SELECT collation(cast(1 as string))"), Seq(Row("UNICODE")))
+    }
+  }
+
   test("SPARK-47431: Default collation set to UNICODE, column type test") {
     withTable("t") {
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
@@ -830,13 +909,13 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     val table = "table_agg"
     // array
     withTable(table) {
-      sql(s"create table $table (a array<string collate utf8_binary_lcase>) using parquet")
+      sql(s"create table $table (a array<string collate utf8_lcase>) using parquet")
       sql(s"insert into $table values (array('aaa')), (array('AAA'))")
       checkAnswer(sql(s"select distinct a from $table"), Seq(Row(Seq("aaa"))))
     }
     // map doesn't support aggregation
     withTable(table) {
-      sql(s"create table $table (m map<string collate utf8_binary_lcase, string>) using parquet")
+      sql(s"create table $table (m map<string collate utf8_lcase, string>) using parquet")
       val query = s"select distinct m from $table"
       checkError(
         exception = intercept[ExtendedAnalysisException](sql(query)),
@@ -844,14 +923,14 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         parameters = Map(
           "colName" -> "`m`",
           "dataType" -> toSQLType(MapType(
-            StringType(CollationFactory.collationNameToId("UTF8_BINARY_LCASE")),
+            StringType(CollationFactory.collationNameToId("UTF8_LCASE")),
             StringType))),
         context = ExpectedContext(query, 0, query.length - 1)
       )
     }
     // struct
     withTable(table) {
-      sql(s"create table $table (s struct<fld:string collate utf8_binary_lcase>) using parquet")
+      sql(s"create table $table (s struct<fld:string collate utf8_lcase>) using parquet")
       sql(s"insert into $table values (named_struct('fld', 'aaa')), (named_struct('fld', 'AAA'))")
       checkAnswer(sql(s"select s.fld from $table group by s"), Seq(Row("aaa")))
     }
@@ -863,7 +942,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     // array
     withTable(tableLeft, tableRight) {
       Seq(tableLeft, tableRight).map(tab =>
-        sql(s"create table $tab (a array<string collate utf8_binary_lcase>) using parquet"))
+        sql(s"create table $tab (a array<string collate utf8_lcase>) using parquet"))
       Seq((tableLeft, "array('aaa')"), (tableRight, "array('AAA')")).map{
         case (tab, data) => sql(s"insert into $tab values ($data)")
       }
@@ -876,7 +955,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     // map doesn't support joins
     withTable(tableLeft, tableRight) {
       Seq(tableLeft, tableRight).map(tab =>
-        sql(s"create table $tab (m map<string collate utf8_binary_lcase, string>) using parquet"))
+        sql(s"create table $tab (m map<string collate utf8_lcase, string>) using parquet"))
       val query =
         s"select $tableLeft.m from $tableLeft join $tableRight on $tableLeft.m = $tableRight.m"
       val ctx = s"$tableLeft.m = $tableRight.m"
@@ -886,7 +965,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         parameters = Map(
           "functionName" -> "`=`",
           "dataType" -> toSQLType(MapType(
-            StringType(CollationFactory.collationNameToId("UTF8_BINARY_LCASE")),
+            StringType(CollationFactory.collationNameToId("UTF8_LCASE")),
             StringType
           )),
           "sqlExpr" -> "\"(m = m)\""),
@@ -895,7 +974,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     // struct
     withTable(tableLeft, tableRight) {
       Seq(tableLeft, tableRight).map(tab =>
-        sql(s"create table $tab (s struct<fld:string collate utf8_binary_lcase>) using parquet"))
+        sql(s"create table $tab (s struct<fld:string collate utf8_lcase>) using parquet"))
       Seq(
         (tableLeft, "named_struct('fld', 'aaa')"),
         (tableRight, "named_struct('fld', 'AAA')")
@@ -911,37 +990,37 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   }
 
   test("Support operations on complex types containing collated strings") {
-    checkAnswer(sql("select reverse('abc' collate utf8_binary_lcase)"), Seq(Row("cba")))
+    checkAnswer(sql("select reverse('abc' collate utf8_lcase)"), Seq(Row("cba")))
     checkAnswer(sql(
       """
-        |select reverse(array('a' collate utf8_binary_lcase,
-        |'b' collate utf8_binary_lcase))
+        |select reverse(array('a' collate utf8_lcase,
+        |'b' collate utf8_lcase))
         |""".stripMargin), Seq(Row(Seq("b", "a"))))
     checkAnswer(sql(
       """
-        |select array_join(array('a' collate utf8_binary_lcase,
-        |'b' collate utf8_binary_lcase), ', ' collate utf8_binary_lcase)
+        |select array_join(array('a' collate utf8_lcase,
+        |'b' collate utf8_lcase), ', ' collate utf8_lcase)
         |""".stripMargin), Seq(Row("a, b")))
     checkAnswer(sql(
       """
-        |select array_join(array('a' collate utf8_binary_lcase,
-        |'b' collate utf8_binary_lcase, null), ', ' collate utf8_binary_lcase,
-        |'c' collate utf8_binary_lcase)
+        |select array_join(array('a' collate utf8_lcase,
+        |'b' collate utf8_lcase, null), ', ' collate utf8_lcase,
+        |'c' collate utf8_lcase)
         |""".stripMargin), Seq(Row("a, b, c")))
     checkAnswer(sql(
       """
-        |select concat('a' collate utf8_binary_lcase, 'b' collate utf8_binary_lcase)
+        |select concat('a' collate utf8_lcase, 'b' collate utf8_lcase)
         |""".stripMargin), Seq(Row("ab")))
     checkAnswer(sql(
       """
-        |select concat(array('a' collate utf8_binary_lcase, 'b' collate utf8_binary_lcase))
+        |select concat(array('a' collate utf8_lcase, 'b' collate utf8_lcase))
         |""".stripMargin), Seq(Row(Seq("a", "b"))))
     checkAnswer(sql(
       """
-        |select map('a' collate utf8_binary_lcase, 1, 'b' collate utf8_binary_lcase, 2)
-        |['A' collate utf8_binary_lcase]
+        |select map('a' collate utf8_lcase, 1, 'b' collate utf8_lcase, 2)
+        |['A' collate utf8_lcase]
         |""".stripMargin), Seq(Row(1)))
-    val ctx = "map('aaa' collate utf8_binary_lcase, 1, 'AAA' collate utf8_binary_lcase, 2)['AaA']"
+    val ctx = "map('aaa' collate utf8_lcase, 1, 'AAA' collate utf8_lcase, 2)['AaA']"
     val query = s"select $ctx"
     checkError(
       exception = intercept[AnalysisException](sql(query)),
@@ -952,7 +1031,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         "inputSql" -> "\"AaA\"",
         "inputType" -> toSQLType(StringType),
         "requiredType" -> toSQLType(StringType(
-          CollationFactory.collationNameToId("UTF8_BINARY_LCASE")))
+          CollationFactory.collationNameToId("UTF8_LCASE")))
       ),
       context = ExpectedContext(
         fragment = ctx,
@@ -967,7 +1046,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     val t2 = "T_BINARY"
 
     withTable(t1, t2) {
-      sql(s"CREATE TABLE $t1 (c STRING COLLATE UTF8_BINARY_LCASE, i int) USING PARQUET")
+      sql(s"CREATE TABLE $t1 (c STRING COLLATE UTF8_LCASE, i int) USING PARQUET")
       sql(s"INSERT INTO $t1 VALUES ('aA', 2), ('Aa', 1), ('ab', 3), ('aa', 1)")
 
       sql(s"CREATE TABLE $t2 (c STRING, i int) USING PARQUET")
@@ -981,4 +1060,409 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       checkAnswer(dfNonBinary, dfBinary)
     }
   }
+
+  test("hash join should be used for collated strings") {
+    val t1 = "T_1"
+    val t2 = "T_2"
+
+    case class HashJoinTestCase[R](collation: String, result: R)
+    val testCases = Seq(
+      HashJoinTestCase("UTF8_BINARY", Seq(Row("aa", 1, "aa", 2))),
+      HashJoinTestCase("UTF8_LCASE", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2))),
+      HashJoinTestCase("UNICODE", Seq(Row("aa", 1, "aa", 2))),
+      HashJoinTestCase("UNICODE_CI", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2)))
+    )
+
+    testCases.foreach(t => {
+      withTable(t1, t2) {
+        sql(s"CREATE TABLE $t1 (x STRING COLLATE ${t.collation}, i int) USING PARQUET")
+        sql(s"INSERT INTO $t1 VALUES ('aa', 1)")
+
+        sql(s"CREATE TABLE $t2 (y STRING COLLATE ${t.collation}, j int) USING PARQUET")
+        sql(s"INSERT INTO $t2 VALUES ('AA', 2), ('aa', 2)")
+
+        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+        checkAnswer(df, t.result)
+
+        val queryPlan = df.queryExecution.executedPlan
+
+        // confirm that hash join is used instead of sort merge join
+        assert(
+          collectFirst(queryPlan) {
+            case _: HashJoin => ()
+          }.nonEmpty
+        )
+        assert(
+          collectFirst(queryPlan) {
+            case _: SortMergeJoinExec => ()
+          }.isEmpty
+        )
+
+        // Only if collation doesn't support binary equality, collation key should be injected.
+        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+          assert(collectFirst(queryPlan) {
+            case b: HashJoin => b.leftKeys.head
+          }.head.isInstanceOf[CollationKey])
+        } else {
+          assert(!collectFirst(queryPlan) {
+            case b: HashJoin => b.leftKeys.head
+          }.head.isInstanceOf[CollationKey])
+        }
+      }
+    })
+  }
+
+  test("hash join should be used for arrays of collated strings") {
+    val t1 = "T_1"
+    val t2 = "T_2"
+
+    case class HashJoinTestCase[R](collation: String, result: R)
+    val testCases = Seq(
+      HashJoinTestCase("UTF8_BINARY",
+        Seq(Row(Seq("aa"), 1, Seq("aa"), 2))),
+      HashJoinTestCase("UTF8_LCASE",
+        Seq(Row(Seq("aa"), 1, Seq("AA"), 2), Row(Seq("aa"), 1, Seq("aa"), 2))),
+      HashJoinTestCase("UNICODE",
+        Seq(Row(Seq("aa"), 1, Seq("aa"), 2))),
+      HashJoinTestCase("UNICODE_CI",
+        Seq(Row(Seq("aa"), 1, Seq("AA"), 2), Row(Seq("aa"), 1, Seq("aa"), 2)))
+    )
+
+    testCases.foreach(t => {
+      withTable(t1, t2) {
+        sql(s"CREATE TABLE $t1 (x ARRAY<STRING COLLATE ${t.collation}>, i int) USING PARQUET")
+        sql(s"INSERT INTO $t1 VALUES (array('aa'), 1)")
+
+        sql(s"CREATE TABLE $t2 (y ARRAY<STRING COLLATE ${t.collation}>, j int) USING PARQUET")
+        sql(s"INSERT INTO $t2 VALUES (array('AA'), 2), (array('aa'), 2)")
+
+        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+        checkAnswer(df, t.result)
+
+        val queryPlan = df.queryExecution.executedPlan
+
+        // confirm that hash join is used instead of sort merge join
+        assert(
+          collectFirst(queryPlan) {
+            case _: HashJoin => ()
+          }.nonEmpty
+        )
+        assert(
+          collectFirst(queryPlan) {
+            case _: ShuffledJoin => ()
+          }.isEmpty
+        )
+
+        // Only if collation doesn't support binary equality, collation key should be injected.
+        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+          assert(collectFirst(queryPlan) {
+            case b: BroadcastHashJoinExec => b.leftKeys.head
+          }.head.asInstanceOf[ArrayTransform].function.asInstanceOf[LambdaFunction].
+            function.isInstanceOf[CollationKey])
+        } else {
+          assert(!collectFirst(queryPlan) {
+            case b: BroadcastHashJoinExec => b.leftKeys.head
+          }.head.isInstanceOf[ArrayTransform])
+        }
+      }
+    })
+  }
+
+  test("hash join should be used for arrays of arrays of collated strings") {
+    val t1 = "T_1"
+    val t2 = "T_2"
+
+    case class HashJoinTestCase[R](collation: String, result: R)
+    val testCases = Seq(
+      HashJoinTestCase("UTF8_BINARY",
+        Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
+      HashJoinTestCase("UTF8_LCASE",
+        Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
+      HashJoinTestCase("UNICODE",
+        Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
+      HashJoinTestCase("UNICODE_CI",
+        Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2)))
+    )
+
+    testCases.foreach(t => {
+      withTable(t1, t2) {
+        sql(s"CREATE TABLE $t1 (x ARRAY<ARRAY<STRING COLLATE ${t.collation}>>, i int) USING " +
+          s"PARQUET")
+        sql(s"INSERT INTO $t1 VALUES (array(array('aa')), 1)")
+
+        sql(s"CREATE TABLE $t2 (y ARRAY<ARRAY<STRING COLLATE ${t.collation}>>, j int) USING " +
+          s"PARQUET")
+        sql(s"INSERT INTO $t2 VALUES (array(array('AA')), 2), (array(array('aa')), 2)")
+
+        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+        checkAnswer(df, t.result)
+
+        val queryPlan = df.queryExecution.executedPlan
+
+        // confirm that hash join is used instead of sort merge join
+        assert(
+          collectFirst(queryPlan) {
+            case _: HashJoin => ()
+          }.nonEmpty
+        )
+        assert(
+          collectFirst(queryPlan) {
+            case _: ShuffledJoin => ()
+          }.isEmpty
+        )
+
+        // Only if collation doesn't support binary equality, collation key should be injected.
+        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+          assert(collectFirst(queryPlan) {
+            case b: BroadcastHashJoinExec => b.leftKeys.head
+          }.head.asInstanceOf[ArrayTransform].function.
+            asInstanceOf[LambdaFunction].function.asInstanceOf[ArrayTransform].function.
+            asInstanceOf[LambdaFunction].function.isInstanceOf[CollationKey])
+        } else {
+          assert(!collectFirst(queryPlan) {
+            case b: BroadcastHashJoinExec => b.leftKeys.head
+          }.head.isInstanceOf[ArrayTransform])
+        }
+      }
+    })
+  }
+
+  test("hash join should respect collation for struct of strings") {
+    val t1 = "T_1"
+    val t2 = "T_2"
+
+    case class HashJoinTestCase[R](collation: String, result: R)
+    val testCases = Seq(
+      HashJoinTestCase("UTF8_BINARY",
+        Seq(Row(Row("aa"), 1, Row("aa"), 2))),
+      HashJoinTestCase("UTF8_LCASE",
+        Seq(Row(Row("aa"), 1, Row("AA"), 2), Row(Row("aa"), 1, Row("aa"), 2))),
+      HashJoinTestCase("UNICODE",
+        Seq(Row(Row("aa"), 1, Row("aa"), 2))),
+      HashJoinTestCase("UNICODE_CI",
+        Seq(Row(Row("aa"), 1, Row("AA"), 2), Row(Row("aa"), 1, Row("aa"), 2)))
+    )
+    testCases.foreach(t => {
+      withTable(t1, t2) {
+        sql(s"CREATE TABLE $t1 (x STRUCT<f:STRING COLLATE ${t.collation}>, i int) USING PARQUET")
+        sql(s"INSERT INTO $t1 VALUES (named_struct('f', 'aa'), 1)")
+
+        sql(s"CREATE TABLE $t2 (y STRUCT<f:STRING COLLATE ${t.collation}>, j int) USING PARQUET")
+        sql(s"INSERT INTO $t2 VALUES (named_struct('f', 'AA'), 2), (named_struct('f', 'aa'), 2)")
+
+        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+        checkAnswer(df, t.result)
+
+        val queryPlan = df.queryExecution.executedPlan
+
+        // Confirm that hash join is used instead of sort merge join.
+        assert(
+          collectFirst(queryPlan) {
+            case _: HashJoin => ()
+          }.nonEmpty
+        )
+        assert(
+          collectFirst(queryPlan) {
+            case _: ShuffledJoin => ()
+          }.isEmpty
+        )
+
+        // Only if collation doesn't support binary equality, collation key should be injected.
+        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+          assert(queryPlan.toString().contains("collationkey"))
+        } else {
+          assert(!queryPlan.toString().contains("collationkey"))
+        }
+      }
+    })
+  }
+
+  test("hash join should respect collation for struct of array of struct of strings") {
+    val t1 = "T_1"
+    val t2 = "T_2"
+
+    case class HashJoinTestCase[R](collation: String, result: R)
+    val testCases = Seq(
+      HashJoinTestCase("UTF8_BINARY",
+        Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
+      HashJoinTestCase("UTF8_LCASE",
+        Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA"))), 2),
+          Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
+      HashJoinTestCase("UNICODE",
+        Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
+      HashJoinTestCase("UNICODE_CI",
+        Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA"))), 2),
+          Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2)))
+    )
+    testCases.foreach(t => {
+      withTable(t1, t2) {
+        sql(s"CREATE TABLE $t1 (x STRUCT<f:ARRAY<STRUCT<f:STRING COLLATE ${t.collation}>>>, " +
+          s"i int) USING PARQUET")
+        sql(s"INSERT INTO $t1 VALUES (named_struct('f', array(named_struct('f', 'aa'))), 1)")
+
+        sql(s"CREATE TABLE $t2 (y STRUCT<f:ARRAY<STRUCT<f:STRING COLLATE ${t.collation}>>>, " +
+          s"j int) USING PARQUET")
+        sql(s"INSERT INTO $t2 VALUES (named_struct('f', array(named_struct('f', 'AA'))), 2), " +
+          s"(named_struct('f', array(named_struct('f', 'aa'))), 2)")
+
+        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+        checkAnswer(df, t.result)
+
+        val queryPlan = df.queryExecution.executedPlan
+
+        // confirm that hash join is used instead of sort merge join
+        assert(
+          collectFirst(queryPlan) {
+            case _: HashJoin => ()
+          }.nonEmpty
+        )
+        assert(
+          collectFirst(queryPlan) {
+            case _: ShuffledJoin => ()
+          }.isEmpty
+        )
+
+        // Only if collation doesn't support binary equality, collation key should be injected.
+        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+          assert(queryPlan.toString().contains("collationkey"))
+        } else {
+          assert(!queryPlan.toString().contains("collationkey"))
+        }
+      }
+    })
+  }
+
+  test("rewrite with collationkey should be an excludable rule") {
+    val t1 = "T_1"
+    val t2 = "T_2"
+    val collation = "UTF8_LCASE"
+    val collationRewriteJoinRule = "org.apache.spark.sql.catalyst.analysis.RewriteCollationJoin"
+    withTable(t1, t2) {
+      withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> collationRewriteJoinRule) {
+        sql(s"CREATE TABLE $t1 (x STRING COLLATE $collation, i int) USING PARQUET")
+        sql(s"INSERT INTO $t1 VALUES ('aa', 1)")
+
+        sql(s"CREATE TABLE $t2 (y STRING COLLATE $collation, j int) USING PARQUET")
+        sql(s"INSERT INTO $t2 VALUES ('AA', 2), ('aa', 2)")
+
+        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+        checkAnswer(df, Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2)))
+
+        val queryPlan = df.queryExecution.executedPlan
+
+        // confirm that sort merge join is used instead of hash join
+        assert(
+          collectFirst(queryPlan) {
+            case _: HashJoin => ()
+          }.isEmpty
+        )
+        assert(
+          collectFirst(queryPlan) {
+            case _: SortMergeJoinExec => ()
+          }.nonEmpty
+        )
+      }
+    }
+  }
+
+  test("rewrite with collationkey shouldn't disrupt multiple join conditions") {
+    val t1 = "T_1"
+    val t2 = "T_2"
+
+    case class HashMultiJoinTestCase[R](
+      type1: String,
+      type2: String,
+      data1: String,
+      data2: String,
+      result: R
+    )
+    val testCases = Seq(
+      HashMultiJoinTestCase("STRING COLLATE UTF8_BINARY", "INT",
+        "'a', 0, 1", "'a', 0, 1", Row("a", 0, 1, "a", 0, 1)),
+      HashMultiJoinTestCase("STRING COLLATE UTF8_BINARY", "STRING COLLATE UTF8_BINARY",
+        "'a', 'a', 1", "'a', 'a', 1", Row("a", "a", 1, "a", "a", 1)),
+      HashMultiJoinTestCase("STRING COLLATE UTF8_BINARY", "STRING COLLATE UTF8_LCASE",
+        "'a', 'a', 1", "'a', 'A', 1", Row("a", "a", 1, "a", "A", 1)),
+      HashMultiJoinTestCase("STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI",
+        "'a', 'a', 1", "'A', 'A', 1", Row("a", "a", 1, "A", "A", 1))
+    )
+
+    testCases.foreach(t => {
+      withTable(t1, t2) {
+        sql(s"CREATE TABLE $t1 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET")
+        sql(s"INSERT INTO $t1 VALUES (${t.data1})")
+        sql(s"CREATE TABLE $t2 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET")
+        sql(s"INSERT INTO $t2 VALUES (${t.data2})")
+
+        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.x AND $t1.y = $t2.y")
+        checkAnswer(df, t.result)
+
+        val queryPlan = df.queryExecution.executedPlan
+
+        // confirm that hash join is used instead of sort merge join
+        assert(
+          collectFirst(queryPlan) {
+            case _: HashJoin => ()
+          }.nonEmpty
+        )
+        assert(
+          collectFirst(queryPlan) {
+            case _: SortMergeJoinExec => ()
+          }.isEmpty
+        )
+      }
+    })
+  }
+
+  test("hll sketch aggregate should respect collation") {
+    case class HllSketchAggTestCase[R](c: String, result: R)
+    val testCases = Seq(
+      HllSketchAggTestCase("UTF8_BINARY", 4),
+      HllSketchAggTestCase("UTF8_LCASE", 3),
+      HllSketchAggTestCase("UNICODE", 4),
+      HllSketchAggTestCase("UNICODE_CI", 3)
+    )
+    testCases.foreach(t => {
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.c) {
+        val q = "SELECT hll_sketch_estimate(hll_sketch_agg(col)) FROM " +
+          "VALUES ('a'), ('A'), ('b'), ('b'), ('c') tab(col)"
+        val df = sql(q)
+        checkAnswer(df, Seq(Row(t.result)))
+      }
+    })
+  }
+
+  test("cache table with collated columns") {
+    val collations = Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI")
+    val lazyOptions = Seq(false, true)
+
+    for (
+      collation <- collations;
+      lazyTable <- lazyOptions
+    ) {
+      val lazyStr = if (lazyTable) "LAZY" else ""
+
+      def checkCacheTable(values: String): Unit = {
+        sql(s"CACHE $lazyStr TABLE tbl AS SELECT col FROM VALUES ($values) AS (col)")
+        // Checks in-memory fetching code path.
+        val all = sql("SELECT col FROM tbl")
+        assert(all.queryExecution.executedPlan.collectFirst {
+          case _: InMemoryTableScanExec => true
+        }.nonEmpty)
+        checkAnswer(all, Row("a"))
+        // Checks column stats code path.
+        checkAnswer(sql("SELECT col FROM tbl WHERE col = 'a'"), Row("a"))
+        checkAnswer(sql("SELECT col FROM tbl WHERE col = 'b'"), Seq.empty)
+      }
+
+      withTable("tbl") {
+        checkCacheTable(s"'a' COLLATE $collation")
+      }
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+        withTable("tbl") {
+          checkCacheTable("'a'")
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala
index 280eb095dc753..a03f083123558 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import scala.jdk.CollectionConverters._
 
+import org.apache.spark.sql.catalyst.plans.AsOfJoinDirection
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
@@ -123,6 +124,24 @@ class DataFrameAsOfJoinSuite extends QueryTest
       parameters = Map.empty)
   }
 
+  test("as-of join - unsupported direction") {
+    val (df1, df2) = prepareForAsOfJoin()
+    val direction = "unknown"
+    checkError(
+      exception = intercept[AnalysisException] {
+        df1.joinAsOf(df2, df1.col("a"), df2.col("a"), usingColumns = Seq.empty,
+          joinType = "inner", tolerance = lit(-1), allowExactMatches = true,
+          direction = direction)
+      },
+      errorClass = "AS_OF_JOIN.UNSUPPORTED_DIRECTION",
+      sqlState = "42604",
+      parameters = Map(
+        "direction" -> direction,
+        "supported" -> AsOfJoinDirection.supported.mkString("'", "', '", "'")
+      )
+    )
+  }
+
   test("as-of join - allowExactMatches = false") {
     val (df1, df2) = prepareForAsOfJoin()
     checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index e42f397cbfc29..df1bb39f18744 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -82,7 +82,9 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       "bucket", "days", "hours", "months", "years", // Datasource v2 partition transformations
       "product", // Discussed in https://github.com/apache/spark/pull/30745
       "unwrap_udt",
-      "collect_top_k"
+      "collect_top_k",
+      "timestamp_add",
+      "timestamp_diff"
     )
 
     // We only consider functions matching this pattern, this excludes symbolic and other
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 01905e2c05fd7..f6fd6b501d790 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -621,4 +621,14 @@ class DataFrameJoinSuite extends QueryTest
     checkAnswer(joined, Row("x", null, null))
     checkAnswer(joined.filter($"new".isNull), Row("x", null, null))
   }
+
+  test("SPARK-47810: replace equivalent expression to <=> in join condition") {
+    val joinTypes = Seq("inner", "outer", "left", "right", "semi", "anti", "cross")
+    joinTypes.foreach(joinType => {
+      val df1 = testData3.as("x").join(testData3.as("y"),
+        ($"x.a" <=> $"y.b").or($"x.a".isNull.and($"y.b".isNull)), joinType)
+      val df2 = testData3.as("x").join(testData3.as("y"), $"x.a" <=> $"y.b", joinType)
+      checkAnswer(df1, df2)
+    })
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala
index e889fe2545afa..d728cc5810a21 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala
@@ -190,7 +190,9 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
                            ||[33 34]|[31 32 33 34 36]|
                            |+-------+----------------+
                            |""".stripMargin
-    assert(df.showString(10) === expectedAnswer)
+    withSQLConf(SQLConf.BINARY_OUTPUT_STYLE.key -> "HEX_DISCRETE") {
+      assert(df.showString(10) === expectedAnswer)
+    }
   }
 
   test("showString: binary, vertical = true") {
@@ -204,7 +206,9 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
                          "-RECORD 1---------------\n" +
                          " _1  | [33 34]          \n" +
                          " _2  | [31 32 33 34 36] \n"
-    assert(df.showString(10, vertical = true) === expectedAnswer)
+    withSQLConf(SQLConf.BINARY_OUTPUT_STYLE.key -> "HEX_DISCRETE") {
+      assert(df.showString(10, vertical = true) === expectedAnswer)
+    }
   }
 
   test("showString: minimum column width") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 6b34a6412cc0f..760ee80260808 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, ScalarSubquery, Uuid}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, CreateMap, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, ScalarSubquery, Uuid}
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LocalRelation, LogicalPlan, OneRowRelation}
@@ -1043,11 +1043,46 @@ class DataFrameSuite extends QueryTest
       ("12".getBytes(StandardCharsets.UTF_8), "ABC.".getBytes(StandardCharsets.UTF_8)),
       ("34".getBytes(StandardCharsets.UTF_8), "12346".getBytes(StandardCharsets.UTF_8))
     ).toDF()
-    val expectedAnswer = Seq(
-      Seq("_1", "_2"),
-      Seq("[31 32]", "[41 42 43 2E]"),
-      Seq("[33 34]", "[31 32 33 34 36]"))
-    assert(df.getRows(10, 20) === expectedAnswer)
+
+    withSQLConf(SQLConf.BINARY_OUTPUT_STYLE.key -> "HEX_DISCRETE") {
+      val expectedAnswer = Seq(
+        Seq("_1", "_2"),
+        Seq("[31 32]", "[41 42 43 2E]"),
+        Seq("[33 34]", "[31 32 33 34 36]"))
+      assert(df.getRows(10, 20) === expectedAnswer)
+    }
+    withSQLConf(SQLConf.BINARY_OUTPUT_STYLE.key -> "HEX") {
+      val expectedAnswer = Seq(
+        Seq("_1", "_2"),
+        Seq("3132", "4142432E"),
+        Seq("3334", "3132333436")
+      )
+      assert(df.getRows(10, 20) === expectedAnswer)
+    }
+    withSQLConf(SQLConf.BINARY_OUTPUT_STYLE.key -> "BASE64") {
+      val expectedAnswer = Seq(
+        Seq("_1", "_2"),
+        Seq("MTI", "QUJDLg"),
+        Seq("MzQ", "MTIzNDY")
+      )
+      assert(df.getRows(10, 20) === expectedAnswer)
+    }
+    withSQLConf(SQLConf.BINARY_OUTPUT_STYLE.key -> "UTF8") {
+      val expectedAnswer = Seq(
+        Seq("_1", "_2"),
+        Seq("12", "ABC."),
+        Seq("34", "12346")
+      )
+      assert(df.getRows(10, 20) === expectedAnswer)
+    }
+    withSQLConf(SQLConf.BINARY_OUTPUT_STYLE.key -> "BASIC") {
+      val expectedAnswer = Seq(
+        Seq("_1", "_2"),
+        Seq("[49, 50]", "[65, 66, 67, 46]"),
+        Seq("[51, 52]", "[49, 50, 51, 52, 54]")
+      )
+      assert(df.getRows(10, 20) === expectedAnswer)
+    }
   }
 
   test("createDataFrame(RDD[Row], StructType) should convert UDTs (SPARK-6672)") {
@@ -2469,6 +2504,14 @@ class DataFrameSuite extends QueryTest
       assert(row.getInt(0).toString == row.getString(2))
       assert(row.getInt(0).toString == row.getString(3))
     }
+
+    val v3 = Column(CreateMap(Seq(Literal("key"), Literal("value"))))
+    val v4 = to_csv(struct(v3.as("a"))) // to_csv is CodegenFallback
+    df.select(v3, v3, v4, v4).collect().foreach { row =>
+      assert(row.getMap(0).toString() == row.getMap(1).toString())
+      assert(row.getString(2) == s"{key -> ${row.getMap(0).get("key").get}}")
+      assert(row.getString(3) == s"{key -> ${row.getMap(0).get("key").get}}")
+    }
   }
 
   test("SPARK-45216: Non-deterministic functions with seed") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
index fe1393af81749..95f4cc78d1564 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
@@ -32,6 +32,28 @@ import org.apache.spark.sql.types.CalendarIntervalType
 class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
+  test("reuse window partitionBy") {
+    val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value")
+    val w = Window.partitionBy("key").orderBy("value")
+
+    checkAnswer(
+      df.select(
+        lead("key", 1).over(w),
+        lead("value", 1).over(w)),
+      Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
+  }
+
+  test("reuse window orderBy") {
+    val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value")
+    val w = Window.orderBy("value").partitionBy("key")
+
+    checkAnswer(
+      df.select(
+        lead("key", 1).over(w),
+        lead("value", 1).over(w)),
+      Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
+  }
+
   test("lead/lag with empty data frame") {
     val df = Seq.empty[(Int, String)].toDF("key", "value")
     val window = Window.partitionBy($"key").orderBy($"value")
@@ -570,4 +592,30 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-34227: WindowFunctionFrame should clear its states during preparation") {
+    // This creates a single partition dataframe with 3 records:
+    //   "a", 0, null
+    //   "a", 1, "x"
+    //   "b", 0, null
+    val df = spark.range(0, 3, 1, 1).select(
+      when($"id" < 2, lit("a")).otherwise(lit("b")).as("key"),
+      ($"id" % 2).cast("int").as("order"),
+      when($"id" % 2 === 0, lit(null)).otherwise(lit("x")).as("value"))
+
+    val window1 = Window.partitionBy($"key").orderBy($"order")
+      .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+    val window2 = Window.partitionBy($"key").orderBy($"order")
+      .rowsBetween(Window.unboundedPreceding, Window.currentRow)
+    checkAnswer(
+      df.select(
+        $"key",
+        $"order",
+        nth_value($"value", 1, ignoreNulls = true).over(window1),
+        nth_value($"value", 1, ignoreNulls = true).over(window2)),
+      Seq(
+        Row("a", 0, "x", null),
+        Row("a", 1, "x", "x"),
+        Row("b", 0, null, null)))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 4c852711451c7..e3aff9b36aece 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -44,28 +44,6 @@ class DataFrameWindowFunctionsSuite extends QueryTest
 
   import testImplicits._
 
-  test("reuse window partitionBy") {
-    val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value")
-    val w = Window.partitionBy("key").orderBy("value")
-
-    checkAnswer(
-      df.select(
-        lead("key", 1).over(w),
-        lead("value", 1).over(w)),
-      Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
-  }
-
-  test("reuse window orderBy") {
-    val df = Seq((1, "1"), (2, "2"), (1, "1"), (2, "2")).toDF("key", "value")
-    val w = Window.orderBy("value").partitionBy("key")
-
-    checkAnswer(
-      df.select(
-        lead("key", 1).over(w),
-        lead("value", 1).over(w)),
-      Row(1, "1") :: Row(2, "2") :: Row(null, null) :: Row(null, null) :: Nil)
-  }
-
   test("rank functions in unspecific window") {
     withTempView("window_table") {
       val df = Seq((1, "1"), (2, "2"), (1, "2"), (2, "2")).toDF("key", "value")
@@ -1156,32 +1134,6 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         Row(Seq(0.0f, -0.0f), Row(0.0d, Double.NaN), Seq(Row(0.0d, 0.0/0.0)), 2)))
   }
 
-  test("SPARK-34227: WindowFunctionFrame should clear its states during preparation") {
-    // This creates a single partition dataframe with 3 records:
-    //   "a", 0, null
-    //   "a", 1, "x"
-    //   "b", 0, null
-    val df = spark.range(0, 3, 1, 1).select(
-      when($"id" < 2, lit("a")).otherwise(lit("b")).as("key"),
-      ($"id" % 2).cast("int").as("order"),
-      when($"id" % 2 === 0, lit(null)).otherwise(lit("x")).as("value"))
-
-    val window1 = Window.partitionBy($"key").orderBy($"order")
-      .rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
-    val window2 = Window.partitionBy($"key").orderBy($"order")
-      .rowsBetween(Window.unboundedPreceding, Window.currentRow)
-    checkAnswer(
-      df.select(
-        $"key",
-        $"order",
-        nth_value($"value", 1, ignoreNulls = true).over(window1),
-        nth_value($"value", 1, ignoreNulls = true).over(window2)),
-      Seq(
-        Row("a", 0, "x", null),
-        Row("a", 1, "x", "x"),
-        Row("b", 0, null, null)))
-  }
-
   test("SPARK-38237: require all cluster keys for child required distribution for window query") {
     def partitionExpressionsColumns(expressions: Seq[Expression]): Seq[String] = {
       expressions.flatMap {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 16a493b52909e..fdb2ec30fdd2d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -21,6 +21,7 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
 import scala.collection.immutable.HashSet
+import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
 import scala.util.Random
 
@@ -29,7 +30,7 @@ import org.scalatest.Assertions._
 import org.scalatest.exceptions.TestFailedException
 import org.scalatest.prop.TableDrivenPropertyChecks._
 
-import org.apache.spark.{SparkConf, SparkException, SparkRuntimeException, SparkUnsupportedOperationException, TaskContext}
+import org.apache.spark.{SparkConf, SparkRuntimeException, SparkUnsupportedOperationException, TaskContext}
 import org.apache.spark.TestUtils.withListener
 import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
@@ -37,7 +38,7 @@ import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, ScroogeLikeExam
 import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoders, ExpressionEncoder, OuterScopes}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedIntEncoder
 import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, GenericRowWithSchema}
-import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
+import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.trees.DataFrameQueryContext
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
@@ -542,25 +543,20 @@ class DatasetSuite extends QueryTest
     val ds1 = Seq(1, 2, 3).toDS().as("a")
     val ds2 = Seq(1, 2).toDS().as("b")
 
-    val e1 = intercept[AnalysisException] {
-      ds1.joinWith(ds2, $"a.value" === $"b.value", "left_semi")
-    }.getMessage
-    assert(e1.contains("Invalid join type in joinWith: " + LeftSemi.sql))
-
-    val e2 = intercept[AnalysisException] {
-      ds1.joinWith(ds2, $"a.value" === $"b.value", "semi")
-    }.getMessage
-    assert(e2.contains("Invalid join type in joinWith: " + LeftSemi.sql))
-
-    val e3 = intercept[AnalysisException] {
-      ds1.joinWith(ds2, $"a.value" === $"b.value", "left_anti")
-    }.getMessage
-    assert(e3.contains("Invalid join type in joinWith: " + LeftAnti.sql))
+    def checkJoinWithJoinType(joinType: String): Unit = {
+      val semiErrorParameters = Map("joinType" -> JoinType(joinType).sql)
+      checkError(
+        exception = intercept[AnalysisException](
+          ds1.joinWith(ds2, $"a.value" === $"b.value", joinType)
+        ),
+        errorClass = "INVALID_JOIN_TYPE_FOR_JOINWITH",
+        sqlState = "42613",
+        parameters = semiErrorParameters
+      )
+    }
 
-    val e4 = intercept[AnalysisException] {
-      ds1.joinWith(ds2, $"a.value" === $"b.value", "anti")
-    }.getMessage
-    assert(e4.contains("Invalid join type in joinWith: " + LeftAnti.sql))
+    Seq("leftsemi", "left_semi", "semi", "leftanti", "left_anti", "anti")
+      .foreach(checkJoinWithJoinType(_))
   }
 
   test("groupBy function, keys") {
@@ -957,6 +953,25 @@ class DatasetSuite extends QueryTest
     assert(result2.length == 3)
   }
 
+  test("SPARK-48718: cogroup deserializer expr is resolved before dedup relation") {
+    val lhs = spark.createDataFrame(
+      List(Row(123L)).asJava,
+      StructType(Seq(StructField("GROUPING_KEY", LongType)))
+    )
+    val rhs = spark.createDataFrame(
+      List(Row(0L, 123L)).asJava,
+      StructType(Seq(StructField("ID", LongType), StructField("GROUPING_KEY", LongType)))
+    )
+
+    val lhsKV = lhs.groupByKey((r: Row) => r.getAs[Long]("GROUPING_KEY"))
+    val rhsKV = rhs.groupByKey((r: Row) => r.getAs[Long]("GROUPING_KEY"))
+    val cogrouped = lhsKV.cogroup(rhsKV)(
+      (a: Long, b: Iterator[Row], c: Iterator[Row]) => Iterator(0L)
+    )
+    val joined = rhs.join(cogrouped, col("ID") === col("value"), "left")
+    checkAnswer(joined, Row(0L, 123L, 0L) :: Nil)
+  }
+
   test("SPARK-34806: observation on datasets") {
     val namedObservation = Observation("named")
     val unnamedObservation = Observation()
@@ -1251,11 +1266,10 @@ class DatasetSuite extends QueryTest
     // Shouldn't throw runtime exception when parent object (`ClassData`) is null
     assert(buildDataset(Row(null)).collect() === Array(NestedStruct(null)))
 
-    val message = intercept[RuntimeException] {
+    // Just check the error class here to avoid flakiness due to different parameters.
+    assert(intercept[SparkRuntimeException] {
       buildDataset(Row(Row("hello", null))).collect()
-    }.getCause.getMessage
-
-    assert(message.contains("Null value appeared in non-nullable field"))
+    }.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("SPARK-12478: top level null field") {
@@ -1593,9 +1607,8 @@ class DatasetSuite extends QueryTest
   }
 
   test("Dataset should throw RuntimeException if top-level product input object is null") {
-    val e = intercept[RuntimeException](Seq(ClassData("a", 1), null).toDS())
-    assert(e.getCause.getMessage.contains("Null value appeared in non-nullable field"))
-    assert(e.getCause.getMessage.contains("top level Product or row object"))
+    val e = intercept[SparkRuntimeException](Seq(ClassData("a", 1), null).toDS())
+    assert(e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("dropDuplicates") {
@@ -2038,19 +2051,34 @@ class DatasetSuite extends QueryTest
   test("SPARK-22472: add null check for top-level primitive values") {
     // If the primitive values are from Option, we need to do runtime null check.
     val ds = Seq(Some(1), None).toDS().as[Int]
-    val e1 = intercept[RuntimeException](ds.collect())
-    assert(e1.getCause.isInstanceOf[NullPointerException])
-    val e2 = intercept[SparkException](ds.map(_ * 2).collect())
-    assert(e2.getCause.isInstanceOf[NullPointerException])
+    val errorClass = "NOT_NULL_ASSERT_VIOLATION"
+    val sqlState = "42000"
+    val parameters = Map("walkedTypePath" -> "\n- root class: \"int\"\n")
+    checkError(
+      exception = intercept[SparkRuntimeException](ds.collect()),
+      errorClass = errorClass,
+      sqlState = sqlState,
+      parameters = parameters)
+    checkError(
+      exception = intercept[SparkRuntimeException](ds.map(_ * 2).collect()),
+      errorClass = errorClass,
+      sqlState = sqlState,
+      parameters = parameters)
 
     withTempPath { path =>
       Seq(Integer.valueOf(1), null).toDF("i").write.parquet(path.getCanonicalPath)
       // If the primitive values are from files, we need to do runtime null check.
       val ds = spark.read.parquet(path.getCanonicalPath).as[Int]
-      val e1 = intercept[RuntimeException](ds.collect())
-      assert(e1.getCause.isInstanceOf[NullPointerException])
-      val e2 = intercept[SparkException](ds.map(_ * 2).collect())
-      assert(e2.getCause.isInstanceOf[NullPointerException])
+      checkError(
+        exception = intercept[SparkRuntimeException](ds.collect()),
+        errorClass = errorClass,
+        sqlState = sqlState,
+        parameters = parameters)
+      checkError(
+        exception = intercept[SparkRuntimeException](ds.map(_ * 2).collect()),
+        errorClass = errorClass,
+        sqlState = sqlState,
+        parameters = parameters)
     }
   }
 
@@ -2068,8 +2096,8 @@ class DatasetSuite extends QueryTest
 
   test("SPARK-23835: null primitive data type should throw NullPointerException") {
     val ds = Seq[(Option[Int], Option[Int])]((Some(1), None)).toDS()
-    val e = intercept[RuntimeException](ds.as[(Int, Int)].collect())
-    assert(e.getCause.isInstanceOf[NullPointerException])
+    val exception = intercept[SparkRuntimeException](ds.as[(Int, Int)].collect())
+    assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("SPARK-24569: Option of primitive types are mistakenly mapped to struct type") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index da04674b99205..22fdd96ce6bad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -151,11 +151,11 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
   }
 
   test("explain table valued functions") {
-    checkKeywordsExistsInExplain(sql("select * from RaNgE(2)"), "Range (0, 2, step=1, splits=None)")
+    checkKeywordsExistsInExplain(sql("select * from RaNgE(2)"), "Range (0, 2, step=1)")
     checkKeywordsExistsInExplain(sql("SELECT * FROM range(3) CROSS JOIN range(3)"),
       "Join Cross",
-      ":- Range (0, 3, step=1, splits=None)",
-      "+- Range (0, 3, step=1, splits=None)")
+      ":- Range (0, 3, step=1)",
+      "+- Range (0, 3, step=1)")
   }
 
   test("explain lateral joins") {
@@ -192,9 +192,11 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
           |)
         """.stripMargin)
       checkKeywordsExistsInExplain(df2,
-        "Project [concat(cast(id#xL as string), cast((id#xL + 1) as string), " +
-          "cast(encode(cast((id#xL + 2) as string), utf-8, false) as string), " +
-          "cast(encode(cast((id#xL + 3) as string), utf-8, false) as string)) AS col#x]")
+        "Project [concat(concat(col1#x, col2#x), cast(concat(col3#x, col4#x) as string)) AS col#x]",
+        "Project [cast(id#xL as string) AS col1#x, " +
+          "cast((id#xL + cast(1 as bigint)) as string) AS col2#x, " +
+          "encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, " +
+          "encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]")
 
       val df3 = sql(
         """
@@ -208,9 +210,10 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
           |)
         """.stripMargin)
       checkKeywordsExistsInExplain(df3,
-        "Project [concat(cast(id#xL as string), " +
-          "cast(encode(cast((id#xL + 2) as string), utf-8, false) as string), " +
-          "cast(encode(cast((id#xL + 3) as string), utf-8, false) as string)) AS col#x]")
+        "Project [concat(col1#x, cast(concat(col3#x, col4#x) as string)) AS col#x]",
+        "Project [cast(id#xL as string) AS col1#x, " +
+          "encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, " +
+          "encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index 73b2eba7060d0..443597f10056b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -117,6 +117,10 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
         // Note: We need to filter out the commands that set the parameters, such as:
         // SET spark.sql.parser.escapedStringLiterals=true
         example.split("  > ").tail.filterNot(_.trim.startsWith("SET")).take(1).foreach {
+          case _ if funcName == "from_avro" || funcName == "to_avro" =>
+            // Skip running the example queries for the from_avro and to_avro functions because
+            // these functions dynamically load the AvroDataToCatalyst or CatalystDataToAvro classes
+            // which are not available in this test.
           case exampleRe(sql, _) =>
             val df = spark.sql(sql)
             val escapedSql = sql.replaceAll("\\|", "&#124;")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 8a092ab69cf17..229677d208136 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterTha
 import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
 import org.apache.spark.sql.catalyst.plans.logical.Filter
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.execution.{ExplainMode, FileSourceScanLike, SimpleMode}
+import org.apache.spark.sql.execution.{FileSourceScanLike, SimpleMode}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.FilePartition
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
@@ -474,7 +474,7 @@ class FileBasedDataSourceSuite extends QueryTest
         errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`vectors`",
-          "columnType" -> "\"ARRAY<DOUBLE>\"",
+          "columnType" -> "UDT(\"ARRAY<DOUBLE>\")",
           "format" -> "CSV")
       )
 
@@ -487,7 +487,7 @@ class FileBasedDataSourceSuite extends QueryTest
         errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
-          "columnType" -> "\"ARRAY<DOUBLE>\"",
+          "columnType" -> "UDT(\"ARRAY<DOUBLE>\")",
           "format" -> "CSV")
       )
     }
@@ -545,7 +545,7 @@ class FileBasedDataSourceSuite extends QueryTest
               errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`a`",
-                "columnType" -> "\"INTERVAL\"",
+                "columnType" -> "UDT(\"INTERVAL\")",
                 "format" -> formatParameter
               )
             )
@@ -595,7 +595,7 @@ class FileBasedDataSourceSuite extends QueryTest
               errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`testType()`",
-                "columnType" -> "\"VOID\"",
+                "columnType" -> "UDT(\"VOID\")",
                 "format" -> formatParameter
               )
             )
@@ -624,7 +624,7 @@ class FileBasedDataSourceSuite extends QueryTest
               errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`a`",
-                "columnType" -> "\"VOID\"",
+                "columnType" -> "UDT(\"VOID\")",
                 "format" -> formatParameter
               )
             )
@@ -1242,59 +1242,6 @@ class FileBasedDataSourceSuite extends QueryTest
       }
     }
   }
-
-  test("disable filter pushdown for collated strings") {
-    Seq("parquet").foreach { format =>
-      Seq(format, "").foreach { conf =>
-        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> conf) {
-          withTempPath { path =>
-            val collation = "'UTF8_BINARY_LCASE'"
-            val df = sql(
-              s"""SELECT
-                 |  COLLATE(c, $collation) as c1,
-                 |  struct(COLLATE(c, $collation)) as str,
-                 |  named_struct('f1', named_struct('f2',
-                 |    COLLATE(c, $collation), 'f3', 1)) as namedstr,
-                 |  array(COLLATE(c, $collation)) as arr,
-                 |  map(COLLATE(c, $collation), 1) as map1,
-                 |  map(1, COLLATE(c, $collation)) as map2
-                 |FROM VALUES ('aaa'), ('AAA'), ('bbb')
-                 |as data(c)
-                 |""".stripMargin)
-
-            df.write.format(format).save(path.getAbsolutePath)
-
-            // filter and expected result
-            val filters = Seq(
-              ("==", Seq(Row("aaa"), Row("AAA"))),
-              ("!=", Seq(Row("bbb"))),
-              ("<", Seq()),
-              ("<=", Seq(Row("aaa"), Row("AAA"))),
-              (">", Seq(Row("bbb"))),
-              (">=", Seq(Row("aaa"), Row("AAA"), Row("bbb"))))
-
-            filters.foreach { filter =>
-              val readback = spark.read
-                .format(format)
-                .load(path.getAbsolutePath)
-                .where(s"c1 ${filter._1} collate('aaa', $collation)")
-                .where(s"str ${filter._1} struct(collate('aaa', $collation))")
-                .where(s"namedstr.f1.f2 ${filter._1} collate('aaa', $collation)")
-                .where(s"arr ${filter._1} array(collate('aaa', $collation))")
-                .where(s"map_keys(map1) ${filter._1} array(collate('aaa', $collation))")
-                .where(s"map_values(map2) ${filter._1} array(collate('aaa', $collation))")
-                .select("c1")
-
-              val explain = readback.queryExecution.explainString(
-                ExplainMode.fromString("extended"))
-              assert(explain.contains("PushedFilters: []"))
-              checkAnswer(readback, filter._2)
-            }
-          }
-        }
-      }
-    }
-  }
 }
 
 object TestingUDT {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ICUCollationsMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ICUCollationsMapSuite.scala
new file mode 100644
index 0000000000000..42d486bd75454
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ICUCollationsMapSuite.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile, CollationFactory}
+
+// scalastyle:off line.size.limit
+/**
+ * Guard against breaking changes in ICU locale names and codes supported by Collator class and provider by CollationFactory.
+ * Map is in form of rows of pairs (locale name, locale id); locale name consists of three parts:
+ * - 2-letter lowercase language code
+ * - 4-letter script code (optional)
+ * - 3-letter uppercase country code
+ *
+ * To re-generate collations map golden file, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly org.apache.spark.sql.ICUCollationsMapSuite"
+ * }}}
+ */
+// scalastyle:on line.size.limit
+class ICUCollationsMapSuite extends SparkFunSuite {
+
+  private val collationsMapFile = {
+    getWorkspaceFilePath("sql", "core", "src", "test", "resources",
+      "collations", "ICU-collations-map.md").toFile
+  }
+
+  if (regenerateGoldenFiles) {
+    val map = CollationFactory.getICULocaleNames
+    val mapOutput = map.zipWithIndex.map {
+      case (localeName, idx) => s"| $idx | $localeName |" }.mkString("\n")
+    val goldenOutput = {
+      s"<!-- Automatically generated by ${getClass.getSimpleName} -->\n" +
+      "## ICU locale ids to name map\n" +
+      "| Locale id | Locale name |\n" +
+      "| --------- | ----------- |\n" +
+      mapOutput + "\n"
+    }
+    val parent = collationsMapFile.getParentFile
+    if (!parent.exists()) {
+      assert(parent.mkdirs(), "Could not create directory: " + parent)
+    }
+    stringToFile(collationsMapFile, goldenOutput)
+  }
+
+  test("ICU locales map breaking change") {
+    val goldenLines = fileToString(collationsMapFile).split('\n')
+    val goldenRelevantLines = goldenLines.slice(4, goldenLines.length) // skip header
+    val input = goldenRelevantLines.map(
+      s => (s.split('|')(2).strip(), s.split('|')(1).strip().toInt))
+    assert(input sameElements CollationFactory.getICULocaleNames.zipWithIndex)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
index fc1524be13179..bc16a69475106 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
@@ -356,8 +356,8 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
         "(bf1.c1 = bf2.c2 and bf2.a2 = 5)) as a join bf3 on bf3.c3 = a.c1", 2)
       // left anti join unsupported.
       // bf2 as creation side and inject runtime filter for bf3(by passing key).
-      assertRewroteWithBloomFilter("select * from (select * from bf1 left anti join bf2 on " +
-        "(bf1.c1 = bf2.c2 and bf2.a2 = 5)) as a join bf3 on bf3.c3 = a.c1")
+      assertDidNotRewriteWithBloomFilter("select * from (select * from bf1 left anti join bf2 " +
+        "on (bf1.c1 = bf2.c2 and bf2.a2 = 5)) as a join bf3 on bf3.c3 = a.c1")
       // left anti join unsupported and hasn't selective filter.
       assertRewroteWithBloomFilter("select * from (select * from bf1 left anti join bf2 on " +
         "(bf1.c1 = bf2.c2 and bf1.a1 = 5)) as a join bf3 on bf3.c3 = a.c1", 0)
@@ -487,7 +487,7 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "32",
       SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000") {
       // Test that the max scan size rather than an individual scan size on the filter
-      // application side matters. `bf5filtered` has 14168 bytes and `bf2` has 3409 bytes.
+      // application side matters. `bf5filtered` has 15049 bytes and `bf2` has 3409 bytes.
       withSQLConf(
         SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "5000") {
         assertRewroteWithBloomFilter("select * from " +
@@ -495,7 +495,7 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
           "join bf3 on t.c5 = bf3.c3 where bf3.a3 = 5", 2)
       }
       withSQLConf(
-        SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "15000") {
+        SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "16000") {
         assertDidNotRewriteWithBloomFilter("select * from " +
           "(select * from bf5filtered union all select * from bf2) t " +
           "join bf3 on t.c5 = bf3.c3 where bf3.a3 = 5")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index c1ca48162d207..957be07607b66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -660,6 +660,49 @@ object IntegratedUDFTestUtils extends SQLHelper {
       orderBy = "OrderingColumn(\"input\")",
       select = "SelectedColumn(\"partition_col\")")
 
+  object UDTFPartitionByIndexingBug extends TestUDTF {
+    val pythonScript: String =
+      s"""
+         |from pyspark.sql.functions import (
+         |    AnalyzeArgument,
+         |    AnalyzeResult,
+         |    PartitioningColumn,
+         |    SelectedColumn,
+         |    udtf
+         |)
+         |from pyspark.sql.types import (
+         |    DoubleType,
+         |    StringType,
+         |    StructType,
+         |)
+         |class $name:
+         |    @staticmethod
+         |    def analyze(observed: AnalyzeArgument) -> AnalyzeResult:
+         |        out_schema = StructType()
+         |        out_schema.add("partition_col", StringType())
+         |        out_schema.add("double_col", DoubleType())
+         |
+         |        return AnalyzeResult(
+         |            schema=out_schema,
+         |            partitionBy=[PartitioningColumn("partition_col")],
+         |            select=[
+         |                SelectedColumn("partition_col"),
+         |                SelectedColumn("double_col"),
+         |            ],
+         |        )
+         |
+         |    def eval(self, *args, **kwargs):
+         |        pass
+         |
+         |    def terminate(self):
+         |        for _ in range(5):
+         |            yield {
+         |                "partition_col": None,
+         |                "double_col": 1.0,
+         |            }
+         |""".stripMargin
+  }
+
   object UDTFInvalidPartitionByOrderByParseError
     extends TestPythonUDTFPartitionByOrderByBase(
       partitionBy = "PartitioningColumn(\"unparsable\")",
@@ -1216,6 +1259,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
     UDTFPartitionByOrderBySelectExpr,
     UDTFPartitionByOrderBySelectComplexExpr,
     UDTFPartitionByOrderBySelectExprOnlyPartitionColumn,
+    UDTFPartitionByIndexingBug,
     InvalidAnalyzeMethodReturnsNonStructTypeSchema,
     InvalidAnalyzeMethodWithSinglePartitionNoInputTable,
     InvalidAnalyzeMethodWithPartitionByNoInputTable,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index be6862f5b96b7..fcb937d82ba42 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.internal.config.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_T
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.{Ascending, GenericRow, SortOrder}
-import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
+import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, JoinSelectionHelper}
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, HintInfo, Join, JoinHint, NO_BROADCAST_AND_REPLICATION}
 import org.apache.spark.sql.execution.{BinaryExecNode, FilterExec, ProjectExec, SortExec, SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -40,7 +40,8 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.tags.SlowSQLTest
 
 @SlowSQLTest
-class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper {
+class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper
+  with JoinSelectionHelper {
   import testImplicits._
 
   setupTestData()
@@ -61,6 +62,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     val sqlString = pair._1
     val c = pair._2
     val df = sql(sqlString)
+    val optimized = df.queryExecution.optimizedPlan
     val physical = df.queryExecution.sparkPlan
     val operators = physical.collect {
       case j: BroadcastHashJoinExec => j
@@ -74,6 +76,10 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     if (operators.head.getClass != c) {
       fail(s"$sqlString expected operator: $c, but got ${operators.head}\n physical: \n$physical")
     }
+    assert(
+      canPlanAsBroadcastHashJoin(optimized.asInstanceOf[Join], conf) ===
+        operators.head.isInstanceOf[BroadcastHashJoinExec],
+      "canPlanAsBroadcastHashJoin not in sync with join selection codepath!")
     operators.head
   }
 
@@ -89,11 +95,13 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       val planned = spark.sessionState.planner.JoinSelection(join)
       assert(planned.size == 1)
       assert(planned.head.isInstanceOf[CartesianProductExec])
+      assert(!canPlanAsBroadcastHashJoin(join, conf))
 
       val plannedWithHint = spark.sessionState.planner.JoinSelection(joinWithHint)
       assert(plannedWithHint.size == 1)
       assert(plannedWithHint.head.isInstanceOf[BroadcastNestedLoopJoinExec])
       assert(plannedWithHint.head.asInstanceOf[BroadcastNestedLoopJoinExec].buildSide == BuildLeft)
+      assert(!canPlanAsBroadcastHashJoin(joinWithHint, conf))
     }
   }
 
@@ -112,10 +120,12 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     val planned = spark.sessionState.planner.JoinSelection(join)
     assert(planned.size == 1)
     assert(planned.head.isInstanceOf[BroadcastHashJoinExec])
+    assert(canPlanAsBroadcastHashJoin(join, conf))
 
     val plannedWithHint = spark.sessionState.planner.JoinSelection(joinWithHint)
     assert(plannedWithHint.size == 1)
     assert(plannedWithHint.head.isInstanceOf[SortMergeJoinExec])
+    assert(!canPlanAsBroadcastHashJoin(joinWithHint, conf))
   }
 
   test("NO_BROADCAST_AND_REPLICATION controls build side in BNLJ") {
@@ -131,11 +141,13 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     assert(planned.size == 1)
     assert(planned.head.isInstanceOf[BroadcastNestedLoopJoinExec])
     assert(planned.head.asInstanceOf[BroadcastNestedLoopJoinExec].buildSide == BuildRight)
+    assert(!canPlanAsBroadcastHashJoin(join, conf))
 
     val plannedWithHint = spark.sessionState.planner.JoinSelection(joinWithHint)
     assert(plannedWithHint.size == 1)
     assert(plannedWithHint.head.isInstanceOf[BroadcastNestedLoopJoinExec])
     assert(plannedWithHint.head.asInstanceOf[BroadcastNestedLoopJoinExec].buildSide == BuildLeft)
+    assert(!canPlanAsBroadcastHashJoin(joinWithHint, conf))
   }
 
   test("join operator selection") {
@@ -191,6 +203,16 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 //    ).foreach { case (query, joinClass) => assertJoin(query, joinClass) }
 //  }
 
+  test("broadcastable join with shuffle join hint") {
+    spark.sharedState.cacheManager.clearCache()
+    sql("CACHE TABLE testData")
+    // Make sure it's planned as broadcast join without the hint.
+    assertJoin("SELECT * FROM testData JOIN testData2 ON key = a",
+      classOf[BroadcastHashJoinExec])
+    assertJoin("SELECT /*+ SHUFFLE_HASH(testData) */ * FROM testData JOIN testData2 ON key = a",
+      classOf[ShuffledHashJoinExec])
+  }
+
   test("broadcasted hash join operator selection") {
     spark.sharedState.cacheManager.clearCache()
     sql("CACHE TABLE testData")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 9946815bdf516..ea00e02e232c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -1164,8 +1164,8 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkIllegalArgumentException] {
         df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).collect()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3250",
-      parameters = Map("other" -> """{"a":123}"""))
+      errorClass = "INVALID_JSON_DATA_TYPE",
+      parameters = Map("invalidType" -> """{"a":123}"""))
 
     val invalidDataType = "MAP<INT, cow>"
     val invalidDataTypeReason = "Unrecognized token 'MAP': " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
new file mode 100644
index 0000000000000..df0fbf15a98ee
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.File
+
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.util.LogUtils.LOG_SCHEMA
+
+/**
+ * Test suite for querying Spark logs using SQL.
+ */
+class LogQuerySuite extends QueryTest with SharedSparkSession with Logging {
+
+  val logFile: File = {
+    val pwd = new File(".").getCanonicalPath
+    new File(pwd + "/target/LogQuerySuite.log")
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    // Clear the log file
+    if (logFile.exists()) {
+      logFile.delete()
+    }
+  }
+
+  private def createTempView(viewName: String): Unit = {
+    spark.read.schema(LOG_SCHEMA).json(logFile.getCanonicalPath).createOrReplaceTempView(viewName)
+  }
+
+  test("Query Spark logs using SQL") {
+    val msg = log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, "1")}."
+    logError(msg)
+
+    withTempView("logs") {
+      createTempView("logs")
+      checkAnswer(
+        spark.sql(s"SELECT level, msg, context, exception FROM logs WHERE msg = '${msg.message}'"),
+        Row("ERROR", msg.message, Map(LogKeys.EXECUTOR_ID.name -> "1"), null) :: Nil)
+    }
+  }
+
+  test("Query Spark logs with exception using SQL") {
+    val msg = log"Task ${MDC(LogKeys.TASK_ID, "2")} failed."
+    val exception = new RuntimeException("OOM")
+    logError(msg, exception)
+
+    withTempView("logs") {
+      createTempView("logs")
+      val expectedMDC = Map(LogKeys.TASK_ID.name -> "2")
+      checkAnswer(
+        spark.sql("SELECT level, msg, context, exception.class, exception.msg FROM logs " +
+          s"WHERE msg = '${msg.message}'"),
+        Row("ERROR", msg.message, expectedMDC, "java.lang.RuntimeException", "OOM") :: Nil)
+
+      val stacktrace =
+        spark.sql(s"SELECT exception.stacktrace FROM logs WHERE msg = '${msg.message}'").collect()
+      assert(stacktrace.length == 1)
+      val topStacktraceArray = stacktrace.head.getSeq[Row](0).head
+      assert(topStacktraceArray.getString(0) == this.getClass.getName)
+      assert(topStacktraceArray.getString(1) != "")
+      assert(topStacktraceArray.getString(2) == this.getClass.getSimpleName + ".scala")
+      assert(topStacktraceArray.getString(3) != "")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index ba04e3b691a1b..ac14b345a7621 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -707,6 +707,17 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       df1.select(try_divide(make_interval(col("year"), col("month")), lit(0))))
   }
 
+  test("try_remainder") {
+    val df = Seq((10, 3), (5, 5), (5, 0)).toDF("birth", "age")
+    checkAnswer(df.selectExpr("try_remainder(birth, age)"), Seq(Row(1), Row(0), Row(null)))
+
+    val dfDecimal = Seq(
+      (BigDecimal(10), BigDecimal(3)),
+      (BigDecimal(5), BigDecimal(5)),
+      (BigDecimal(5), BigDecimal(0))).toDF("birth", "age")
+    checkAnswer(dfDecimal.selectExpr("try_remainder(birth, age)"), Seq(Row(1), Row(0), Row(null)))
+  }
+
   test("try_element_at") {
     val df = Seq((Array(1, 2, 3), 2)).toDF("a", "b")
     checkAnswer(df.selectExpr("try_element_at(a, b)"), Seq(Row(2)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
index b5b3492269415..ad424b3a7cc76 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -59,13 +59,13 @@ import org.apache.spark.tags.ExtendedSQLTest
  * To re-generate golden files for entire suite, run:
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability*Suite"
- *   SPARK_GENERATE_GOLDEN_FILES=1 SPARK_ANSI_SQL_MODE=true build/sbt "sql/testOnly *PlanStability*Suite"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 SPARK_ANSI_SQL_MODE=false build/sbt "sql/testOnly *PlanStability*Suite"
  * }}}
  *
  * To re-generate golden file for a single test, run:
  * {{{
  *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability*Suite -- -z (tpcds-v1.4/q49)"
- *   SPARK_GENERATE_GOLDEN_FILES=1 SPARK_ANSI_SQL_MODE=true build/sbt "sql/testOnly *PlanStability*Suite -- -z (tpcds-v1.4/q49)"
+ *   SPARK_GENERATE_GOLDEN_FILES=1 SPARK_ANSI_SQL_MODE=false build/sbt "sql/testOnly *PlanStability*Suite -- -z (tpcds-v1.4/q49)"
  * }}}
  */
 // scalastyle:on line.size.limit
@@ -256,9 +256,11 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite {
   protected def testQuery(tpcdsGroup: String, query: String, suffix: String = ""): Unit = {
     val queryString = resourceToString(s"$tpcdsGroup/$query.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
-    // Disable char/varchar read-side handling for better performance.
-    withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false",
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+    withSQLConf(
+      // Disable char/varchar read-side handling for better performance.
+      SQLConf.READ_SIDE_CHAR_PADDING.key -> "false",
+      SQLConf.LEGACY_NO_CHAR_PADDING_IN_PREDICATE.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
       val qe = sql(queryString).queryExecution
       val plan = qe.executedPlan
       val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
index 48a9564ab8f95..bca1472799939 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -279,4 +279,12 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(sql("select CAST(c as STRING) from t"), Row("2018-11-17 13:33:33"))
     }
   }
+
+  test("SPARK-48033: default columns using runtime replaceable expression works") {
+    withTable("t") {
+      sql("CREATE TABLE t(v VARIANT DEFAULT parse_json('1')) USING PARQUET")
+      sql("INSERT INTO t VALUES(DEFAULT)")
+      checkAnswer(sql("select v from t"), sql("select parse_json('1')").collect())
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala
index fbdd1428ba9b8..754c46cc5cd3e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import java.util.Collections
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.{SparkConf, SparkRuntimeException}
 import org.apache.spark.sql.connector.catalog.{Column => ColumnV2, Identifier, InMemoryTableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.internal.SQLConf
@@ -56,7 +56,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
     withTable("t") {
       sql(s"CREATE TABLE t (s STRING, i INT NOT NULL) USING $FORMAT")
 
-      val e = intercept[SparkException] {
+      val e = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql("SELECT 'txt' AS s, null AS i")
           inputDF.writeTo("t").append()
@@ -64,7 +64,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
           sql("INSERT INTO t VALUES ('txt', null)")
         }
       }
-      assertNotNullException(e, Seq("i"))
+      assert(e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
     }
   }
 
@@ -88,7 +88,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
            |USING $FORMAT
          """.stripMargin)
 
-      val e1 = intercept[SparkException] {
+      val e1 = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -106,7 +106,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       assertNotNullException(e1, Seq("s", "ns"))
 
-      val e2 = intercept[SparkException] {
+      val e2 = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -124,7 +124,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       assertNotNullException(e2, Seq("s", "arr"))
 
-      val e3 = intercept[SparkException] {
+      val e3 = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -177,7 +177,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       checkAnswer(spark.table("t"), Row(1, Row(1, null)))
 
-      val e = intercept[SparkException] {
+      val e = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -224,7 +224,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       checkAnswer(spark.table("t"), Row(1, null))
 
-      val e = intercept[SparkException] {
+      val e = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -279,7 +279,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       checkAnswer(spark.table("t"), Row(1, List(null, Row(1, 1))))
 
-      val e = intercept[SparkException] {
+      val e = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -325,7 +325,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       checkAnswer(spark.table("t"), Row(1, null))
 
-      val e = intercept[SparkException] {
+      val e = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql("SELECT 1 AS i, map(1, null) AS m")
           inputDF.writeTo("t").append()
@@ -364,7 +364,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       checkAnswer(spark.table("t"), Row(1, Map(Row(1, 1) -> null)))
 
-      val e1 = intercept[SparkException] {
+      val e1 = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -382,7 +382,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
       }
       assertNotNullException(e1, Seq("m", "key", "x"))
 
-      val e2 = intercept[SparkException] {
+      val e2 = intercept[SparkRuntimeException] {
         if (byName) {
           val inputDF = sql(
             s"""SELECT
@@ -402,11 +402,9 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
     }
   }
 
-  private def assertNotNullException(e: SparkException, colPath: Seq[String]): Unit = {
+  private def assertNotNullException(e: SparkRuntimeException, colPath: Seq[String]): Unit = {
     e.getCause match {
-      case npe: NullPointerException =>
-        assert(npe.getMessage.contains("Null value appeared in non-nullable field"))
-        assert(npe.getMessage.contains(colPath.mkString("\n", "\n", "\n")))
+      case _ if e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION" =>
       case other =>
         fail(s"Unexpected exception cause: $other")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index f81369bbad367..56c364e20846a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -3748,22 +3748,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
 
   test("SPARK-33084: Add jar support Ivy URI in SQL") {
     val sc = spark.sparkContext
-    val hiveVersion = "2.3.9"
     // transitive=false, only download specified jar
-    sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion?transitive=false")
-    assert(sc.listJars()
-      .exists(_.contains(s"org.apache.hive.hcatalog_hive-hcatalog-core-$hiveVersion.jar")))
+    sql(s"ADD JAR ivy://org.springframework:spring-core:6.1.6?transitive=false")
+    assert(sc.listJars().exists(_.contains("org.springframework_spring-core-6.1.6.jar")))
+    assert(!sc.listJars().exists(_.contains("org.springframework_spring-jcl-6.1.6.jar")))
 
     // default transitive=true, test download ivy URL jar return multiple jars
-    sql("ADD JAR ivy://org.scala-js:scalajs-test-interface_2.12:1.2.0")
-    assert(sc.listJars().exists(_.contains("scalajs-library_2.12")))
-    assert(sc.listJars().exists(_.contains("scalajs-test-interface_2.12")))
+    sql("ADD JAR ivy://org.awaitility:awaitility:4.2.1")
+    assert(sc.listJars().exists(_.contains("org.awaitility_awaitility-4.2.1.jar")))
+    assert(sc.listJars().exists(_.contains("org.hamcrest_hamcrest-2.1.jar")))
 
-    sql(s"ADD JAR ivy://org.apache.hive:hive-contrib:$hiveVersion" +
-      "?exclude=org.pentaho:pentaho-aggdesigner-algorithm&transitive=true")
-    assert(sc.listJars().exists(_.contains(s"org.apache.hive_hive-contrib-$hiveVersion.jar")))
-    assert(sc.listJars().exists(_.contains(s"org.apache.hive_hive-exec-$hiveVersion.jar")))
-    assert(!sc.listJars().exists(_.contains("org.pentaho.pentaho_aggdesigner-algorithm")))
+    sql("ADD JAR ivy://org.junit.jupiter:junit-jupiter:5.10.2" +
+      "?exclude=org.junit.jupiter:junit-jupiter-engine&transitive=true")
+    assert(sc.listJars().exists(_.contains("org.junit.jupiter_junit-jupiter-api-5.10.2.jar")))
+    assert(sc.listJars().exists(_.contains("org.junit.jupiter_junit-jupiter-params-5.10.2.jar")))
+    assert(!sc.listJars().exists(_.contains("org.junit.jupiter_junit-jupiter-engine-5.10.2.jar")))
   }
 
   test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") {
@@ -4400,8 +4399,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(df,
         Row(1, 2, 3, 4, 5, 6, 7, 8, 9, 10) ::
           Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20) :: Nil)
-      assert(df.schema.names.sameElements(
-        Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a")))
+      assert(df.schema.names ===
+        Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a"))
       checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"),
         Row(1, 6, 7, 8, 9) :: Row(2, 12, 14, 16, 18) :: Nil)
       checkAnswer(df.where("`a.b` > 10"),
@@ -4419,8 +4418,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(df,
         Row(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11) ::
           Row(2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22) :: Nil)
-      assert(df.schema.names.sameElements(
-        Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a", ",")))
+      assert(df.schema.names ===
+        Array("max(t)", "max(t", "=", "\n", ";", "a b", "{", ".", "a.b", "a", ","))
       checkAnswer(df.select("`max(t)`", "`a b`", "`{`", "`.`", "`a.b`"),
         Row(1, 6, 7, 8, 9) :: Row(2, 12, 14, 16, 18) :: Nil)
       checkAnswer(df.where("`a.b` > 10"),
@@ -4716,6 +4715,147 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     val df6 = df3.join(df2, col("df3.zaak_id") === col("df2.customer_id"), "outer")
     df5.crossJoin(df6)
   }
+
+  test("SPARK-47939: Describe should work with parameterized queries") {
+    checkAnswer(
+      spark.sql("describe select ?", Array(1)),
+      Array(
+        Row("1", "int", null)
+      )
+    )
+    checkAnswer(
+      spark.sql("describe select :first", Map("first" -> 1)),
+      Array(
+        Row("1", "int", null)
+      )
+    )
+
+    checkAnswer(
+      spark.sql("describe select * from values (?, ?) t(x, y)", Array(1, "a")),
+      Array(
+        Row("x", "int", null),
+        Row("y", "string", null)
+      )
+    )
+    checkAnswer(
+      spark.sql(
+        "describe select * from values (:first, :second) t(x, y)",
+        Map("first" -> 1, "second" -> "a")
+      ),
+      Array(
+        Row("x", "int", null),
+        Row("y", "string", null)
+      )
+    )
+  }
+
+  test("SPARK-47939: Explain should work with parameterized queries") {
+    def checkQueryPlan(df: DataFrame, plan: String): Unit = assert(
+      df.collect()
+        .map(_.getString(0))
+        .map(_.replaceAll("#[0-9]+", "#N"))
+        === Array(plan.stripMargin)
+    )
+
+    checkQueryPlan(
+      spark.sql("explain select ?", Array(1)),
+      """== Physical Plan ==
+        |*(1) Project [1 AS 1#N]
+        |+- *(1) Scan OneRowRelation[]
+
+        |"""
+    )
+    checkQueryPlan(
+      spark.sql("explain select :first", Map("first" -> 1)),
+      """== Physical Plan ==
+        |*(1) Project [1 AS 1#N]
+        |+- *(1) Scan OneRowRelation[]
+
+        |"""
+    )
+
+    checkQueryPlan(
+      spark.sql("explain explain explain select ?", Array(1)),
+      """== Physical Plan ==
+        |Execute ExplainCommand
+        |   +- ExplainCommand ExplainCommand 'PosParameterizedQuery [1], SimpleMode, SimpleMode
+
+        |"""
+    )
+    checkQueryPlan(
+      spark.sql("explain explain explain select :first", Map("first" -> 1)),
+      // scalastyle:off
+      """== Physical Plan ==
+        |Execute ExplainCommand
+        |   +- ExplainCommand ExplainCommand 'NameParameterizedQuery [first], [1], SimpleMode, SimpleMode
+
+        |"""
+      // scalastyle:on
+    )
+
+    checkQueryPlan(
+      spark.sql("explain describe select ?", Array(1)),
+      """== Physical Plan ==
+        |Execute DescribeQueryCommand
+        |   +- DescribeQueryCommand select ?
+
+        |"""
+    )
+    checkQueryPlan(
+      spark.sql("explain describe select :first", Map("first" -> 1)),
+      """== Physical Plan ==
+        |Execute DescribeQueryCommand
+        |   +- DescribeQueryCommand select :first
+
+        |"""
+    )
+
+    checkQueryPlan(
+      spark.sql("explain extended select * from values (?, ?) t(x, y)", Array(1, "a")),
+      """== Parsed Logical Plan ==
+        |'PosParameterizedQuery [1, a]
+        |+- 'Project [*]
+        |   +- 'SubqueryAlias t
+        |      +- 'UnresolvedInlineTable [x, y], [[posparameter(39), posparameter(42)]]
+
+        |== Analyzed Logical Plan ==
+        |x: int, y: string
+        |Project [x#N, y#N]
+        |+- SubqueryAlias t
+        |   +- LocalRelation [x#N, y#N]
+
+        |== Optimized Logical Plan ==
+        |LocalRelation [x#N, y#N]
+
+        |== Physical Plan ==
+        |LocalTableScan [x#N, y#N]
+        |"""
+    )
+    checkQueryPlan(
+      spark.sql(
+        "explain extended select * from values (:first, :second) t(x, y)",
+        Map("first" -> 1, "second" -> "a")
+      ),
+      """== Parsed Logical Plan ==
+        |'NameParameterizedQuery [first, second], [1, a]
+        |+- 'Project [*]
+        |   +- 'SubqueryAlias t
+        |      +- 'UnresolvedInlineTable [x, y], [[namedparameter(first), namedparameter(second)]]
+
+        |== Analyzed Logical Plan ==
+        |x: int, y: string
+        |Project [x#N, y#N]
+        |+- SubqueryAlias t
+        |   +- LocalRelation [x#N, y#N]
+
+        |== Optimized Logical Plan ==
+        |LocalRelation [x#N, y#N]
+
+        |== Physical Plan ==
+        |LocalTableScan [x#N, y#N]
+        |"""
+    )
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index c9cb459878cfb..b031f45ddbf34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -703,9 +703,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
   }
 
   test("Test logic for determining whether a query is semantically sorted") {
-    withTable("t1", "t2") {
-      spark.sql("CREATE TABLE t1(a int, b int) USING parquet")
-      spark.sql("CREATE TABLE t2(a int, b int) USING parquet")
+    withTempView("t1", "t2") {
+      spark.sql("CREATE TEMP VIEW t1 AS SELECT * FROM VALUES (1, 1) AS t1(a, b)")
+      spark.sql("CREATE TEMP VIEW t2 AS SELECT * FROM VALUES (1, 2) AS t2(a, b)")
 
       val unsortedSelectQuery = "select * from t1"
       val sortedSelectQuery = "select * from t1 order by a, b"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
index 2c803ceffe950..a8b359f308a2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
@@ -83,18 +83,6 @@ class SetCommandSuite extends QueryTest with SharedSparkSession with ResetSystem
     spark.sessionState.conf.clear()
   }
 
-  test("SPARK-19218 `SET -v` should not fail with null value configuration") {
-    import SQLConf._
-    val confEntry = buildConf("spark.test").doc("doc").stringConf.createWithDefault(null)
-
-    try {
-      val result = sql("SET -v").collect()
-      assert(result === result.sortBy(_.getString(0)))
-    } finally {
-      SQLConf.unregister(confEntry)
-    }
-  }
-
   test("SET commands with illegal or inappropriate argument") {
     spark.sessionState.conf.clear()
     // Set negative mapred.reduce.tasks for automatically determining
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 18c1f4dcc4e00..1f0033a0efcdc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, Partial}
-import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, CompoundBody, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Limit, LocalRelation, LogicalPlan, Statistics, UnresolvedHint}
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, SinglePartition}
@@ -40,7 +40,7 @@ import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper, AQEShuffleReadExec, QueryStageExec, ShuffleQueryStageExec}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
-import org.apache.spark.sql.execution.datasources.{FileFormat, WriteFilesExec, WriteFilesSpec}
+import org.apache.spark.sql.execution.datasources.{FileFormat, WriteFilesExec, WriteFilesExecBase, WriteFilesSpec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.internal.SQLConf
@@ -581,6 +581,9 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars
 
   override def parseQuery(sqlText: String): LogicalPlan =
     delegate.parseQuery(sqlText)
+
+  override def parseScript(sqlScriptText: String): CompoundBody =
+    delegate.parseScript(sqlScriptText)
 }
 
 object MyExtensions {
@@ -842,14 +845,13 @@ class ColumnarProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
     new ColumnarProjectExec(projectList, newChild)
 }
 
-class ColumnarWriteExec(
+case class ColumnarWriteExec(
     child: SparkPlan,
     fileFormat: FileFormat,
     partitionColumns: Seq[Attribute],
     bucketSpec: Option[BucketSpec],
     options: Map[String, String],
-    staticPartitions: TablePartitionSpec) extends WriteFilesExec(
-  child, fileFormat, partitionColumns, bucketSpec, options, staticPartitions) {
+    staticPartitions: TablePartitionSpec) extends WriteFilesExecBase {
 
   override def supportsColumnar: Boolean = true
 
@@ -858,8 +860,8 @@ class ColumnarWriteExec(
     throw new Exception("columnar write")
   }
 
-  override protected def withNewChildInternal(newChild: SparkPlan): WriteFilesExec =
-    new ColumnarWriteExec(
+  override protected def withNewChildInternal(newChild: SparkPlan): ColumnarWriteExec =
+    ColumnarWriteExec(
       newChild, fileFormat, partitionColumns, bucketSpec, options, staticPartitions)
 }
 
@@ -971,7 +973,7 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] {
             replaceWithColumnarExpression(exp).asInstanceOf[NamedExpression]),
             replaceWithColumnarPlan(plan.child))
         case write: WriteFilesExec =>
-          new ColumnarWriteExec(
+          ColumnarWriteExec(
             replaceWithColumnarPlan(write.child),
             write.fileFormat,
             write.partitionColumns,
@@ -1014,6 +1016,7 @@ case class MyShuffleExchangeExec(delegate: ShuffleExchangeExec) extends ShuffleE
     val attributeStats = AttributeMap(Seq((child.output.head, columnStats)))
     Statistics(stats.sizeInBytes, stats.rowCount, attributeStats)
   }
+  override def shuffleId: Int = delegate.shuffleId
   override def child: SparkPlan = delegate.child
   override protected def doExecute(): RDD[InternalRow] = delegate.execute()
   override def outputPartitioning: Partitioning = delegate.outputPartitioning
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 393ecc95b66b2..5f1fa2904e341 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -644,7 +644,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
 
   test("analyzes column statistics in cached global temporary view") {
     withGlobalTempView("gTempView") {
-      val globalTempDB = spark.sharedState.globalTempViewManager.database
+      val globalTempDB = spark.sharedState.globalTempDB
       val e1 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 75b4415db6b54..31c1cac9fb718 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -332,6 +332,11 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
     // scalastyle:on
   }
 
+  test("string substring function using columns") {
+    val df = Seq(("Spark", 2, 3)).toDF("a", "b", "c")
+    checkAnswer(df.select(substring($"a", $"b", $"c")), Row("par"))
+  }
+
   test("string encode/decode function") {
     val bytes = Array[Byte](-27, -92, -89, -27, -115, -125, -28, -72, -106, -25, -107, -116)
     // scalastyle:off
@@ -525,6 +530,33 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Seq("aa", "bb", "cc", "")))
   }
 
+  test("SPARK-47845: string split function with column types") {
+    val df = Seq(
+      ("aa2bb3cc4", "[1-9]+", 0),
+      ("aa2bb3cc4", "[1-9]+", 2),
+      ("aa2bb3cc4", "[1-9]+", -2)).toDF("a", "b", "c")
+
+    // without limit
+    val expectedNoLimit = Seq(
+      Row(Seq("aa", "bb", "cc", "")),
+      Row(Seq("aa", "bb", "cc", "")),
+      Row(Seq("aa", "bb", "cc", "")))
+
+    checkAnswer(df.select(split($"a", $"b")), expectedNoLimit)
+
+    checkAnswer(df.selectExpr("split(a, b)"), expectedNoLimit)
+
+    // with limit
+    val expectedWithLimit = Seq(
+      Row(Seq("aa", "bb", "cc", "")),
+      Row(Seq("aa", "bb3cc4")),
+      Row(Seq("aa", "bb", "cc", "")))
+
+    checkAnswer(df.select(split($"a", $"b", $"c")), expectedWithLimit)
+
+    checkAnswer(df.selectExpr("split(a, b, c)"), expectedWithLimit)
+  }
+
   test("string / binary length function") {
     val df = Seq(("123", Array[Byte](1, 2, 3, 4), 123, 2.0f, 3.015))
       .toDF("a", "b", "c", "d", "e")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala
index a84dd9645bcc4..46a24acb475c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSCollationQueryTestSuite.scala
@@ -123,7 +123,7 @@ class TPCDSCollationQueryTestSuite extends QueryTest with TPCDSBase with SQLQuer
   val checks: Seq[Seq[CollationCheck]] = Seq(
     Seq(
       CaseSensitiveCollationCheck("tpcds_utf8", "UTF8_BINARY", "lower"),
-      CaseInsensitiveCollationCheck("tpcds_utf8_random", "UTF8_BINARY_LCASE", randomizeCase)
+      CaseInsensitiveCollationCheck("tpcds_utf8_random", "UTF8_LCASE", randomizeCase)
     ),
     Seq(
       CaseSensitiveCollationCheck("tpcds_unicode", "UNICODE", "lower"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 87ca3a07c4d56..7e940252430f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -1183,4 +1183,39 @@ class UDFSuite extends QueryTest with SharedSparkSession {
       df10.select(zip_with(col("array1"), col("array2"), (b1, b2) => reverseThenConcat2(b1, b2)))
     checkAnswer(test10, Row(Array(Row("cbaihg"), Row("fedlkj"))) :: Nil)
   }
+
+  test("SPARK-47927: Correctly pass null values derived from join to UDF") {
+    val f = udf[Tuple1[Option[Int]], Tuple1[Option[Int]]](identity)
+    val ds1 = Seq(1).toDS()
+    val ds2 = Seq[Int]().toDS()
+
+    checkAnswer(
+      ds1.join(ds2, ds1("value") === ds2("value"), "left_outer")
+        .select(f(struct(ds2("value").as("_1")))),
+      Row(Row(null)))
+  }
+
+  test("char/varchar as UDF return type") {
+    Seq(CharType(5), VarcharType(5)).foreach { dt =>
+      val f = udf(
+        new UDF0[String] {
+          override def call(): String = "a"
+        },
+        dt
+      )
+      checkError(
+        intercept[AnalysisException](spark.range(1).select(f())),
+        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_ENCODER",
+        sqlState = "0A000",
+        parameters = Map("dataType" -> s"\"${dt.sql}\"")
+      )
+    }
+  }
+
+  test("SPARK-47927: ScalaUDF null handling") {
+    val f = udf[Int, Int](_ + 1)
+    val df = Seq(Some(1), None).toDF("c")
+      .select(f($"c").as("f"), f($"f"))
+    checkAnswer(df, Seq(Row(2, 3), Row(null, null)))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
index 9daa69ce9f155..18a6c538e0a80 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import java.io.ByteArrayOutputStream
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, SparkIllegalArgumentException}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
@@ -188,4 +188,40 @@ class UnsafeRowSuite extends SparkFunSuite {
     unsafeRow.setDecimal(0, d2, 38)
     assert(unsafeRow.getDecimal(0, 38, 18) === null)
   }
+
+  test("SPARK-48713: throw SparkIllegalArgumentException for illegal UnsafeRow.pointTo") {
+    val emptyRow = UnsafeRow.createFromByteArray(64, 2)
+    val byteArray = new Array[Byte](64)
+
+    // Out of bounds
+    var errorMsg = intercept[SparkIllegalArgumentException] {
+      emptyRow.pointTo(byteArray, Platform.BYTE_ARRAY_OFFSET + 50, 32)
+    }.getMessage
+    assert(
+      errorMsg.contains(
+        "Invalid byte array backed UnsafeRow: byte array length=64, offset=50, byte size=32"
+      )
+    )
+
+    // Negative size
+    errorMsg = intercept[SparkIllegalArgumentException] {
+      emptyRow.pointTo(byteArray, Platform.BYTE_ARRAY_OFFSET + 50, -32)
+    }.getMessage
+    assert(
+      errorMsg.contains(
+        "Invalid byte array backed UnsafeRow: byte array length=64, offset=50, byte size=-32"
+      )
+    )
+
+    // Negative offset
+    errorMsg = intercept[SparkIllegalArgumentException] {
+      emptyRow.pointTo(byteArray, -5, 32)
+    }.getMessage
+    assert(
+      errorMsg.contains(
+        s"Invalid byte array backed UnsafeRow: byte array length=64, " +
+          s"offset=${-5 - Platform.BYTE_ARRAY_OFFSET}, byte size=32"
+      )
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
index 58528b9186736..c4dba850cf777 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
@@ -14,13 +14,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.expressions.{CreateArray, CreateNamedStruct, Literal, StructsToJson}
+import org.apache.spark.sql.catalyst.expressions.{Cast, CreateArray, CreateNamedStruct, JsonToStructs, Literal, StructsToJson}
 import org.apache.spark.sql.catalyst.expressions.variant.ParseJson
 import org.apache.spark.sql.execution.WholeStageCodegenExec
+import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnarArray
 import org.apache.spark.types.variant.VariantBuilder
 import org.apache.spark.unsafe.types.VariantVal
 
@@ -57,6 +59,72 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     check("[0.0, 1.00, 1.10, 1.23]", "[0,1,1.1,1.23]")
   }
 
+  test("from_json/to_json round-trip") {
+    def check(input: String, output: String = null): Unit = {
+      val df = Seq(input).toDF("v")
+      val variantDF = df.select(Column(StructsToJson(Map.empty,
+        JsonToStructs(VariantType, Map.empty, Column("v").expr))))
+      val expected = if (output != null) output else input
+      checkAnswer(variantDF, Seq(Row(expected)))
+    }
+
+    check("null")
+    check("true")
+    check("false")
+    check("-1")
+    check("1.0E10")
+    check("\"\"")
+    check("\"" + ("a" * 63) + "\"")
+    check("\"" + ("b" * 64) + "\"")
+    // scalastyle:off nonascii
+    check("\"" + ("你好，世界" * 20) + "\"")
+    // scalastyle:on nonascii
+    check("[]")
+    check("{}")
+    // scalastyle:off nonascii
+    check(
+      "[null, true,   false,-1, 1e10, \"\\uD83D\\uDE05\", [ ], { } ]",
+      "[null,true,false,-1,1.0E10,\"😅\",[],{}]"
+    )
+    // scalastyle:on nonascii
+    check("[0.0, 1.00, 1.10, 1.23]", "[0,1,1.1,1.23]")
+  }
+
+  test("try_parse_json/to_json round-trip") {
+    def check(input: String, output: String = "INPUT IS OUTPUT"): Unit = {
+      val df = Seq(input).toDF("v")
+      val variantDF = df.selectExpr("to_json(try_parse_json(v)) as v").select(Column("v"))
+      val expected = if (output != "INPUT IS OUTPUT") output else input
+      checkAnswer(variantDF, Seq(Row(expected)))
+    }
+
+    check("null")
+    check("true")
+    check("false")
+    check("-1")
+    check("1.0E10")
+    check("\"\"")
+    check("\"" + ("a" * 63) + "\"")
+    check("\"" + ("b" * 64) + "\"")
+    // scalastyle:off nonascii
+    check("\"" + ("你好，世界" * 20) + "\"")
+    // scalastyle:on nonascii
+    check("[]")
+    check("{}")
+    // scalastyle:off nonascii
+    check(
+      "[null, true,   false,-1, 1e10, \"\\uD83D\\uDE05\", [ ], { } ]",
+      "[null,true,false,-1,1.0E10,\"😅\",[],{}]"
+    )
+    // scalastyle:on nonascii
+    check("[0.0, 1.00, 1.10, 1.23]", "[0,1,1.1,1.23]")
+    // Places where parse_json should fail and therefore, try_parse_json should return null
+    check("{1:2}", null)
+    check("{\"a\":1", null)
+    check("{\"a\":[a,b,c]}", null)
+    check("\"" + "a" * (16 * 1024 * 1024) + "\"", null)
+  }
+
   test("to_json with nested variant") {
     val df = Seq(1).toDF("v")
     val variantDF1 = df.select(
@@ -91,6 +159,17 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     check("null", "VOID")
     check("1", "BIGINT")
     check("1.0", "DECIMAL(1,0)")
+    check("0.01", "DECIMAL(2,2)")
+    check("1.00", "DECIMAL(1,0)")
+    check("10.00", "DECIMAL(2,0)")
+    check("10.10", "DECIMAL(3,1)")
+    check("0.0", "DECIMAL(1,0)")
+    check("-0.0", "DECIMAL(1,0)")
+    check("2147483647.999", "DECIMAL(13,3)")
+    check("9223372036854775808", "DECIMAL(19,0)")
+    check("-9223372036854775808.0", "DECIMAL(19,0)")
+    check("9999999999999999999.9999999999999999999", "DECIMAL(38,19)")
+    check("9999999999999999999.99999999999999999999", "DOUBLE")
     check("1E0", "DOUBLE")
     check("true", "BOOLEAN")
     check("\"2000-01-01\"", "STRING")
@@ -113,6 +192,35 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     )
   }
 
+  test("from_json variant data type parsing") {
+    def check(variantTypeString: String): Unit = {
+      val df = Seq("{\"a\": 1, \"b\": [2, 3.1]}").toDF("j").selectExpr("variant_get(from_json(j,\""
+        + variantTypeString + "\"),\"$.b[0]\")::int")
+      checkAnswer(df, Seq(Row(2)))
+    }
+
+    check("variant")
+    check("     \t variant ")
+    check("  \n  VaRiaNt  ")
+  }
+
+  test("is_variant_null with parse_json and variant_get") {
+    def check(json: String, path: String, expected: Boolean): Unit = {
+      val df = Seq(json).toDF("j").selectExpr(s"is_variant_null(variant_get(parse_json(j),"
+        + s"\"${path}\"))")
+      checkAnswer(df, Seq(Row(expected)))
+    }
+
+    check("{ \"a\": null }", "$.a", expected = true)
+    check("{ \"a\": null }", "$.b", expected = false)
+    check("{ \"a\": null, \"b\": \"null\" }", "$.b", expected = false)
+    check("{ \"a\": null, \"b\": {\"c\": null} }", "$.b.c", expected = true)
+    check("{ \"a\": null, \"b\": {\"c\": null, \"d\": [13, null]} }", "$.b.d", expected = false)
+    check("{ \"a\": null, \"b\": {\"c\": null, \"d\": [13, null]} }", "$.b.d[0]", expected = false)
+    check("{ \"a\": null, \"b\": {\"c\": null, \"d\": [13, null]} }", "$.b.d[1]", expected = true)
+    check("{ \"a\": null, \"b\": {\"c\": null, \"d\": [13, null]} }", "$.b.d[2]", expected = false)
+  }
+
   test("schema_of_variant_agg") {
     // Literal input.
     checkAnswer(
@@ -154,4 +262,31 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
         Seq.fill(3)(Row("STRUCT<a: ARRAY<STRING>>")) ++ Seq(Row("STRUCT<a: ARRAY<BIGINT>>")))
     }
   }
+
+  test("cast to variant with ColumnarArray input") {
+    val dataVector = new OnHeapColumnVector(4, LongType)
+    dataVector.appendNull()
+    dataVector.appendLong(123)
+    dataVector.appendNull()
+    dataVector.appendLong(456)
+    val array = new ColumnarArray(dataVector, 0, 4)
+    val variant = Cast(Literal(array, ArrayType(LongType)), VariantType).eval()
+    assert(variant.toString == "[null,123,null,456]")
+    dataVector.close()
+  }
+
+  test("cast to variant with scan input") {
+    withTempPath { dir =>
+      val path = dir.getAbsolutePath
+      val input = Seq(Row(Array(1, null), Map("k1" -> null, "k2" -> false), Row(null, "str")))
+      val schema = StructType.fromDDL(
+        "a array<int>, m map<string, boolean>, s struct<f1 string, f2 string>")
+      spark.createDataFrame(spark.sparkContext.parallelize(input), schema).write.parquet(path)
+      val df = spark.read.parquet(path).selectExpr(
+        s"cast(cast(a as variant) as ${schema(0).dataType.sql})",
+        s"cast(cast(m as variant) as ${schema(1).dataType.sql})",
+        s"cast(cast(s as variant) as ${schema(2).dataType.sql})")
+      checkAnswer(df, input)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
index d276ec4428b9f..0c00676607dd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
@@ -18,20 +18,25 @@
 package org.apache.spark.sql
 
 import java.io.File
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
 
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 import scala.util.Random
 
-import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, ExpressionEvalHelper, Literal}
+import org.apache.spark.sql.catalyst.expressions.variant.{VariantExpressionEvalUtils, VariantGet}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{StringType, StructField, StructType, VariantType}
-import org.apache.spark.unsafe.types.VariantVal
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 import org.apache.spark.util.ArrayImplicits._
 
-class VariantSuite extends QueryTest with SharedSparkSession {
+class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEvalHelper {
   import testImplicits._
 
   test("basic tests") {
@@ -55,6 +60,15 @@ class VariantSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("basic try_parse_json alias") {
+    val df = spark.createDataFrame(Seq(Row("""{ "a" : 1 }"""), Row("""{ a : 1 }""")).asJava,
+      new StructType().add("json", StringType))
+    val actual = df.select(to_json(try_parse_json(col("json")))).collect()
+
+    assert(actual(0)(0) == """{"a":1}""")
+    assert(actual(1)(0) == null)
+  }
+
   test("basic parse_json alias") {
     val df = spark.createDataFrame(Seq(Row("""{ "a" : 1 }""")).asJava,
       new StructType().add("json", StringType))
@@ -66,6 +80,33 @@ class VariantSuite extends QueryTest with SharedSparkSession {
     assert(actual.getString(1) == """{"b":[{"c":"str2"}]}""")
   }
 
+  test("expression alias") {
+    val df = Seq("""{ "a" : 1 }""", """{ "b" : 2 }""").toDF("json")
+    val v = parse_json(col("json"))
+
+    def rows(results: Any*): Seq[Row] = results.map(Row(_))
+
+    checkAnswer(df.select(is_variant_null(v)), rows(false, false))
+    checkAnswer(df.select(schema_of_variant(v)), rows("STRUCT<a: BIGINT>", "STRUCT<b: BIGINT>"))
+    checkAnswer(df.select(schema_of_variant_agg(v)), rows("STRUCT<a: BIGINT, b: BIGINT>"))
+
+    checkAnswer(df.select(variant_get(v, "$.a", "int")), rows(1, null))
+    checkAnswer(df.select(variant_get(v, "$.b", "int")), rows(null, 2))
+    checkAnswer(df.select(variant_get(v, "$.a", "double")), rows(1.0, null))
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        df.select(variant_get(v, "$.a", "binary")).collect()
+      },
+      errorClass = "INVALID_VARIANT_CAST",
+      parameters = Map("value" -> "1", "dataType" -> "\"BINARY\"")
+    )
+
+    checkAnswer(df.select(try_variant_get(v, "$.a", "int")), rows(1, null))
+    checkAnswer(df.select(try_variant_get(v, "$.b", "int")), rows(null, 2))
+    checkAnswer(df.select(try_variant_get(v, "$.a", "double")), rows(1.0, null))
+    checkAnswer(df.select(try_variant_get(v, "$.a", "binary")), rows(null, null))
+  }
+
   test("round trip tests") {
     val rand = new Random(42)
     val input = Seq.fill(50) {
@@ -272,6 +313,88 @@ class VariantSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("json option constraints") {
+    withTempDir { dir =>
+      val file = new File(dir, "file.json")
+      Files.write(file.toPath, "0".getBytes(StandardCharsets.UTF_8))
+
+      // Ensure that we get an error when setting the singleVariantColumn JSON option while also
+      // specifying a schema.
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.format("json").option("singleVariantColumn", "var").schema("var variant")
+        },
+        errorClass = "INVALID_SINGLE_VARIANT_COLUMN",
+        parameters = Map.empty
+      )
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.format("json").option("singleVariantColumn", "another_name")
+            .schema("var variant").json(file.getAbsolutePath).collect()
+        },
+        errorClass = "INVALID_SINGLE_VARIANT_COLUMN",
+        parameters = Map.empty
+      )
+    }
+  }
+
+  test("json scan") {
+    val content = Seq(
+      "true",
+      """{"a": [], "b": null}""",
+      """{"a": 1}""",
+      "[1, 2, 3]"
+    ).mkString("\n").getBytes(StandardCharsets.UTF_8)
+
+    withTempDir { dir =>
+      val file = new File(dir, "file.json")
+      Files.write(file.toPath, content)
+
+      checkAnswer(
+        spark.read.format("json").option("singleVariantColumn", "var")
+          .load(file.getAbsolutePath)
+          .selectExpr("to_json(var)"),
+        Seq(Row("true"), Row("""{"a":[],"b":null}"""), Row("""{"a":1}"""), Row("[1,2,3]"))
+      )
+
+      checkAnswer(
+        spark.read.format("json").schema("a variant, b variant")
+          .load(file.getAbsolutePath).selectExpr("to_json(a)", "to_json(b)"),
+        Seq(Row(null, null), Row("[]", "null"), Row("1", null), Row(null, null))
+      )
+    }
+
+    // Test scan with partitions.
+    withTempDir { dir =>
+      new File(dir, "a=1/b=2/").mkdirs()
+      Files.write(new File(dir, "a=1/b=2/file.json").toPath, content)
+      checkAnswer(
+        spark.read.format("json").option("singleVariantColumn", "var")
+          .load(dir.getAbsolutePath).selectExpr("a", "b", "to_json(var)"),
+        Seq(Row(1, 2, "true"), Row(1, 2, """{"a":[],"b":null}"""), Row(1, 2, """{"a":1}"""),
+          Row(1, 2, "[1,2,3]"))
+      )
+    }
+  }
+
+  test("json scan with map schema") {
+    withTempDir { dir =>
+      val file = new File(dir, "file.json")
+      val content = Seq(
+        "true",
+        """{"v": null}""",
+        """{"v": {"a": 1, "b": null}}"""
+      ).mkString("\n").getBytes(StandardCharsets.UTF_8)
+      Files.write(file.toPath, content)
+      checkAnswer(
+        spark.read.format("json").schema("v map<string, variant>")
+          .load(file.getAbsolutePath)
+          .selectExpr("to_json(v)"),
+        Seq(Row(null), Row(null), Row("""{"a":1,"b":null}"""))
+      )
+    }
+  }
+
   test("group/order/join variant are disabled") {
     var ex = intercept[AnalysisException] {
       spark.sql("select parse_json('') group by 1")
@@ -324,4 +447,184 @@ class VariantSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-48067: default variant columns works") {
+    withTable("t") {
+      sql("""create table t(
+        v1 variant default null,
+        v2 variant default parse_json(null),
+        v3 variant default cast(null as variant),
+        v4 variant default parse_json('1'),
+        v5 variant default parse_json('1'),
+        v6 variant default parse_json('{\"k\": \"v\"}'),
+        v7 variant default cast(5 as int),
+        v8 variant default cast('hello' as string),
+        v9 variant default parse_json(to_json(parse_json('{\"k\": \"v\"}')))
+      ) using parquet""")
+      sql("""insert into t values(DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT, DEFAULT,
+        DEFAULT, DEFAULT)""")
+
+      val expected = sql("""select
+        cast(null as variant) as v1,
+        parse_json(null) as v2,
+        cast(null as variant) as v3,
+        parse_json('1') as v4,
+        parse_json('1') as v5,
+        parse_json('{\"k\": \"v\"}') as v6,
+        cast(cast(5 as int) as variant) as v7,
+        cast('hello' as variant) as v8,
+        parse_json(to_json(parse_json('{\"k\": \"v\"}'))) as v9
+      """)
+      val actual = sql("select * from t")
+      checkAnswer(actual, expected.collect())
+    }
+  }
+
+  Seq(
+    (
+      "basic int parse json",
+      VariantExpressionEvalUtils.parseJson(UTF8String.fromString("1")),
+      VariantType
+    ),
+    (
+      "basic json parse json",
+      VariantExpressionEvalUtils.parseJson(UTF8String.fromString("{\"k\": \"v\"}")),
+      VariantType
+    ),
+    (
+      "basic null parse json",
+      VariantExpressionEvalUtils.parseJson(UTF8String.fromString("null")),
+      VariantType
+    ),
+    (
+      "basic null",
+      null,
+      VariantType
+    ),
+    (
+      "basic array",
+      new GenericArrayData(Array[Int](1, 2, 3, 4, 5)),
+      new ArrayType(IntegerType, false)
+    ),
+    (
+      "basic string",
+      UTF8String.fromString("literal string"),
+      StringType
+    ),
+    (
+      "basic timestamp",
+      0L,
+      TimestampType
+    ),
+    (
+      "basic int",
+      0,
+      IntegerType
+    ),
+    (
+      "basic struct",
+      Literal.default(new StructType().add("col0", StringType)).eval(),
+      new StructType().add("col0", StringType)
+    ),
+    (
+      "complex struct with child variant",
+      Literal.default(new StructType()
+        .add("col0", StringType)
+        .add("col1", new StructType().add("col0", VariantType))
+        .add("col2", VariantType)
+        .add("col3", new ArrayType(VariantType, false))
+      ).eval(),
+      new StructType()
+        .add("col0", StringType)
+        .add("col1", new StructType().add("col0", VariantType))
+        .add("col2", VariantType)
+        .add("col3", new ArrayType(VariantType, false))
+    ),
+    (
+      "basic array with null",
+      new GenericArrayData(Array[Any](1, 2, null)),
+      new ArrayType(IntegerType, true)
+    ),
+    (
+      "basic map with null",
+      new ArrayBasedMapData(
+        new GenericArrayData(Array[Any](UTF8String.fromString("k1"), UTF8String.fromString("k2"))),
+        new GenericArrayData(Array[Any](1, null))
+      ),
+      new MapType(StringType, IntegerType, true)
+    )
+  ).foreach { case (testName, value, dt) =>
+    test(s"SPARK-48067: Variant literal `sql` correctly recreates the variant - $testName") {
+      val l = Literal.create(
+        VariantExpressionEvalUtils.castToVariant(value, dt.asInstanceOf[DataType]), VariantType)
+      val jsonString = l.eval().asInstanceOf[VariantVal]
+        .toJson(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone))
+      val expectedSql = s"PARSE_JSON('$jsonString')"
+      assert(l.sql == expectedSql)
+      val valueFromLiteralSql =
+        spark.sql(s"select ${l.sql}").collect()(0).getAs[VariantVal](0)
+
+      // Cast the variants to their specified type to compare for logical equality.
+      // Currently, variant equality naively compares its value and metadata binaries. However,
+      // variant equality is more complex than this.
+      val castVariantExpr = VariantGet(
+        l,
+        Literal.create(UTF8String.fromString("$"), StringType),
+        dt,
+        true,
+        Some(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone).toString())
+      )
+      val sqlVariantExpr = VariantGet(
+        Literal.create(valueFromLiteralSql, VariantType),
+        Literal.create(UTF8String.fromString("$"), StringType),
+        dt,
+        true,
+        Some(DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone).toString())
+      )
+      checkEvaluation(castVariantExpr, sqlVariantExpr.eval())
+    }
+  }
+
+  test("variant_get size") {
+    val largeKey = "x" * 1000
+    val df = Seq(s"""{ "$largeKey": {"a" : 1 },
+                       "b" : 2,
+                       "c": [1,2,3,{"$largeKey": 4}] }""").toDF("json")
+            .selectExpr("parse_json(json) as v")
+
+    // Check Variant with approximate bounds to avoid flakiness if we make minor format changes.
+    def checkSize(v: VariantVal, minMetadata: Long, maxMetadata: Long,
+                  minValue: Long, maxValue: Long): Unit = {
+      val mSize = v.getMetadata.length
+      assert(mSize >= minMetadata)
+      assert(mSize <= maxMetadata)
+      val vSize = v.getValue.length
+      assert(vSize >= minValue)
+      assert(vSize <= maxValue)
+    }
+
+    // The full Variant has large metadata (but only one copy of `largeKey`).
+    checkSize(df.selectExpr("variant_get(v, '$', 'variant')").collect()(0)
+        .getAs[VariantVal](0), 1000, 1050, 20, 40)
+    // Extracting Variant or a nested type containing Variant should strip out the large metadata.
+    checkSize(df.selectExpr("variant_get(v, '$.b', 'variant')").collect()(0)
+        .getAs[VariantVal](0), 2, 4, 2, 4)
+    // Behavior is the same without an explicit cast to Variant.
+    checkSize(df.selectExpr("variant_get(v, '$.b', 'variant')").collect()(0)
+        .getAs[VariantVal](0), 2, 4, 2, 4)
+    checkSize(df.selectExpr(s"variant_get(v, '$$.$largeKey', 'variant')").collect()(0)
+        .getAs[VariantVal](0), 5, 10, 5, 10)
+    checkSize(df.selectExpr(s"variant_get(v, '$$.$largeKey', 'struct<a:variant>')")
+        .collect()(0).getStruct(0).getAs[VariantVal](0), 2, 4, 2, 4)
+    // Only the array element that contains `largeKey` should be large.
+    checkSize(df.selectExpr("variant_get(v, '$.c', 'array<variant>')").collect()(0)
+        .getSeq[VariantVal](0)(0), 2, 4, 2, 4)
+    checkSize(df.selectExpr("variant_get(v, '$.c', 'array<variant>')").collect()(0)
+        .getSeq[VariantVal](0)(3), 1000, 1020, 5, 10)
+    // Cast to a nested type containing Variant should also remove metadata.
+    val structResult = df.selectExpr(s"cast(v as struct<$largeKey:variant,b:variant>)").collect()(0)
+        .getStruct(0)
+    checkSize(structResult.getAs[VariantVal](0), 5, 10, 5, 10)
+    checkSize(structResult.getAs[VariantVal](1), 2, 4, 2, 4)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala
index 90a26af917aa9..1364fab3138e3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 
 import scala.jdk.CollectionConverters._
 
+import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -39,6 +40,16 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Row(1)) :: Nil)
   }
 
+  test("SPARK-48300: from_xml - Codegen Support") {
+    withTempView("XmlToStructsTable") {
+      val dataDF = Seq("""<ROW><a>1</a></ROW>""").toDF("value")
+      dataDF.createOrReplaceTempView("XmlToStructsTable")
+      val df = sql("SELECT from_xml(value, 'a INT') FROM XmlToStructsTable")
+      assert(df.queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec])
+      checkAnswer(df, Row(Row(1)) :: Nil)
+    }
+  }
+
   test("from_xml with option (timestampFormat)") {
     val df = Seq("""<ROW><time>26/08/2015 18:00</time></ROW>""").toDS()
     val schema = new StructType().add("time", TimestampType)
@@ -55,7 +66,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
     val options = Map("rowTag" -> "foo").asJava
 
     checkAnswer(
-      df.select(from_xml($"value", schema)),
+      df.select(from_xml($"value", schema, options)),
       Row(Row(1)) :: Nil)
   }
 
@@ -110,6 +121,36 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Row(1, "haa")) :: Nil)
   }
 
+  test("SPARK-48363: from_xml with non struct schema") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("1").toDS().select(from_xml($"value", lit("ARRAY<int>"), Map[String, String]().asJava))
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      parameters = Map(
+        "inputSchema" -> "\"ARRAY<int>\"",
+        "dataType" -> "\"ARRAY<INT>\""
+      ),
+      context = ExpectedContext(fragment = "from_xml", getCurrentClassCallSitePattern)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("1").toDF("xml").selectExpr(s"from_xml(xml, 'ARRAY<int>')")
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      parameters = Map(
+        "inputSchema" -> "\"ARRAY<int>\"",
+        "dataType" -> "\"ARRAY<INT>\""
+      ),
+      context = ExpectedContext(
+        fragment = "from_xml(xml, 'ARRAY<int>')",
+        start = 0,
+        stop = 26
+      )
+    )
+  }
+
   test("to_xml - struct") {
     val schema = StructType(StructField("a", IntegerType, nullable = false) :: Nil)
     val data = Seq(Row(1))
@@ -383,6 +424,22 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-48296: to_xml - Codegen Support") {
+    withTempView("StructsToXmlTable") {
+      val schema = StructType(StructField("a", IntegerType, nullable = false) :: Nil)
+      val dataDF = spark.createDataFrame(Seq(Row(1)).asJava, schema).withColumn("a", struct($"a"))
+      dataDF.createOrReplaceTempView("StructsToXmlTable")
+      val df = sql("SELECT to_xml(a) FROM StructsToXmlTable")
+      val plan = df.queryExecution.executedPlan
+      assert(plan.isInstanceOf[WholeStageCodegenExec])
+      val expected =
+        s"""|<ROW>
+            |    <a>1</a>
+            |</ROW>""".stripMargin
+      checkAnswer(df, Seq(Row(expected)))
+    }
+  }
+
   test("corrupt record column in the middle") {
     val schema = new StructType()
       .add("a", IntegerType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
new file mode 100644
index 0000000000000..ab8e82162ce10
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
@@ -0,0 +1,265 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.collation
+
+import org.apache.parquet.schema.MessageType
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.parquet.{ParquetFilters, SparkToParquetSchemaConverter}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
+import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
+import org.apache.spark.sql.sources.{EqualTo, Filter, IsNotNull}
+import org.apache.spark.sql.test.SharedSparkSession
+
+abstract class CollatedFilterPushDownToParquetSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
+
+  val dataSource = "parquet"
+  val nonCollatedCol = "c0"
+  val collatedCol = "c1"
+  val collatedStructCol = "c2"
+  val collatedStructNestedCol = "f1"
+  val collatedStructFieldAccess = s"$collatedStructCol.$collatedStructNestedCol"
+  val collatedArrayCol = "c3"
+  val collatedMapCol = "c4"
+
+  val lcaseCollation = "'UTF8_LCASE'"
+
+  def getPushedDownFilters(query: DataFrame): Seq[Filter]
+
+  protected def createParquetFilters(schema: MessageType): ParquetFilters =
+    new ParquetFilters(schema, conf.parquetFilterPushDownDate, conf.parquetFilterPushDownTimestamp,
+      conf.parquetFilterPushDownDecimal, conf.parquetFilterPushDownStringPredicate,
+      conf.parquetFilterPushDownInFilterThreshold,
+      conf.caseSensitiveAnalysis,
+      RebaseSpec(LegacyBehaviorPolicy.CORRECTED))
+
+  def testPushDown(
+      filterString: String,
+      expectedPushedFilters: Seq[Filter],
+      expectedRowCount: Int): Unit = {
+    withTempPath { path =>
+      val df = sql(
+        s"""
+           |SELECT
+           |  c as $nonCollatedCol,
+           |  COLLATE(c, $lcaseCollation) as $collatedCol,
+           |  named_struct('$collatedStructNestedCol',
+           |    COLLATE(c, $lcaseCollation)) as $collatedStructCol,
+           |  array(COLLATE(c, $lcaseCollation)) as $collatedArrayCol,
+           |  map(COLLATE(c, $lcaseCollation), 1) as $collatedMapCol
+           |FROM VALUES ('aaa'), ('AAA'), ('bbb')
+           |as data(c)
+           |""".stripMargin)
+
+      df.write.format(dataSource).save(path.getAbsolutePath)
+
+      val query = spark.read.format(dataSource).load(path.getAbsolutePath)
+        .filter(filterString)
+
+      val actualPushedFilters = getPushedDownFilters(query)
+      assert(actualPushedFilters.toSet === expectedPushedFilters.toSet)
+      assert(query.count() === expectedRowCount)
+    }
+  }
+
+  test("do not push down anything for literal comparison") {
+    testPushDown(
+       filterString = s"'aaa' COLLATE UNICODE = 'bbb' COLLATE UNICODE",
+       expectedPushedFilters = Seq.empty,
+       expectedRowCount = 0)
+  }
+
+  test("push down null check for collated column") {
+    testPushDown(
+      filterString = s"$collatedCol = 'aaa'",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 2)
+  }
+
+  test("push down null check for non-equality check") {
+    testPushDown(
+      filterString = s"$collatedCol != 'aaa'",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 1)
+  }
+
+  test("push down null check for greater than check") {
+    testPushDown(
+      filterString = s"$collatedCol > 'aaa'",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 1)
+  }
+
+  test("push down null check for gte check") {
+    testPushDown(
+      filterString = s"$collatedCol >= 'aaa'",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 3)
+  }
+
+  test("push down null check for less than check") {
+    testPushDown(
+      filterString = s"$collatedCol < 'aaa'",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 0)
+  }
+
+  test("push down null check for lte check") {
+    testPushDown(
+      filterString = s"$collatedCol <= 'aaa'",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 2)
+  }
+
+  test("push down null check for STARTSWITH") {
+    testPushDown(
+      filterString = s"STARTSWITH($collatedCol, 'a')",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 2)
+  }
+
+  test("push down null check for ENDSWITH") {
+    testPushDown(
+      filterString = s"ENDSWITH($collatedCol, 'a')",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 2)
+  }
+
+  test("push down null check for CONTAINS") {
+    testPushDown(
+      filterString = s"CONTAINS($collatedCol, 'a')",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 2)
+  }
+
+  test("no push down for IN") {
+    testPushDown(
+      filterString = s"$collatedCol IN ('aaa', 'bbb')",
+      expectedPushedFilters = Seq.empty,
+      expectedRowCount = 3)
+  }
+
+  test("push down null check for equality for non-collated column in AND") {
+    testPushDown(
+      filterString = s"$collatedCol = 'aaa' AND $nonCollatedCol = 'aaa'",
+      expectedPushedFilters =
+        Seq(IsNotNull(collatedCol), IsNotNull(nonCollatedCol), EqualTo(nonCollatedCol, "aaa")),
+      expectedRowCount = 1)
+  }
+
+  test("for OR do not push down anything") {
+    testPushDown(
+      filterString = s"$collatedCol = 'aaa' OR $nonCollatedCol = 'aaa'",
+      expectedPushedFilters = Seq.empty,
+      expectedRowCount = 2)
+  }
+
+  test("mix OR and AND") {
+    testPushDown(
+      filterString = s"$collatedCol = 'aaa' AND ($nonCollatedCol = 'aaa' OR $collatedCol = 'aaa')",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 2)
+  }
+
+  test("negate check on collated column") {
+    testPushDown(
+      filterString = s"NOT($collatedCol == 'aaa')",
+      expectedPushedFilters = Seq(IsNotNull(collatedCol)),
+      expectedRowCount = 1)
+  }
+
+  test("compare entire struct - parquet does not support null check on complex types") {
+    testPushDown(
+      filterString = s"$collatedStructCol = " +
+        s"named_struct('$collatedStructNestedCol', collate('aaa', $lcaseCollation))",
+      expectedPushedFilters = Seq.empty,
+      expectedRowCount = 2)
+  }
+
+  test("inner struct field access") {
+    testPushDown(
+      filterString = s"$collatedStructFieldAccess = 'aaa'",
+      expectedPushedFilters = Seq(IsNotNull(collatedStructFieldAccess)),
+      expectedRowCount = 2)
+  }
+
+  test("array - parquet does not support null check on complex types") {
+    testPushDown(
+      filterString = s"$collatedArrayCol = array(collate('aaa', $lcaseCollation))",
+      expectedPushedFilters = Seq.empty,
+      expectedRowCount = 2)
+  }
+
+  test("map - parquet does not support null check on complex types") {
+    testPushDown(
+      filterString = s"map_keys($collatedMapCol) != array(collate('aaa', $lcaseCollation))",
+      expectedPushedFilters = Seq.empty,
+      expectedRowCount = 1)
+  }
+}
+
+class CollatedFilterPushDownToParquetV1Suite extends CollatedFilterPushDownToParquetSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, dataSource)
+
+  override def getPushedDownFilters(query: DataFrame): Seq[Filter] = {
+    var maybeRelation: Option[HadoopFsRelation] = None
+    val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
+      case PhysicalOperation(_, filters,
+          LogicalRelation(relation: HadoopFsRelation, _, _, _)) =>
+        maybeRelation = Some(relation)
+        filters
+    }.flatten
+
+    if (maybeAnalyzedPredicate.isEmpty) {
+      return Seq.empty
+    }
+
+    val (_, selectedFilters, _) =
+      DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate)
+
+    val schema = new SparkToParquetSchemaConverter(conf).convert(query.schema)
+    val parquetFilters = createParquetFilters(schema)
+    parquetFilters.convertibleFilters(selectedFilters)
+  }
+}
+
+class CollatedFilterPushDownToParquetV2Suite extends CollatedFilterPushDownToParquetSuite {
+  override protected def sparkConf: SparkConf =
+    super
+      .sparkConf
+      .set(SQLConf.USE_V1_SOURCE_LIST, "")
+
+  override def getPushedDownFilters(query: DataFrame): Seq[Filter] = {
+    query.queryExecution.optimizedPlan.collectFirst {
+      case PhysicalOperation(_, _,
+          DataSourceV2ScanRelation(_, scan: ParquetScan, _, _, _)) =>
+        scan.pushedFilters.toSeq
+    }.getOrElse(Seq.empty)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
index 996d7acb1148d..28605958c71da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
@@ -466,7 +466,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $t ADD COLUMNS $field double")
           },
-          errorClass = "FIELDS_ALREADY_EXISTS",
+          errorClass = "FIELD_ALREADY_EXISTS",
           parameters = expectedParameters,
           context = ExpectedContext(
             fragment = s"ALTER TABLE $t ADD COLUMNS $field double",
@@ -1116,7 +1116,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $t RENAME COLUMN $field TO $newName")
           },
-          errorClass = "FIELDS_ALREADY_EXISTS",
+          errorClass = "FIELD_ALREADY_EXISTS",
           parameters = Map(
             "op" -> "rename",
             "fieldNames" -> s"${toSQLId(expectedName)}",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 5d5ea6499c49d..7bbb6485c273f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -110,7 +110,14 @@ class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable
     Option(tables.get(ident)) match {
       case Some(table) =>
         val properties = CatalogV2Util.applyPropertiesChanges(table.properties, changes)
-        val schema = CatalogV2Util.applySchemaChanges(table.schema, changes, None, "ALTER TABLE")
+        val provider = Option(properties.get("provider"))
+
+        val schema = CatalogV2Util.applySchemaChanges(
+          table.schema,
+          changes,
+          provider,
+          "ALTER TABLE"
+        )
 
         // fail if the last column in the schema was dropped
         if (schema.fields.isEmpty) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index d89c0a2525fd9..0382efaf9d7e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -24,7 +24,7 @@ import java.util.Locale
 import scala.concurrent.duration.MICROSECONDS
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
 import org.apache.spark.sql.catalyst.InternalRow
@@ -814,14 +814,10 @@ class DataSourceV2SQLSuiteV1Filter
         if (nullable) {
           insertNullValueAndCheck()
         } else {
-          // TODO assign a error-classes name
-          checkError(
-            exception = intercept[SparkException] {
-              insertNullValueAndCheck()
-            },
-            errorClass = null,
-            parameters = Map.empty
-          )
+          val exception = intercept[SparkRuntimeException] {
+            insertNullValueAndCheck()
+          }
+          assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
         }
     }
   }
@@ -1743,6 +1739,16 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-48709: varchar resolution mismatch for DataSourceV2 CTAS") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.LEGACY.toString) {
+      withTable("testcat.ns.t1", "testcat.ns.t2") {
+        sql("CREATE TABLE testcat.ns.t1 (d1 string, d2 varchar(200)) USING parquet")
+        sql("CREATE TABLE testcat.ns.t2 USING foo as select * from testcat.ns.t1")
+      }
+    }
+  }
+
   test("ShowCurrentNamespace: basic tests") {
     def testShowCurrentNamespace(expectedCatalogName: String, expectedNamespace: String): Unit = {
       val schema = new StructType()
@@ -3488,6 +3494,30 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-48286: Add new column with default value which is not foldable") {
+    val foldableExpressions = Seq("1", "2 + 1")
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> v2Source) {
+      withTable("tab") {
+        spark.sql(s"CREATE TABLE tab (col1 INT DEFAULT 100) USING $v2Source")
+        val exception = intercept[AnalysisException] {
+          // Rand function is not foldable
+          spark.sql(s"ALTER TABLE tab ADD COLUMN col2 DOUBLE DEFAULT rand()")
+        }
+        assert(exception.getSqlState == "42623")
+        assert(exception.errorClass.get == "INVALID_DEFAULT_VALUE.NOT_CONSTANT")
+        assert(exception.messageParameters("colName") == "`col2`")
+        assert(exception.messageParameters("defaultValue") == "rand()")
+        assert(exception.messageParameters("statement") == "ALTER TABLE")
+      }
+      foldableExpressions.foreach(expr => {
+        withTable("tab") {
+          spark.sql(s"CREATE TABLE tab (col1 INT DEFAULT 100) USING $v2Source")
+          spark.sql(s"ALTER TABLE tab ADD COLUMN col2 DOUBLE DEFAULT $expr")
+        }
+      })
+    }
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index ec275fe101fd6..d77a6e8b8ac16 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -298,6 +298,12 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         Row("bbb", 20, 250.0), Row("bbb", 20, 350.0), Row("ccc", 30, 400.50)))
   }
 
+  private def collectAllShuffles(plan: SparkPlan): Seq[ShuffleExchangeExec] = {
+    collect(plan) {
+      case s: ShuffleExchangeExec => s
+    }
+  }
+
   private def collectShuffles(plan: SparkPlan): Seq[ShuffleExchangeExec] = {
     // here we skip collecting shuffle operators that are not associated with SMJ
     collect(plan) {
@@ -346,6 +352,23 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     Column.create("price", FloatType),
     Column.create("time", TimestampType))
 
+  test("SPARK-48655: group by on partition keys should not introduce additional shuffle") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val df = sql(s"SELECT MAX(price) AS res FROM testcat.ns.$items GROUP BY id")
+    val shuffles = collectAllShuffles(df.queryExecution.executedPlan)
+    assert(shuffles.isEmpty,
+      "should contain shuffle when not grouping by partition values")
+
+    checkAnswer(df.sort("res"), Seq(Row(10.0), Row(15.5), Row(41.0)))
+  }
+
   test("partitioned join: join with two partition keys and matching & sorted partitions") {
     val items_partitions = Array(bucket(8, "id"), days("arrive_time"))
     createTable(items, itemsColumns, items_partitions)
@@ -1136,7 +1159,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         val df = createJoinTestDF(Seq("arrive_time" -> "time"))
         val shuffles = collectShuffles(df.queryExecution.executedPlan)
         if (shuffle) {
-          assert(shuffles.size == 2, "partitioning with transform not work now")
+          assert(shuffles.size == 1, "partitioning with transform should trigger SPJ")
         } else {
           assert(shuffles.size == 2, "should add two side shuffle when bucketing shuffle one side" +
             " is not enabled")
@@ -1227,6 +1250,66 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     }
   }
 
+  test("SPARK-48065: SPJ: allowJoinKeysSubsetOfPartitionKeys is too strict") {
+    val table1 = "tab1e1"
+    val table2 = "table2"
+    val partition = Array(identity("id"))
+    createTable(table1, columns, partition)
+    sql(s"INSERT INTO testcat.ns.$table1 VALUES " +
+        "(1, 'aa', cast('2020-01-01' as timestamp)), " +
+        "(2, 'bb', cast('2020-01-01' as timestamp)), " +
+        "(2, 'cc', cast('2020-01-01' as timestamp)), " +
+        "(3, 'dd', cast('2020-01-01' as timestamp)), " +
+        "(3, 'dd', cast('2020-01-01' as timestamp)), " +
+        "(3, 'ee', cast('2020-01-01' as timestamp)), " +
+        "(3, 'ee', cast('2020-01-01' as timestamp))")
+
+    createTable(table2, columns, partition)
+    sql(s"INSERT INTO testcat.ns.$table2 VALUES " +
+        "(4, 'zz', cast('2020-01-01' as timestamp)), " +
+        "(4, 'zz', cast('2020-01-01' as timestamp)), " +
+        "(3, 'dd', cast('2020-01-01' as timestamp)), " +
+        "(3, 'dd', cast('2020-01-01' as timestamp)), " +
+        "(3, 'xx', cast('2020-01-01' as timestamp)), " +
+        "(3, 'xx', cast('2020-01-01' as timestamp)), " +
+        "(2, 'ww', cast('2020-01-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(true, false).foreach { partiallyClustered =>
+        withSQLConf(
+          SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false",
+          SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+          SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key ->
+            partiallyClustered.toString,
+          SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key -> "true") {
+          val df = sql(
+            s"""
+               |${selectWithMergeJoinHint("t1", "t2")}
+               |t1.id AS id, t1.data AS t1data, t2.data AS t2data
+               |FROM testcat.ns.$table1 t1 JOIN testcat.ns.$table2 t2
+               |ON t1.id = t2.id AND t1.data = t2.data ORDER BY t1.id, t1data, t2data
+               |""".stripMargin)
+          val shuffles = collectShuffles(df.queryExecution.executedPlan)
+          assert(shuffles.isEmpty, "SPJ should be triggered")
+
+          val scans = collectScans(df.queryExecution.executedPlan)
+            .map(_.inputRDD.partitions.length)
+          if (partiallyClustered) {
+            assert(scans == Seq(8, 8))
+          } else {
+            assert(scans == Seq(4, 4))
+          }
+          checkAnswer(df, Seq(
+            Row(3, "dd", "dd"),
+            Row(3, "dd", "dd"),
+            Row(3, "dd", "dd"),
+            Row(3, "dd", "dd")
+          ))
+        }
+      }
+    }
+  }
+
   test("SPARK-44647: test join key is subset of cluster key " +
       "with push values and partially-clustered") {
     val table1 = "tab1e1"
@@ -1931,22 +2014,19 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       "(6, 50.0, cast('2023-02-01' as timestamp))")
 
     Seq(true, false).foreach { pushdownValues =>
-      Seq(true, false).foreach { partiallyClustered =>
-        withSQLConf(
-          SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true",
-          SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushdownValues.toString,
-          SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key
-            -> partiallyClustered.toString,
-          SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key -> "true") {
-          val df = createJoinTestDF(Seq("id" -> "item_id"))
-          val shuffles = collectShuffles(df.queryExecution.executedPlan)
-          assert(shuffles.size == 1, "SPJ should be triggered")
-          checkAnswer(df, Seq(Row(1, "aa", 30.0, 42.0),
-            Row(1, "aa", 30.0, 89.0),
-            Row(1, "aa", 40.0, 42.0),
-            Row(1, "aa", 40.0, 89.0),
-            Row(3, "bb", 10.0, 19.5)))
-        }
+      withSQLConf(
+        SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true",
+        SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushdownValues.toString,
+        SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> "false",
+        SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key -> "true") {
+        val df = createJoinTestDF(Seq("id" -> "item_id"))
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        assert(shuffles.size == 1, "SPJ should be triggered")
+        checkAnswer(df, Seq(Row(1, "aa", 30.0, 42.0),
+          Row(1, "aa", 30.0, 89.0),
+          Row(1, "aa", 40.0, 42.0),
+          Row(1, "aa", 40.0, 89.0),
+          Row(3, "bb", 10.0, 19.5)))
       }
     }
   }
@@ -1992,4 +2072,109 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       }
     }
   }
+
+  test("SPARK-48012: one-side shuffle with partition transforms") {
+    val items_partitions = Array(bucket(2, "id"), identity("arrive_time"))
+    val items_partitions2 = Array(identity("arrive_time"), bucket(2, "id"))
+
+    Seq(items_partitions, items_partitions2).foreach { partition =>
+      catalog.clearTables()
+
+      createTable(items, itemsColumns, partition)
+      sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        "(1, 'bb', 30.0, cast('2020-01-01' as timestamp)), " +
+        "(1, 'cc', 30.0, cast('2020-01-02' as timestamp)), " +
+        "(3, 'dd', 10.0, cast('2020-01-01' as timestamp)), " +
+        "(4, 'ee', 15.5, cast('2020-02-01' as timestamp)), " +
+        "(5, 'ff', 32.1, cast('2020-03-01' as timestamp))")
+
+      createTable(purchases, purchasesColumns, Array.empty)
+      sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        "(1, 42.0, cast('2020-01-01' as timestamp)), " +
+        "(2, 10.7, cast('2020-01-01' as timestamp))," +
+        "(3, 19.5, cast('2020-02-01' as timestamp))," +
+        "(4, 56.5, cast('2020-02-01' as timestamp))")
+
+      withSQLConf(
+        SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true") {
+        val df = createJoinTestDF(Seq("id" -> "item_id", "arrive_time" -> "time"))
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        assert(shuffles.size == 1, "only shuffle side that does not report partitioning")
+
+        checkAnswer(df, Seq(
+          Row(1, "bb", 30.0, 42.0),
+          Row(1, "aa", 40.0, 42.0),
+          Row(4, "ee", 15.5, 56.5)))
+      }
+    }
+  }
+
+  test("SPARK-48012: one-side shuffle with partition transforms and pushdown values") {
+    val items_partitions = Array(bucket(2, "id"), identity("arrive_time"))
+    createTable(items, itemsColumns, items_partitions)
+
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+      "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+      "(1, 'bb', 30.0, cast('2020-01-01' as timestamp)), " +
+      "(1, 'cc', 30.0, cast('2020-01-02' as timestamp))")
+
+    createTable(purchases, purchasesColumns, Array.empty)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+      "(1, 42.0, cast('2020-01-01' as timestamp)), " +
+      "(2, 10.7, cast('2020-01-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDown => {
+        withSQLConf(
+          SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true",
+          SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key ->
+            pushDown.toString) {
+          val df = createJoinTestDF(Seq("id" -> "item_id", "arrive_time" -> "time"))
+          val shuffles = collectShuffles(df.queryExecution.executedPlan)
+          assert(shuffles.size == 1, "only shuffle side that does not report partitioning")
+
+          checkAnswer(df, Seq(
+            Row(1, "bb", 30.0, 42.0),
+            Row(1, "aa", 40.0, 42.0)))
+        }
+      }
+    }
+  }
+
+  test("SPARK-48012: one-side shuffle with partition transforms " +
+    "with fewer join keys than partition kes") {
+    val items_partitions = Array(bucket(2, "id"), identity("name"))
+    createTable(items, itemsColumns, items_partitions)
+
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+      "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+      "(1, 'aa', 30.0, cast('2020-01-02' as timestamp)), " +
+      "(3, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+      "(4, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    createTable(purchases, purchasesColumns, Array.empty)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+      "(1, 42.0, cast('2020-01-01' as timestamp)), " +
+      "(1, 89.0, cast('2020-01-03' as timestamp)), " +
+      "(3, 19.5, cast('2020-02-01' as timestamp)), " +
+      "(5, 26.0, cast('2023-01-01' as timestamp)), " +
+      "(6, 50.0, cast('2023-02-01' as timestamp))")
+
+   withSQLConf(
+     SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false",
+     SQLConf.V2_BUCKETING_SHUFFLE_ENABLED.key -> "true",
+     SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+     SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> "false",
+     SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key -> "true") {
+     val df = createJoinTestDF(Seq("id" -> "item_id"))
+     val shuffles = collectShuffles(df.queryExecution.executedPlan)
+     assert(shuffles.size == 2, "SPJ should not be triggered for transform expression with" +
+       "less join keys than partition keys for now.")
+     checkAnswer(df, Seq(Row(1, "aa", 30.0, 42.0),
+       Row(1, "aa", 30.0, 89.0),
+       Row(1, "aa", 40.0, 42.0),
+       Row(1, "aa", 40.0, 89.0),
+       Row(3, "bb", 10.0, 19.5)))
+   }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
index ed44111c81d2a..c080a66bce257 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
@@ -943,4 +943,32 @@ class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
           Row(3, Row("y1 ", "y2"), "hr"))) // update (not matched by source)
     }
   }
+
+  test("withSchemaEvolution carries over existing when clauses") {
+    withTempView("source") {
+      Seq(1, 2, 4).toDF("pk").createOrReplaceTempView("source")
+
+      // an arbitrary merge
+      val writer1 = spark.table("source")
+        .mergeInto("dummy", $"col" === $"col")
+        .whenMatched(col("col") === 1)
+        .updateAll()
+        .whenMatched()
+        .delete()
+        .whenNotMatched(col("col") === 1)
+        .insertAll()
+        .whenNotMatchedBySource(col("col") === 1)
+        .delete()
+      val writer2 = writer1.withSchemaEvolution()
+
+      assert(writer1.matchedActions.length === 2)
+      assert(writer1.notMatchedActions.length === 1)
+      assert(writer1.notMatchedBySourceActions.length === 1)
+
+      assert(writer1.matchedActions === writer2.matchedActions)
+      assert(writer1.notMatchedActions === writer2.notMatchedActions)
+      assert(writer1.notMatchedBySourceActions === writer2.notMatchedBySourceActions)
+      assert(writer2.schemaEvolutionEnabled)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
index 0b643ca534e39..9d4e4fc016722 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.connector
 
-import org.apache.spark.{SparkException, SparkRuntimeException}
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, In, Not}
 import org.apache.spark.sql.catalyst.optimizer.BuildLeft
@@ -1317,7 +1317,7 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
 
       Seq(1, 4).toDF("pk").createOrReplaceTempView("source")
 
-      val e1 = intercept[SparkException] {
+      val e1 = intercept[SparkRuntimeException] {
         sql(
           s"""MERGE INTO $tableNameAsString t
              |USING source s
@@ -1326,9 +1326,9 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
              | UPDATE SET s = named_struct('n_i', null, 'n_l', -1L)
              |""".stripMargin)
       }
-      assert(e1.getCause.getMessage.contains("Null value appeared in non-nullable field"))
+      assert(e1.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
 
-      val e2 = intercept[SparkException] {
+      val e2 = intercept[SparkRuntimeException] {
         sql(
           s"""MERGE INTO $tableNameAsString t
              |USING source s
@@ -1337,9 +1337,9 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
              | UPDATE SET s = named_struct('n_i', null, 'n_l', -1L)
              |""".stripMargin)
       }
-      assert(e2.getCause.getMessage.contains("Null value appeared in non-nullable field"))
+      assert(e2.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
 
-      val e3 = intercept[SparkException] {
+      val e3 = intercept[SparkRuntimeException] {
         sql(
           s"""MERGE INTO $tableNameAsString t
              |USING source s
@@ -1348,7 +1348,7 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
              | INSERT (pk, s, dep) VALUES (s.pk, named_struct('n_i', null, 'n_l', -1L), 'invalid')
              |""".stripMargin)
       }
-      assert(e3.getCause.getMessage.contains("Null value appeared in non-nullable field"))
+      assert(e3.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
index b43101c2e0255..c2ae5f40cfaf6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.connector
 
-import org.apache.spark.SparkException
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue}
 import org.apache.spark.sql.connector.expressions.LiteralValue
@@ -575,9 +575,12 @@ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
         |{ "pk": 3, "s": { "n_i": 3, "n_l": 33 }, "dep": "hr" }
         |""".stripMargin)
 
-    val e = intercept[SparkException] {
-      sql(s"UPDATE $tableNameAsString SET s = named_struct('n_i', null, 'n_l', -1L) WHERE pk = 1")
-    }
-    assert(e.getCause.getMessage.contains("Null value appeared in non-nullable field"))
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        sql(s"UPDATE $tableNameAsString SET s = named_struct('n_i', null, 'n_l', -1L) WHERE pk = 1")
+      },
+      errorClass = "NOT_NULL_ASSERT_VIOLATION",
+      sqlState = "42000",
+      parameters = Map("walkedTypePath" -> "\ns\nn_i\n"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
index ee71bd3af1e02..3ab7edb78439c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -306,7 +306,7 @@ class V2CommandsCaseSensitivitySuite
           None,
           Some(UnresolvedFieldPosition(ColumnPosition.after("id"))),
           None))),
-      "FIELDS_ALREADY_EXISTS",
+      "FIELD_ALREADY_EXISTS",
       Map(
         "op" -> "add",
         "fieldNames" -> "`ID`",
@@ -317,7 +317,7 @@ class V2CommandsCaseSensitivitySuite
   test("SPARK-36381: Check column name exist case sensitive and insensitive when rename column") {
     alterTableErrorClass(
       RenameColumn(table, UnresolvedFieldName(Array("id").toImmutableArraySeq), "DATA"),
-      "FIELDS_ALREADY_EXISTS",
+      "FIELD_ALREADY_EXISTS",
       Map(
         "op" -> "rename",
         "fieldNames" -> "`DATA`",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
index 5cdb900901056..5364fc5d62423 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
@@ -16,9 +16,11 @@
  */
 package org.apache.spark.sql.connector.catalog.functions
 
-import java.sql.Timestamp
+import java.time.{Instant, LocalDate, ZoneId}
+import java.time.temporal.ChronoUnit
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -44,7 +46,13 @@ object YearsFunction extends ScalarFunction[Long] {
   override def name(): String = "years"
   override def canonicalName(): String = name()
 
-  def invoke(ts: Long): Long = new Timestamp(ts).getYear + 1900
+  val UTC: ZoneId = ZoneId.of("UTC")
+  val EPOCH_LOCAL_DATE: LocalDate = Instant.EPOCH.atZone(UTC).toLocalDate
+
+  def invoke(ts: Long): Long = {
+    val localDate = DateTimeUtils.microsToInstant(ts).atZone(UTC).toLocalDate
+    ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, localDate)
+  }
 }
 
 object DaysFunction extends BoundFunction {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 4574d3328d48a..958d2b0130d8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -926,6 +926,37 @@ class QueryCompilationErrorsSuite
       parameters = Map("message" -> "Cannot convert Spark data type \"DUMMY\" to any Parquet type.")
     )
   }
+
+  test("SPARK-48556: Ensure UNRESOLVED_COLUMN is thrown when query has grouping expressions " +
+      "with invalid column name") {
+    case class UnresolvedDummyColumnTest(query: String, pos: Int)
+
+    withTable("t1") {
+      sql("create table t1(a int, b int) using parquet")
+      val tests = Seq(
+        UnresolvedDummyColumnTest("select grouping(a), dummy from t1 group by a with rollup", 20),
+        UnresolvedDummyColumnTest("select dummy, grouping(a) from t1 group by a with rollup", 7),
+        UnresolvedDummyColumnTest(
+          "select a, case when grouping(a) = 1 then 0 else b end, count(dummy) from t1 " +
+            "group by 1 with rollup",
+          61),
+        UnresolvedDummyColumnTest(
+          "select a, max(dummy), case when grouping(a) = 1 then 0 else b end " +
+            "from t1 group by 1 with rollup",
+          14)
+      )
+      tests.foreach(test => {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(test.query)
+          },
+          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          parameters = Map("objectName" -> "`dummy`", "proposal" -> "`a`, `b`"),
+          context = ExpectedContext(fragment = "dummy", start = test.pos, stop = test.pos + 4)
+        )
+      })
+    }
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index d381dae6ea293..b7fb65091ef73 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -32,6 +32,22 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     intercept[ParseException](sql(sqlText).collect())
   }
 
+  test("PARSE_STACK_OVERFLOW_ERROR: Stack overflow hit") {
+    val query = (1 to 20000).map(x => "SELECT 1 as a").mkString(" UNION ALL ")
+    val e = intercept[ParseException] {
+      spark.sql(query)
+    }
+    checkError(
+      exception = parseException(query),
+      errorClass = "FAILED_TO_PARSE_TOO_COMPLEX",
+      parameters = Map(),
+      context = ExpectedContext(
+        query,
+        start = 0,
+        stop = query.length - 1)
+    )
+  }
+
   test("EXEC_IMMEDIATE_DUPLICATE_ARGUMENT_ALIASES: duplicate aliases provided in using statement") {
     val query = "EXECUTE IMMEDIATE 'SELECT 1707 WHERE ? = 1' USING 1 as first" +
       ", 2 as first, 3 as second, 4 as second, 5 as third"
@@ -272,7 +288,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
         stop = 27))
   }
 
-  test("INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_IF_NOT_EXISTS_AND_REPLACE: " +
+  test("INVALID_SQL_SYNTAX.CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE: " +
     "Create function with both if not exists and replace") {
     val sqlText =
       """CREATE OR REPLACE FUNCTION IF NOT EXISTS func1 as
@@ -281,7 +297,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
 
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.CREATE_FUNC_WITH_IF_NOT_EXISTS_AND_REPLACE",
+      errorClass = "INVALID_SQL_SYNTAX.CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE",
       sqlState = "42000",
       context = ExpectedContext(
         fragment = sqlText,
@@ -631,6 +647,13 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       sqlState = "42K01",
       parameters = Map("elementType" -> "<INT>"),
       context = ExpectedContext(fragment = "ARRAY", start = 30, stop = 34))
+    // Create column of array type without specifying element type in lowercase
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 array)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "array", start = 30, stop = 34))
   }
 
   test("INCOMPLETE_TYPE_DEFINITION: struct type definition is incomplete") {
@@ -658,6 +681,12 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       errorClass = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'<'", "hint" -> ": missing ')'"))
+    // Create column of struct type without specifying field type in lowercase
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 struct)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "struct", start = 30, stop = 35))
   }
 
   test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") {
@@ -679,6 +708,12 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       errorClass = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'<'", "hint" -> ": missing ')'"))
+    // Create column of map type without specifying key/value types in lowercase
+    checkError(
+      exception = parseException("SELECT CAST(map('1',2) AS map)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "map", start = 26, stop = 28))
   }
 
   test("INVALID_ESC: Escape string must contain only one character") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
index e87b90dfdd84a..dc72b4a092aef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.IO_ENCRYPTION_ENABLED
 import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.execution.adaptive._
 import org.apache.spark.sql.execution.adaptive.AQEShuffleReadExec
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
@@ -28,7 +29,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ArrayImplicits._
 
-class CoalesceShufflePartitionsSuite extends SparkFunSuite {
+class CoalesceShufflePartitionsSuite extends SparkFunSuite with SQLConfHelper {
 
   private var originalActiveSparkSession: Option[SparkSession] = _
   private var originalInstantiatedSparkSession: Option[SparkSession] = _
@@ -374,72 +375,73 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite {
 
   test("SPARK-24705 adaptive query execution works correctly when exchange reuse enabled") {
     val test: SparkSession => Unit = { spark: SparkSession =>
-      spark.sql("SET spark.sql.exchange.reuse=true")
-      val df = spark.range(0, 6, 1).selectExpr("id AS key", "id AS value")
-
-      // test case 1: a query stage has 3 child stages but they are the same stage.
-      // Final Stage 1
-      //   ShuffleQueryStage 0
-      //   ReusedQueryStage 0
-      //   ReusedQueryStage 0
-      val resultDf = df.join(df, "key").join(df, "key")
-      QueryTest.checkAnswer(resultDf, (0 to 5).map(i => Row(i, i, i, i)))
-      val finalPlan = resultDf.queryExecution.executedPlan
-        .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
-      assert(finalPlan.collect {
-        case ShuffleQueryStageExec(_, r: ReusedExchangeExec, _) => r
-      }.length == 2)
-      assert(
-        finalPlan.collect {
-          case r @ CoalescedShuffleRead() => r
-        }.length == 3)
-
-
-      // test case 2: a query stage has 2 parent stages.
-      // Final Stage 3
-      //   ShuffleQueryStage 1
-      //     ShuffleQueryStage 0
-      //   ShuffleQueryStage 2
-      //     ReusedQueryStage 0
-      val grouped = df.groupBy((col("key") + 1).as("key")).agg(max("value").as("value"))
-      val resultDf2 = grouped.groupBy(col("key") + 1).max("value")
-        .union(grouped.groupBy(col("key") + 2).max("value"))
-      QueryTest.checkAnswer(resultDf2, Row(2, 0) :: Row(3, 0) :: Row(3, 1) :: Row(4, 1) ::
-        Row(4, 2) :: Row(5, 2) :: Row(5, 3) :: Row(6, 3) :: Row(6, 4) :: Row(7, 4) :: Row(7, 5) ::
-        Row(8, 5) :: Nil)
-
-      val finalPlan2 = resultDf2.queryExecution.executedPlan
-        .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
-
-      // The result stage has 2 children
-      val level1Stages = finalPlan2.collect { case q: QueryStageExec => q }
-      assert(level1Stages.length == 2)
-
-      assert(
-        finalPlan2.collect {
-          case r @ CoalescedShuffleRead() => r
-        }.length == 2, "finalPlan2")
+      withSQLConf("spark.sql.exchange.reuse" -> "true") {
+        val df = spark.range(0, 6, 1).selectExpr("id AS key", "id AS value")
+
+        // test case 1: a query stage has 3 child stages but they are the same stage.
+        // Final Stage 1
+        //   ShuffleQueryStage 0
+        //   ReusedQueryStage 0
+        //   ReusedQueryStage 0
+        val resultDf = df.join(df, "key").join(df, "key")
+        QueryTest.checkAnswer(resultDf, (0 to 5).map(i => Row(i, i, i, i)))
+        val finalPlan = resultDf.queryExecution.executedPlan
+          .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
+        assert(finalPlan.collect {
+          case ShuffleQueryStageExec(_, r: ReusedExchangeExec, _) => r
+        }.length == 2)
+        assert(
+          finalPlan.collect {
+            case r@CoalescedShuffleRead() => r
+          }.length == 3)
+
+
+        // test case 2: a query stage has 2 parent stages.
+        // Final Stage 3
+        //   ShuffleQueryStage 1
+        //     ShuffleQueryStage 0
+        //   ShuffleQueryStage 2
+        //     ReusedQueryStage 0
+        val grouped = df.groupBy((col("key") + 1).as("key")).agg(max("value").as("value"))
+        val resultDf2 = grouped.groupBy(col("key") + 1).max("value")
+          .union(grouped.groupBy(col("key") + 2).max("value"))
+        QueryTest.checkAnswer(resultDf2, Row(2, 0) :: Row(3, 0) :: Row(3, 1) :: Row(4, 1) ::
+          Row(4, 2) :: Row(5, 2) :: Row(5, 3) :: Row(6, 3) :: Row(6, 4) :: Row(7, 4) :: Row(7, 5) ::
+          Row(8, 5) :: Nil)
+
+        val finalPlan2 = resultDf2.queryExecution.executedPlan
+          .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
 
-      level1Stages.foreach(qs =>
-        assert(qs.plan.collect {
-          case r @ CoalescedShuffleRead() => r
-        }.length == 1,
-          "Wrong CoalescedShuffleRead below " + qs.simpleString(3)))
-
-      val leafStages = level1Stages.flatMap { stage =>
-        // All of the child stages of result stage have only one child stage.
-        val children = stage.plan.collect { case q: QueryStageExec => q }
-        assert(children.length == 1)
-        children
-      }
-      assert(leafStages.length == 2)
+        // The result stage has 2 children
+        val level1Stages = finalPlan2.collect { case q: QueryStageExec => q }
+        assert(level1Stages.length == 2)
+
+        assert(
+          finalPlan2.collect {
+            case r@CoalescedShuffleRead() => r
+          }.length == 2, "finalPlan2")
+
+        level1Stages.foreach(qs =>
+          assert(qs.plan.collect {
+            case r@CoalescedShuffleRead() => r
+          }.length == 1,
+            "Wrong CoalescedShuffleRead below " + qs.simpleString(3)))
+
+        val leafStages = level1Stages.flatMap { stage =>
+          // All of the child stages of result stage have only one child stage.
+          val children = stage.plan.collect { case q: QueryStageExec => q }
+          assert(children.length == 1)
+          children
+        }
+        assert(leafStages.length == 2)
 
-      val reusedStages = level1Stages.flatMap { stage =>
-        stage.plan.collect {
-          case ShuffleQueryStageExec(_, r: ReusedExchangeExec, _) => r
+        val reusedStages = level1Stages.flatMap { stage =>
+          stage.plan.collect {
+            case ShuffleQueryStageExec(_, r: ReusedExchangeExec, _) => r
+          }
         }
+        assert(reusedStages.length == 1)
       }
-      assert(reusedStages.length == 1)
     }
     withSparkSession(test, 400, None)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala
index 41ddcef89b7d4..6b0f0b5582dc5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala
@@ -16,7 +16,8 @@
  */
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.{QueryTest}
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.test.SharedSparkSession
 
 class ExecuteImmediateEndToEndSuite extends QueryTest with SharedSparkSession {
@@ -36,4 +37,30 @@ class ExecuteImmediateEndToEndSuite extends QueryTest with SharedSparkSession {
       spark.sql("DROP TEMPORARY VARIABLE IF EXISTS parm;")
     }
   }
+
+  test("EXEC IMMEDIATE STACK OVERFLOW") {
+    try {
+      spark.sql("DECLARE parm = 1;")
+      val query = (1 to 20000).map(x => "SELECT 1 as a").mkString(" UNION ALL ")
+      Seq(
+        s"EXECUTE IMMEDIATE '$query'",
+        s"EXECUTE IMMEDIATE '$query' INTO parm").foreach { q =>
+        val e = intercept[ParseException] {
+          spark.sql(q)
+        }
+
+        checkError(
+          exception = intercept[ParseException](sql(query).collect()),
+          errorClass = "FAILED_TO_PARSE_TOO_COMPLEX",
+          parameters = Map(),
+          context = ExpectedContext(
+            query,
+            start = 0,
+            stop = query.length - 1)
+        )
+      }
+    } finally {
+      spark.sql("DROP TEMPORARY VARIABLE IF EXISTS parm;")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
index 6e2200380d6cc..31d8dd0740e14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
@@ -30,7 +30,7 @@ class GlobalTempViewSuite extends QueryTest with SharedSparkSession {
 
   override protected def beforeAll(): Unit = {
     super.beforeAll()
-    globalTempDB = spark.sharedState.globalTempViewManager.database
+    globalTempDB = spark.sharedState.globalTempDB
   }
 
   private var globalTempDB: String = _
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
index f8366b3f7c5fa..936aaba51935a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
@@ -61,7 +61,8 @@ class HiveResultSuite extends SharedSparkSession {
   test("toHiveString correctly handles UDTs") {
     val point = new ExamplePoint(50.0, 50.0)
     val tpe = new ExamplePointUDT()
-    assert(toHiveString((point, tpe), false, getTimeFormatters) === "(50.0, 50.0)")
+    assert(toHiveString((point, tpe), false, getTimeFormatters, getBinaryFormatter) ===
+      "(50.0, 50.0)")
   }
 
   test("decimal formatting in hive result") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 15de4c5cc5b2d..1400ee25f4319 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -744,6 +744,14 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("SPARK-24500: create union with stream of children") {
+    @scala.annotation.nowarn("cat=deprecation")
+    val df = Union(Stream(
+      Range(1, 1, 1, 1),
+      Range(1, 2, 1, 1)))
+    df.queryExecution.executedPlan.execute()
+  }
+
+  test("SPARK-45685: create union with LazyList of children") {
     val df = Union(LazyList(
       Range(1, 1, 1, 1),
       Range(1, 2, 1, 1)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 73e5165829327..3608e7c920767 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.execution.datasources.v2.ShowTablesExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.storage.ShuffleIndexBlockId
 import org.apache.spark.util.Utils
 
 case class QueryExecutionTestRecord(
@@ -314,6 +315,48 @@ class QueryExecutionSuite extends SharedSparkSession {
     mockCallback.assertExecutedPlanPrepared()
   }
 
+  private def cleanupShuffles(): Unit = {
+    val blockManager = spark.sparkContext.env.blockManager
+    blockManager.diskBlockManager.getAllBlocks().foreach {
+      case ShuffleIndexBlockId(shuffleId, _, _) =>
+        spark.sparkContext.env.shuffleManager.unregisterShuffle(shuffleId)
+      case _ =>
+    }
+  }
+
+  test("SPARK-47764: Cleanup shuffle dependencies - DoNotCleanup mode") {
+    val plan = spark.range(100).repartition(10).logicalPlan
+    val df = Dataset.ofRows(spark, plan, DoNotCleanup)
+    df.collect()
+
+    val blockManager = spark.sparkContext.env.blockManager
+    assert(blockManager.migratableResolver.getStoredShuffles().nonEmpty)
+    assert(blockManager.diskBlockManager.getAllBlocks().nonEmpty)
+    cleanupShuffles()
+  }
+
+  test("SPARK-47764: Cleanup shuffle dependencies - SkipMigration mode") {
+    val plan = spark.range(100).repartition(10).logicalPlan
+    val df = Dataset.ofRows(spark, plan, SkipMigration)
+    df.collect()
+
+    val blockManager = spark.sparkContext.env.blockManager
+    assert(blockManager.migratableResolver.getStoredShuffles().isEmpty)
+    assert(blockManager.diskBlockManager.getAllBlocks().nonEmpty)
+    cleanupShuffles()
+  }
+
+  test("SPARK-47764: Cleanup shuffle dependencies - RemoveShuffleFiles mode") {
+    val plan = spark.range(100).repartition(10).logicalPlan
+    val df = Dataset.ofRows(spark, plan, RemoveShuffleFiles)
+    df.collect()
+
+    val blockManager = spark.sparkContext.env.blockManager
+    assert(blockManager.migratableResolver.getStoredShuffles().isEmpty)
+    assert(blockManager.diskBlockManager.getAllBlocks().isEmpty)
+    cleanupShuffles()
+  }
+
   test("SPARK-35378: Return UnsafeRow in CommandResultExecCheck execute methods") {
     val plan = spark.sql("SHOW FUNCTIONS").queryExecution.executedPlan
     assert(plan.isInstanceOf[CommandResultExec])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
index 48860f381efa8..b8a109919f8f6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
@@ -29,12 +29,13 @@ import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart}
 import org.apache.spark.sql.{Row, SparkSession}
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ThreadUtils
 import org.apache.spark.util.Utils.REDACTION_REPLACEMENT_TEXT
 
-class SQLExecutionSuite extends SparkFunSuite {
+class SQLExecutionSuite extends SparkFunSuite with SQLConfHelper {
 
   test("concurrent query execution (SPARK-10548)") {
     val conf = new SparkConf()
@@ -194,9 +195,9 @@ class SQLExecutionSuite extends SparkFunSuite {
           start.physicalPlanDescription.toLowerCase(Locale.ROOT).contains("project")
       })
       spark.sql("SELECT 1").collect()
-      spark.sql("SET k2 = v2")
-      spark.sql("SET redaction.password = 123")
-      spark.sql("SELECT 1").collect()
+      withSQLConf("k2" -> "v2", "redaction.password" -> "123") {
+        spark.sql("SELECT 1").collect()
+      }
       spark.sparkContext.listenerBus.waitUntilEmpty()
       assert(index.get() == 2)
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 1888844b9b986..f54a4f4606061 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -91,7 +91,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
               "objName" -> s"`$SESSION_CATALOG_NAME`.`default`.`jtv1`",
               "tempObj" -> "VIEW",
               "tempObjName" -> "`temp_jtv1`"))
-          val globalTempDB = spark.sharedState.globalTempViewManager.database
+          val globalTempDB = spark.sharedState.globalTempDB
           sql("CREATE GLOBAL TEMP VIEW global_temp_jtv1 AS SELECT * FROM jt WHERE id > 0")
           checkError(
             exception = intercept[AnalysisException] {
@@ -899,46 +899,48 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
 
   test("resolve a view when the dataTypes of referenced table columns changed") {
     withTable("tab1") {
-      spark.range(1, 10).selectExpr("id", "id + 1 id1").write.saveAsTable("tab1")
-      withView("testView") {
-        sql("CREATE VIEW testView AS SELECT * FROM tab1")
-
-        // Allow casting from IntegerType to LongType
-        val df = (1 until 10).map(i => (i, i + 1)).toDF("id", "id1")
-        df.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
-        checkAnswer(sql("SELECT * FROM testView ORDER BY id1"), (1 to 9).map(i => Row(i, i + 1)))
-
-        // Casting from DoubleType to LongType might truncate, throw an AnalysisException.
-        val df2 = (1 until 10).map(i => (i.toDouble, i.toDouble)).toDF("id", "id1")
-        df2.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
-        checkError(
-          exception = intercept[AnalysisException](sql("SELECT * FROM testView")),
-          errorClass = "CANNOT_UP_CAST_DATATYPE",
-          parameters = Map(
-            "expression" -> s"$SESSION_CATALOG_NAME.default.tab1.id",
-            "sourceType" -> "\"DOUBLE\"",
-            "targetType" -> "\"BIGINT\"",
-            "details" -> ("The type path of the target object is:\n\n" +
-              "You can either add an explicit cast to the input data or " +
-              "choose a higher precision type of the field in the target object")
+      withSQLConf("spark.sql.legacy.viewSchemaCompensation" -> "false") {
+        spark.range(1, 10).selectExpr("id", "id + 1 id1").write.saveAsTable("tab1")
+        withView("testView") {
+          sql("CREATE VIEW testView AS SELECT * FROM tab1")
+
+          // Allow casting from IntegerType to LongType
+          val df = (1 until 10).map(i => (i, i + 1)).toDF("id", "id1")
+          df.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
+          checkAnswer(sql("SELECT * FROM testView ORDER BY id1"), (1 to 9).map(i => Row(i, i + 1)))
+
+          // Casting from DoubleType to LongType might truncate, throw an AnalysisException.
+          val df2 = (1 until 10).map(i => (i.toDouble, i.toDouble)).toDF("id", "id1")
+          df2.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
+          checkError(
+            exception = intercept[AnalysisException](sql("SELECT * FROM testView")),
+            errorClass = "CANNOT_UP_CAST_DATATYPE",
+            parameters = Map(
+              "expression" -> s"$SESSION_CATALOG_NAME.default.tab1.id",
+              "sourceType" -> "\"DOUBLE\"",
+              "targetType" -> "\"BIGINT\"",
+              "details" -> ("The type path of the target object is:\n\n" +
+                "You can either add an explicit cast to the input data or " +
+                "choose a higher precision type of the field in the target object")
+            )
           )
-        )
 
-        // Can't cast from ArrayType to LongType, throw an AnalysisException.
-        val df3 = (1 until 10).map(i => (i, Seq(i))).toDF("id", "id1")
-        df3.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
-        checkError(
-          exception = intercept[AnalysisException](sql("SELECT * FROM testView")),
-          errorClass = "CANNOT_UP_CAST_DATATYPE",
-          parameters = Map(
-            "expression" -> s"$SESSION_CATALOG_NAME.default.tab1.id1",
-            "sourceType" -> "\"ARRAY<INT>\"",
-            "targetType" -> "\"BIGINT\"",
-            "details" -> ("The type path of the target object is:\n\n" +
-              "You can either add an explicit cast to the input data or " +
-              "choose a higher precision type of the field in the target object")
+          // Can't cast from ArrayType to LongType, throw an AnalysisException.
+          val df3 = (1 until 10).map(i => (i, Seq(i))).toDF("id", "id1")
+          df3.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
+          checkError(
+            exception = intercept[AnalysisException](sql("SELECT * FROM testView")),
+            errorClass = "CANNOT_UP_CAST_DATATYPE",
+            parameters = Map(
+              "expression" -> s"$SESSION_CATALOG_NAME.default.tab1.id1",
+              "sourceType" -> "\"ARRAY<INT>\"",
+              "targetType" -> "\"BIGINT\"",
+              "details" -> ("The type path of the target object is:\n\n" +
+                "You can either add an explicit cast to the input data or " +
+                "choose a higher precision type of the field in the target object")
+            )
           )
-        )
+        }
       }
     }
   }
@@ -1100,7 +1102,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
   test("local temp view refers global temp view") {
     withGlobalTempView("v1") {
       withTempView("v2") {
-        val globalTempDB = spark.sharedState.globalTempViewManager.database
+        val globalTempDB = spark.sharedState.globalTempDB
         sql("CREATE GLOBAL TEMPORARY VIEW v1 AS SELECT 1")
         sql(s"CREATE TEMPORARY VIEW v2 AS SELECT * FROM ${globalTempDB}.v1")
         checkAnswer(sql("SELECT * FROM v2"), Seq(Row(1)))
@@ -1111,7 +1113,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
   test("global temp view refers local temp view") {
     withTempView("v1") {
       withGlobalTempView("v2") {
-        val globalTempDB = spark.sharedState.globalTempViewManager.database
+        val globalTempDB = spark.sharedState.globalTempDB
         sql("CREATE TEMPORARY VIEW v1 AS SELECT 1")
         sql(s"CREATE GLOBAL TEMPORARY VIEW v2 AS SELECT * FROM v1")
         checkAnswer(sql(s"SELECT * FROM ${globalTempDB}.v2"), Seq(Row(1)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index d2740f9eac789..e75413b804f48 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -536,7 +536,7 @@ class LocalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession {
 }
 
 class GlobalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession {
-  private def db: String = spark.sharedState.globalTempViewManager.database
+  private def db: String = spark.sharedState.globalTempDB
   override protected def viewTypeString: String = "GLOBAL TEMPORARY VIEW"
   override protected def formattedViewName(viewName: String): String = {
     s"$db.$viewName"
@@ -736,7 +736,8 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
     Seq(true, false).foreach { serde =>
       withView(viewName) {
         createView(viewName, "SELECT 1 AS a")
-        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( a) AS SELECT 1 AS a"
+        val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( a) " +
+          "WITH SCHEMA COMPENSATION AS SELECT 1 AS a"
         assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
       }
     }
@@ -748,7 +749,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
       withView(viewName) {
         createView(viewName, "SELECT 1 AS a, 2 AS b", Seq("a", "b COMMENT 'b column'"))
         val expected = s"CREATE VIEW ${formattedViewName(viewName)}" +
-          s" ( a, b COMMENT 'b column') AS SELECT 1 AS a, 2 AS b"
+          s" ( a, b COMMENT 'b column') WITH SCHEMA COMPENSATION AS SELECT 1 AS a, 2 AS b"
         assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
       }
     }
@@ -764,7 +765,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         val expected = s"CREATE VIEW ${formattedViewName(viewName)} ( c1 COMMENT 'bla', c2)" +
           " COMMENT 'table comment'" +
           " TBLPROPERTIES ( 'prop1' = 'value1', 'prop2' = 'value2')" +
-          " AS SELECT 1 AS c1, '2' AS c2"
+          " WITH SCHEMA COMPENSATION AS SELECT 1 AS c1, '2' AS c2"
         assert(getShowCreateDDL(formattedViewName(viewName), serde) == expected)
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index d949342106159..928d732f2a160 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -130,7 +130,8 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
     assert(sorter.numSpills > 0)
 
     // Merging spilled files should not throw assertion error
-    sorter.writePartitionedMapOutput(0, 0, mapOutputWriter)
+    sorter.writePartitionedMapOutput(0, 0, mapOutputWriter,
+      taskContext.taskMetrics.shuffleWriteMetrics)
   }
 
   test("SPARK-10403: unsafe row serializer with SortShuffleManager") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 3aaf61ffba465..4d2d465828924 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -785,6 +785,16 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
   }
 
   test("SPARK-26680: Stream in groupBy does not cause StackOverflowError") {
+    @scala.annotation.nowarn("cat=deprecation")
+    val groupByCols = Stream(col("key"))
+    val df = Seq((1, 2), (2, 3), (1, 3)).toDF("key", "value")
+      .groupBy(groupByCols: _*)
+      .max("value")
+
+    checkAnswer(df, Seq(Row(1, 3), Row(2, 3)))
+  }
+
+  test("SPARK-45685: LazyList in groupBy does not cause StackOverflowError") {
     val groupByCols = LazyList(col("key"))
     val df = Seq((1, 2), (2, 3), (1, 3)).toDF("key", "value")
       .groupBy(groupByCols: _*)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 39f6aa8505b32..93df399731d42 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -25,11 +25,15 @@ import org.scalatest.PrivateMethodTester
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkException
+import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart}
-import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy}
+import org.apache.spark.shuffle.sort.SortShuffleManager
+import org.apache.spark.sql.{DataFrame, Dataset, QueryTest, Row, SparkSession, Strategy}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
-import org.apache.spark.sql.execution.{CollectLimitExec, ColumnarToRowExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnionExec}
+import org.apache.spark.sql.execution.{CollectLimitExec, ColumnarToRowExec, EmptyRelationExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnaryExecNode, UnionExec}
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec, InMemoryTableScanLike}
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
@@ -61,7 +65,8 @@ class AdaptiveQueryExecSuite
 
   setupTestData()
 
-  private def runAdaptiveAndVerifyResult(query: String): (SparkPlan, SparkPlan) = {
+  private def runAdaptiveAndVerifyResult(query: String,
+      skipCheckAnswer: Boolean = false): (SparkPlan, SparkPlan) = {
     var finalPlanCnt = 0
     var hasMetricsEvent = false
     val listener = new SparkListener {
@@ -85,8 +90,10 @@ class AdaptiveQueryExecSuite
     assert(planBefore.toString.startsWith("AdaptiveSparkPlan isFinalPlan=false"))
     val result = dfAdaptive.collect()
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
-      val df = sql(query)
-      checkAnswer(df, result.toImmutableArraySeq)
+      if (!skipCheckAnswer) {
+        val df = sql(query)
+        checkAnswer(df, result.toImmutableArraySeq)
+      }
     }
     val planAfter = dfAdaptive.queryExecution.executedPlan
     assert(planAfter.toString.startsWith("AdaptiveSparkPlan isFinalPlan=true"))
@@ -158,6 +165,12 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  private def findTopLevelUnion(plan: SparkPlan): Seq[UnionExec] = {
+    collect(plan) {
+      case l: UnionExec => l
+    }
+  }
+
   private def findReusedExchange(plan: SparkPlan): Seq[ReusedExchangeExec] = {
     collectWithSubqueries(plan) {
       case ShuffleQueryStageExec(_, e: ReusedExchangeExec, _) => e
@@ -897,6 +910,92 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("SPARK-47148: AQE should avoid to materialize ShuffleQueryStage on the cancellation") {
+    def createJoinedDF(): DataFrame = {
+      val df = spark.range(5).toDF("col")
+      val df2 = spark.range(10).toDF("col").coalesce(2)
+      val df3 = spark.range(15).toDF("col").filter(Symbol("col") >= 2)
+      df.join(df2, Seq("col")).join(df3, Seq("col"))
+    }
+
+    try {
+      spark.experimental.extraStrategies = TestProblematicCoalesceStrategy :: Nil
+      withSQLConf(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+        val joinedDF = createJoinedDF()
+
+        val error = intercept[SparkException] {
+          joinedDF.collect()
+        }
+        assert(error.getMessage() contains "ProblematicCoalesce execution is failed")
+
+        val adaptivePlan = joinedDF.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec]
+
+        // All QueryStages should be based on ShuffleQueryStageExec
+        val shuffleQueryStageExecs = collect(adaptivePlan) {
+          case sqse: ShuffleQueryStageExec => sqse
+        }
+        assert(shuffleQueryStageExecs.length == 3, s"Physical Plan should include " +
+          s"3 ShuffleQueryStages. Physical Plan: $adaptivePlan")
+        shuffleQueryStageExecs.foreach(sqse => assert(sqse.name.contains("ShuffleQueryStageExec-")))
+        // First ShuffleQueryStage is materialized so it needs to be canceled.
+        assert(shuffleQueryStageExecs(0).shuffle.isMaterializationStarted(),
+          "Materialization should be started.")
+        // Second ShuffleQueryStage materialization is failed so
+        // it is excluded from the cancellation due to earlyFailedStage.
+        assert(shuffleQueryStageExecs(1).shuffle.isMaterializationStarted(),
+          "Materialization should be started but it is failed.")
+        // Last ShuffleQueryStage is not materialized yet so it does not require
+        // to be canceled and it is just skipped from the cancellation.
+        assert(!shuffleQueryStageExecs(2).shuffle.isMaterializationStarted(),
+          "Materialization should not be started.")
+      }
+    } finally {
+      spark.experimental.extraStrategies = Nil
+    }
+  }
+
+  test("SPARK-47148: Check if BroadcastQueryStage materialization is started") {
+    def createJoinedDF(): DataFrame = {
+      spark.range(10).toDF("col1").createTempView("t1")
+      spark.range(5).coalesce(2).toDF("col2").createTempView("t2")
+      spark.range(15).toDF("col3").filter(Symbol("col3") >= 2).createTempView("t3")
+      sql("SELECT /*+ BROADCAST(t3) */ * FROM (SELECT /*+ BROADCAST(t2) */ * FROM t1 " +
+        "INNER JOIN t2 ON t1.col1 = t2.col2) t JOIN t3 ON t.col1 = t3.col3;")
+    }
+    withTempView("t1", "t2", "t3") {
+      try {
+        spark.experimental.extraStrategies = TestProblematicCoalesceStrategy :: Nil
+        withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+          val joinedDF = createJoinedDF()
+
+          val error = intercept[SparkException] {
+            joinedDF.collect()
+          }
+          assert(error.getMessage() contains "ProblematicCoalesce execution is failed")
+
+          val adaptivePlan =
+            joinedDF.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec]
+
+          // All QueryStages should be based on BroadcastQueryStageExec
+          val broadcastQueryStageExecs = collect(adaptivePlan) {
+            case bqse: BroadcastQueryStageExec => bqse
+          }
+          assert(broadcastQueryStageExecs.length == 2, adaptivePlan)
+          broadcastQueryStageExecs.foreach { bqse =>
+            assert(bqse.name.contains("BroadcastQueryStageExec-"))
+            // Both BroadcastQueryStages are materialized at the beginning.
+            assert(bqse.broadcast.isMaterializationStarted(),
+              s"${bqse.name}' s materialization should be started.")
+          }
+        }
+      } finally {
+        spark.experimental.extraStrategies = Nil
+      }
+    }
+  }
+
   test("SPARK-30403: AQE should handle InSubquery") {
     withSQLConf(
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
@@ -1551,13 +1650,13 @@ class AdaptiveQueryExecSuite
       val (plan1, adaptivePlan1) = runAdaptiveAndVerifyResult(
         "SELECT key FROM testData WHERE key = 0 ORDER BY key, value")
       assert(findTopLevelSort(plan1).size == 1)
-      assert(stripAQEPlan(adaptivePlan1).isInstanceOf[LocalTableScanExec])
+      assert(stripAQEPlan(adaptivePlan1).isInstanceOf[EmptyRelationExec])
 
       val (plan2, adaptivePlan2) = runAdaptiveAndVerifyResult(
        "SELECT key FROM (SELECT * FROM testData WHERE value = 'no_match' ORDER BY key)" +
          " WHERE key > rand()")
       assert(findTopLevelSort(plan2).size == 1)
-      assert(stripAQEPlan(adaptivePlan2).isInstanceOf[LocalTableScanExec])
+      assert(stripAQEPlan(adaptivePlan2).isInstanceOf[EmptyRelationExec])
     }
   }
 
@@ -1565,18 +1664,18 @@ class AdaptiveQueryExecSuite
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
       val (plan1, adaptivePlan1) = runAdaptiveAndVerifyResult(
         "SELECT key, count(*) FROM testData WHERE value = 'no_match' GROUP BY key")
-      assert(!plan1.isInstanceOf[LocalTableScanExec])
-      assert(stripAQEPlan(adaptivePlan1).isInstanceOf[LocalTableScanExec])
+      assert(!plan1.isInstanceOf[EmptyRelationExec])
+      assert(stripAQEPlan(adaptivePlan1).isInstanceOf[EmptyRelationExec])
 
       val (plan2, adaptivePlan2) = runAdaptiveAndVerifyResult(
         "SELECT key, count(*) FROM testData WHERE value = 'no_match' GROUP BY key limit 1")
-      assert(!plan2.isInstanceOf[LocalTableScanExec])
-      assert(stripAQEPlan(adaptivePlan2).isInstanceOf[LocalTableScanExec])
+      assert(!plan2.isInstanceOf[EmptyRelationExec])
+      assert(stripAQEPlan(adaptivePlan2).isInstanceOf[EmptyRelationExec])
 
       val (plan3, adaptivePlan3) = runAdaptiveAndVerifyResult(
         "SELECT count(*) FROM testData WHERE value = 'no_match'")
-      assert(!plan3.isInstanceOf[LocalTableScanExec])
-      assert(!stripAQEPlan(adaptivePlan3).isInstanceOf[LocalTableScanExec])
+      assert(!plan3.isInstanceOf[EmptyRelationExec])
+      assert(!stripAQEPlan(adaptivePlan3).isInstanceOf[EmptyRelationExec])
     }
   }
 
@@ -1597,7 +1696,7 @@ class AdaptiveQueryExecSuite
           |""".stripMargin)
       checkNumUnion(plan1, 1)
       checkNumUnion(adaptivePlan1, 0)
-      assert(!stripAQEPlan(adaptivePlan1).isInstanceOf[LocalTableScanExec])
+      assert(!stripAQEPlan(adaptivePlan1).isInstanceOf[EmptyRelationExec])
 
       val (plan2, adaptivePlan2) = runAdaptiveAndVerifyResult(
         """
@@ -1607,7 +1706,7 @@ class AdaptiveQueryExecSuite
           |""".stripMargin)
       checkNumUnion(plan2, 1)
       checkNumUnion(adaptivePlan2, 0)
-      assert(stripAQEPlan(adaptivePlan2).isInstanceOf[LocalTableScanExec])
+      assert(stripAQEPlan(adaptivePlan2).isInstanceOf[EmptyRelationExec])
     }
   }
 
@@ -1876,8 +1975,8 @@ class AdaptiveQueryExecSuite
       .map(_.getMessage.getFormattedMessage)
       .filter(_.startsWith("Materialize query stage"))
       .toArray
-    assert(materializeLogs(0).startsWith("Materialize query stage BroadcastQueryStageExec"))
-    assert(materializeLogs(1).startsWith("Materialize query stage ShuffleQueryStageExec"))
+    assert(materializeLogs(0).startsWith("Materialize query stage: BroadcastQueryStageExec-1"))
+    assert(materializeLogs(1).startsWith("Materialize query stage: ShuffleQueryStageExec-0"))
   }
 
   test("SPARK-34899: Use origin plan if we can not coalesce shuffle partition") {
@@ -2410,6 +2509,28 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("SPARK-48037: Fix SortShuffleWriter lacks shuffle write related metrics " +
+    "resulting in potentially inaccurate data") {
+    withTable("t3") {
+      withSQLConf(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+        SQLConf.SHUFFLE_PARTITIONS.key -> (SortShuffleManager
+          .MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE + 1).toString) {
+        sql("CREATE TABLE t3 USING PARQUET AS SELECT id FROM range(2)")
+        val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+          """
+            |SELECT id, count(*)
+            |FROM t3
+            |GROUP BY id
+            |LIMIT 1
+            |""".stripMargin, skipCheckAnswer = true)
+        // The shuffle stage produces two rows and the limit operator should not been optimized out.
+        assert(findTopLevelLimit(plan).size == 1)
+        assert(findTopLevelLimit(adaptivePlan).size == 1)
+      }
+    }
+  }
+
   test("SPARK-37063: OptimizeSkewInRebalancePartitions support optimize non-root node") {
     withTempView("v") {
       withSQLConf(
@@ -2680,6 +2801,35 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("SPARK-48155: AQEPropagateEmptyRelation check remained child for join") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      // Before SPARK-48155, since the AQE will call ValidateSparkPlan,
+      // all AQE optimize rule won't work and return the origin plan.
+      // After SPARK-48155, Spark avoid invalid propagate of empty relation.
+      // Then the UNION first child empty relation can be propagate correctly
+      // and the JOIN won't be propagated since will generated a invalid plan.
+      val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT /*+ BROADCAST(t3) */ t3.b, count(t3.a) FROM testData2 t1
+          |INNER JOIN (
+          |  SELECT * FROM testData2
+          |  WHERE b = 0
+          |  UNION ALL
+          |  SELECT * FROM testData2
+          |  WHErE b != 0
+          |) t2
+          |ON t1.b = t2.b AND t1.a = 0
+          |RIGHT OUTER JOIN testData2 t3
+          |ON t1.a > t3.a
+          |GROUP BY t3.b
+        """.stripMargin
+      )
+      assert(findTopLevelBroadcastNestedLoopJoin(adaptivePlan).size == 1)
+      assert(findTopLevelUnion(adaptivePlan).size == 0)
+    }
+  }
+
   test("SPARK-39915: Dataset.repartition(N) may not create N partitions") {
     withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "6") {
       // partitioning:  HashPartitioning
@@ -2703,7 +2853,7 @@ class AdaptiveQueryExecSuite
       // shuffleOrigin: REPARTITION_BY_NUM
       assert(spark.range(0).repartition(5).rdd.getNumPartitions == 5)
       // shuffleOrigin: REBALANCE_PARTITIONS_BY_NONE
-      assert(spark.range(0).repartition().rdd.getNumPartitions == 0)
+      assert(spark.range(0).repartition().rdd.getNumPartitions == 1)
       // through project
       assert(spark.range(0).selectExpr("id % 3 as c1", "id % 7 as c2")
         .repartition(5).select($"c2").rdd.getNumPartitions == 5)
@@ -2900,3 +3050,26 @@ private case class SimpleShuffleSortCostEvaluator() extends CostEvaluator {
     SimpleCost(cost)
   }
 }
+
+/**
+ * Helps to simulate ExchangeQueryStageExec materialization failure.
+ */
+private object TestProblematicCoalesceStrategy extends Strategy {
+  private case class TestProblematicCoalesceExec(numPartitions: Int, child: SparkPlan)
+    extends UnaryExecNode {
+    override protected def doExecute(): RDD[InternalRow] =
+      throw new SparkException("ProblematicCoalesce execution is failed")
+    override def output: Seq[Attribute] = child.output
+    override protected def withNewChildInternal(newChild: SparkPlan): TestProblematicCoalesceExec =
+      copy(child = newChild)
+  }
+
+  override def apply(plan: LogicalPlan): Seq[SparkPlan] = {
+    plan match {
+      case org.apache.spark.sql.catalyst.plans.logical.Repartition(
+      numPartitions, false, child) =>
+        TestProblematicCoalesceExec(numPartitions, planLater(child)) :: Nil
+      case _ => Nil
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRuleContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRuleContextSuite.scala
new file mode 100644
index 0000000000000..04c9e6c946b45
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRuleContextSuite.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{SparkSession, SparkSessionExtensionsProvider}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.{ColumnarRule, RangeExec, SparkPlan, SparkStrategy}
+import org.apache.spark.sql.execution.aggregate.HashAggregateExec
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+
+class AdaptiveRuleContextSuite extends SparkFunSuite with AdaptiveSparkPlanHelper {
+
+  private def stop(spark: SparkSession): Unit = {
+    spark.stop()
+    SparkSession.clearActiveSession()
+    SparkSession.clearDefaultSession()
+  }
+
+  private def withSession(
+      builders: Seq[SparkSessionExtensionsProvider])(f: SparkSession => Unit): Unit = {
+    val builder = SparkSession.builder().master("local[1]")
+    builders.foreach(builder.withExtensions)
+    val spark = builder.getOrCreate()
+    try f(spark) finally {
+      stop(spark)
+    }
+  }
+
+  test("test adaptive rule context") {
+    withSession(
+      Seq(_.injectRuntimeOptimizerRule(_ => MyRuleContextForRuntimeOptimization),
+        _.injectPlannerStrategy(_ => MyRuleContextForPlannerStrategy),
+        _.injectQueryPostPlannerStrategyRule(_ => MyRuleContextForPostPlannerStrategyRule),
+        _.injectQueryStagePrepRule(_ => MyRuleContextForPreQueryStageRule),
+        _.injectQueryStageOptimizerRule(_ => MyRuleContextForQueryStageRule),
+        _.injectColumnar(_ => MyRuleContextForColumnarRule))) { spark =>
+      val df = spark.range(1, 10, 1, 3).selectExpr("id % 3 as c").groupBy("c").count()
+      df.collect()
+      assert(collectFirst(df.queryExecution.executedPlan) {
+        case s: ShuffleExchangeExec if s.numPartitions == 2 => s
+      }.isDefined)
+    }
+  }
+
+  test("test adaptive rule context with subquery") {
+    withSession(
+      Seq(_.injectQueryStagePrepRule(_ => MyRuleContextForQueryStageWithSubquery))) { spark =>
+      spark.sql("select (select count(*) from range(10)), id from range(10)").collect()
+    }
+  }
+}
+
+object MyRuleContext {
+  def checkAndGetRuleContext(): AdaptiveRuleContext = {
+    val ruleContextOpt = AdaptiveRuleContext.get()
+    assert(ruleContextOpt.isDefined)
+    ruleContextOpt.get
+  }
+
+  def checkRuleContextForQueryStage(plan: SparkPlan): SparkPlan = {
+    val ruleContext = checkAndGetRuleContext()
+    assert(!ruleContext.isSubquery)
+    val stage = plan.find(_.isInstanceOf[ShuffleQueryStageExec])
+    if (stage.isDefined && stage.get.asInstanceOf[ShuffleQueryStageExec].isMaterialized) {
+      assert(ruleContext.isFinalStage)
+      assert(!ruleContext.configs().get("spark.sql.shuffle.partitions").contains("2"))
+    } else {
+      assert(!ruleContext.isFinalStage)
+      assert(ruleContext.configs().get("spark.sql.shuffle.partitions").contains("2"))
+    }
+    plan
+  }
+}
+
+object MyRuleContextForRuntimeOptimization extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    MyRuleContext.checkAndGetRuleContext()
+    plan
+  }
+}
+
+object MyRuleContextForPlannerStrategy extends SparkStrategy {
+  override def apply(plan: LogicalPlan): Seq[SparkPlan] = {
+    plan match {
+      case _: LogicalQueryStage =>
+        val ruleContext = MyRuleContext.checkAndGetRuleContext()
+        assert(!ruleContext.configs().get("spark.sql.shuffle.partitions").contains("2"))
+        Nil
+      case _ => Nil
+    }
+  }
+}
+
+object MyRuleContextForPostPlannerStrategyRule extends Rule[SparkPlan] {
+  override def apply(plan: SparkPlan): SparkPlan = {
+    val ruleContext = MyRuleContext.checkAndGetRuleContext()
+    if (plan.find(_.isInstanceOf[RangeExec]).isDefined) {
+      ruleContext.setConfig("spark.sql.shuffle.partitions", "2")
+    }
+    plan
+  }
+}
+
+object MyRuleContextForPreQueryStageRule extends Rule[SparkPlan] {
+  override def apply(plan: SparkPlan): SparkPlan = {
+    val ruleContext = MyRuleContext.checkAndGetRuleContext()
+    assert(!ruleContext.isFinalStage)
+    plan
+  }
+}
+
+object MyRuleContextForQueryStageRule extends Rule[SparkPlan] {
+  override def apply(plan: SparkPlan): SparkPlan = {
+    MyRuleContext.checkRuleContextForQueryStage(plan)
+  }
+}
+
+object MyRuleContextForColumnarRule extends ColumnarRule {
+  override def preColumnarTransitions: Rule[SparkPlan] = {
+    plan: SparkPlan => {
+      if (plan.isInstanceOf[AdaptiveSparkPlanExec]) {
+        // skip if we are not inside AQE
+        assert(AdaptiveRuleContext.get().isEmpty)
+        plan
+      } else {
+        MyRuleContext.checkRuleContextForQueryStage(plan)
+      }
+    }
+  }
+
+  override def postColumnarTransitions: Rule[SparkPlan] = {
+    plan: SparkPlan => {
+      if (plan.isInstanceOf[AdaptiveSparkPlanExec]) {
+        // skip if we are not inside AQE
+        assert(AdaptiveRuleContext.get().isEmpty)
+        plan
+      } else {
+        MyRuleContext.checkRuleContextForQueryStage(plan)
+      }
+    }
+  }
+}
+
+object MyRuleContextForQueryStageWithSubquery extends Rule[SparkPlan] {
+  override def apply(plan: SparkPlan): SparkPlan = {
+    val ruleContext = MyRuleContext.checkAndGetRuleContext()
+    if (plan.exists(_.isInstanceOf[HashAggregateExec])) {
+      assert(ruleContext.isSubquery)
+      if (plan.exists(_.isInstanceOf[RangeExec])) {
+        assert(!ruleContext.isFinalStage)
+      } else {
+        assert(ruleContext.isFinalStage)
+      }
+    } else {
+      assert(!ruleContext.isSubquery)
+    }
+    plan
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
index 7a93c7c495e26..86e9320ae9cde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
@@ -19,12 +19,12 @@ package org.apache.spark.sql.execution.benchmark
 import scala.concurrent.duration._
 
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
-import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.catalyst.util.{CollationFactory, CollationSupport}
 import org.apache.spark.unsafe.types.UTF8String
 
 abstract class CollationBenchmarkBase extends BenchmarkBase {
   protected val collationTypes: Seq[String] =
-    Seq("UTF8_BINARY_LCASE", "UNICODE", "UTF8_BINARY", "UNICODE_CI")
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI")
 
   def generateSeqInput(n: Long): Seq[UTF8String]
 
@@ -36,18 +36,19 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       utf8Strings.size * 10,
       warmupTime = 10.seconds,
       output = output)
-    collationTypes.foreach(collationType => {
+    collationTypes.foreach { collationType => {
       val collation = CollationFactory.fetchCollation(collationType)
       benchmark.addCase(s"$collationType") { _ =>
-        sublistStrings.foreach(s1 =>
-          utf8Strings.foreach(s =>
-            (0 to 10).foreach(_ =>
-              collation.equalsFunction(s, s1).booleanValue())
-          )
-        )
+        sublistStrings.foreach { s1 =>
+          utf8Strings.foreach { s =>
+            (0 to 3).foreach { _ =>
+              collation.equalsFunction(s, s1).booleanValue()
+            }
+          }
+        }
       }
     }
-    )
+    }
     benchmark.run()
   }
 
@@ -59,19 +60,19 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       utf8Strings.size * 10,
       warmupTime = 10.seconds,
       output = output)
-    collationTypes.foreach(collationType => {
+    collationTypes.foreach { collationType => {
       val collation = CollationFactory.fetchCollation(collationType)
       benchmark.addCase(s"$collationType") { _ =>
-        sublistStrings.foreach(s1 =>
-          utf8Strings.foreach(s =>
-            (0 to 10).foreach(_ =>
+        sublistStrings.foreach { s1 =>
+          utf8Strings.foreach { s =>
+            (0 to 3).foreach { _ =>
               collation.comparator.compare(s, s1)
-            )
-          )
-        )
+            }
+          }
+        }
       }
     }
-    )
+    }
     benchmark.run()
   }
 
@@ -85,19 +86,103 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       utf8Strings.size * 10,
       warmupTime = 10.seconds,
       output = output)
-    collationTypes.foreach(collationType => {
+    collationTypes.foreach { collationType => {
       val collation = CollationFactory.fetchCollation(collationType)
       benchmark.addCase(s"$collationType") { _ =>
-        sublistStrings.foreach(_ =>
-          utf8Strings.foreach(s =>
-            (0 to 10).foreach(_ =>
+        sublistStrings.foreach { _ =>
+          utf8Strings.foreach { s =>
+            (0 to 3).foreach { _ =>
               collation.hashFunction.applyAsLong(s)
-            )
-          )
-        )
+            }
+          }
+        }
+      }
+    }
+    }
+    benchmark.run()
+  }
+
+  def benchmarkContains(
+      collationTypes: Seq[String],
+      utf8Strings: Seq[UTF8String]): Unit = {
+    val sublistStrings = utf8Strings
+
+    val benchmark = new Benchmark(
+      "collation unit benchmarks - contains",
+      utf8Strings.size * 10,
+      warmupTime = 10.seconds,
+      output = output)
+    collationTypes.foreach { collationType => {
+      val collation = CollationFactory.fetchCollation(collationType)
+      benchmark.addCase(s"$collationType") { _ =>
+        sublistStrings.foreach { s1 =>
+          utf8Strings.foreach { s =>
+            (0 to 3).foreach { _ =>
+              CollationSupport.Contains.exec(
+                s, s1, CollationFactory.collationNameToId(collation.collationName)
+              )
+            }
+          }
+        }
       }
     }
-    )
+    }
+    benchmark.run()
+  }
+
+  def benchmarkStartsWith(
+      collationTypes: Seq[String],
+      utf8Strings: Seq[UTF8String]): Unit = {
+    val sublistStrings = utf8Strings
+
+    val benchmark = new Benchmark(
+      "collation unit benchmarks - startsWith",
+      utf8Strings.size * 10,
+      warmupTime = 10.seconds,
+      output = output)
+    collationTypes.foreach { collationType => {
+      val collation = CollationFactory.fetchCollation(collationType)
+      benchmark.addCase(s"$collationType") { _ =>
+        sublistStrings.foreach { s1 =>
+          utf8Strings.foreach { s =>
+            (0 to 3).foreach { _ =>
+              CollationSupport.StartsWith.exec(
+                s, s1, CollationFactory.collationNameToId(collation.collationName)
+              )
+            }
+          }
+        }
+      }
+    }
+    }
+    benchmark.run()
+  }
+
+  def benchmarkEndsWith(
+      collationTypes: Seq[String],
+      utf8Strings: Seq[UTF8String]): Unit = {
+    val sublistStrings = utf8Strings
+
+    val benchmark = new Benchmark(
+      "collation unit benchmarks - endsWith",
+      utf8Strings.size * 10,
+      warmupTime = 10.seconds,
+      output = output)
+    collationTypes.foreach { collationType => {
+      val collation = CollationFactory.fetchCollation(collationType)
+      benchmark.addCase(s"$collationType") { _ =>
+        sublistStrings.foreach { s1 =>
+          utf8Strings.foreach { s =>
+            (0 to 3).foreach { _ =>
+              CollationSupport.EndsWith.exec(
+                s, s1, CollationFactory.collationNameToId(collation.collationName)
+              )
+            }
+          }
+        }
+      }
+    }
+    }
     benchmark.run()
   }
 }
@@ -127,9 +212,13 @@ object CollationBenchmark extends CollationBenchmarkBase {
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
-    benchmarkUTFStringEquals(collationTypes, generateSeqInput(10000L))
-    benchmarkUTFStringCompare(collationTypes, generateSeqInput(10000L))
-    benchmarkUTFStringHashFunction(collationTypes, generateSeqInput(10000L))
+    val inputs = generateSeqInput(10000L)
+    benchmarkUTFStringEquals(collationTypes, inputs)
+    benchmarkUTFStringCompare(collationTypes, inputs)
+    benchmarkUTFStringHashFunction(collationTypes, inputs)
+    benchmarkContains(collationTypes, inputs)
+    benchmarkStartsWith(collationTypes, inputs)
+    benchmarkEndsWith(collationTypes, inputs)
   }
 }
 
@@ -152,8 +241,12 @@ object CollationNonASCIIBenchmark extends CollationBenchmarkBase {
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
-    benchmarkUTFStringEquals(collationTypes, generateSeqInput(4000L))
-    benchmarkUTFStringCompare(collationTypes, generateSeqInput(4000L))
-    benchmarkUTFStringHashFunction(collationTypes, generateSeqInput(4000L))
+    val inputs = generateSeqInput(4000L)
+    benchmarkUTFStringEquals(collationTypes, inputs)
+    benchmarkUTFStringCompare(collationTypes, inputs)
+    benchmarkUTFStringHashFunction(collationTypes, inputs)
+    benchmarkContains(collationTypes, inputs)
+    benchmarkStartsWith(collationTypes, inputs)
+    benchmarkEndsWith(collationTypes, inputs)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index ff01221d0d7c7..94e88a96f37e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -127,6 +127,13 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
 
   private def withParquetVersions(f: String => Unit): Unit = Seq("V1", "V2").foreach(f)
 
+  private def getExpr(dataType: DataType = IntegerType): String = dataType match {
+    case BooleanType => "CASE WHEN value % 2 = 0 THEN true ELSE false END"
+    case ByteType => "cast(value % 128 as byte)"
+    case ShortType => "cast(value % 32768 as short)"
+    case _ => s"cast(value % ${Int.MaxValue} as ${dataType.sql})"
+  }
+
   def numericScanBenchmark(values: Int, dataType: DataType): Unit = {
     // Benchmarks running through spark sql.
     val sqlBenchmark = new Benchmark(
@@ -143,12 +150,14 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
     withTempPath { dir =>
       withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         import spark.implicits._
-        spark.range(values).map(_ => Random.nextLong()).createOrReplaceTempView("t1")
+        spark.range(values).map(_ => Random.nextLong())
+          .selectExpr(getExpr(dataType) + " as id")
+          .createOrReplaceTempView("t1")
 
-        prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1"))
+        prepareTable(dir, spark.sql(s"SELECT id FROM t1"))
 
         val query = dataType match {
-          case BooleanType => "sum(cast(id as bigint))"
+          case BooleanType => "sum(if(cast(id as boolean), 1, 0))"
           case _ => "sum(id)"
         }
 
@@ -291,7 +300,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
         spark.range(values).map(_ => Random.nextLong()).createOrReplaceTempView("t1")
 
         prepareTable(dir,
-          spark.sql(s"SELECT named_struct('f', CAST(value as ${dataType.sql})) as col FROM t1"),
+          spark.sql(s"SELECT named_struct('f', ${getExpr(dataType)}) as col FROM t1"),
           onlyParquetOrc = true)
 
         sqlBenchmark.addCase(s"SQL ORC MR") { _ =>
@@ -416,7 +425,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(
           dir,
-          spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1"))
+          spark.sql(s"SELECT ${getExpr()} c1, CAST(value as STRING) AS c2 FROM t1"))
 
         benchmark.addCase("SQL CSV") { _ =>
           spark.sql("select sum(c1), sum(length(c2)) from csvTable").noop()
@@ -512,7 +521,8 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong()).createOrReplaceTempView("t1")
 
-        prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p"))
+        prepareTable(dir,
+          spark.sql(s"SELECT value % 2 AS p, ${getExpr()} id FROM t1"), Some("p"))
 
         benchmark.addCase("Data column - CSV") { _ =>
           spark.sql("select sum(id) from csvTable").noop()
@@ -710,7 +720,7 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark {
       withTempTable("t1", "csvTable", "jsonTable", "parquetV1Table", "parquetV2Table", "orcTable") {
         import spark.implicits._
         val middle = width / 2
-        val selectExpr = (1 to width).map(i => s"value as c$i")
+        val selectExpr = (1 to width).map(i => s"${getExpr()} as c$i")
         spark.range(values).map(_ => Random.nextLong()).toDF()
           .selectExpr(selectExpr: _*).createOrReplaceTempView("t1")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index 6359f1b5f4f47..e5cb3ef8a04c7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -47,6 +47,12 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
       .noop()
   }
 
+  private def doBenchmarkAnsiOff(cardinality: Int, exprs: String*): Unit = {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      doBenchmark(cardinality, exprs: _*)
+    }
+  }
+
   private def run(cardinality: Int, name: String, exprs: String*): Unit = {
     codegenBenchmark(name, cardinality) {
       doBenchmark(cardinality, exprs: _*)
@@ -75,7 +81,7 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
             doBenchmark(N, s"$dt + interval 1 month 2 day")
           }
           benchmark.addCase("date + interval(m, d, ms)") { _ =>
-            doBenchmark(N, s"$dt + interval 1 month 2 day 5 hour")
+            doBenchmarkAnsiOff(N, s"$dt + interval 1 month 2 day 5 hour")
           }
           benchmark.addCase("date - interval(m)") { _ =>
             doBenchmark(N, s"$dt - interval 1 month")
@@ -84,7 +90,7 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
             doBenchmark(N, s"$dt - interval 1 month 2 day")
           }
           benchmark.addCase("date - interval(m, d, ms)") { _ =>
-            doBenchmark(N, s"$dt - interval 1 month 2 day 5 hour")
+            doBenchmarkAnsiOff(N, s"$dt - interval 1 month 2 day 5 hour")
           }
           benchmark.addCase("timestamp + interval(m)") { _ =>
             doBenchmark(N, s"$ts + interval 1 month")
@@ -161,7 +167,7 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
           }
           val dateExpr = "cast(timestamp_seconds(id) as date)"
           Seq("year", "yyyy", "yy", "mon", "month", "mm").foreach { level =>
-            run(N, s"trunc $level", s"trunc('$level', $dateExpr)")
+            run(N, s"trunc $level", s"trunc($dateExpr, '$level')")
           }
         }
         runBenchmark("Parsing") {
@@ -171,7 +177,7 @@ object DateTimeBenchmark extends SqlBasedBenchmark {
           run(n, "to timestamp str", timestampStrExpr)
           run(n, "to_timestamp", s"to_timestamp($timestampStrExpr, $pattern)")
           run(n, "to_unix_timestamp", s"to_unix_timestamp($timestampStrExpr, $pattern)")
-          val dateStrExpr = "concat('2019-01-', lpad(mod(id, 25), 2, '0'))"
+          val dateStrExpr = "concat('2019-01-', lpad(mod(id, 25) + 1, 2, '0'))"
           run(n, "to date str", dateStrExpr)
           run(n, "to_date", s"to_date($dateStrExpr, 'yyyy-MM-dd')")
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/EncodeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/EncodeBenchmark.scala
new file mode 100644
index 0000000000000..76ebd7f41677b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/EncodeBenchmark.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+
+/**
+ * Benchmark for encode
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/EncodeBenchmark-results.txt".
+ * }}}
+ */
+object EncodeBenchmark extends SqlBasedBenchmark {
+  import spark.implicits._
+  private val N = 10L * 1000 * 1000
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    withTempPath { path =>
+      // scalastyle:off nonascii
+      val exprs = Seq(
+        "",
+        "Spark",
+        "白日依山尽，黄河入海流。欲穷千里目，更上一层楼。",
+        "το Spark είναι το πιο δημοφιλές πλαίσιο επεξεργασίας μεγάλων δεδομένων παγκοσμίως",
+        "세계에서 가장 인기 있는 빅데이터 처리 프레임워크인 Spark",
+        "Sparkは世界で最も人気のあるビッグデータ処理フレームワークである。")
+      // scalastyle:off nonascii
+
+      spark.range(N).map { i =>
+        val idx = (i % 6).toInt
+        val str = exprs(idx)
+        (str, str * 3, str * 5, str * 9, "")
+      }.write.parquet(path.getCanonicalPath)
+
+      val benchmark = new Benchmark("encode", N, output = output)
+      def addBenchmarkCase(charset: String): Unit = {
+        benchmark.addCase(charset) { _ =>
+          spark.read.parquet(path.getCanonicalPath).selectExpr(
+            s"encode(_1, '$charset')",
+            s"encode(_2, '$charset')",
+            s"encode(_3, '$charset')",
+            s"encode(_4, '$charset')",
+            s"encode(_5, '$charset')").noop()
+        }
+      }
+      addBenchmarkCase("UTF-32")
+      addBenchmarkCase("UTF-16")
+      addBenchmarkCase("UTF-8")
+      benchmark.run()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
index faaea51c0028d..a3d4cd12a1962 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
@@ -46,14 +46,14 @@ object InExpressionBenchmark extends SqlBasedBenchmark {
   private def runByteBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
     val name = s"$numItems bytes"
     val values = (Byte.MinValue until Byte.MinValue + numItems).map(v => s"${v}Y")
-    val df = spark.range(0, numRows).select($"id".cast(ByteType))
+    val df = spark.range(0, numRows).selectExpr("mod(id, 256) as id")
     runBenchmark(name, df, values, numRows, minNumIters)
   }
 
   private def runShortBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
     val name = s"$numItems shorts"
     val values = (1 to numItems).map(v => s"${v}S")
-    val df = spark.range(0, numRows).select($"id".cast(ShortType))
+    val df = spark.range(0, numRows).selectExpr("mod(id, 32768) as id")
     runBenchmark(name, df, values, numRows, minNumIters)
   }
 
@@ -64,14 +64,14 @@ object InExpressionBenchmark extends SqlBasedBenchmark {
     require(isLookupSwitch(rangeSize, numItems))
     val name = s"$numItems shorts (non-compact)"
     val values = (Short.MinValue until maxValue by step).map(v => s"${v}S")
-    val df = spark.range(0, numRows).select($"id".cast(ShortType))
+    val df = spark.range(0, numRows).selectExpr("mod(id, 32768) as id")
     runBenchmark(name, df, values, numRows, minNumIters)
   }
 
   private def runIntBenchmark(numItems: Int, numRows: Long, minNumIters: Int): Unit = {
     val name = s"$numItems ints"
     val values = 1 to numItems
-    val df = spark.range(0, numRows).select($"id".cast(IntegerType))
+    val df = spark.range(0, numRows).selectExpr("mod(id, 2147483648) as id")
     runBenchmark(name, df, values, numRows, minNumIters)
   }
 
@@ -82,7 +82,7 @@ object InExpressionBenchmark extends SqlBasedBenchmark {
     require(isLookupSwitch(rangeSize, numItems))
     val name = s"$numItems ints (non-compact)"
     val values = Int.MinValue until maxValue.toInt by step.toInt
-    val df = spark.range(0, numRows).select($"id".cast(IntegerType))
+    val df = spark.range(0, numRows).selectExpr("mod(id, 2147483648) as id")
     runBenchmark(name, df, values, numRows, minNumIters)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MetadataStructBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MetadataStructBenchmark.scala
index 38fff24abe506..1ed3292a69fa8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MetadataStructBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MetadataStructBenchmark.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.benchmark
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.datasources.FileFormat
-import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.functions.{concat, lit}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
@@ -39,7 +39,7 @@ object MetadataStructBenchmark extends SqlBasedBenchmark {
       spark.range(0, NUM_ROWS, 1, 1).toDF("id")
         .withColumn("num1", $"id" + 10)
         .withColumn("num2", $"id" / 10)
-        .withColumn("str", lit("a sample string ") + $"id".cast("string"))
+        .withColumn("str", concat(lit("a sample string "), $"id".cast("string")))
         .write.format(format).save(dir.getAbsolutePath)
       val df = spark.read.format(format).load(dir.getAbsolutePath)
       f(df)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index c76ad0434d4f6..d70e25bb026e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -58,6 +58,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
       .set("spark.sql.crossJoin.enabled", "true")
       .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
       .set("spark.kryo.registrationRequired", "true")
+      .set("spark.sql.ansi.enabled", "false")
 
     SparkSession.builder().config(conf).getOrCreate()
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
index f39057013e64b..bdb118b91fa28 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnStatsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.columnar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.types.PhysicalDataType
+import org.apache.spark.sql.types.StringType
 
 class ColumnStatsSuite extends SparkFunSuite {
   testColumnStats(classOf[BooleanColumnStats], BOOLEAN, Array(true, false, 0))
@@ -28,9 +29,9 @@ class ColumnStatsSuite extends SparkFunSuite {
   testColumnStats(classOf[LongColumnStats], LONG, Array(Long.MaxValue, Long.MinValue, 0))
   testColumnStats(classOf[FloatColumnStats], FLOAT, Array(Float.MaxValue, Float.MinValue, 0))
   testColumnStats(classOf[DoubleColumnStats], DOUBLE, Array(Double.MaxValue, Double.MinValue, 0))
-  testColumnStats(classOf[StringColumnStats], STRING, Array(null, null, 0))
   testDecimalColumnStats(Array(null, null, 0))
   testIntervalColumnStats(Array(null, null, 0))
+  testStringColumnStats(Array(null, null, 0))
 
   def testColumnStats[T <: PhysicalDataType, U <: ColumnStats](
       columnStatsClass: Class[U],
@@ -141,4 +142,60 @@ class ColumnStatsSuite extends SparkFunSuite {
       }
     }
   }
+
+  def testStringColumnStats[T <: PhysicalDataType, U <: ColumnStats](
+      initialStatistics: Array[Any]): Unit = {
+
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collation => {
+      val columnType = STRING(StringType(collation))
+
+      test(s"STRING($collation): empty") {
+        val columnStats = new StringColumnStats(StringType(collation).collationId)
+        columnStats.collectedStatistics.zip(initialStatistics).foreach {
+          case (actual, expected) => assert(actual === expected)
+        }
+      }
+
+      test(s"STRING($collation): non-empty") {
+        import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
+
+        val columnStats = new StringColumnStats(StringType(collation).collationId)
+        val rows = Seq.fill(10)(makeRandomRow(columnType)) ++ Seq.fill(10)(makeNullRow(1))
+        rows.foreach(columnStats.gatherStats(_, 0))
+
+        val values = rows.take(10).map(_.get(0,
+          ColumnarDataTypeUtils.toLogicalDataType(columnType.dataType)))
+        val ordering = PhysicalDataType.ordering(
+          ColumnarDataTypeUtils.toLogicalDataType(columnType.dataType))
+        val stats = columnStats.collectedStatistics
+
+        assertResult(values.min(ordering), "Wrong lower bound")(stats(0))
+        assertResult(values.max(ordering), "Wrong upper bound")(stats(1))
+        assertResult(10, "Wrong null count")(stats(2))
+        assertResult(20, "Wrong row count")(stats(3))
+        assertResult(stats(4), "Wrong size in bytes") {
+          rows.map { row =>
+            if (row.isNullAt(0)) 4 else columnType.actualSize(row, 0)
+          }.sum
+        }
+      }
+    })
+
+    test("STRING(UTF8_LCASE): collation-defined ordering") {
+      import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+      import org.apache.spark.unsafe.types.UTF8String
+
+      val columnStats = new StringColumnStats(StringType("UTF8_LCASE").collationId)
+      val rows = Seq("b", "a", "C", "A").map(str => {
+        val row = new GenericInternalRow(1)
+        row(0) = UTF8String.fromString(str)
+        row
+      })
+      rows.foreach(columnStats.gatherStats(_, 0))
+
+      val stats = columnStats.collectedStatistics
+      assertResult(UTF8String.fromString("a"), "Wrong lower bound")(stats(0))
+      assertResult(UTF8String.fromString("C"), "Wrong upper bound")(stats(1))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
index d79ac8dc35459..a95bda9bf71df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
 import org.apache.spark.sql.catalyst.types.{PhysicalArrayType, PhysicalDataType, PhysicalMapType, PhysicalStructType}
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -40,7 +41,9 @@ class ColumnTypeSuite extends SparkFunSuite {
     val checks = Map(
       NULL -> 0, BOOLEAN -> 1, BYTE -> 1, SHORT -> 2, INT -> 4, LONG -> 8,
       FLOAT -> 4, DOUBLE -> 8, COMPACT_DECIMAL(15, 10) -> 8, LARGE_DECIMAL(20, 10) -> 12,
-      STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68,
+      STRING(StringType) -> 8, STRING(StringType("UTF8_LCASE")) -> 8,
+      STRING(StringType("UNICODE")) -> 8, STRING(StringType("UNICODE_CI")) -> 8,
+      BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68,
       CALENDAR_INTERVAL -> 16)
 
     checks.foreach { case (columnType, expectedSize) =>
@@ -73,7 +76,12 @@ class ColumnTypeSuite extends SparkFunSuite {
     checkActualSize(LONG, Long.MaxValue, 8)
     checkActualSize(FLOAT, Float.MaxValue, 4)
     checkActualSize(DOUBLE, Double.MaxValue, 8)
-    checkActualSize(STRING, "hello", 4 + "hello".getBytes(StandardCharsets.UTF_8).length)
+    Seq(
+      "UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI"
+    ).foreach(collation => {
+      checkActualSize(STRING(StringType(collation)),
+        "hello", 4 + "hello".getBytes(StandardCharsets.UTF_8).length)
+    })
     checkActualSize(BINARY, Array.fill[Byte](4)(0.toByte), 4 + 4)
     checkActualSize(COMPACT_DECIMAL(15, 10), Decimal(0, 15, 10), 8)
     checkActualSize(LARGE_DECIMAL(20, 10), Decimal(0, 20, 10), 5)
@@ -93,7 +101,10 @@ class ColumnTypeSuite extends SparkFunSuite {
   testNativeColumnType(FLOAT)
   testNativeColumnType(DOUBLE)
   testNativeColumnType(COMPACT_DECIMAL(15, 10))
-  testNativeColumnType(STRING)
+  testNativeColumnType(STRING(StringType)) // UTF8_BINARY
+  testNativeColumnType(STRING(StringType("UTF8_LCASE")))
+  testNativeColumnType(STRING(StringType("UNICODE")))
+  testNativeColumnType(STRING(StringType("UNICODE_CI")))
 
   testColumnType(NULL)
   testColumnType(BINARY)
@@ -104,11 +115,18 @@ class ColumnTypeSuite extends SparkFunSuite {
   testColumnType(CALENDAR_INTERVAL)
 
   def testNativeColumnType[T <: PhysicalDataType](columnType: NativeColumnType[T]): Unit = {
-    testColumnType[T#InternalType](columnType)
+    val typeName = columnType match {
+      case s: STRING =>
+        val collation = CollationFactory.fetchCollation(s.collationId).collationName
+        Some(if (collation == "UTF8_BINARY") "STRING" else s"STRING($collation)")
+      case _ => None
+    }
+    testColumnType[T#InternalType](columnType, typeName)
   }
 
-  def testColumnType[JvmType](columnType: ColumnType[JvmType]): Unit = {
-
+  def testColumnType[JvmType](
+      columnType: ColumnType[JvmType],
+      typeName: Option[String] = None): Unit = {
     val proj = UnsafeProjection.create(
       Array[DataType](ColumnarDataTypeUtils.toLogicalDataType(columnType.dataType)))
     val converter = CatalystTypeConverters.createToScalaConverter(
@@ -116,8 +134,9 @@ class ColumnTypeSuite extends SparkFunSuite {
     val seq = (0 until 4).map(_ => proj(makeRandomRow(columnType)).copy())
     val totalSize = seq.map(_.getSizeInBytes).sum
     val bufferSize = Math.max(DEFAULT_BUFFER_SIZE, totalSize)
+    val testName = typeName.getOrElse(columnType.toString)
 
-    test(s"$columnType append/extract") {
+    test(s"$testName append/extract") {
       val buffer = ByteBuffer.allocate(bufferSize).order(ByteOrder.nativeOrder())
       seq.foreach(r => columnType.append(columnType.getField(r, 0), buffer))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala
index e7b509c087b79..d08c34056f565 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnarTestUtils.scala
@@ -50,7 +50,7 @@ object ColumnarTestUtils {
       case LONG => Random.nextLong()
       case FLOAT => Random.nextFloat()
       case DOUBLE => Random.nextDouble()
-      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
+      case _: STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
       case BINARY => randomBytes(Random.nextInt(32))
       case CALENDAR_INTERVAL =>
         new CalendarInterval(Random.nextInt(), Random.nextInt(), Random.nextLong())
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala
index 169d9356c00cc..ee622793ee0a3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnAccessorSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
 import org.apache.spark.sql.catalyst.types.{PhysicalArrayType, PhysicalMapType, PhysicalStructType}
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.types._
 
 class TestNullableColumnAccessor[JvmType](
@@ -41,21 +42,33 @@ object TestNullableColumnAccessor {
 class NullableColumnAccessorSuite extends SparkFunSuite {
   import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
 
-  Seq(
+  val stringTypes = Seq(
+    STRING(StringType), // UTF8_BINARY
+    STRING(StringType("UTF8_LCASE")),
+    STRING(StringType("UNICODE")),
+    STRING(StringType("UNICODE_CI")))
+  val otherTypes = Seq(
     NULL, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE,
-    STRING, BINARY, COMPACT_DECIMAL(15, 10), LARGE_DECIMAL(20, 10),
+    BINARY, COMPACT_DECIMAL(15, 10), LARGE_DECIMAL(20, 10),
     STRUCT(PhysicalStructType(Array(StructField("a", StringType)))),
     ARRAY(PhysicalArrayType(IntegerType, true)),
     MAP(PhysicalMapType(IntegerType, StringType, true)),
     CALENDAR_INTERVAL)
-    .foreach {
+
+  stringTypes.foreach(s => {
+    val collation = CollationFactory.fetchCollation(s.collationId).collationName
+    val typeName = if (collation == "UTF8_BINARY") "STRING" else s"STRING($collation)"
+    testNullableColumnAccessor(s, Some(typeName))
+  })
+  otherTypes.foreach {
     testNullableColumnAccessor(_)
   }
 
   def testNullableColumnAccessor[JvmType](
-      columnType: ColumnType[JvmType]): Unit = {
+      columnType: ColumnType[JvmType],
+      testTypeName: Option[String] = None): Unit = {
 
-    val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
+    val typeName = testTypeName.getOrElse(columnType.getClass.getSimpleName.stripSuffix("$"))
     val nullRow = makeNullRow(1)
 
     test(s"Nullable $typeName column accessor: empty column") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala
index 22f557e49ded5..609212c95e987 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/NullableColumnBuilderSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
 import org.apache.spark.sql.catalyst.types.{PhysicalArrayType, PhysicalMapType, PhysicalStructType}
+import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.types._
 
 class TestNullableColumnBuilder[JvmType](columnType: ColumnType[JvmType])
@@ -39,21 +40,33 @@ object TestNullableColumnBuilder {
 class NullableColumnBuilderSuite extends SparkFunSuite {
   import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
 
-  Seq(
+  val stringTypes = Seq(
+    STRING(StringType), // UTF8_BINARY
+    STRING(StringType("UTF8_LCASE")),
+    STRING(StringType("UNICODE")),
+    STRING(StringType("UNICODE_CI")))
+  val otherTypes = Seq(
     BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE,
-    STRING, BINARY, COMPACT_DECIMAL(15, 10), LARGE_DECIMAL(20, 10),
+    BINARY, COMPACT_DECIMAL(15, 10), LARGE_DECIMAL(20, 10),
     STRUCT(PhysicalStructType(Array(StructField("a", StringType)))),
     ARRAY(PhysicalArrayType(IntegerType, true)),
     MAP(PhysicalMapType(IntegerType, StringType, true)),
     CALENDAR_INTERVAL)
-    .foreach {
+
+  stringTypes.foreach(s => {
+    val collation = CollationFactory.fetchCollation(s.collationId).collationName
+    val typeName = if (collation == "UTF8_BINARY") "STRING" else s"STRING($collation)"
+    testNullableColumnBuilder(s, Some(typeName))
+  })
+  otherTypes.foreach {
     testNullableColumnBuilder(_)
   }
 
   def testNullableColumnBuilder[JvmType](
-      columnType: ColumnType[JvmType]): Unit = {
+      columnType: ColumnType[JvmType],
+      testTypeName: Option[String] = None): Unit = {
 
-    val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
+    val typeName = testTypeName.getOrElse(columnType.getClass.getSimpleName.stripSuffix("$"))
     val dataType = columnType.dataType
     val proj = UnsafeProjection.create(Array[DataType](
       ColumnarDataTypeUtils.toLogicalDataType(dataType)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
index 2da0adf439dae..05ae575305299 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
@@ -27,6 +27,7 @@ import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 import org.apache.spark.sql.catalyst.types.PhysicalDataType
 import org.apache.spark.sql.execution.columnar.{BOOLEAN, INT, LONG, NativeColumnType, SHORT, STRING}
+import org.apache.spark.sql.types.StringType
 import org.apache.spark.util.Utils._
 
 /**
@@ -231,8 +232,8 @@ object CompressionSchemeBenchmark extends BenchmarkBase with AllCompressionSchem
     }
     testData.rewind()
 
-    runEncodeBenchmark("STRING Encode", iters, count, STRING, testData)
-    runDecodeBenchmark("STRING Decode", iters, count, STRING, testData)
+    runEncodeBenchmark("STRING Encode", iters, count, STRING(StringType), testData)
+    runDecodeBenchmark("STRING Decode", iters, count, STRING(StringType), testData)
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
index 10d5e8a0eb9a3..2b2bc7e761368 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/DictionaryEncodingSuite.scala
@@ -25,19 +25,27 @@ import org.apache.spark.sql.catalyst.types.PhysicalDataType
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.types.StringType
 
 class DictionaryEncodingSuite extends SparkFunSuite {
   val nullValue = -1
   testDictionaryEncoding(new IntColumnStats, INT)
   testDictionaryEncoding(new LongColumnStats, LONG)
-  testDictionaryEncoding(new StringColumnStats, STRING, false)
+  Seq(
+    "UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI"
+  ).foreach(collation => {
+    val dt = StringType(collation)
+    val typeName = if (collation == "UTF8_BINARY") "STRING" else s"STRING($collation)"
+    testDictionaryEncoding(new StringColumnStats(dt), STRING(dt), false, Some(typeName))
+  })
 
   def testDictionaryEncoding[T <: PhysicalDataType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[T],
-      testDecompress: Boolean = true): Unit = {
+      testDecompress: Boolean = true,
+      testTypeName: Option[String] = None): Unit = {
 
-    val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
+    val typeName = testTypeName.getOrElse(columnType.getClass.getSimpleName.stripSuffix("$"))
 
     def buildDictionary(buffer: ByteBuffer) = {
       (0 until buffer.getInt()).map(columnType.extract(buffer) -> _.toShort).toMap
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
index 00f242a6b9c4b..9b0067fd29832 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/RunLengthEncodingSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.types.PhysicalDataType
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.types.StringType
 
 class RunLengthEncodingSuite extends SparkFunSuite {
   val nullValue = -1
@@ -31,14 +32,21 @@ class RunLengthEncodingSuite extends SparkFunSuite {
   testRunLengthEncoding(new ShortColumnStats, SHORT)
   testRunLengthEncoding(new IntColumnStats, INT)
   testRunLengthEncoding(new LongColumnStats, LONG)
-  testRunLengthEncoding(new StringColumnStats, STRING, false)
+  Seq(
+    "UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI"
+  ).foreach(collation => {
+    val dt = StringType(collation)
+    val typeName = if (collation == "UTF8_BINARY") "STRING" else s"STRING($collation)"
+    testRunLengthEncoding(new StringColumnStats(dt), STRING(dt), false, Some(typeName))
+  })
 
   def testRunLengthEncoding[T <: PhysicalDataType](
       columnStats: ColumnStats,
       columnType: NativeColumnType[T],
-      testDecompress: Boolean = true): Unit = {
+      testDecompress: Boolean = true,
+      testTypeName: Option[String] = None): Unit = {
 
-    val typeName = columnType.getClass.getSimpleName.stripSuffix("$")
+    val typeName = testTypeName.getOrElse(columnType.getClass.getSimpleName.stripSuffix("$"))
 
     def skeleton(uniqueValueCount: Int, inputRuns: Seq[(Int, Int)]): Unit = {
       // -------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
index c28c7b9db0436..7f5b3de4865c9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
@@ -83,10 +83,19 @@ trait AlterNamespaceSetPropertiesSuiteBase extends QueryTest with DDLCommandTest
       CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
         withNamespace(ns) {
           sql(s"CREATE NAMESPACE $ns")
-          val exception = intercept[ParseException] {
-            sql(s"ALTER NAMESPACE $ns SET PROPERTIES ('$key'='dummyVal')")
-          }
-          assert(exception.getMessage.contains(s"$key is a reserved namespace property"))
+          val sqlText = s"ALTER NAMESPACE $ns SET PROPERTIES ('$key'='dummyVal')"
+          checkErrorMatchPVals(
+            exception = intercept[ParseException] {
+              sql(sqlText)
+            },
+            errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
+            parameters = Map("property" -> key, "msg" -> ".*"),
+            sqlState = None,
+            context = ExpectedContext(
+              fragment = sqlText,
+              start = 0,
+              stop = 46 + ns.length + key.length)
+          )
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesParserSuite.scala
new file mode 100644
index 0000000000000..72d307c816664
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesParserSuite.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace}
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.execution.SparkSqlParser
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterNamespaceUnsetPropertiesParserSuite extends AnalysisTest with SharedSparkSession {
+
+  private lazy val parser = new SparkSqlParser()
+
+  private def parseException(sqlText: String): SparkThrowable = {
+    intercept[ParseException](sql(sqlText).collect())
+  }
+
+  private def parsePlan(sqlText: String): LogicalPlan = {
+    parser.parsePlan(sqlText)
+  }
+
+  test("unset namespace properties") {
+    Seq("DATABASE", "SCHEMA", "NAMESPACE").foreach { nsToken =>
+      Seq("PROPERTIES", "DBPROPERTIES").foreach { propToken =>
+        comparePlans(
+          parsePlan(s"ALTER $nsToken a.b.c UNSET $propToken ('a', 'b', 'c')"),
+          UnsetNamespacePropertiesCommand(
+            UnresolvedNamespace(Seq("a", "b", "c")), Seq("a", "b", "c")))
+
+        comparePlans(
+          parsePlan(s"ALTER $nsToken a.b.c UNSET $propToken ('a')"),
+          UnsetNamespacePropertiesCommand(UnresolvedNamespace(Seq("a", "b", "c")), Seq("a")))
+      }
+    }
+  }
+
+  test("property values must not be set") {
+    val sql = "ALTER NAMESPACE my_db UNSET PROPERTIES('key_without_value', 'key_with_value'='x')"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "Values should not be specified for key(s): [key_with_value]"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 80))
+  }
+
+  test("not support clause - IF EXISTS") {
+    Seq("DATABASE", "SCHEMA", "NAMESPACE").foreach { nsToken =>
+      Seq("PROPERTIES", "DBPROPERTIES").foreach { propToken =>
+        val sql = s"ALTER $nsToken a.b.c UNSET $propToken IF EXISTS ('a', 'b', 'c')"
+        checkError(
+          exception = parseException(sql),
+          errorClass = "PARSE_SYNTAX_ERROR",
+          parameters = Map("error" -> "'IF'", "hint" -> ": missing '('")
+        )
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesSuiteBase.scala
new file mode 100644
index 0000000000000..1d43cc5938487
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesSuiteBase.scala
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * This base suite contains unified tests for the `ALTER NAMESPACE ... UNSET PROPERTIES` command
+ * that check V1 and V2 table catalogs. The tests that cannot run for all supported catalogs are
+ * located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.AlterNamespaceUnsetPropertiesSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.AlterNamespaceUnsetPropertiesSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.AlterNamespaceUnsetPropertiesSuite`
+ *     - V1 Hive External catalog:
+ *        `org.apache.spark.sql.hive.execution.command.AlterNamespaceUnsetPropertiesSuite`
+ */
+trait AlterNamespaceUnsetPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "ALTER NAMESPACE ... UNSET PROPERTIES"
+
+  protected def namespace: String
+
+  protected def getProperties(namespace: String): String = {
+    val propsRow = sql(s"DESCRIBE NAMESPACE EXTENDED $namespace")
+      .toDF("key", "value")
+      .where("key like 'Properties%'")
+      .collect()
+    assert(propsRow.length == 1)
+    propsRow(0).getString(1)
+  }
+
+  test("namespace does not exist") {
+    val ns = "not_exist"
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER NAMESPACE $catalog.$ns UNSET PROPERTIES ('d')")
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> s"`$ns`"))
+  }
+
+  test("basic test") {
+    val ns = s"$catalog.$namespace"
+    withNamespace(ns) {
+      sql(s"CREATE NAMESPACE $ns")
+      assert(getProperties(ns) === "")
+      sql(s"ALTER NAMESPACE $ns SET PROPERTIES ('a'='a', 'b'='b', 'c'='c')")
+      assert(getProperties(ns) === "((a,a), (b,b), (c,c))")
+      sql(s"ALTER NAMESPACE $ns UNSET PROPERTIES ('b')")
+      assert(getProperties(ns) === "((a,a), (c,c))")
+
+      // unset non-existent properties
+      // it will be successful, ignoring non-existent properties
+      sql(s"ALTER NAMESPACE $ns UNSET PROPERTIES ('b')")
+      assert(getProperties(ns) === "((a,a), (c,c))")
+    }
+  }
+
+  test("test reserved properties") {
+    import SupportsNamespaces._
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    val ns = s"$catalog.$namespace"
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
+      CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        withNamespace(ns) {
+          sql(s"CREATE NAMESPACE $ns")
+          val sqlText = s"ALTER NAMESPACE $ns UNSET PROPERTIES ('$key')"
+          checkErrorMatchPVals(
+            exception = intercept[ParseException] {
+              sql(sqlText)
+            },
+            errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
+            parameters = Map("property" -> key, "msg" -> ".*"),
+            sqlState = None,
+            context = ExpectedContext(
+              fragment = sqlText,
+              start = 0,
+              stop = 37 + ns.length + key.length)
+          )
+        }
+      }
+    }
+    withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
+      CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
+        withNamespace(ns) {
+          // Set the location explicitly because v2 catalog may not set the default location.
+          // Without this, `meta.get(key)` below may return null.
+          sql(s"CREATE NAMESPACE $ns LOCATION 'tmp/prop_test'")
+          assert(getProperties(ns) === "")
+          sql(s"ALTER NAMESPACE $ns UNSET PROPERTIES ('$key')")
+          assert(getProperties(ns) === "", s"$key is a reserved namespace property and ignored")
+          val meta = spark.sessionState.catalogManager.catalog(catalog)
+            .asNamespaceCatalog.loadNamespaceMetadata(namespace.split('.'))
+          assert(!meta.get(key).contains("foo"),
+            "reserved properties should not have side effects")
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index 02c9d318bb46f..ef9ae47847405 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -236,7 +236,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
         checkCachedRelation("v1", Seq(Row(0, 0), Row(3, 3)))
       }
 
-      val v2 = s"${spark.sharedState.globalTempViewManager.database}.v2"
+      val v2 = s"${spark.sharedState.globalTempDB}.v2"
       withGlobalTempView("v2") {
         sql(s"CREATE GLOBAL TEMP VIEW v2 AS SELECT * FROM $t")
         cacheRelation(v2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
index 0aaeb8d2160c3..d91085956e330 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -222,7 +222,7 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
         checkCachedRelation("v1", Seq(Row(0, 2), Row(1, 3)))
       }
 
-      val v2 = s"${spark.sharedState.globalTempViewManager.database}.v2"
+      val v2 = s"${spark.sharedState.globalTempDB}.v2"
       withGlobalTempView("v2") {
         sql(s"CREATE GLOBAL TEMP VIEW v2 AS SELECT * FROM $t")
         cacheRelation(v2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesParserSuite.scala
new file mode 100644
index 0000000000000..78abd1a8b7fd3
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesParserSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.SetTableProperties
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableSetTblPropertiesParserSuite extends AnalysisTest with SharedSparkSession {
+
+  private def parseException(sqlText: String): SparkThrowable = {
+    intercept[ParseException](sql(sqlText).collect())
+  }
+
+  // ALTER TABLE table_name SET TBLPROPERTIES ('comment' = new_comment);
+  test("alter table: alter table properties") {
+    val sql1_table = "ALTER TABLE table_name SET TBLPROPERTIES ('test' = 'test', " +
+      "'comment' = 'new_comment')"
+    comparePlans(
+      parsePlan(sql1_table),
+      SetTableProperties(
+        UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET TBLPROPERTIES", true),
+        Map("test" -> "test", "comment" -> "new_comment")))
+  }
+
+  test("alter table - property values must be set") {
+    val sql = "ALTER TABLE my_tab SET TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 78))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
new file mode 100644
index 0000000000000..64b70d709b93f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.catalyst.TableIdentifier
+
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. SET TBLPROPERTIES`
+ * command that check V1 and V2 table catalogs. The tests that cannot run for all supported
+ * catalogs are located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.AlterTableSetTblPropertiesSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableSetTblPropertiesSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.AlterTableSetTblPropertiesSuite`
+ *     - V1 Hive External catalog:
+ *        `org.apache.spark.sql.hive.execution.command.AlterTableSetTblPropertiesSuite`
+ */
+trait AlterTableSetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "ALTER TABLE .. SET TBLPROPERTIES"
+
+  def checkTblProps(tableIdent: TableIdentifier, expectedTblProps: Map[String, String]): Unit
+
+  test("alter table set tblproperties") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 int, col2 string, a int, b int) $defaultUsing")
+      val tableIdent = TableIdentifier("tbl", Some("ns"), Some(catalog))
+      checkTblProps(tableIdent, Map.empty[String, String])
+
+      sql(s"ALTER TABLE $t SET TBLPROPERTIES ('k1' = 'v1', 'k2' = 'v2', 'k3' = 'v3')")
+      checkTblProps(tableIdent, Map("k1" -> "v1", "k2" -> "v2", "k3" -> "v3"))
+
+      sql(s"USE $catalog.ns")
+      sql(s"ALTER TABLE tbl SET TBLPROPERTIES ('k1' = 'v1', 'k2' = 'v2', 'k3' = 'v3')")
+      checkTblProps(tableIdent, Map("k1" -> "v1", "k2" -> "v2", "k3" -> "v3"))
+
+      sql(s"ALTER TABLE $t SET TBLPROPERTIES ('k1' = 'v1', 'k2' = 'v8')")
+      checkTblProps(tableIdent, Map("k1" -> "v1", "k2" -> "v8", "k3" -> "v3"))
+
+      // table to alter does not exist
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("ALTER TABLE does_not_exist SET TBLPROPERTIES ('winner' = 'loser')")
+        },
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`does_not_exist`"),
+        context = ExpectedContext(fragment = "does_not_exist", start = 12, stop = 25)
+      )
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index e4df8c64e59ef..505f0b4bdea62 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.SparkThrowable
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, UnresolvedAttribute, UnresolvedFunctionName, UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, SchemaCompensation, UnresolvedAttribute, UnresolvedFunctionName, UnresolvedIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans
@@ -36,6 +37,9 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     super.parseException(parser.parsePlan)(sqlText)
   }
 
+  private def intercept(sqlCommand: String, messages: String*): Unit =
+    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)()
+
   private def compareTransformQuery(sql: String, expected: LogicalPlan): Unit = {
     val plan = parser.parsePlan(sql).asInstanceOf[ScriptTransformation].copy(ioschema = null)
     comparePlans(plan, expected, checkAnalysis = false)
@@ -103,18 +107,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         stop = 98))
   }
 
-  test("alter table - property values must be set") {
-    val sql = "ALTER TABLE my_tab SET TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
-    checkError(
-      exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
-      parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
-      context = ExpectedContext(
-        fragment = sql,
-        start = 0,
-        stop = 78))
-  }
-
   test("alter table unset properties - property values must NOT be set") {
     val sql = "ALTER TABLE my_tab UNSET TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
     checkError(
@@ -522,7 +514,8 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       Some("SELECT * FROM tab1"),
       parser.parsePlan("SELECT * FROM tab1"),
       false,
-      false)
+      false,
+      SchemaCompensation)
     comparePlans(parsed1, expected1)
 
     val v2 = "CREATE TEMPORARY VIEW a AS SELECT * FROM tab1"
@@ -569,7 +562,8 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       Some("SELECT * FROM tab1"),
       parser.parsePlan("SELECT * FROM tab1"),
       false,
-      true)
+      true,
+      SchemaCompensation)
     comparePlans(parsed1, expected1)
 
     val v2 =
@@ -839,4 +833,44 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       parser.parsePlan("SHOW CATALOGS LIKE 'defau*'"),
       ShowCatalogsCommand(Some("defau*")))
   }
+
+  test("Create SQL functions") {
+    comparePlans(
+      parser.parsePlan("CREATE TEMP FUNCTION foo() RETURNS INT RETURN 1"),
+      CreateSQLFunctionCommand(
+        FunctionIdentifier("foo"),
+        inputParamText = None,
+        returnTypeText = "INT",
+        exprText = Some("1"),
+        queryText = None,
+        comment = None,
+        isDeterministic = None,
+        containsSQL = None,
+        isTableFunc = false,
+        isTemp = true,
+        ignoreIfExists = false,
+        replace = false))
+    intercept("CREATE FUNCTION foo() RETURNS INT RETURN 1",
+      "Operation not allowed: creating persistent SQL functions is not supported")
+  }
+
+  test("create SQL functions with unsupported routine characteristics") {
+    intercept("CREATE FUNCTION foo() RETURNS INT LANGUAGE blah RETURN 1",
+      "Operation not allowed: Unsupported language for user defined functions: blah")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT SPECIFIC foo1 RETURN 1",
+      "Operation not allowed: SQL function with SPECIFIC name is not supported")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT NO SQL RETURN 1",
+      "Operation not allowed: SQL function with NO SQL is not supported")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT NO SQL CONTAINS SQL RETURN 1",
+      "Found duplicate clauses: SQL DATA ACCESS")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT RETURNS NULL ON NULL INPUT RETURN 1",
+      "Operation not allowed: SQL function with RETURNS NULL ON NULL INPUT is not supported")
+
+    intercept("CREATE FUNCTION foo() RETURNS INT SQL SECURITY INVOKER RETURN 1",
+      "Operation not allowed: SQL function with SQL SECURITY INVOKER is not supported")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e8af606d797e3..553b68bec52fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -327,10 +327,6 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
 
   protected val reversedProperties = Seq(PROP_OWNER)
 
-  test("alter table: set properties (datasource table)") {
-    testSetProperties(isDatasourceTable = true)
-  }
-
   test("alter table: unset properties (datasource table)") {
     testUnsetProperties(isDatasourceTable = true)
   }
@@ -1117,40 +1113,6 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
     )
   }
 
-  protected def testSetProperties(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    def getProps: Map[String, String] = {
-      if (isUsingHiveMetastore) {
-        normalizeCatalogTable(catalog.getTableMetadata(tableIdent)).properties
-      } else {
-        catalog.getTableMetadata(tableIdent).properties
-      }
-    }
-    assert(getProps.isEmpty)
-    // set table properties
-    sql("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('andrew' = 'or14', 'kor' = 'bel')")
-    assert(getProps == Map("andrew" -> "or14", "kor" -> "bel"))
-    // set table properties without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
-    sql("ALTER TABLE tab1 SET TBLPROPERTIES ('kor' = 'belle', 'kar' = 'bol')")
-    assert(getProps == Map("andrew" -> "or14", "kor" -> "belle", "kar" -> "bol"))
-    // table to alter does not exist
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("ALTER TABLE does_not_exist SET TBLPROPERTIES ('winner' = 'loser')")
-      },
-      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
-      parameters = Map("relationName" -> "`does_not_exist`"),
-      context = ExpectedContext(fragment = "does_not_exist", start = 12, stop = 25)
-    )
-  }
-
   protected def testUnsetProperties(isDatasourceTable: Boolean): Unit = {
     if (!isUsingHiveMetastore) {
       assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
@@ -2260,7 +2222,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       )
 
       withGlobalTempView("src") {
-        val globalTempDB = spark.sharedState.globalTempViewManager.database
+        val globalTempDB = spark.sharedState.globalTempDB
         sql("CREATE GLOBAL TEMP VIEW src AS SELECT 1 AS a, '2' AS b")
         sql(s"CREATE TABLE t4 LIKE $globalTempDB.src USING parquet")
         val table = catalog.getTableMetadata(TableIdentifier("t4"))
@@ -2437,6 +2399,100 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       )
     }
   }
+
+  test("Change column collation") {
+    withTable("t1", "t2", "t3", "t4") {
+      // Plain `StringType`.
+      sql("CREATE TABLE t1(col STRING) USING parquet")
+      sql("INSERT INTO t1 VALUES ('a')")
+      checkAnswer(sql("SELECT COLLATION(col) FROM t1"), Row("UTF8_BINARY"))
+      sql("ALTER TABLE t1 ALTER COLUMN col TYPE STRING COLLATE UTF8_LCASE")
+      checkAnswer(sql("SELECT COLLATION(col) FROM t1"), Row("UTF8_LCASE"))
+
+      // Invalid "ALTER COLUMN" to Integer.
+      val alterInt = "ALTER TABLE t1 ALTER COLUMN col TYPE INTEGER"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(alterInt)
+        },
+        errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+        parameters = Map(
+          "originType" -> "\"STRING COLLATE UTF8_LCASE\"",
+          "originName" -> "`col`",
+          "table" -> "`spark_catalog`.`default`.`t1`",
+          "newType" -> "\"INT\"",
+          "newName" -> "`col`"
+        ),
+        context = ExpectedContext(fragment = alterInt, start = 0, stop = alterInt.length - 1)
+      )
+
+      // `ArrayType` with collation.
+      sql("CREATE TABLE t2(col ARRAY<STRING>) USING parquet")
+      sql("INSERT INTO t2 VALUES (ARRAY('a'))")
+      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("UTF8_BINARY"))
+      sql("ALTER TABLE t2 ALTER COLUMN col TYPE ARRAY<STRING COLLATE UTF8_LCASE>")
+      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("UTF8_LCASE"))
+
+      // `MapType` with collation.
+      sql("CREATE TABLE t3(col MAP<STRING, STRING>) USING parquet")
+      sql("INSERT INTO t3 VALUES (MAP('k', 'v'))")
+      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("UTF8_BINARY"))
+      sql(
+        """
+          |ALTER TABLE t3 ALTER COLUMN col TYPE
+          |MAP<STRING, STRING COLLATE UTF8_LCASE>""".stripMargin)
+      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("UTF8_LCASE"))
+
+      // Invalid change of map key collation.
+      val alterMap =
+        "ALTER TABLE t3 ALTER COLUMN col TYPE " +
+          "MAP<STRING COLLATE UTF8_LCASE, STRING>"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(alterMap)
+        },
+        errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+        parameters = Map(
+          "originType" -> "\"MAP<STRING, STRING COLLATE UTF8_LCASE>\"",
+          "originName" -> "`col`",
+          "table" -> "`spark_catalog`.`default`.`t3`",
+          "newType" -> "\"MAP<STRING COLLATE UTF8_LCASE, STRING>\"",
+          "newName" -> "`col`"
+        ),
+        context = ExpectedContext(fragment = alterMap, start = 0, stop = alterMap.length - 1)
+      )
+
+      // `StructType` with collation.
+      sql("CREATE TABLE t4(col STRUCT<a:STRING>) USING parquet")
+      sql("INSERT INTO t4 VALUES (NAMED_STRUCT('a', 'value'))")
+      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("UTF8_BINARY"))
+      sql("ALTER TABLE t4 ALTER COLUMN col TYPE STRUCT<a:STRING COLLATE UTF8_LCASE>")
+      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("UTF8_LCASE"))
+    }
+  }
+
+  test("Invalid collation change on partition and bucket columns") {
+    withTable("t1", "t2") {
+      sql("CREATE TABLE t1(col STRING, i INTEGER) USING parquet PARTITIONED BY (col)")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("ALTER TABLE t1 ALTER COLUMN col TYPE STRING COLLATE UTF8_LCASE")
+        },
+        errorClass = "CANNOT_ALTER_PARTITION_COLUMN",
+        sqlState = "428FR",
+        parameters = Map("tableName" -> "`spark_catalog`.`default`.`t1`", "columnName" -> "`col`")
+      )
+      sql("CREATE TABLE t2(col STRING) USING parquet CLUSTERED BY (col) INTO 1 BUCKETS")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("ALTER TABLE t2 ALTER COLUMN col TYPE STRING COLLATE UTF8_LCASE")
+        },
+        errorClass = "CANNOT_ALTER_COLLATION_BUCKET_COLUMN",
+        sqlState = "428FR",
+        parameters = Map("tableName" -> "`spark_catalog`.`default`.`t2`", "columnName" -> "`col`")
+      )
+    }
+  }
 }
 
 object FakeLocalFsFileSystem {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 28ea2c9bec1ab..f004ab7137f79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1586,7 +1586,7 @@ class PlanResolutionSuite extends AnalysisTest {
         // basic
         val sql1 =
           s"""
-             |MERGE INTO $target AS target
+             |MERGE WITH SCHEMA EVOLUTION INTO $target AS target
              |USING $source AS source
              |ON target.i = source.i
              |WHEN MATCHED AND (target.s='delete') THEN DELETE
@@ -1608,12 +1608,14 @@ class PlanResolutionSuite extends AnalysisTest {
                   insertAssigns)),
               Seq(DeleteAction(Some(EqualTo(ndl: AttributeReference, StringLiteral("delete")))),
                 UpdateAction(Some(EqualTo(nul: AttributeReference, StringLiteral("update"))),
-                  notMatchedBySourceUpdateAssigns))) =>
+                  notMatchedBySourceUpdateAssigns)),
+              withSchemaEvolution) =>
             checkMergeConditionResolution(target, source, mergeCondition)
             checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns)
             checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
             checkNotMatchedBySourceClausesResolution(target, Some(ndl), Some(nul),
               notMatchedBySourceUpdateAssigns)
+            assert(withSchemaEvolution === true)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1638,11 +1640,13 @@ class PlanResolutionSuite extends AnalysisTest {
                   StringLiteral("update"))), updateAssigns)),
               Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
                   insertAssigns)),
-              Seq()) =>
+              Seq(),
+              withSchemaEvolution) =>
             checkMergeConditionResolution(target, source, mergeCondition)
             checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns,
               starInUpdate = true)
             checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
+            assert(withSchemaEvolution === false)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1663,11 +1667,13 @@ class PlanResolutionSuite extends AnalysisTest {
               mergeCondition,
               Seq(UpdateAction(None, updateAssigns)),
               Seq(InsertAction(None, insertAssigns)),
-              Seq()) =>
+              Seq(),
+              withSchemaEvolution) =>
             checkMergeConditionResolution(target, source, mergeCondition)
             checkMatchedClausesResolution(target, source, None, None, updateAssigns,
               starInUpdate = true)
             checkNotMatchedClausesResolution(target, source, None, insertAssigns)
+            assert(withSchemaEvolution === false)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1692,12 +1698,14 @@ class PlanResolutionSuite extends AnalysisTest {
               Seq(DeleteAction(Some(_)), UpdateAction(None, updateAssigns)),
               Seq(InsertAction(None, insertAssigns)),
               Seq(DeleteAction(Some(EqualTo(_: AttributeReference, StringLiteral("delete")))),
-                UpdateAction(None, notMatchedBySourceUpdateAssigns))) =>
+                UpdateAction(None, notMatchedBySourceUpdateAssigns)),
+              withSchemaEvolution) =>
             checkMergeConditionResolution(target, source, mergeCondition)
             checkMatchedClausesResolution(target, source, None, None, updateAssigns)
             checkNotMatchedClausesResolution(target, source, None, insertAssigns)
             checkNotMatchedBySourceClausesResolution(target, None, None,
               notMatchedBySourceUpdateAssigns)
+            assert(withSchemaEvolution === false)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1727,12 +1735,14 @@ class PlanResolutionSuite extends AnalysisTest {
                   insertAssigns)),
               Seq(DeleteAction(Some(EqualTo(ndl: AttributeReference, StringLiteral("delete")))),
                 UpdateAction(Some(EqualTo(nul: AttributeReference, StringLiteral("update"))),
-                  notMatchedBySourceUpdateAssigns))) =>
+                  notMatchedBySourceUpdateAssigns)),
+              withSchemaEvolution) =>
             checkMergeConditionResolution(target, source, mergeCondition)
             checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns)
             checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
             checkNotMatchedBySourceClausesResolution(target, Some(ndl), Some(nul),
               notMatchedBySourceUpdateAssigns)
+            assert(withSchemaEvolution === false)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1764,12 +1774,14 @@ class PlanResolutionSuite extends AnalysisTest {
                   insertAssigns)),
               Seq(DeleteAction(Some(EqualTo(ndl: AttributeReference, StringLiteral("delete")))),
                 UpdateAction(Some(EqualTo(nul: AttributeReference, StringLiteral("update"))),
-                  notMatchedBySourceUpdateAssigns))) =>
+                  notMatchedBySourceUpdateAssigns)),
+              withSchemaEvolution) =>
             checkMergeConditionResolution(target, source, mergeCondition)
             checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns)
             checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
             checkNotMatchedBySourceClausesResolution(target, Some(ndl), Some(nul),
               notMatchedBySourceUpdateAssigns)
+            assert(withSchemaEvolution === false)
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
 
@@ -1837,6 +1849,7 @@ class PlanResolutionSuite extends AnalysisTest {
               case other =>
                 fail("unexpected second not matched by source action " + other)
             }
+            assert(m.withSchemaEvolution === false)
 
           case other =>
             fail("Expect MergeIntoTable, but got:\n" + other.treeString)
@@ -1905,6 +1918,7 @@ class PlanResolutionSuite extends AnalysisTest {
           Seq(Assignment(_: AttributeReference, Literal(42, IntegerType)))) =>
           case other => fail("unexpected second not matched by source action " + other)
         }
+        assert(m.withSchemaEvolution === false)
 
       case other =>
         fail("Expect MergeIntoTable, but got:\n" + other.treeString)
@@ -2009,6 +2023,7 @@ class PlanResolutionSuite extends AnalysisTest {
         assert(m.matchedActions.length == 2)
         assert(m.notMatchedActions.length == 1)
         assert(m.notMatchedBySourceActions.length == 2)
+        assert(m.withSchemaEvolution === false)
 
       case other =>
         fail("Expect MergeIntoTable, but got:\n" + other.treeString)
@@ -2045,7 +2060,8 @@ class PlanResolutionSuite extends AnalysisTest {
             Seq(InsertAction(
               Some(EqualTo(il: AttributeReference, StringLiteral("a"))),
             insertAssigns)),
-            Seq(DeleteAction(Some(_)), UpdateAction(None, secondUpdateAssigns))) =>
+            Seq(DeleteAction(Some(_)), UpdateAction(None, secondUpdateAssigns)),
+            withSchemaEvolution) =>
           val ti = target.output.find(_.name == "i").get
           val ts = target.output.find(_.name == "s").get
           val si = source.output.find(_.name == "i").get
@@ -2064,6 +2080,7 @@ class PlanResolutionSuite extends AnalysisTest {
           assert(secondUpdateAssigns.size == 1)
           // UPDATE key is resolved with target table only, so column `s` is not ambiguous.
           assert(secondUpdateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
+          assert(withSchemaEvolution === false)
 
         case p => fail("Expect MergeIntoTable, but got:\n" + p.treeString)
       }
@@ -2150,7 +2167,8 @@ class PlanResolutionSuite extends AnalysisTest {
             _,
             Seq(),
             Seq(),
-            notMatchedBySourceActions) =>
+            notMatchedBySourceActions,
+            withSchemaEvolution) =>
           assert(notMatchedBySourceActions.length == 2)
           notMatchedBySourceActions(0) match {
             case DeleteAction(Some(EqualTo(dl: AttributeReference, StringLiteral("b")))) =>
@@ -2171,6 +2189,7 @@ class PlanResolutionSuite extends AnalysisTest {
               assert(us.sameRef(ti))
             case other => fail("unexpected second not matched by source action " + other)
           }
+          assert(withSchemaEvolution === false)
       }
 
       val sql7 =
@@ -2205,6 +2224,7 @@ class PlanResolutionSuite extends AnalysisTest {
       case u: MergeIntoTable =>
         assert(u.targetTable.isInstanceOf[UnresolvedRelation])
         assert(u.sourceTable.isInstanceOf[UnresolvedRelation])
+        assert(u.withSchemaEvolution === false)
       case _ => fail("Expect MergeIntoTable, but got:\n" + parsed.treeString)
     }
 
@@ -2283,6 +2303,7 @@ class PlanResolutionSuite extends AnalysisTest {
             assert(s2.functionName == "varcharTypeWriteSideCheck")
           case other => fail("Expect UpdateAction, but got: " + other)
         }
+        assert(m.withSchemaEvolution === false)
       case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
     }
   }
@@ -2304,12 +2325,14 @@ class PlanResolutionSuite extends AnalysisTest {
           _,
           Seq(DeleteAction(None)),
           Seq(InsertAction(None, insertAssigns)),
-          Nil) =>
+          Nil,
+          withSchemaEvolution) =>
         // There is only one assignment, the missing col is not filled with default value
         assert(insertAssigns.size == 1)
         // Special case: Spark does not resolve any columns in MERGE if table accepts any schema.
         assert(insertAssigns.head.key.asInstanceOf[UnresolvedAttribute].name == "target.i")
         assert(insertAssigns.head.value.asInstanceOf[UnresolvedAttribute].name == "DEFAULT")
+        assert(withSchemaEvolution === false)
 
       case l => fail("Expected unresolved MergeIntoTable, but got:\n" + l.treeString)
     }
@@ -2824,11 +2847,9 @@ class PlanResolutionSuite extends AnalysisTest {
     assert(desc.viewText.isEmpty)
     assert(desc.viewQueryColumnNames.isEmpty)
     assert(desc.storage.locationUri.isEmpty)
-    assert(desc.storage.inputFormat ==
-        Some("org.apache.hadoop.mapred.TextInputFormat"))
-    assert(desc.storage.outputFormat ==
-        Some("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"))
-    assert(desc.storage.serde == Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
+    assert(desc.storage.inputFormat.isEmpty)
+    assert(desc.storage.outputFormat.isEmpty)
+    assert(desc.storage.serde.isEmpty)
     assert(desc.storage.properties.isEmpty)
     assert(desc.properties.isEmpty)
     assert(desc.comment.isEmpty)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index c88217221ab76..d6b91bcf3eb8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -351,6 +351,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
              |Created By: <created by>
              |Type: VIEW
              |View Text: SELECT id FROM $catalog.$namespace.$table
+             |View Schema Mode: BINDING
              |View Catalog and Namespace: spark_catalog.default
              |View Query Output Columns: [id]
              |Schema: root
@@ -377,6 +378,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
              |Created By: <created by>
              |Type: VIEW
              |View Text: SELECT id FROM $catalog.$namespace.$table
+             |View Schema Mode: BINDING
              |View Catalog and Namespace: spark_catalog.default
              |View Query Output Columns: [id]
              |Schema: root
@@ -394,6 +396,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
              |Created By: <created by>
              |Type: VIEW
              |View Text: SELECT id FROM $catalog.$namespace.$table
+             |View Schema Mode: BINDING
              |View Catalog and Namespace: spark_catalog.default
              |View Query Output Columns: [id]
              |Schema: root
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
index facbfa3dedf8c..982c568d09a79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
@@ -206,7 +206,7 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
         )
       }
 
-      val v2 = s"${spark.sharedState.globalTempViewManager.database}.v2"
+      val v2 = s"${spark.sharedState.globalTempDB}.v2"
       withGlobalTempView("v2") {
         sql(s"CREATE GLOBAL TEMP VIEW v2 AS SELECT * FROM $t")
         checkError(
@@ -245,7 +245,7 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
         checkCachedRelation("v1", Seq(Row(0, 0, 0)))
       }
 
-      val v2 = s"${spark.sharedState.globalTempViewManager.database}.v2"
+      val v2 = s"${spark.sharedState.globalTempDB}.v2"
       withGlobalTempView("v2") {
         sql(s"INSERT INTO $t PARTITION (width = 10, length = 10) SELECT 10")
         sql(s"CREATE GLOBAL TEMP VIEW v2 AS SELECT * FROM $t")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterNamespaceUnsetPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterNamespaceUnsetPropertiesSuiteBase.scala
new file mode 100644
index 0000000000000..da7fdbba16b0b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterNamespaceUnsetPropertiesSuiteBase.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.execution.command
+
+/**
+ * This base suite contains unified tests for the `ALTER NAMESPACE ... UNSET PROPERTIES` command
+ * that checks V1 table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog:
+ *     `org.apache.spark.sql.execution.command.v1.AlterNamespaceUnsetPropertiesSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.AlterNamespaceUnsetPropertiesSuite`
+ */
+trait AlterNamespaceUnsetPropertiesSuiteBase extends command.AlterNamespaceUnsetPropertiesSuiteBase
+    with command.TestsV1AndV2Commands {
+  override def namespace: String = "db"
+}
+
+/**
+ * The class contains tests for the `ALTER NAMESPACE ... UNSET PROPERTIES` command to
+ * check V1 In-Memory table catalog.
+ */
+class AlterNamespaceUnsetPropertiesSuite extends AlterNamespaceUnsetPropertiesSuiteBase
+  with CommandSuiteBase {
+  override def commandVersion: String =
+    super[AlterNamespaceUnsetPropertiesSuiteBase].commandVersion
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index 71f04159638aa..dac99c8ff7023 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -133,7 +133,7 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
         checkCachedRelation("v1", Seq(Row(0, 0), Row(0, 1), Row(0, 2)))
       }
 
-      val v2 = s"${spark.sharedState.globalTempViewManager.database}.v2"
+      val v2 = s"${spark.sharedState.globalTempDB}.v2"
       withGlobalTempView("v2") {
         sql(s"CREATE GLOBAL TEMP VIEW v2 AS SELECT * FROM $t")
         cacheRelation(v2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetTblPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetTblPropertiesSuite.scala
new file mode 100644
index 0000000000000..e74e5d4fc9ea5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetTblPropertiesSuite.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. SET TBLPROPERTIES`
+ * command that check V1 table catalogs. The tests that cannot run for all V1 catalogs
+ * are located in more specific test suites:
+ *
+ *   - V1 In-Memory catalog:
+ *      `org.apache.spark.sql.execution.command.v1.AlterTableSetTblPropertiesSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.AlterTableSetTblPropertiesSuite`
+ */
+trait AlterTableSetTblPropertiesSuiteBase extends command.AlterTableSetTblPropertiesSuiteBase {
+
+  private[sql] lazy val sessionCatalog = spark.sessionState.catalog
+
+  private def isUsingHiveMetastore: Boolean = {
+    spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive"
+  }
+
+  private def normalizeTblProps(props: Map[String, String]): Map[String, String] = {
+    props.filterNot(p => Seq("transient_lastDdlTime").contains(p._1))
+  }
+
+  private def getTableProperties(tableIdent: TableIdentifier): Map[String, String] = {
+    sessionCatalog.getTableMetadata(tableIdent).properties
+  }
+
+  override def checkTblProps(tableIdent: TableIdentifier,
+      expectedTblProps: Map[String, String]): Unit = {
+    val actualTblProps = getTableProperties(tableIdent)
+    if (isUsingHiveMetastore) {
+      assert(normalizeTblProps(actualTblProps) == expectedTblProps)
+    } else {
+      assert(actualTblProps == expectedTblProps)
+    }
+  }
+}
+
+class AlterTableSetTblPropertiesSuite
+  extends AlterTableSetTblPropertiesSuiteBase with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 4b4742910bd18..9be802b5f1fea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -205,6 +205,7 @@ class ShowTablesSuite extends ShowTablesSuiteBase with CommandSuiteBase {
              |Type: VIEW
              |View Text: SELECT id FROM $catalog.$namespace.$table
              |View Original Text: SELECT id FROM $catalog.$namespace.$table
+             |View Schema Mode: COMPENSATION
              |View Catalog and Namespace: $catalog.$namespace
              |View Query Output Columns: [id]
              |Schema: root
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterNamespaceUnsetPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterNamespaceUnsetPropertiesSuite.scala
new file mode 100644
index 0000000000000..352238eda2eab
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterNamespaceUnsetPropertiesSuite.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `ALTER NAMESPACE ... UNSET PROPERTIES` command to check V2
+ * table catalogs.
+ */
+class AlterNamespaceUnsetPropertiesSuite extends command.AlterNamespaceUnsetPropertiesSuiteBase
+  with CommandSuiteBase {
+
+  override def namespace: String = "ns1.ns2"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index b733666f0d84a..defa026c0e281 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -105,7 +105,7 @@ class AlterTableAddPartitionSuite
         checkCachedRelation("v1", Seq(Row(0, 0), Row(0, 1), Row(1, 2)))
       }
 
-      val v2 = s"${spark.sharedState.globalTempViewManager.database}.v2"
+      val v2 = s"${spark.sharedState.globalTempDB}.v2"
       withGlobalTempView(v2) {
         sql(s"CREATE GLOBAL TEMP VIEW v2 AS SELECT * FROM $t")
         cacheRelation(v2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetTblPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetTblPropertiesSuite.scala
new file mode 100644
index 0000000000000..7d7b2ad8686ee
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetTblPropertiesSuite.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.connector.catalog.{Identifier, Table}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `ALTER TABLE .. SET TBLPROPERTIES` command to
+ * check V2 table catalogs.
+ */
+class AlterTableSetTblPropertiesSuite
+  extends command.AlterTableSetTblPropertiesSuiteBase with CommandSuiteBase {
+
+  private def normalizeTblProps(props: Map[String, String]): Map[String, String] = {
+    props.filterNot(p => Seq("provider", "owner").contains(p._1))
+  }
+
+  private def getTableMetadata(tableIndent: TableIdentifier): Table = {
+    val nameParts = tableIndent.nameParts
+    val v2Catalog = spark.sessionState.catalogManager.catalog(nameParts.head).asTableCatalog
+    val namespace = nameParts.drop(1).init.toArray
+    v2Catalog.loadTable(Identifier.of(namespace, nameParts.last))
+  }
+
+  override def checkTblProps(tableIdent: TableIdentifier,
+      expectedTblProps: Map[String, String]): Unit = {
+    val actualTblProps = getTableMetadata(tableIdent).properties.asScala.toMap
+    assert(normalizeTblProps(actualTblProps) === expectedTblProps)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
index 2b9ec97bace1e..9f0396ab60e32 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
@@ -327,8 +327,10 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
 
   test("SPARK-41636: selectFilters returns predicates in deterministic order") {
 
-    val predicates = Seq(EqualTo($"id", 1), EqualTo($"id", 2),
-      EqualTo($"id", 3), EqualTo($"id", 4), EqualTo($"id", 5), EqualTo($"id", 6))
+    val idColAttribute = AttributeReference("id", IntegerType)()
+    val predicates = Seq(EqualTo(idColAttribute, 1), EqualTo(idColAttribute, 2),
+      EqualTo(idColAttribute, 3), EqualTo(idColAttribute, 4), EqualTo(idColAttribute, 5),
+      EqualTo(idColAttribute, 6))
 
     val (unhandledPredicates, pushedFilters, handledFilters) =
       DataSourceStrategy.selectFilters(FakeRelation(), predicates)
@@ -338,4 +340,21 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
     })
     assert(handledFilters.isEmpty)
   }
+
+  test("SPARK-48431: Push filters on columns with UTF8_BINARY collation") {
+    val colAttr = $"col".string("UTF8_BINARY")
+    testTranslateFilter(EqualTo(colAttr, Literal("value")), Some(sources.EqualTo("col", "value")))
+    testTranslateFilter(Not(EqualTo(colAttr, Literal("value"))),
+      Some(sources.Not(sources.EqualTo("col", "value"))))
+    testTranslateFilter(LessThan(colAttr, Literal("value")),
+      Some(sources.LessThan("col", "value")))
+    testTranslateFilter(LessThan(colAttr, Literal("value")), Some(sources.LessThan("col", "value")))
+    testTranslateFilter(LessThanOrEqual(colAttr, Literal("value")),
+      Some(sources.LessThanOrEqual("col", "value")))
+    testTranslateFilter(GreaterThan(colAttr, Literal("value")),
+      Some(sources.GreaterThan("col", "value")))
+    testTranslateFilter(GreaterThanOrEqual(colAttr, Literal("value")),
+      Some(sources.GreaterThanOrEqual("col", "value")))
+    testTranslateFilter(IsNotNull(colAttr), Some(sources.IsNotNull("col")))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 110c330f16956..6399eb6da049f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -31,7 +31,7 @@ import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -547,6 +547,66 @@ class FileIndexSuite extends SharedSparkSession {
     assert(fileIndex.leafFileStatuses.toSeq == statuses)
   }
 
+  test("SPARK-48649: Ignore invalid partitions") {
+    // Table:
+    // id   part_col
+    //  1          1
+    //  2          2
+    val df = spark.range(1, 3, 1, 2).toDF("id")
+      .withColumn("part_col", col("id"))
+
+    withTempPath { directoryPath =>
+      df.write
+        .mode("overwrite")
+        .format("parquet")
+        .partitionBy("part_col")
+        .save(directoryPath.getCanonicalPath)
+
+      // Rename one of the folders.
+      new File(directoryPath, "part_col=1").renameTo(new File(directoryPath, "undefined"))
+
+      // By default, we expect the invalid path assertion to trigger.
+      val ex = intercept[AssertionError] {
+        spark.read
+          .format("parquet")
+          .load(directoryPath.getCanonicalPath)
+          .collect()
+      }
+      assert(ex.getMessage.contains("Conflicting directory structures detected"))
+
+      // With the config enabled, we should only read the valid partition.
+      withSQLConf(SQLConf.IGNORE_INVALID_PARTITION_PATHS.key -> "true") {
+        assert(
+          spark.read
+            .format("parquet")
+            .load(directoryPath.getCanonicalPath)
+            .collect() === Seq(Row(2, 2)))
+      }
+
+      // Data source option override takes precedence.
+      withSQLConf(SQLConf.IGNORE_INVALID_PARTITION_PATHS.key -> "true") {
+        val ex = intercept[AssertionError] {
+          spark.read
+            .format("parquet")
+            .option(FileIndexOptions.IGNORE_INVALID_PARTITION_PATHS, "false")
+            .load(directoryPath.getCanonicalPath)
+            .collect()
+        }
+        assert(ex.getMessage.contains("Conflicting directory structures detected"))
+      }
+
+      // Data source option override takes precedence.
+      withSQLConf(SQLConf.IGNORE_INVALID_PARTITION_PATHS.key -> "false") {
+        assert(
+          spark.read
+            .format("parquet")
+            .option(FileIndexOptions.IGNORE_INVALID_PARTITION_PATHS, "true")
+            .load(directoryPath.getCanonicalPath)
+            .collect() === Seq(Row(2, 2)))
+      }
+    }
+  }
+
   test("expire FileStatusCache if TTL is configured") {
     val previousValue = SQLConf.get.getConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS)
     try {
@@ -585,9 +645,10 @@ class FileIndexSuite extends SharedSparkSession {
   }
 
   test("SPARK-40667: validate FileIndex Options") {
-    assert(FileIndexOptions.getAllOptions.size == 7)
+    assert(FileIndexOptions.getAllOptions.size == 8)
     // Please add validation on any new FileIndex options here
     assert(FileIndexOptions.isValidOption("ignoreMissingFiles"))
+    assert(FileIndexOptions.isValidOption("ignoreInvalidPartitionPaths"))
     assert(FileIndexOptions.isValidOption("timeZone"))
     assert(FileIndexOptions.isValidOption("recursiveFileLookup"))
     assert(FileIndexOptions.isValidOption("basePath"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
index 5256043289d5e..fefb16a351fdb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
@@ -161,7 +161,9 @@ class ParquetReadSchemaSuite
   with HideColumnInTheMiddleTest
   with AddNestedColumnTest
   with HideNestedColumnTest
-  with ChangePositionTest {
+  with ChangePositionTest
+  with IntegralTypeTest
+  with ToDoubleTypeTest {
 
   override val format: String = "parquet"
 
@@ -183,7 +185,9 @@ class VectorizedParquetReadSchemaSuite
   with HideColumnInTheMiddleTest
   with AddNestedColumnTest
   with HideNestedColumnTest
-  with ChangePositionTest {
+  with ChangePositionTest
+  with IntegralTypeTest
+  with ToDoubleTypeTest {
 
   override val format: String = "parquet"
 
@@ -205,7 +209,9 @@ class MergedParquetReadSchemaSuite
   with HideColumnInTheMiddleTest
   with AddNestedColumnTest
   with HideNestedColumnTest
-  with ChangePositionTest {
+  with ChangePositionTest
+  with IntegralTypeTest
+  with ToDoubleTypeTest {
 
   override val format: String = "parquet"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
index 51c9b960a8eab..3762241719acd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
@@ -82,7 +82,7 @@ class SaveIntoDataSourceCommandSuite extends QueryTest with SharedSparkSession {
 
     val df = spark.range(1).selectExpr(
         "cast('a' as binary) a", "true b", "cast(1 as byte) c", "1.23 d", "'abc'",
-        "'abc' COLLATE UTF8_BINARY_LCASE")
+        "'abc' COLLATE UTF8_LCASE")
     dataSource.planForWriting(SaveMode.ErrorIfExists, df.logicalPlan)
 
     // Variant and Interval types are disallowed by default.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
index ce43edb79c127..04a7b4834f4b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
@@ -214,8 +214,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
           val executedPlan = FileFormatWriter.executedPlan.get
 
           val plan = if (enabled) {
-            assert(executedPlan.isInstanceOf[WriteFilesExec])
-            executedPlan.asInstanceOf[WriteFilesExec].child
+            assert(executedPlan.isInstanceOf[WriteFilesExecBase])
+            executedPlan.asInstanceOf[WriteFilesExecBase].child
           } else {
             executedPlan.transformDown {
               case a: AdaptiveSparkPlanExec => a.executedPlan
@@ -261,8 +261,8 @@ class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1Write
         val executedPlan = FileFormatWriter.executedPlan.get
 
         val plan = if (enabled) {
-          assert(executedPlan.isInstanceOf[WriteFilesExec])
-          executedPlan.asInstanceOf[WriteFilesExec].child
+          assert(executedPlan.isInstanceOf[WriteFilesExecBase])
+          executedPlan.asInstanceOf[WriteFilesExecBase].child
         } else {
           executedPlan.transformDown {
             case a: AdaptiveSparkPlanExec => a.executedPlan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 22ea133ee19aa..f7ea8a735068e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -80,6 +80,7 @@ abstract class CSVSuite
   private val valueMalformedFile = "test-data/value-malformed.csv"
   private val badAfterGoodFile = "test-data/bad_after_good.csv"
   private val malformedRowFile = "test-data/malformedRow.csv"
+  private val charFile = "test-data/char.csv"
 
   /** Verifies data and schema. */
   private def verifyCars(
@@ -1246,14 +1247,13 @@ abstract class CSVSuite
     val exp = spark.sql("select timestamp_ntz'2020-12-12 12:12:12' as col0")
     for (pattern <- patterns) {
       withTempPath { path =>
-        val actualPath = path.toPath.toUri.toURL.toString
         val ex = intercept[SparkException] {
           exp.write.format("csv").option("timestampNTZFormat", pattern).save(path.getAbsolutePath)
         }
         checkErrorMatchPVals(
           exception = ex,
           errorClass = "TASK_WRITE_FAILED",
-          parameters = Map("path" -> s"$actualPath.*"))
+          parameters = Map("path" -> s".*${path.getName}.*"))
         val msg = ex.getCause.getMessage
         assert(
           msg.contains("Unsupported field: OffsetSeconds") ||
@@ -1488,16 +1488,21 @@ abstract class CSVSuite
         val e = intercept[SparkException] {
           spark.read.csv(inputFile.toURI.toString).collect()
         }
-        checkError(
+        checkErrorMatchPVals(
           exception = e,
           errorClass = "FAILED_READ_FILE.NO_HINT",
-          parameters = Map("path" -> inputFile.toPath.toUri.toString)
+          parameters = Map("path" -> s".*${inputFile.getName}.*")
         )
         assert(e.getCause.isInstanceOf[EOFException])
         assert(e.getCause.getMessage === "Unexpected end of input stream")
         val e2 = intercept[SparkException] {
           spark.read.option("multiLine", true).csv(inputFile.toURI.toString).collect()
         }
+        checkErrorMatchPVals(
+          exception = e2,
+          errorClass = "FAILED_READ_FILE.NO_HINT",
+          parameters = Map("path" -> s".*${inputFile.getName}.*")
+        )
         assert(e2.getCause.getCause.getCause.isInstanceOf[EOFException])
         assert(e2.getCause.getCause.getCause.getMessage === "Unexpected end of input stream")
       }
@@ -3342,6 +3347,29 @@ abstract class CSVSuite
         expected)
     }
   }
+
+  test("SPARK-48241: CSV parsing failure with char/varchar type columns") {
+    withTable("charVarcharTable") {
+      spark.sql(
+        s"""
+           |CREATE TABLE charVarcharTable(
+           |  color char(4),
+           |  name varchar(10))
+           |USING csv
+           |OPTIONS (
+           |  header "true",
+           |  path "${testFile(charFile)}"
+           |)
+       """.stripMargin)
+      val expected = Seq(
+        Row("pink", "Bob"),
+        Row("blue", "Mike"),
+        Row("grey", "Tom"))
+      checkAnswer(
+        sql("SELECT * FROM charVarcharTable"),
+        expected)
+    }
+  }
 }
 
 class CSVv1Suite extends CSVSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index f3c332bab1833..9e5ecc08e24a2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1924,10 +1924,10 @@ abstract class JsonSuite
         val e = intercept[SparkException] {
           spark.read.json(inputFile.toURI.toString).collect()
         }
-        checkError(
+        checkErrorMatchPVals(
           exception = e,
           errorClass = "FAILED_READ_FILE.NO_HINT",
-          parameters = Map("path" -> inputFile.toPath.toUri.toString))
+          parameters = Map("path" -> s".*${inputFile.getName}.*"))
         assert(e.getCause.isInstanceOf[EOFException])
         assert(e.getCause.getMessage === "Unexpected end of input stream")
         val e2 = intercept[SparkException] {
@@ -3039,14 +3039,13 @@ abstract class JsonSuite
     val exp = spark.sql("select timestamp_ntz'2020-12-12 12:12:12' as col0")
     for (pattern <- patterns) {
       withTempPath { path =>
-        val actualPath = path.toPath.toUri.toURL.toString
         val err = intercept[SparkException] {
           exp.write.option("timestampNTZFormat", pattern).json(path.getAbsolutePath)
         }
         checkErrorMatchPVals(
           exception = err,
           errorClass = "TASK_WRITE_FAILED",
-          parameters = Map("path" -> s"$actualPath.*"))
+          parameters = Map("path" -> s".*${path.getName}.*"))
 
         val msg = err.getCause.getMessage
         assert(
@@ -3728,7 +3727,7 @@ abstract class JsonSuite
   }
 
   test("SPARK-40667: validate JSON Options") {
-    assert(JSONOptions.getAllOptions.size == 28)
+    assert(JSONOptions.getAllOptions.size == 29)
     // Please add validation on any new Json options here
     assert(JSONOptions.isValidOption("samplingRatio"))
     assert(JSONOptions.isValidOption("primitivesAsString"))
@@ -3756,6 +3755,7 @@ abstract class JsonSuite
     assert(JSONOptions.isValidOption("columnNameOfCorruptRecord"))
     assert(JSONOptions.isValidOption("timeZone"))
     assert(JSONOptions.isValidOption("writeNonAsciiCharacterAsCodePoint"))
+    assert(JSONOptions.isValidOption("singleVariantColumn"))
     assert(JSONOptions.isValidOption("encoding"))
     assert(JSONOptions.isValidOption("charset"))
     // Please add validation on any new Json options with alternative here
@@ -3864,6 +3864,64 @@ abstract class JsonSuite
       }
     }
   }
+
+  test("SPARK-48148: values are unchanged when read as string") {
+    withTempPath { path =>
+      def extractData(
+          jsonString: String,
+          expectedInexactData: Seq[String],
+          expectedExactData: Seq[String],
+          multiLine: Boolean = false): Unit = {
+        Seq(jsonString).toDF()
+          .repartition(1)
+          .write
+          .mode("overwrite")
+          .text(path.getAbsolutePath)
+
+        withClue("Exact string parsing") {
+          withSQLConf(SQLConf.JSON_EXACT_STRING_PARSING.key -> "true") {
+            val df = spark.read
+              .schema("data STRING")
+              .option("multiLine", multiLine.toString)
+              .json(path.getAbsolutePath)
+            checkAnswer(df, expectedExactData.map(d => Row(d)))
+          }
+        }
+
+        withClue("Inexact string parsing") {
+          withSQLConf(SQLConf.JSON_EXACT_STRING_PARSING.key -> "false") {
+            val df = spark.read
+              .schema("data STRING")
+              .option("multiLine", multiLine.toString)
+              .json(path.getAbsolutePath)
+            checkAnswer(df, expectedInexactData.map(d => Row(d)))
+          }
+        }
+      }
+      extractData(
+        """{"data": {"white":    "space"}}""",
+        expectedInexactData = Seq("""{"white":"space"}"""),
+        expectedExactData = Seq("""{"white":    "space"}""")
+      )
+      extractData(
+        """{"data": ["white",    "space"]}""",
+        expectedInexactData = Seq("""["white","space"]"""),
+        expectedExactData = Seq("""["white",    "space"]""")
+      )
+      val granularFloat = "-999.99999999999999999999999999999999995"
+      extractData(
+        s"""{"data": {"v": ${granularFloat}}}""",
+        expectedInexactData = Seq("""{"v":-1000.0}"""),
+        expectedExactData = Seq(s"""{"v": ${granularFloat}}""")
+      )
+      extractData(
+        s"""{"data": {"white":\n"space"}}""",
+        expectedInexactData = Seq("""{"white":"space"}"""),
+        expectedExactData = Seq(s"""{"white":\n"space"}"""),
+        multiLine = true
+      )
+    }
+  }
 }
 
 class JsonV1Suite extends JsonSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 9fb490dd823ad..02e1c70cc8cb7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -1208,7 +1208,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
 
   test("SPARK-7837 Do not close output writer twice when commitTask() fails") {
     withSQLConf(SQLConf.FILE_COMMIT_PROTOCOL_CLASS.key ->
-        classOf[SQLHadoopMapReduceCommitProtocol].getCanonicalName) {
+      classOf[SQLHadoopMapReduceCommitProtocol].getCanonicalName) {
       // Using a output committer that always fail when committing a task, so that both
       // `commitTask()` and `abortTask()` are invoked.
       val extraOptions = Map[String, String](
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
index fffc9e2b19246..baa11df302b04 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetInteroperabilitySuite.scala
@@ -213,8 +213,8 @@ class ParquetInteroperabilitySuite extends ParquetCompatibilityTest with SharedS
               // predicates because (a) in ParquetFilters, we ignore TimestampType and (b) parquet
               // does not read statistics from int96 fields, as they are unsigned.  See
               // scalastyle:off line.size.limit
-              // https://github.com/apache/parquet-mr/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L419
-              // https://github.com/apache/parquet-mr/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L348
+              // https://github.com/apache/parquet-java/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L419
+              // https://github.com/apache/parquet-java/blob/2fd62ee4d524c270764e9b91dca72e5cf1a005b7/parquet-hadoop/src/main/java/org/apache/parquet/format/converter/ParquetMetadataConverter.java#L348
               // scalastyle:on line.size.limit
               //
               // Just to be defensive in case anything ever changes in parquet, this test checks
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index 183c4f71df6c6..a6ad147c865d2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -112,7 +112,8 @@ abstract class ParquetPartitionDiscoverySuite
       "hdfs://host:9000/path/a=10.5/b=hello")
 
     var exception = intercept[AssertionError] {
-      parsePartitions(paths.map(new Path(_)), true, Set.empty[Path], None, true, true, timeZoneId)
+      parsePartitions(
+        paths.map(new Path(_)), true, Set.empty[Path], None, true, true, timeZoneId, false)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
 
@@ -129,7 +130,8 @@ abstract class ParquetPartitionDiscoverySuite
       None,
       true,
       true,
-      timeZoneId)
+      timeZoneId,
+      false)
 
     // Valid
     paths = Seq(
@@ -145,7 +147,8 @@ abstract class ParquetPartitionDiscoverySuite
       None,
       true,
       true,
-      timeZoneId)
+      timeZoneId,
+      false)
 
     // Valid
     paths = Seq(
@@ -161,7 +164,8 @@ abstract class ParquetPartitionDiscoverySuite
       None,
       true,
       true,
-      timeZoneId)
+      timeZoneId,
+      false)
 
     // Invalid
     paths = Seq(
@@ -177,7 +181,8 @@ abstract class ParquetPartitionDiscoverySuite
         None,
         true,
         true,
-        timeZoneId)
+        timeZoneId,
+        false)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
 
@@ -200,7 +205,8 @@ abstract class ParquetPartitionDiscoverySuite
         None,
         true,
         true,
-        timeZoneId)
+        timeZoneId,
+        false)
     }
     assert(exception.getMessage().contains("Conflicting directory structures detected"))
   }
@@ -296,7 +302,8 @@ abstract class ParquetPartitionDiscoverySuite
           None,
           true,
           true,
-          timeZoneId)
+          timeZoneId,
+          false)
       assert(actualSpec.partitionColumns === spec.partitionColumns)
       assert(actualSpec.partitions.length === spec.partitions.length)
       actualSpec.partitions.zip(spec.partitions).foreach { case (actual, expected) =>
@@ -427,7 +434,7 @@ abstract class ParquetPartitionDiscoverySuite
     def check(paths: Seq[String], spec: PartitionSpec): Unit = {
       val actualSpec =
         parsePartitions(paths.map(new Path(_)), false, Set.empty[Path], None,
-          true, true, timeZoneId)
+          true, true, timeZoneId, false)
       assert(actualSpec === spec)
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index a329d3fdc3cbc..4d413efe50430 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -369,16 +369,14 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
       }
 
       withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> sqlConf) {
-        withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
-          val exception = intercept[SparkException] {
-            testIgnoreCorruptFiles(options)
-          }.getCause
-          assert(exception.getMessage().contains("is not a Parquet file"))
-          val exception2 = intercept[SparkException] {
-            testIgnoreCorruptFilesWithoutSchemaInfer(options)
-          }.getCause
-          assert(exception2.getMessage().contains("is not a Parquet file"))
-        }
+        val exception = intercept[SparkException] {
+          testIgnoreCorruptFiles(options)
+        }.getCause
+        assert(exception.getMessage().contains("is not a Parquet file"))
+        val exception2 = intercept[SparkException] {
+          testIgnoreCorruptFilesWithoutSchemaInfer(options)
+        }.getCause
+        assert(exception2.getMessage().contains("is not a Parquet file"))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala
index 35e1a38376dd8..f2d04a9c28f2a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala
@@ -502,7 +502,7 @@ class ParquetVectorizedSuite extends QueryTest with ParquetTest with SharedSpark
     val ty = parquetSchema.asGroupType().getType("a").asPrimitiveType()
     val cd = new ColumnDescriptor(Seq("a").toArray, ty, 0, maxDef)
     val repetitionLevels = Array.fill[Int](inputValues.length)(0)
-    val definitionLevels = inputValues.map(v => if (v == null) 0 else 1)
+    val definitionLevels = inputValues.map(v => if (v == null) 0 else maxDef)
 
     val memPageStore = new MemPageStore(expectedValues.length)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactorySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactorySuite.scala
new file mode 100644
index 0000000000000..bd20307974416
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactorySuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.mockito.Mockito._
+import org.scalatest.PrivateMethodTester
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.sql.execution.datasources.WriteJobDescription
+import org.apache.spark.util.SerializableConfiguration
+
+class FileWriterFactorySuite extends SparkFunSuite with PrivateMethodTester {
+
+  test("SPARK-48484: V2Write uses different TaskAttemptIds for different task attempts") {
+    val jobDescription = mock(classOf[WriteJobDescription])
+    when(jobDescription.serializableHadoopConf).thenReturn(
+      new SerializableConfiguration(new Configuration(false)))
+    val committer = mock(classOf[FileCommitProtocol])
+
+    val writerFactory = FileWriterFactory(jobDescription, committer)
+    val createTaskAttemptContext =
+      PrivateMethod[TaskAttemptContextImpl](Symbol("createTaskAttemptContext"))
+
+    val attemptContext =
+      writerFactory.invokePrivate(createTaskAttemptContext(0, 1))
+    val attemptContext1 =
+      writerFactory.invokePrivate(createTaskAttemptContext(0, 2))
+    assert(attemptContext.getTaskAttemptID.getTaskID == attemptContext1.getTaskAttemptID.getTaskID)
+    assert(attemptContext.getTaskAttemptID.getId != attemptContext1.getTaskAttemptID.getId)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala
index e3714e6044955..d793ef526c47b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala
@@ -51,4 +51,12 @@ class DerbyTableCatalogSuite extends QueryTest with SharedSparkSession {
       checkAnswer(sql(s"SHOW TABLES IN derby.test1"), Row("test1", "TABLE2", false))
     }
   }
+
+  test("SPARK-48439: Calculate suitable precision and scale for DECIMAL type") {
+    withTable("derby.test1.table1") {
+      sql("CREATE TABLE derby.test1.table1 (c1 decimal(38, 18))")
+      sql("INSERT INTO derby.test1.table1 VALUES (1.123456789123456789)")
+      checkAnswer(sql("SELECT * FROM derby.test1.table1"), Row(1.12345678912))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index f4e7921e88bc2..d2ff33e104772 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -200,7 +200,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tableName ADD COLUMNS (c3 DOUBLE)")
         },
-        errorClass = "FIELDS_ALREADY_EXISTS",
+        errorClass = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "add",
           "fieldNames" -> "`c3`",
@@ -239,7 +239,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tableName RENAME COLUMN C TO C0")
         },
-        errorClass = "FIELDS_ALREADY_EXISTS",
+        errorClass = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "rename",
           "fieldNames" -> "`C0`",
@@ -619,15 +619,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   test("CREATE TABLE with table property") {
     withTable("h2.test.new_table") {
-      checkError(
+      checkErrorMatchPVals(
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE h2.test.new_table(i INT, j STRING)" +
             " TBLPROPERTIES('ENGINE'='tableEngineName')")
         },
-        errorClass = "FAILED_JDBC.UNCLASSIFIED",
+        errorClass = "FAILED_JDBC.CREATE_TABLE",
         parameters = Map(
-          "url" -> "jdbc:",
-          "message" -> "Failed table creation: test.new_table"))
+          "url" -> "jdbc:.*",
+          "tableName" -> "`test`.`new_table`"))
     }
   }
 
@@ -639,14 +639,14 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-42904: CREATE TABLE with char/varchar with invalid char length") {
-    checkError(
+    checkErrorMatchPVals(
       exception = intercept[AnalysisException]{
         sql("CREATE TABLE h2.test.new_table(c CHAR(1000000001))")
       },
-      errorClass = "FAILED_JDBC.UNCLASSIFIED",
+      errorClass = "FAILED_JDBC.CREATE_TABLE",
       parameters = Map(
-        "url" -> "jdbc:",
-        "message" -> "Failed table creation: test.new_table"))
+        "url" -> "jdbc:.*",
+        "tableName" -> "`test`.`new_table`"))
   }
 
   test("SPARK-42955: Skip classifyException and wrap AnalysisException for SparkThrowable") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala
index c800168b507a8..e6cdd0dce9efa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala
@@ -18,16 +18,18 @@ package org.apache.spark.sql.execution.datasources.v2.state
 
 import java.io.{File, FileWriter}
 
+import org.apache.hadoop.conf.Configuration
 import org.scalatest.Assertions
 
-import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.{AnalysisException, DataFrame, Encoders, Row}
 import org.apache.spark.sql.catalyst.expressions.{BoundReference, GenericInternalRow}
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.datasources.v2.state.utils.SchemaUtil
 import org.apache.spark.sql.execution.streaming.{CommitLog, MemoryStream, OffsetSeqLog}
-import org.apache.spark.sql.execution.streaming.state.{HDFSBackedStateStoreProvider, RocksDBStateStoreProvider, StateStore}
+import org.apache.spark.sql.execution.streaming.state._
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{IntegerType, StructType}
@@ -194,6 +196,78 @@ class StateDataSourceNegativeTestSuite extends StateDataSourceTestBase {
       }
     }
   }
+
+  test("ERROR: snapshotStartBatchId specified as a negative value") {
+    withTempDir { tempDir =>
+      val exc = intercept[StateDataSourceInvalidOptionValueIsNegative] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.BATCH_ID, 0)
+          .option(StateSourceOptions.SNAPSHOT_START_BATCH_ID, -1)
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_INVALID_OPTION_VALUE.IS_NEGATIVE", "42616",
+        Map("optionName" -> StateSourceOptions.SNAPSHOT_START_BATCH_ID))
+    }
+  }
+
+  test("ERROR: snapshotPartitionId specified as a negative value") {
+    withTempDir { tempDir =>
+      val exc = intercept[StateDataSourceInvalidOptionValueIsNegative] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.BATCH_ID, 0)
+          .option(StateSourceOptions.SNAPSHOT_PARTITION_ID, -1)
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_INVALID_OPTION_VALUE.IS_NEGATIVE", "42616",
+        Map("optionName" -> StateSourceOptions.SNAPSHOT_PARTITION_ID))
+    }
+  }
+
+  test("ERROR: snapshotStartBatchId specified without snapshotPartitionId or vice versa") {
+    withTempDir { tempDir =>
+      val exc = intercept[StateDataSourceUnspecifiedRequiredOption] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.BATCH_ID, 0)
+          .option(StateSourceOptions.SNAPSHOT_START_BATCH_ID, 0)
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_REQUIRED_OPTION_UNSPECIFIED", "42601",
+        Map("optionName" -> StateSourceOptions.SNAPSHOT_PARTITION_ID))
+    }
+
+    withTempDir { tempDir =>
+      val exc = intercept[StateDataSourceUnspecifiedRequiredOption] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.BATCH_ID, 0)
+          .option(StateSourceOptions.SNAPSHOT_PARTITION_ID, 0)
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_REQUIRED_OPTION_UNSPECIFIED", "42601",
+        Map("optionName" -> StateSourceOptions.SNAPSHOT_START_BATCH_ID))
+    }
+  }
+
+  test("ERROR: snapshotStartBatchId is greater than snapshotEndBatchId") {
+    withTempDir { tempDir =>
+      val startBatchId = 1
+      val endBatchId = 0
+      val exc = intercept[StateDataSourceInvalidOptionValue] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.SNAPSHOT_START_BATCH_ID, startBatchId)
+          .option(StateSourceOptions.BATCH_ID, endBatchId)
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE", "42616",
+        Map(
+          "optionName" -> StateSourceOptions.SNAPSHOT_START_BATCH_ID,
+          "message" -> s"value should be less than or equal to $endBatchId"))
+    }
+  }
 }
 
 /**
@@ -301,35 +375,138 @@ class StateDataSourceSQLConfigSuite extends StateDataSourceTestBase {
 }
 
 class HDFSBackedStateDataSourceReadSuite extends StateDataSourceReadSuite {
+  override protected def newStateStoreProvider(): HDFSBackedStateStoreProvider =
+    new HDFSBackedStateStoreProvider
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     spark.conf.set(SQLConf.STATE_STORE_PROVIDER_CLASS.key,
-      classOf[HDFSBackedStateStoreProvider].getName)
+      newStateStoreProvider().getClass.getName)
+    // make sure we have a snapshot for every two delta files
+    // HDFS maintenance task will not count the latest delta file, which has the same version
+    // as the snapshot version
+    spark.conf.set(SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT.key, 1)
+  }
+
+  test("ERROR: snapshot of version not found") {
+    testSnapshotNotFound()
+  }
+
+  test("provider.replayReadStoreFromSnapshot(snapshotVersion, endVersion)") {
+    testGetReadStoreWithStartVersion()
+  }
+
+  test("option snapshotPartitionId") {
+    testSnapshotPartitionId()
+  }
+
+  test("snapshotStatBatchId on limit state") {
+    testSnapshotOnLimitState("hdfs")
+  }
+
+  test("snapshotStatBatchId on aggregation state") {
+    testSnapshotOnAggregateState("hdfs")
+  }
+
+  test("snapshotStatBatchId on deduplication state") {
+    testSnapshotOnDeduplicateState("hdfs")
+  }
+
+  test("snapshotStatBatchId on join state") {
+    testSnapshotOnJoinState("hdfs", 1)
+    testSnapshotOnJoinState("hdfs", 2)
   }
 }
 
 class RocksDBStateDataSourceReadSuite extends StateDataSourceReadSuite {
+  override protected def newStateStoreProvider(): RocksDBStateStoreProvider =
+    new RocksDBStateStoreProvider
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     spark.conf.set(SQLConf.STATE_STORE_PROVIDER_CLASS.key,
-      classOf[RocksDBStateStoreProvider].getName)
+      newStateStoreProvider().getClass.getName)
     spark.conf.set("spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled",
       "false")
   }
 }
 
-class RocksDBWithChangelogCheckpointStateDataSourceReaderSuite extends StateDataSourceReadSuite {
+class RocksDBWithChangelogCheckpointStateDataSourceReaderSuite extends
+StateDataSourceReadSuite {
+  override protected def newStateStoreProvider(): RocksDBStateStoreProvider =
+    new RocksDBStateStoreProvider
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     spark.conf.set(SQLConf.STATE_STORE_PROVIDER_CLASS.key,
-      classOf[RocksDBStateStoreProvider].getName)
+      newStateStoreProvider().getClass.getName)
     spark.conf.set("spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled",
       "true")
+    // make sure we have a snapshot for every other checkpoint
+    // RocksDB maintenance task will count the latest checkpoint, so we need to set it to 2
+    spark.conf.set(SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT.key, 2)
+  }
+
+  test("ERROR: snapshot of version not found") {
+    testSnapshotNotFound()
+  }
+
+  test("provider.getReadStore(snapshotVersion, endVersion)") {
+    testGetReadStoreWithStartVersion()
+  }
+
+  test("option snapshotPartitionId") {
+    testSnapshotPartitionId()
+  }
+
+  test("snapshotStatBatchId on limit state") {
+    testSnapshotOnLimitState("rocksdb")
+  }
+
+  test("snapshotStatBatchId on aggregation state") {
+    testSnapshotOnAggregateState("rocksdb")
+  }
+
+  test("snapshotStatBatchId on deduplication state") {
+    testSnapshotOnDeduplicateState("rocksdb")
+  }
+
+  test("snapshotStatBatchId on join state") {
+    testSnapshotOnJoinState("rocksdb", 1)
+    testSnapshotOnJoinState("rocksdb", 2)
   }
 }
 
 abstract class StateDataSourceReadSuite extends StateDataSourceTestBase with Assertions {
 
+  import testImplicits._
+  import StateStoreTestsHelper._
+
+  protected val keySchema: StructType = StateStoreTestsHelper.keySchema
+  protected val valueSchema: StructType = StateStoreTestsHelper.valueSchema
+
+  protected def newStateStoreProvider(): StateStoreProvider
+
+  /**
+   * Calls the overridable [[newStateStoreProvider]] to create the state store provider instance.
+   * Initialize it with the configuration set by child classes.
+   *
+   * @param checkpointDir        path to store state information
+   * @return instance of class extending [[StateStoreProvider]]
+   */
+  private def getNewStateStoreProvider(checkpointDir: String): StateStoreProvider = {
+    val provider = newStateStoreProvider()
+    provider.init(
+      StateStoreId(checkpointDir, 0, 0),
+      keySchema,
+      valueSchema,
+      NoPrefixKeyStateEncoderSpec(keySchema),
+      useColumnFamilies = false,
+      StateStoreConf(spark.sessionState.conf),
+      new Configuration)
+    provider
+  }
+
   test("simple aggregation, state ver 1") {
     testStreamingAggregation(1)
   }
@@ -796,4 +973,228 @@ abstract class StateDataSourceReadSuite extends StateDataSourceTestBase with Ass
       testForSide("right")
     }
   }
+
+  protected def testSnapshotNotFound(): Unit = {
+    withTempDir { tempDir =>
+      val provider = getNewStateStoreProvider(tempDir.getAbsolutePath)
+      for (i <- 1 to 4) {
+        val store = provider.getStore(i - 1)
+        put(store, "a", i, i)
+        store.commit()
+        provider.doMaintenance() // create a snapshot every other delta file
+      }
+
+      val exc = intercept[SparkException] {
+        provider.asInstanceOf[SupportsFineGrainedReplay]
+          .replayReadStateFromSnapshot(1, 2)
+      }
+      checkError(exc, "CANNOT_LOAD_STATE_STORE.UNCATEGORIZED")
+    }
+  }
+
+  protected def testGetReadStoreWithStartVersion(): Unit = {
+    withTempDir { tempDir =>
+      val provider = getNewStateStoreProvider(tempDir.getAbsolutePath)
+      for (i <- 1 to 4) {
+        val store = provider.getStore(i - 1)
+        put(store, "a", i, i)
+        store.commit()
+        provider.doMaintenance()
+      }
+
+      val result =
+        provider.asInstanceOf[SupportsFineGrainedReplay]
+          .replayReadStateFromSnapshot(2, 3)
+
+      assert(get(result, "a", 1).get == 1)
+      assert(get(result, "a", 2).get == 2)
+      assert(get(result, "a", 3).get == 3)
+      assert(get(result, "a", 4).isEmpty)
+
+      provider.close()
+    }
+  }
+
+  protected def testSnapshotPartitionId(): Unit = {
+    withTempDir { tempDir =>
+      val inputData = MemoryStream[Int]
+      val df = inputData.toDF().limit(10)
+
+      testStream(df)(
+        StartStream(checkpointLocation = tempDir.getAbsolutePath),
+        AddData(inputData, 1, 2, 3, 4),
+        CheckLastBatch(1, 2, 3, 4)
+      )
+
+      val stateDf = spark.read.format("statestore")
+        .option(StateSourceOptions.SNAPSHOT_START_BATCH_ID, 0)
+        .option(StateSourceOptions.SNAPSHOT_PARTITION_ID, 0)
+        .option(StateSourceOptions.BATCH_ID, 0)
+        .load(tempDir.getAbsolutePath)
+
+      // should result in only one partition && should not throw error in planning stage
+      assert(stateDf.rdd.getNumPartitions == 1)
+
+      // should throw error when partition id is out of range
+      val stateDfError = spark.read.format("statestore")
+        .option(StateSourceOptions.SNAPSHOT_START_BATCH_ID, 0)
+        .option(
+          StateSourceOptions.SNAPSHOT_PARTITION_ID, 1)
+        .option(StateSourceOptions.BATCH_ID, 0)
+        .load(tempDir.getAbsolutePath)
+
+      val exc = intercept[StateStoreSnapshotPartitionNotFound] {
+        stateDfError.show()
+      }
+      assert(exc.getErrorClass === "CANNOT_LOAD_STATE_STORE.SNAPSHOT_PARTITION_ID_NOT_FOUND")
+    }
+  }
+
+  private def testSnapshotStateDfAgainstStateDf(resourceDir: File): Unit = {
+    val stateSnapshotDf = spark.read.format("statestore")
+      .option("snapshotPartitionId", 0)
+      .option("snapshotStartBatchId", 1)
+      .load(resourceDir.getAbsolutePath)
+
+    val stateDf = spark.read.format("statestore")
+      .load(resourceDir.getAbsolutePath)
+      .filter(col("partition_id") === 0)
+
+    checkAnswer(stateSnapshotDf, stateDf)
+  }
+
+  protected def testSnapshotOnLimitState(providerName: String): Unit = {
+    /** The golden files are generated by:
+      withSQLConf({
+        SQLConf.STREAMING_MAINTENANCE_INTERVAL.key -> "100"
+      }) {
+        val inputData = MemoryStream[(Int, Long)]
+        val query = inputData.toDF().limit(10)
+        testStream(query)(
+          StartStream(checkpointLocation = <...>),
+          AddData(inputData, (1, 1L), (2, 2L), (3, 3L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (4, 4L), (5, 5L), (6, 6L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (7, 7L), (8, 8L), (9, 9L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (10, 10L), (11, 11L), (12, 12L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) }
+        )
+      }
+     */
+    val resourceUri = this.getClass.getResource(
+      s"/structured-streaming/checkpoint-version-4.0.0/$providerName/limit/"
+    ).toURI
+
+    testSnapshotStateDfAgainstStateDf(new File(resourceUri))
+  }
+
+  protected def testSnapshotOnAggregateState(providerName: String): Unit = {
+    /** The golden files are generated by:
+      withSQLConf({
+        SQLConf.STREAMING_MAINTENANCE_INTERVAL.key -> "100"
+      }) {
+        val inputData = MemoryStream[(Int, Long)]
+        val query = inputData.toDF().groupBy("_1").count()
+        testStream(query, OutputMode.Update)(
+          StartStream(checkpointLocation = <...>),
+          AddData(inputData, (1, 1L), (2, 2L), (3, 3L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (2, 2L), (3, 3L), (4, 4L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (3, 3L), (4, 4L), (5, 5L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (4, 4L), (5, 5L), (6, 6L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) }
+        )
+      }
+     */
+    val resourceUri = this.getClass.getResource(
+      s"/structured-streaming/checkpoint-version-4.0.0/$providerName/dedup/"
+    ).toURI
+
+    testSnapshotStateDfAgainstStateDf(new File(resourceUri))
+  }
+
+  protected def testSnapshotOnDeduplicateState(providerName: String): Unit = {
+    /** The golden files are generated by:
+      withSQLConf({
+        SQLConf.STREAMING_MAINTENANCE_INTERVAL.key -> "100"
+      }) {
+        val inputData = MemoryStream[(Int, Long)]
+        val query = inputData.toDF().dropDuplicates("_1")
+        testStream(query)(
+          StartStream(checkpointLocation = <...>),
+          AddData(inputData, (1, 1L), (2, 2L), (3, 3L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (2, 2L), (3, 3L), (4, 4L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (3, 3L), (4, 4L), (5, 5L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (4, 4L), (5, 5L), (6, 6L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) }
+        )
+      }
+     */
+    val resourceUri = this.getClass.getResource(
+      s"/structured-streaming/checkpoint-version-4.0.0/$providerName/dedup/"
+    ).toURI
+
+    testSnapshotStateDfAgainstStateDf(new File(resourceUri))
+  }
+
+  protected def testSnapshotOnJoinState(providerName: String, stateVersion: Int): Unit = {
+    /** The golden files are generated by:
+      withSQLConf({
+        SQLConf.STREAMING_JOIN_STATE_FORMAT_VERSION.key -> stateVersion.toString
+        SQLConf.STREAMING_MAINTENANCE_INTERVAL.key -> "100"
+      }) {
+        val inputData = MemoryStream[(Int, Long)]
+        val query = getStreamStreamJoinQuery(inputData)
+        testStream(query)(
+          StartStream(checkpointLocation = <...>),
+          AddData(inputData, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (6, 6L), (7, 7L), (8, 8L), (9, 9L), (10, 10L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) },
+          AddData(inputData, (11, 11L), (12, 12L), (13, 13L), (14, 14L), (15, 15L)),
+          ProcessAllAvailable(),
+          Execute { _ => Thread.sleep(2000) }
+        )
+      }
+     */
+    val resourceUri = this.getClass.getResource(
+      s"/structured-streaming/checkpoint-version-4.0.0/$providerName/join$stateVersion/"
+    ).toURI
+
+    val resourceDir = new File(resourceUri)
+
+    val stateSnapshotDf = spark.read.format("statestore")
+      .option("snapshotPartitionId", 2)
+      .option("snapshotStartBatchId", 1)
+      .option("joinSide", "left")
+      .load(resourceDir.getAbsolutePath)
+
+    val stateDf = spark.read.format("statestore")
+      .option("joinSide", "left")
+      .load(resourceDir.getAbsolutePath)
+      .filter(col("partition_id") === 2)
+
+    checkAnswer(stateSnapshotDf, stateDf)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
index 3c408ab8ee90e..930cc29878108 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
@@ -77,6 +77,20 @@ class XmlSuite
   override def excluded: Seq[String] = Seq(
     s"Propagate Hadoop configs from $dataSourceFormat options to underlying file system")
 
+  private val baseOptions = Map("rowTag" -> "ROW")
+
+  private def readData(
+      xmlString: String,
+      schemaOpt: Option[StructType],
+      options: Map[String, String] = Map.empty): DataFrame = {
+    val ds = spark.createDataset(spark.sparkContext.parallelize(Seq(xmlString)))(Encoders.STRING)
+    if (schemaOpt.isDefined) {
+      spark.read.schema(schemaOpt.get).options(options).xml(ds)
+    } else {
+      spark.read.options(options).xml(ds)
+    }
+  }
+
   // Tests
 
   test("DSL test") {
@@ -252,10 +266,10 @@ class XmlSuite
         .xml(inputFile)
         .collect()
     }
-    checkError(
+    checkErrorMatchPVals(
       exception = exceptionInParsing,
       errorClass = "FAILED_READ_FILE.NO_HINT",
-      parameters = Map("path" -> Path.of(inputFile).toUri.toString))
+      parameters = Map("path" -> s".*$inputFile.*"))
     checkError(
       exception = exceptionInParsing.getCause.asInstanceOf[SparkException],
       errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
@@ -284,10 +298,10 @@ class XmlSuite
         .xml(inputFile)
         .show()
     }
-    checkError(
+    checkErrorMatchPVals(
       exception = exceptionInParsing,
       errorClass = "FAILED_READ_FILE.NO_HINT",
-      parameters = Map("path" -> Path.of(inputFile).toUri.toString))
+      parameters = Map("path" -> s".*$inputFile.*"))
     checkError(
       exception = exceptionInParsing.getCause.asInstanceOf[SparkException],
       errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
@@ -1206,14 +1220,16 @@ class XmlSuite
   }
 
   test("test XSD validation") {
-    val basketDF = spark.read
-      .option("rowTag", "basket")
-      .option("inferSchema", true)
-      .option("rowValidationXSDPath", getTestResourcePath(resDir + "basket.xsd")
-        .replace("file:/", "/"))
-      .xml(getTestResourcePath(resDir + "basket.xml"))
-    // Mostly checking it doesn't fail
-    assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027)
+    Seq("basket.xsd", "include-example/first.xsd").foreach { xsdFile =>
+      val basketDF = spark.read
+        .option("rowTag", "basket")
+        .option("inferSchema", true)
+        .option("rowValidationXSDPath", getTestResourcePath(resDir + xsdFile)
+          .replace("file:/", "/"))
+        .xml(getTestResourcePath(resDir + "basket.xml"))
+      // Mostly checking it doesn't fail
+      assert(basketDF.selectExpr("entry[0].key").head().getLong(0) === 9027)
+    }
   }
 
   test("test XSD validation with validation error") {
@@ -1279,26 +1295,6 @@ class XmlSuite
     assert(result.select("decoded._foo").head().getString(0) === "bar")
   }
 
-  /*
-  test("from_xml array basic test") {
-    val xmlData =
-      """<parent><pid>12345</pid><name>dave guy</name></parent>
-        |<parent><pid>67890</pid><name>other guy</name></parent>""".stripMargin
-    val df = Seq((8, xmlData)).toDF("number", "payload")
-    val xmlSchema = ArrayType(
-      StructType(
-        StructField("pid", IntegerType) ::
-          StructField("name", StringType) :: Nil))
-    val expectedSchema = df.schema.add("decoded", xmlSchema)
-    val result = df.withColumn("decoded",
-      from_xml(df.col("payload"), xmlSchema))
-    assert(expectedSchema === result.schema)
-    // TODO: ArrayType and MapType support in from_xml
-    // assert(result.selectExpr("decoded[0].pid").head().getInt(0) === 12345)
-    // assert(result.selectExpr("decoded[1].pid").head().getInt(1) === 67890)
-  }
-  */
-
   test("from_xml error test") {
     // XML contains error
     val xmlData =
@@ -2445,7 +2441,6 @@ class XmlSuite
     val exp = spark.sql("select timestamp_ntz'2020-12-12 12:12:12' as col0")
     for (pattern <- patterns) {
       withTempPath { path =>
-        val actualPath = path.toPath.toUri.toURL.toString
         val err = intercept[SparkException] {
           exp.write.option("timestampNTZFormat", pattern)
             .option("rowTag", "ROW").xml(path.getAbsolutePath)
@@ -2453,7 +2448,7 @@ class XmlSuite
         checkErrorMatchPVals(
           exception = err,
           errorClass = "TASK_WRITE_FAILED",
-          parameters = Map("path" -> s"$actualPath.*"))
+          parameters = Map("path" -> s".*${path.getName}.*"))
         val msg = err.getCause.getMessage
         assert(
           msg.contains("Unsupported field: OffsetSeconds") ||
@@ -2625,6 +2620,18 @@ class XmlSuite
 
     val expectedResults3 = Seq.range(1, 18).map(Row(_))
     checkAnswer(results3, expectedResults3)
+
+    val results4 = spark.read.format("xml")
+      .option("rowTag", "ROW")
+      .load(getTestResourcePath(resDir + "cdata-no-ignore.xml"))
+
+    val expectedResults4 = Seq(
+      Row("<a>1</a>"),
+      Row("2"),
+      Row("<ROW>3</ROW>"),
+      Row("4"),
+      Row("<ROW>5</ROW>"))
+    checkAnswer(results4, expectedResults4)
   }
 
   test("capture values interspersed between elements - nested struct") {
@@ -2960,11 +2967,10 @@ class XmlSuite
                 .mode(SaveMode.Overwrite)
                 .xml(path)
             }
-            val actualPath = Path.of(dir.getAbsolutePath).toUri.toURL.toString.stripSuffix("/")
             checkErrorMatchPVals(
               exception = e,
               errorClass = "TASK_WRITE_FAILED",
-              parameters = Map("path" -> s"$actualPath.*"))
+              parameters = Map("path" -> s".*${dir.getName}.*"))
             assert(e.getCause.isInstanceOf[XMLStreamException])
             assert(e.getCause.getMessage.contains(errorMsg))
         }
@@ -3013,6 +3019,149 @@ class XmlSuite
       }
     }
   }
+
+  /////////////////////////////////////
+  // Projection, sorting, filtering  //
+  /////////////////////////////////////
+  test("select with string xml object") {
+    val xmlString =
+      s"""
+         |<ROW>
+         |    <name>John</name>
+         |    <metadata><id>3</id></metadata>
+         |</ROW>
+         |""".stripMargin
+    val schema = new StructType()
+      .add("name", StringType)
+      .add("metadata", StringType)
+    val df = readData(xmlString, Some(schema), baseOptions)
+    checkAnswer(df.select("name"), Seq(Row("John")))
+  }
+
+  test("select with duplicate field name in string xml object") {
+    val xmlString =
+      s"""
+         |<ROW>
+         |    <a><b>c</b></a>
+         |    <b>d</b>
+         |</ROW>
+         |""".stripMargin
+    val schema = new StructType()
+      .add("a", StringType)
+      .add("b", StringType)
+    val df = readData(xmlString, Some(schema), baseOptions)
+    val dfWithSchemaInference = readData(xmlString, None, baseOptions)
+    Seq(df, dfWithSchemaInference).foreach { df =>
+      checkAnswer(df.select("b"), Seq(Row("d")))
+    }
+  }
+
+  test("select nested struct objects") {
+    val xmlString =
+      s"""
+         |<ROW>
+         |    <struct>
+         |        <innerStruct>
+         |            <field1>1</field1>
+         |            <field2>2</field2>
+         |        </innerStruct>
+         |    </struct>
+         |</ROW>
+         |""".stripMargin
+    val schema = new StructType()
+      .add(
+        "struct",
+        new StructType()
+          .add("innerStruct", new StructType().add("field1", LongType).add("field2", LongType))
+      )
+    val df = readData(xmlString, Some(schema), baseOptions)
+    val dfWithSchemaInference = readData(xmlString, None, baseOptions)
+    Seq(df, dfWithSchemaInference).foreach { df =>
+      checkAnswer(df.select("struct"), Seq(Row(Row(Row(1, 2)))))
+      checkAnswer(df.select("struct.innerStruct"), Seq(Row(Row(1, 2))))
+    }
+  }
+
+  test("select a struct of lists") {
+    val xmlString =
+      s"""
+         |<ROW>
+         |    <struct>
+         |        <array><field>1</field></array>
+         |        <array><field>2</field></array>
+         |        <array><field>3</field></array>
+         |    </struct>
+         |</ROW>
+         |""".stripMargin
+    val schema = new StructType()
+      .add(
+        "struct",
+        new StructType()
+          .add("array", ArrayType(StructType(StructField("field", LongType) :: Nil))))
+
+    val df = readData(xmlString, Some(schema), baseOptions)
+    val dfWithSchemaInference = readData(xmlString, None, baseOptions)
+    Seq(df, dfWithSchemaInference).foreach { df =>
+      checkAnswer(df.select("struct"), Seq(Row(Row(Array(Row(1), Row(2), Row(3))))))
+      checkAnswer(df.select("struct.array"), Seq(Row(Array(Row(1), Row(2), Row(3)))))
+    }
+  }
+
+  test("select complex objects") {
+    val xmlString =
+      s"""
+         |<ROW>
+         |    1
+         |    <struct1>
+         |        value2
+         |        <struct2>
+         |            3
+         |            <array1>
+         |                value4
+         |                <struct3>
+         |                    5
+         |                    <array2>1<!--First comment--> <!--Second comment--></array2>
+         |                    value6
+         |                    <array2>2</array2>
+         |                    7
+         |                </struct3>
+         |                value8
+         |                <string>string</string>
+         |                9
+         |            </array1>
+         |            value10
+         |            <array1>
+         |                <struct3><!--First comment--> <!--Second comment-->
+         |                    <array2>3</array2>
+         |                    11
+         |                    <array2>4</array2><!--First comment--> <!--Second comment-->
+         |                </struct3>
+         |                <string>string</string>
+         |                value12
+         |            </array1>
+         |            13
+         |            <int>3</int>
+         |            value14
+         |        </struct2>
+         |        15
+         |    </struct1>
+         |     <!--First comment-->
+         |    value16
+         |     <!--Second comment-->
+         |</ROW>
+         |""".stripMargin
+    val df = readData(xmlString, None, baseOptions ++ Map("valueTag" -> "VALUE"))
+    checkAnswer(df.select("struct1.VALUE"), Seq(Row(Seq("value2", "15"))))
+    checkAnswer(df.select("struct1.struct2.array1"), Seq(Row(Seq(
+      Row(Seq("value4", "value8", "9"), "string", Row(Seq("5", "value6", "7"), Seq(1, 2))),
+      Row(Seq("value12"), "string", Row(Seq("11"), Seq(3, 4)))
+    ))))
+    checkAnswer(df.select("struct1.struct2.array1.struct3"), Seq(Row(Seq(
+      Row(Seq("5", "value6", "7"), Seq(1, 2)),
+      Row(Seq("11"), Seq(3, 4))
+    ))))
+    checkAnswer(df.select("struct1.struct2.array1.string"), Seq(Row(Seq("string", "string"))))
+  }
 }
 
 // Mock file system that checks the number of open files
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/parsers/StaxXmlParserUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/parsers/StaxXmlParserUtilsSuite.scala
index a4ac25b036c41..ad5b176f71f7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/parsers/StaxXmlParserUtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/parsers/StaxXmlParserUtilsSuite.scala
@@ -73,17 +73,34 @@ final class StaxXmlParserUtilsSuite extends SparkFunSuite with BeforeAndAfterAll
     val input = <ROW><info>
       <name>Sam Mad Dog Smith</name><amount><small>1</small>
         <large>9</large></amount></info><abc>2</abc><test>2</test></ROW>
-    val parser = factory.createXMLEventReader(new StringReader(input.toString))
-    // We assume here it's reading the value within `id` field.
-    StaxXmlParserUtils.skipUntil(parser, XMLStreamConstants.CHARACTERS)
-    StaxXmlParserUtils.skipChildren(parser)
-    assert(parser.nextEvent().asEndElement().getName.getLocalPart === "info")
-    parser.next()
-    StaxXmlParserUtils.skipChildren(parser)
-    assert(parser.nextEvent().asEndElement().getName.getLocalPart === "abc")
-    parser.next()
-    StaxXmlParserUtils.skipChildren(parser)
-    assert(parser.nextEvent().asEndElement().getName.getLocalPart === "test")
+    val xmlOptions = new XmlOptions()
+    // skip the entire row
+    val parser1 = factory.createXMLEventReader(new StringReader(input.toString))
+    StaxXmlParserUtils.skipUntil(parser1, XMLStreamConstants.START_ELEMENT)
+    StaxXmlParserUtils.skipChildren(parser1, "ROW", xmlOptions)
+    assert(parser1.peek().getEventType === XMLStreamConstants.END_DOCUMENT)
+
+    // skip <name> and <amount> respectively
+    val parser2 = factory.createXMLEventReader(new StringReader(input.toString))
+    StaxXmlParserUtils.skipUntil(parser2, XMLStreamConstants.CHARACTERS)
+    // skip <name>
+    val elementName1 =
+      StaxXmlParserUtils.getName(parser2.nextEvent().asStartElement().getName, xmlOptions)
+    StaxXmlParserUtils.skipChildren(parser2, elementName1, xmlOptions)
+    assert(parser2.peek().getEventType === XMLStreamConstants.START_ELEMENT)
+    val elementName2 =
+      StaxXmlParserUtils.getName(parser2.peek().asStartElement().getName, xmlOptions)
+    assert(
+      StaxXmlParserUtils
+        .getName(parser2.peek().asStartElement().getName, xmlOptions) == elementName2
+    )
+    // skip <amount>
+    parser2.nextEvent()
+    StaxXmlParserUtils.skipChildren(parser2, elementName2, xmlOptions)
+    assert(parser2.peek().getEventType === XMLStreamConstants.END_ELEMENT)
+    assert(
+      StaxXmlParserUtils.getName(parser2.peek().asEndElement().getName, xmlOptions) == "info"
+    )
   }
 
   test("XML Input Factory disables DTD parsing") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala
index 6f4bd1888fbb4..2d1449bd96cb5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
 import org.apache.spark.sql.IntegratedUDFTestUtils.{createUserDefinedPythonDataSource, shouldTestPandasUDFs}
 import org.apache.spark.sql.execution.datasources.v2.python.{PythonDataSourceV2, PythonMicroBatchStream, PythonStreamingSourceOffset}
-import org.apache.spark.sql.execution.streaming.{MemoryStream, ProcessingTimeTrigger}
+import org.apache.spark.sql.execution.streaming.{CommitLog, MemoryStream, OffsetSeqLog, ProcessingTimeTrigger}
 import org.apache.spark.sql.streaming.StreamingQueryException
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -36,11 +36,11 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
 
   val waitTimeout = 15.seconds
 
-  protected def simpleDataStreamReaderScript: String =
+  protected def testDataStreamReaderScript: String =
     """
       |from pyspark.sql.datasource import DataSourceStreamReader, InputPartition
       |
-      |class SimpleDataStreamReader(DataSourceStreamReader):
+      |class TestDataStreamReader(DataSourceStreamReader):
       |    current = 0
       |    def initialOffset(self):
       |        return {"offset": {"partition-1": 0}}
@@ -57,6 +57,43 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
       |        yield (partition.value,)
       |""".stripMargin
 
+  protected def simpleDataStreamReaderScript: String =
+    """
+      |from pyspark.sql.datasource import SimpleDataSourceStreamReader
+      |
+      |class SimpleDataStreamReader(SimpleDataSourceStreamReader):
+      |    def initialOffset(self):
+      |        return {"partition-1": 0}
+      |    def read(self, start: dict):
+      |        start_idx = start["partition-1"]
+      |        it = iter([(i, ) for i in range(start_idx, start_idx + 2)])
+      |        return (it, {"partition-1": start_idx + 2})
+      |    def readBetweenOffsets(self, start: dict, end: dict):
+      |        start_idx = start["partition-1"]
+      |        end_idx = end["partition-1"]
+      |        return iter([(i, ) for i in range(start_idx, end_idx)])
+      |""".stripMargin
+
+  protected def simpleDataStreamReaderWithEmptyBatchScript: String =
+    """
+      |from pyspark.sql.datasource import SimpleDataSourceStreamReader
+      |
+      |class SimpleDataStreamReader(SimpleDataSourceStreamReader):
+      |    def initialOffset(self):
+      |        return {"partition-1": 0}
+      |    def read(self, start: dict):
+      |        start_idx = start["partition-1"]
+      |        if start_idx % 4 == 0:
+      |            it = iter([(i, ) for i in range(start_idx, start_idx + 2)])
+      |        else:
+      |            it = iter([])
+      |        return (it, {"partition-1": start_idx + 2})
+      |    def readBetweenOffsets(self, start: dict, end: dict):
+      |        start_idx = start["partition-1"]
+      |        end_idx = end["partition-1"]
+      |        return iter([(i, ) for i in range(start_idx, end_idx)])
+      |""".stripMargin
+
   protected def errorDataStreamReaderScript: String =
     """
       |from pyspark.sql.datasource import DataSourceStreamReader, InputPartition
@@ -117,11 +154,11 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     val dataSourceScript =
       s"""
          |from pyspark.sql.datasource import DataSource
-         |$simpleDataStreamReaderScript
+         |$testDataStreamReaderScript
          |
          |class $dataSourceName(DataSource):
          |    def streamReader(self, schema):
-         |        return SimpleDataStreamReader()
+         |        return TestDataStreamReader()
          |""".stripMargin
     val inputSchema = StructType.fromDDL("input BINARY")
 
@@ -144,7 +181,7 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     stream.stop()
   }
 
-  test("Read from simple data stream source") {
+  test("SimpleDataSourceStreamReader run query and restart") {
     assume(shouldTestPandasUDFs)
     val dataSourceScript =
       s"""
@@ -154,9 +191,264 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
          |class $dataSourceName(DataSource):
          |    def schema(self) -> str:
          |        return "id INT"
-         |    def streamReader(self, schema):
+         |    def simpleStreamReader(self, schema):
+         |        return SimpleDataStreamReader()
+         |""".stripMargin
+    val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
+    spark.dataSource.registerPython(dataSourceName, dataSource)
+    assert(spark.sessionState.dataSourceManager.dataSourceExists(dataSourceName))
+    withTempDir { dir =>
+      val path = dir.getAbsolutePath
+      val checkpointDir = new File(path, "checkpoint")
+      val df = spark.readStream.format(dataSourceName).load()
+
+      val stopSignal1 = new CountDownLatch(1)
+
+      val q1 = df
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .foreachBatch((df: DataFrame, batchId: Long) => {
+          df.cache()
+          checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
+          if (batchId == 10) stopSignal1.countDown()
+        })
+        .start()
+      stopSignal1.await()
+      assert(q1.recentProgress.forall(_.numInputRows == 2))
+      q1.stop()
+      q1.awaitTermination()
+
+      val stopSignal2 = new CountDownLatch(1)
+      val q2 = df
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .foreachBatch((df: DataFrame, batchId: Long) => {
+          df.cache()
+          checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
+          if (batchId == 20) stopSignal2.countDown()
+        }).start()
+      stopSignal2.await()
+      assert(q2.recentProgress.forall(_.numInputRows == 2))
+      q2.stop()
+      q2.awaitTermination()
+    }
+  }
+
+  // Verify prefetch and cache pattern of SimpleDataSourceStreamReader handle empty
+  // data batch correctly.
+  test("SimpleDataSourceStreamReader read empty batch") {
+    assume(shouldTestPandasUDFs)
+    val dataSourceScript =
+      s"""
+         |from pyspark.sql.datasource import DataSource
+         |$simpleDataStreamReaderWithEmptyBatchScript
+         |
+         |class $dataSourceName(DataSource):
+         |    def schema(self) -> str:
+         |        return "id INT"
+         |    def simpleStreamReader(self, schema):
          |        return SimpleDataStreamReader()
          |""".stripMargin
+    val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
+    spark.dataSource.registerPython(dataSourceName, dataSource)
+    assert(spark.sessionState.dataSourceManager.dataSourceExists(dataSourceName))
+    withTempDir { dir =>
+      val path = dir.getAbsolutePath
+      val checkpointDir = new File(path, "checkpoint")
+      val df = spark.readStream.format(dataSourceName).load()
+
+      val stopSignal = new CountDownLatch(1)
+
+      val q = df
+        .writeStream
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .foreachBatch((df: DataFrame, batchId: Long) => {
+          df.cache()
+          if (batchId % 2 == 0) {
+            checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
+          } else {
+            assert(df.isEmpty)
+          }
+          if (batchId == 10) stopSignal.countDown()
+        })
+        .start()
+      stopSignal.await()
+      q.stop()
+      q.awaitTermination()
+    }
+  }
+
+  test("SimpleDataSourceStreamReader read exactly once") {
+    assume(shouldTestPandasUDFs)
+    val dataSourceScript =
+      s"""
+         |from pyspark.sql.datasource import DataSource
+         |$simpleDataStreamReaderScript
+         |
+         |class $dataSourceName(DataSource):
+         |    def schema(self) -> str:
+         |        return "id INT"
+         |    def simpleStreamReader(self, schema):
+         |        return SimpleDataStreamReader()
+         |""".stripMargin
+    val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
+    spark.dataSource.registerPython(dataSourceName, dataSource)
+    assert(spark.sessionState.dataSourceManager.dataSourceExists(dataSourceName))
+    withTempDir { dir =>
+      val path = dir.getAbsolutePath
+      val checkpointDir = new File(path, "checkpoint")
+      val outputDir = new File(path, "output")
+      val df = spark.readStream.format(dataSourceName).load()
+      var lastBatchId = 0
+      // Restart streaming query multiple times to verify exactly once guarantee.
+      for (i <- 1 to 5) {
+
+        if (i % 2 == 0) {
+          // Remove the last entry of commit log to test replaying microbatch during restart.
+          val offsetLog = new OffsetSeqLog(
+            spark, new File(checkpointDir, "offsets").getCanonicalPath)
+          val commitLog = new CommitLog(
+            spark, new File(checkpointDir, "commits").getCanonicalPath)
+          commitLog.purgeAfter(offsetLog.getLatest().get._1 - 1)
+        }
+
+        val q = df
+          .writeStream
+          .option("checkpointLocation", checkpointDir.getAbsolutePath)
+          .format("json")
+          .start(outputDir.getAbsolutePath)
+
+        while (q.recentProgress.length < 5) {
+          Thread.sleep(200)
+        }
+        q.stop()
+        q.awaitTermination()
+        lastBatchId = q.lastProgress.batchId.toInt
+      }
+      assert(lastBatchId > 20)
+      val rowCount = spark.read.format("json").load(outputDir.getAbsolutePath).count()
+      // There may be one uncommitted batch that is not recorded in query progress.
+      // The number of batch can be lastBatchId + 1 or lastBatchId + 2.
+      assert(rowCount == 2 * (lastBatchId + 1) || rowCount == 2 * (lastBatchId + 2))
+      checkAnswer(spark.read.format("json").load(outputDir.getAbsolutePath),
+        (0 until rowCount.toInt).map(Row(_)))
+    }
+  }
+
+  test("initialOffset() method not implemented in SimpleDataSourceStreamReader") {
+    assume(shouldTestPandasUDFs)
+    val initialOffsetNotImplementedScript =
+      s"""
+         |from pyspark.sql.datasource import DataSource
+         |from pyspark.sql.datasource import SimpleDataSourceStreamReader
+         |class ErrorDataStreamReader(SimpleDataSourceStreamReader):
+         |    ...
+         |
+         |class $errorDataSourceName(DataSource):
+         |    def simpleStreamReader(self, schema):
+         |        return ErrorDataStreamReader()
+         |""".stripMargin
+    val inputSchema = StructType.fromDDL("input BINARY")
+
+    val dataSource =
+      createUserDefinedPythonDataSource(errorDataSourceName, initialOffsetNotImplementedScript)
+    spark.dataSource.registerPython(errorDataSourceName, dataSource)
+    val pythonDs = new PythonDataSourceV2
+    pythonDs.setShortName("ErrorDataSource")
+
+    def testMicroBatchStreamError(action: String, msg: String)
+                                 (func: PythonMicroBatchStream => Unit): Unit = {
+      val stream = new PythonMicroBatchStream(
+        pythonDs, errorDataSourceName, inputSchema, CaseInsensitiveStringMap.empty())
+      val err = intercept[SparkException] {
+        func(stream)
+      }
+      checkErrorMatchPVals(err,
+        errorClass = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+        parameters = Map(
+          "action" -> action,
+          "msg" -> "(.|\\n)*"
+        ))
+      assert(err.getMessage.contains(msg))
+      assert(err.getMessage.contains("ErrorDataSource"))
+      stream.stop()
+    }
+
+    testMicroBatchStreamError(
+      "initialOffset", "[NOT_IMPLEMENTED] initialOffset is not implemented") {
+      stream => stream.initialOffset()
+    }
+
+    // User don't need to implement latestOffset for SimpleDataSourceStreamReader.
+    // The latestOffset method of simple stream reader invokes initialOffset() and read()
+    // So the not implemented method is initialOffset.
+    testMicroBatchStreamError(
+      "latestOffset", "[NOT_IMPLEMENTED] initialOffset is not implemented") {
+      stream => stream.latestOffset()
+    }
+  }
+
+  test("read() method throw error in SimpleDataSourceStreamReader") {
+    assume(shouldTestPandasUDFs)
+    val initialOffsetNotImplementedScript =
+      s"""
+         |from pyspark.sql.datasource import DataSource
+         |from pyspark.sql.datasource import SimpleDataSourceStreamReader
+         |class ErrorDataStreamReader(SimpleDataSourceStreamReader):
+         |    def initialOffset(self):
+         |        return {"partition": 1}
+         |    def read(self, start):
+         |        raise Exception("error reading available data")
+         |
+         |class $errorDataSourceName(DataSource):
+         |    def simpleStreamReader(self, schema):
+         |        return ErrorDataStreamReader()
+         |""".stripMargin
+    val inputSchema = StructType.fromDDL("input BINARY")
+
+    val dataSource =
+      createUserDefinedPythonDataSource(errorDataSourceName, initialOffsetNotImplementedScript)
+    spark.dataSource.registerPython(errorDataSourceName, dataSource)
+    val pythonDs = new PythonDataSourceV2
+    pythonDs.setShortName("ErrorDataSource")
+
+    def testMicroBatchStreamError(action: String, msg: String)
+                                 (func: PythonMicroBatchStream => Unit): Unit = {
+      val stream = new PythonMicroBatchStream(
+        pythonDs, errorDataSourceName, inputSchema, CaseInsensitiveStringMap.empty())
+      val err = intercept[SparkException] {
+        func(stream)
+      }
+      checkErrorMatchPVals(err,
+        errorClass = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+        parameters = Map(
+          "action" -> action,
+          "msg" -> "(.|\\n)*"
+        ))
+      assert(err.getMessage.contains(msg))
+      assert(err.getMessage.contains("ErrorDataSource"))
+      stream.stop()
+    }
+
+    testMicroBatchStreamError(
+      "latestOffset", "Exception: error reading available data") {
+      stream => stream.latestOffset()
+    }
+  }
+
+  test("Read from test data stream source") {
+    assume(shouldTestPandasUDFs)
+    val dataSourceScript =
+      s"""
+         |from pyspark.sql.datasource import DataSource
+         |$testDataStreamReaderScript
+         |
+         |class $dataSourceName(DataSource):
+         |    def schema(self) -> str:
+         |        return "id INT"
+         |    def streamReader(self, schema):
+         |        return TestDataStreamReader()
+         |""".stripMargin
 
     val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
     spark.dataSource.registerPython(dataSourceName, dataSource)
@@ -178,6 +470,42 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     q.awaitTermination()
   }
 
+  // Verify that socket between python runner and JVM doesn't timeout with large trigger interval.
+  test("Read from test data stream source, trigger interval=20 seconds") {
+    assume(shouldTestPandasUDFs)
+    val dataSourceScript =
+      s"""
+         |from pyspark.sql.datasource import DataSource
+         |$testDataStreamReaderScript
+         |
+         |class $dataSourceName(DataSource):
+         |    def schema(self) -> str:
+         |        return "id INT"
+         |    def streamReader(self, schema):
+         |        return TestDataStreamReader()
+         |""".stripMargin
+
+    val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
+    spark.dataSource.registerPython(dataSourceName, dataSource)
+    assert(spark.sessionState.dataSourceManager.dataSourceExists(dataSourceName))
+    val df = spark.readStream.format(dataSourceName).load()
+
+    val stopSignal = new CountDownLatch(1)
+
+    val q = df.writeStream.foreachBatch((df: DataFrame, batchId: Long) => {
+      // checkAnswer may materialize the dataframe more than once
+      // Cache here to make sure the numInputRows metrics is consistent.
+      df.cache()
+      checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
+      if (batchId >= 2) stopSignal.countDown()
+    }).trigger(ProcessingTimeTrigger(20 * 1000)).start()
+    stopSignal.await()
+    assert(q.recentProgress.forall(_.numInputRows == 2))
+    q.stop()
+    q.awaitTermination()
+    assert(q.exception.isEmpty)
+  }
+
   test("Streaming data source read with custom partitions") {
     assume(shouldTestPandasUDFs)
     val dataSourceScript =
@@ -188,7 +516,7 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
          |        self.start = start
          |        self.end = end
          |
-         |class SimpleDataStreamReader(DataSourceStreamReader):
+         |class TestDataStreamReader(DataSourceStreamReader):
          |    current = 0
          |    def initialOffset(self):
          |        return {"offset": 0}
@@ -210,7 +538,7 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
          |        return "id INT"
          |
          |    def streamReader(self, schema):
-         |        return SimpleDataStreamReader()
+         |        return TestDataStreamReader()
          |""".stripMargin
     val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
     spark.dataSource.registerPython(dataSourceName, dataSource)
@@ -303,7 +631,6 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     assert(err.getMessage.contains("error reading data"))
   }
 
-
   test("Method not implemented in stream reader") {
     assume(shouldTestPandasUDFs)
     val dataSourceScript =
@@ -476,6 +803,46 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     }
   }
 
+  // Verify that commit runner work correctly with large timeout interval.
+  test(s"data source stream write, trigger interval=20 seconds") {
+    assume(shouldTestPandasUDFs)
+    val dataSource =
+      createUserDefinedPythonDataSource(dataSourceName, simpleDataStreamWriterScript)
+    spark.dataSource.registerPython(dataSourceName, dataSource)
+    val inputData = MemoryStream[Int](numPartitions = 3)
+    val df = inputData.toDF()
+    withTempDir { dir =>
+      val path = dir.getAbsolutePath
+      val checkpointDir = new File(path, "checkpoint")
+      checkpointDir.mkdir()
+      val outputDir = new File(path, "output")
+      outputDir.mkdir()
+      val q = df
+        .writeStream
+        .format(dataSourceName)
+        .option("checkpointLocation", checkpointDir.getAbsolutePath)
+        .trigger(ProcessingTimeTrigger(20 * 1000))
+        .start(outputDir.getAbsolutePath)
+      def resultDf: DataFrame = spark.read.format("json")
+        .load(outputDir.getAbsolutePath)
+
+      inputData.addData(1 to 3)
+      eventually(timeout(waitTimeout * 5)) {
+        assert(q.lastProgress.batchId >= 1)
+      }
+      checkAnswer(resultDf, (1 to 3).map(Row(_)))
+
+      inputData.addData(4 to 6)
+      eventually(timeout(waitTimeout * 5)) {
+        assert(q.lastProgress.batchId >= 2)
+      }
+      checkAnswer(resultDf, (1 to 6).map(Row(_)))
+      q.stop()
+      q.awaitTermination()
+      assert(q.exception.isEmpty)
+    }
+  }
+
   test("streaming sink write commit and abort") {
     assume(shouldTestPandasUDFs)
     // The data source write the number of rows and partitions into batchId.json in
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
index 3101281251b1b..2e56ad0ab4160 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.execution.python
 
-import org.apache.spark.sql.{IntegratedUDFTestUtils, QueryTest}
-import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest}
+import org.apache.spark.sql.functions.{array, count, transform}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.LongType
 
@@ -112,4 +112,16 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
     val pandasTestUDF = TestGroupedAggPandasUDF(name = udfName)
     assert(df.agg(pandasTestUDF(df("id"))).schema.fieldNames.exists(_.startsWith(udfName)))
   }
+
+  test("SPARK-48706: Negative test case for Python UDF in higher order functions") {
+    assume(shouldTestPythonUDFs)
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.range(1).select(transform(array("id"), x => pythonTestUDF(x))).collect()
+      },
+      errorClass = "UNSUPPORTED_FEATURE.LAMBDA_FUNCTION_WITH_PYTHON_UDF",
+      parameters = Map("funcName" -> "\"pyUDF(namedlambdavariable())\""),
+      context = ExpectedContext(
+        "transform", s".*${this.getClass.getSimpleName}.*"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala
index 989597ae041db..1eaf1d24056da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.python
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest, Row}
 import org.apache.spark.sql.catalyst.expressions.{Add, Alias, Expression, FunctionTableSubqueryArgumentExpression, Literal}
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Repartition, RepartitionByExpression, Sort, SubqueryAlias}
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf
@@ -363,4 +364,29 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession {
         Row("abc"))
     }
   }
+
+  test("SPARK-48180: Analyzer bug with multiple ORDER BY items for input table argument") {
+    assume(shouldTestPythonUDFs)
+    spark.udtf.registerPython("testUDTF", pythonUDTF)
+    checkError(
+      exception = intercept[ParseException](sql(
+        """
+          |SELECT * FROM testUDTF(
+          |  TABLE(SELECT 1 AS device_id, 2 AS data_ds)
+          |  WITH SINGLE PARTITION
+          |  ORDER BY device_id, data_ds)
+          |""".stripMargin)),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" ->
+        ("The table function call includes a table argument with an invalid " +
+          "partitioning/ordering specification: the ORDER BY clause included multiple " +
+          "expressions without parentheses surrounding them; please add parentheses around these " +
+          "expressions and then retry the query again")),
+      context = ExpectedContext(
+        fragment = "TABLE(SELECT 1 AS device_id, 2 AS data_ds)\n  " +
+          "WITH SINGLE PARTITION\n  " +
+          "ORDER BY device_id, data_ds",
+        start = 27,
+        stop = 122))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala
index 572fc2429273b..5b304c55dd5a7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
+import java.time.Duration
 import java.util.UUID
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, StatefulProcessorHandleImpl}
-import org.apache.spark.sql.streaming.{ListState, MapState, TimeMode, ValueState}
+import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, MapStateImplWithTTL, StatefulProcessorHandleImpl}
+import org.apache.spark.sql.streaming.{ListState, MapState, TimeMode, TTLConfig, ValueState}
 import org.apache.spark.sql.types.{BinaryType, StructType}
 
 /**
@@ -167,4 +169,90 @@ class MapStateSuite extends StateVariableSuiteBase {
       assert(mapTestState2.iterator().toList === List(("k2", 4)))
     }
   }
+
+  test("test Map state TTL") {
+    tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
+      val store = provider.getStore(0)
+      val timestampMs = 10
+      val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
+        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.ProcessingTime(),
+        batchTimestampMs = Some(timestampMs))
+
+      val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+      val testState: MapStateImplWithTTL[String, String] =
+        handle.getMapState[String, String]("testState", Encoders.STRING,
+          Encoders.STRING, ttlConfig).asInstanceOf[MapStateImplWithTTL[String, String]]
+      ImplicitGroupingKeyTracker.setImplicitKey("test_key")
+      testState.updateValue("k1", "v1")
+      assert(testState.getValue("k1") === "v1")
+      assert(testState.getWithoutEnforcingTTL("k1").get === "v1")
+
+      val ttlExpirationMs = timestampMs + 60000
+      var ttlValue = testState.getTTLValue("k1")
+      assert(ttlValue.isDefined)
+      assert(ttlValue.get._2 === ttlExpirationMs)
+      var ttlStateValueIterator = testState.getKeyValuesInTTLState().map(_._2)
+      assert(ttlStateValueIterator.hasNext)
+
+      // increment batchProcessingTime, or watermark and ensure expired value is not returned
+      val nextBatchHandle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
+        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        TimeMode.ProcessingTime(), batchTimestampMs = Some(ttlExpirationMs))
+
+      val nextBatchTestState: MapStateImplWithTTL[String, String] =
+        nextBatchHandle.getMapState[String, String](
+            "testState", Encoders.STRING, Encoders.STRING, ttlConfig)
+            .asInstanceOf[MapStateImplWithTTL[String, String]]
+
+      ImplicitGroupingKeyTracker.setImplicitKey("test_key")
+
+      // ensure get does not return the expired value
+      assert(!nextBatchTestState.exists())
+      assert(nextBatchTestState.getValue("k1") === null)
+
+      // ttl value should still exist in state
+      ttlValue = nextBatchTestState.getTTLValue("k1")
+      assert(ttlValue.isDefined)
+      assert(ttlValue.get._2 === ttlExpirationMs)
+      ttlStateValueIterator = nextBatchTestState.getKeyValuesInTTLState().map(_._2)
+      assert(ttlStateValueIterator.hasNext)
+      assert(ttlStateValueIterator.next() === ttlExpirationMs)
+      assert(ttlStateValueIterator.isEmpty)
+
+      // getWithoutTTL should still return the expired value
+      assert(nextBatchTestState.getWithoutEnforcingTTL("k1").get === "v1")
+
+      nextBatchTestState.clear()
+      assert(!nextBatchTestState.exists())
+      assert(nextBatchTestState.getValue("k1") === null)
+    }
+  }
+
+  test("test negative or zero TTL duration throws error") {
+    tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
+      val store = provider.getStore(0)
+      val batchTimestampMs = 10
+      val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
+        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        TimeMode.ProcessingTime(), batchTimestampMs = Some(batchTimestampMs))
+
+      Seq(null, Duration.ZERO, Duration.ofMinutes(-1)).foreach { ttlDuration =>
+        val ttlConfig = TTLConfig(ttlDuration)
+        val ex = intercept[SparkUnsupportedOperationException] {
+          handle.getMapState[String, String](
+            "testState", Encoders.STRING, Encoders.STRING, ttlConfig)
+        }
+
+        checkError(
+          ex,
+          errorClass = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
+          parameters = Map(
+            "operationType" -> "update",
+            "stateName" -> "testState"
+          ),
+          matchPVals = true
+        )
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index ab2afa1b8a617..ea54fb997ca2e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -19,8 +19,11 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.io._
 import java.nio.charset.Charset
+import java.util.concurrent.Executors
 
 import scala.collection.mutable
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration._
 import scala.language.implicitConversions
 
 import org.apache.commons.io.FileUtils
@@ -874,6 +877,41 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     )
   }
 
+  testWithChangelogCheckpointingEnabled("RocksDBFileManager: " +
+    "background snapshot upload doesn't acquire RocksDB instance lock") {
+    // Create a custom ExecutionContext
+    implicit val ec: ExecutionContext = ExecutionContext
+      .fromExecutor(Executors.newSingleThreadExecutor())
+
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(lockAcquireTimeoutMs = 10000, minDeltasForSnapshot = 0)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+
+    withDB(remoteDir, conf = conf) { db =>
+      db.load(0)
+      db.put("0", "0")
+      db.commit()
+
+      // Acquire lock
+      db.load(1)
+      db.put("1", "1")
+
+      // Run doMaintenance in another thread
+      val maintenanceFuture = Future {
+        db.doMaintenance()
+      }
+
+      val timeout = 5.seconds
+
+      // Ensure that maintenance task runs without being blocked by task thread
+      ThreadUtils.awaitResult(maintenanceFuture, timeout)
+      assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+      // Release lock
+      db.commit()
+    }
+  }
+
   testWithChangelogCheckpointingEnabled("RocksDBFileManager: read and write changelog") {
     val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
     val fileManager = new RocksDBFileManager(
@@ -1699,6 +1737,11 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
               db.load(0)
               db.put("a", "1")
               db.commit()
+              if (boundedMemoryUsage == "true") {
+                assert(db.metricsOpt.get.totalMemUsageBytes === 0)
+              } else {
+                assert(db.metricsOpt.get.totalMemUsageBytes > 0)
+              }
               db.getWriteBufferManagerAndCache()
             }
 
@@ -1709,6 +1752,11 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
               db.load(0)
               db.put("a", "1")
               db.commit()
+              if (boundedMemoryUsage == "true") {
+                assert(db.metricsOpt.get.totalMemUsageBytes === 0)
+              } else {
+                assert(db.metricsOpt.get.totalMemUsageBytes > 0)
+              }
               db.getWriteBufferManagerAndCache()
             }
 
@@ -1758,6 +1806,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
             db.remove("a")
             db.put("c", "3")
             db.commit()
+            assert(db.metricsOpt.get.totalMemUsageBytes === 0)
           }
         } finally {
           RocksDBMemoryManager.resetWriteBufferManagerAndCache
@@ -1930,7 +1979,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
   }
 
   testWithChangelogCheckpointingEnabled("time travel 4 -" +
-    " validate successful RocksDB load") {
+    " validate successful RocksDB load when metadata file is overwritten") {
     val remoteDir = Utils.createTempDir().toString
     val conf = dbConf.copy(minDeltasForSnapshot = 2, compactOnCommit = false)
     new File(remoteDir).delete() // to make sure that the directory gets created
@@ -1945,8 +1994,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       db.load(1)
       db.put("3", "3")
 
-      // do maintenance - upload any latest snapshots so far
-      // would fail to acquire lock and no snapshots would be uploaded
+      // upload any latest snapshots so far
       db.doMaintenance()
       db.commit()
       // upload newly created snapshot 2.zip
@@ -1958,6 +2006,47 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  testWithChangelogCheckpointingEnabled("time travel 5 -" +
+    "validate successful RocksDB load when metadata file is not overwritten") {
+    // Ensure commit doesn't modify the latestSnapshot that doMaintenance will upload
+    val fmClass = "org.apache.spark.sql.execution.streaming.state." +
+      "NoOverwriteFileSystemBasedCheckpointFileManager"
+    withTempDir { dir =>
+      val conf = dbConf.copy(minDeltasForSnapshot = 0) // create snapshot every commit
+      val hadoopConf = new Configuration()
+      hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
+
+      val remoteDir = dir.getCanonicalPath
+      withDB(remoteDir, conf = conf, hadoopConf = hadoopConf) { db =>
+        db.load(0)
+        db.put("a", "1")
+        db.commit()
+
+        // load previous version, and recreate the snapshot
+        db.load(0)
+        db.put("a", "1")
+
+        // upload version 1 snapshot created above
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+        db.commit() // create snapshot again
+
+        // load version 1 - should succeed
+        withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+        }
+
+        // upload recently created snapshot
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+        // load version 1 again - should succeed
+        withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+        }
+      }
+    }
+  }
+
   test("validate Rocks DB SST files do not have a VersionIdMismatch" +
     " when metadata file is not overwritten - scenario 1") {
     val fmClass = "org.apache.spark.sql.execution.streaming.state." +
@@ -2257,7 +2346,11 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       numKeys: Int): Unit = {
     val checkpointDir = Utils.createTempDir().getAbsolutePath // local dir to create checkpoints
     generateFiles(checkpointDir, fileToLengths)
-    fileManager.saveCheckpointToDfs(checkpointDir, version, numKeys)
+    fileManager.saveCheckpointToDfs(
+      checkpointDir,
+      version,
+      numKeys,
+      fileManager.captureFileMapReference())
   }
 
   def loadAndVerifyCheckpointFiles(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
index a089a05469f75..feab7a5fa3b0a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
@@ -19,10 +19,12 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.util.UUID
 
-import scala.util.Random
+import scala.util.{Random, Try}
 
 import org.apache.hadoop.conf.Configuration
 
+import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.sql.execution.streaming.state.StateStoreTestsHelper.newDir
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -65,12 +67,12 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
 
   private val keySchemaWithCollation = new StructType()
     .add(StructField("key1", IntegerType, nullable = true))
-    .add(StructField("key2", StringType("UTF8_BINARY_LCASE"), nullable = true))
+    .add(StructField("key2", StringType("UTF8_LCASE"), nullable = true))
     .add(StructField("key3", structSchema, nullable = true))
 
   private val valueSchemaWithCollation = new StructType()
     .add(StructField("value1", IntegerType, nullable = true))
-    .add(StructField("value2", StringType("UTF8_BINARY_LCASE"), nullable = true))
+    .add(StructField("value2", StringType("UTF8_LCASE"), nullable = true))
     .add(StructField("value3", structSchema, nullable = true))
 
   // Checks on adding/removing (nested) field.
@@ -253,9 +255,9 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
 
   test("SPARK-47776: checking for compatibility with collation change in key") {
     verifyException(keySchema, valueSchema, keySchemaWithCollation, valueSchema,
-      ignoreValueSchema = false)
+      ignoreValueSchema = false, keyCollationChecks = true)
     verifyException(keySchemaWithCollation, valueSchema, keySchema, valueSchema,
-      ignoreValueSchema = false)
+      ignoreValueSchema = false, keyCollationChecks = true)
   }
 
   test("SPARK-47776: checking for compatibility with collation change in value") {
@@ -287,47 +289,47 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
     StructType(newFields)
   }
 
-  private def runSchemaChecker(
-      dir: String,
-      queryId: UUID,
-      newKeySchema: StructType,
-      newValueSchema: StructType,
-      ignoreValueSchema: Boolean): Unit = {
-    // in fact, Spark doesn't support online state schema change, so need to check
-    // schema only once for each running of JVM
-    val providerId = StateStoreProviderId(
-      StateStoreId(dir, opId, partitionId), queryId)
-
-    new StateSchemaCompatibilityChecker(providerId, hadoopConf)
-      .check(newKeySchema, newValueSchema, ignoreValueSchema = ignoreValueSchema)
-  }
-
   private def verifyException(
       oldKeySchema: StructType,
       oldValueSchema: StructType,
       newKeySchema: StructType,
       newValueSchema: StructType,
-      ignoreValueSchema: Boolean = false): Unit = {
+      ignoreValueSchema: Boolean = false,
+      keyCollationChecks: Boolean = false): Unit = {
     val dir = newDir()
-    val queryId = UUID.randomUUID()
-    runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema,
-      ignoreValueSchema = ignoreValueSchema)
-
-    val e = intercept[StateSchemaNotCompatible] {
-      runSchemaChecker(dir, queryId, newKeySchema, newValueSchema,
-        ignoreValueSchema = ignoreValueSchema)
+    val runId = UUID.randomUUID()
+    val stateInfo = StatefulOperatorStateInfo(dir, runId, opId, 0, 200)
+    val formatValidationForValue = !ignoreValueSchema
+    val extraOptions = Map(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG
+      -> formatValidationForValue.toString)
+
+    val result = Try(
+      StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
+        oldKeySchema, oldValueSchema, spark.sessionState, extraOptions)
+    ).toEither.fold(Some(_), _ => None)
+
+    val ex = if (result.isDefined) {
+      result.get.asInstanceOf[SparkUnsupportedOperationException]
+    } else {
+      intercept[SparkUnsupportedOperationException] {
+        StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
+          newKeySchema, newValueSchema, spark.sessionState, extraOptions)
+      }
     }
 
-    assert(e.getMessage.contains("Provided schema doesn't match to the schema for existing state!"))
-    assert(e.getMessage.contains(newKeySchema.toString()))
-    assert(e.getMessage.contains(oldKeySchema.toString()))
-
-    if (ignoreValueSchema) {
-      assert(!e.getMessage.contains(newValueSchema.toString()))
-      assert(!e.getMessage.contains(oldValueSchema.toString()))
+    // collation checks are also performed in this path. so we need to check for them explicitly.
+    if (keyCollationChecks) {
+      assert(ex.getMessage.contains("Binary inequality column is not supported"))
+      assert(ex.getErrorClass === "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY")
     } else {
-      assert(e.getMessage.contains(newValueSchema.toString()))
-      assert(e.getMessage.contains(oldValueSchema.toString()))
+      if (ignoreValueSchema) {
+        // if value schema is ignored, the mismatch has to be on the key schema
+        assert(ex.getErrorClass === "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE")
+      } else {
+        assert(ex.getErrorClass === "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE" ||
+          ex.getErrorClass === "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE")
+      }
+      assert(ex.getMessage.contains("does not match existing"))
     }
   }
 
@@ -338,10 +340,16 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
       newValueSchema: StructType,
       ignoreValueSchema: Boolean = false): Unit = {
     val dir = newDir()
-    val queryId = UUID.randomUUID()
-    runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema,
-      ignoreValueSchema = ignoreValueSchema)
-    runSchemaChecker(dir, queryId, newKeySchema, newValueSchema,
-      ignoreValueSchema = ignoreValueSchema)
+    val runId = UUID.randomUUID()
+    val stateInfo = StatefulOperatorStateInfo(dir, runId, opId, 0, 200)
+    val formatValidationForValue = !ignoreValueSchema
+    val extraOptions = Map(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG
+      -> formatValidationForValue.toString)
+
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
+      oldKeySchema, oldValueSchema, spark.sessionState, extraOptions)
+
+    StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
+      newKeySchema, newValueSchema, spark.sessionState, extraOptions)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 4523a14ca1ccd..2c4111ec026ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -30,6 +30,8 @@ import scala.util.Random
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
+import org.json4s.DefaultFormats
+import org.json4s.jackson.JsonMethods
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
@@ -41,6 +43,7 @@ import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProj
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorSuite.withCoordinatorRef
+import org.apache.spark.sql.execution.streaming.state.StateStoreValueRowFormatValidationFailure
 import org.apache.spark.sql.functions.count
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -388,6 +391,44 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     }
   }
 
+  test("SPARK-48105: state store unload/close happens during the maintenance") {
+    tryWithProviderResource(
+      newStoreProvider(opId = Random.nextInt(), partition = 0, minDeltasForSnapshot = 1)) {
+      provider =>
+        val store = provider.getStore(0).asInstanceOf[provider.HDFSBackedStateStore]
+        val values = (1 to 20)
+        val keys = values.map(i => ("a" + i))
+        keys.zip(values).map{case (k, v) => put(store, k, 0, v)}
+        // commit state store with 20 keys.
+        store.commit()
+        // get the state store iterator: mimic the case which the iterator is hold in the
+        // maintenance thread.
+        val storeIterator = store.iterator()
+
+        // the store iterator should still be valid as the maintenance thread may have already
+        // hold it and is doing snapshotting even though the state store is unloaded.
+        val outputKeys = new mutable.ArrayBuffer[String]
+        val outputValues = new mutable.ArrayBuffer[Int]
+        var cnt = 0
+        while (storeIterator.hasNext) {
+          if (cnt == 10) {
+            // Mimic the case where the provider is loaded in another executor in the middle of
+            // iteration. When this happens, the provider will be unloaded and closed in
+            // current executor.
+            provider.close()
+          }
+          val unsafeRowPair = storeIterator.next()
+          val (key, _) = keyRowToData(unsafeRowPair.key)
+          outputKeys.append(key)
+          outputValues.append(valueRowToData(unsafeRowPair.value))
+
+          cnt = cnt + 1
+        }
+        assert(keys.sorted === outputKeys.sorted)
+        assert(values.sorted === outputValues.sorted)
+    }
+  }
+
   test("maintenance") {
     val conf = new SparkConf()
       .setMaster("local")
@@ -1568,12 +1609,12 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     // By default, when there is an invalid pair of value row and value schema, it should throw
     val keyRow = dataToKeyRow("key", 1)
     val valueRow = dataToValueRow(2)
-    val e = intercept[InvalidUnsafeRowException] {
+    val e = intercept[StateStoreValueRowFormatValidationFailure] {
       // Here valueRow doesn't match with prefixKeySchema
       StateStoreProvider.validateStateRowFormat(
         keyRow, keySchema, valueRow, keySchema, getDefaultStoreConf())
     }
-    assert(e.getMessage.contains("The streaming query failed by state format invalidation"))
+    assert(e.getMessage.contains("The streaming query failed to validate written state"))
 
     // When sqlConf.stateStoreFormatValidationEnabled is set to false and
     // StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG is set to true,
@@ -1588,6 +1629,30 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
       keyRow, keySchema, valueRow, keySchema, storeConf)
   }
 
+  test("test serialization and deserialization of NoPrefixKeyStateEncoderSpec") {
+    implicit val formats: DefaultFormats.type = DefaultFormats
+    val encoderSpec = NoPrefixKeyStateEncoderSpec(keySchema)
+    val jsonMap = JsonMethods.parse(encoderSpec.json).extract[Map[String, Any]]
+    val deserializedEncoderSpec = KeyStateEncoderSpec.fromJson(keySchema, jsonMap)
+    assert(encoderSpec == deserializedEncoderSpec)
+  }
+
+  test("test serialization and deserialization of PrefixKeyScanStateEncoderSpec") {
+    implicit val formats: DefaultFormats.type = DefaultFormats
+    val encoderSpec = PrefixKeyScanStateEncoderSpec(keySchema, 1)
+    val jsonMap = JsonMethods.parse(encoderSpec.json).extract[Map[String, Any]]
+    val deserializedEncoderSpec = KeyStateEncoderSpec.fromJson(keySchema, jsonMap)
+    assert(encoderSpec == deserializedEncoderSpec)
+  }
+
+  test("test serialization and deserialization of RangeKeyScanStateEncoderSpec") {
+    implicit val formats: DefaultFormats.type = DefaultFormats
+    val encoderSpec = RangeKeyScanStateEncoderSpec(keySchema, Seq(1))
+    val jsonMap = JsonMethods.parse(encoderSpec.json).extract[Map[String, Any]]
+    val deserializedEncoderSpec = KeyStateEncoderSpec.fromJson(keySchema, jsonMap)
+    assert(encoderSpec == deserializedEncoderSpec)
+  }
+
   /** Return a new provider with a random id */
   def newStoreProvider(): ProviderClass
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala
index aafbf4df60af7..52bdb0213c7e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala
@@ -255,6 +255,24 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
     }
   }
 
+  test("ttl States are populated for mapState and timeMode=ProcessingTime") {
+    tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
+      val store = provider.getStore(0)
+      val handle = new StatefulProcessorHandleImpl(store,
+        UUID.randomUUID(), keyExprEncoder, TimeMode.ProcessingTime(),
+        batchTimestampMs = Some(10))
+
+      val mapStateWithTTL = handle.getMapState("testState",
+        Encoders.STRING, Encoders.STRING, TTLConfig(Duration.ofHours(1)))
+
+      // create another state without TTL, this should not be captured in the handle
+      handle.getMapState("testState", Encoders.STRING, Encoders.STRING)
+
+      assert(handle.ttlStates.size() === 1)
+      assert(handle.ttlStates.get(0) === mapStateWithTTL)
+    }
+  }
+
   test("ttl States are not populated for timeMode=None") {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
@@ -263,6 +281,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
 
       handle.getValueState("testValueState", Encoders.STRING)
       handle.getListState("testListState", Encoders.STRING)
+      handle.getMapState("testMapState", Encoders.STRING, Encoders.STRING)
 
       assert(handle.ttlStates.isEmpty)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 7cce6086c6fd8..aca968745d198 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -476,6 +476,180 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper {
     assert(testVector.getDoubles(0, 3)(2) == 1342.17729d)
   }
 
+  def check(expected: Seq[Any], testVector: WritableColumnVector): Unit = {
+    expected.zipWithIndex.foreach {
+      case (v: Integer, idx) =>
+        assert(testVector.getInt(idx) == v)
+        assert(testVector.getInts(0, testVector.capacity)(idx) == v)
+      case (v: Short, idx) =>
+        assert(testVector.getShort(idx) == v)
+        assert(testVector.getShorts(0, testVector.capacity)(idx) == v)
+      case (v: Byte, idx) =>
+        assert(testVector.getByte(idx) == v)
+        assert(testVector.getBytes(0, testVector.capacity)(idx) == v)
+      case (v: Long, idx) =>
+        assert(testVector.getLong(idx) == v)
+        assert(testVector.getLongs(0, testVector.capacity)(idx) == v)
+      case (v: Float, idx) =>
+        assert(testVector.getFloat(idx) == v)
+        assert(testVector.getFloats(0, testVector.capacity)(idx) == v)
+      case (v: Double, idx) =>
+        assert(testVector.getDouble(idx) == v)
+        assert(testVector.getDoubles(0, testVector.capacity)(idx) == v)
+      case (null, idx) => testVector.isNullAt(idx)
+      case (_, idx) => assert(false, s"Unexpected value at $idx")
+    }
+
+    // Verify ColumnarArray.copy() works as expected
+    val arr = new ColumnarArray(testVector, 0, testVector.capacity)
+    assert(arr.toSeq(testVector.dataType) == expected)
+    assert(arr.copy().toSeq(testVector.dataType) == expected)
+  }
+
+  testVectors("getInts with dictionary and nulls", 3, IntegerType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1, null, 3)
+    expected.foreach {
+      case i: Integer => testVector.appendInt(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(7, null, 9)
+    val dictArray = (Seq(-1, -1) ++ expectedDictionary.map {
+      case i: Integer => i.toInt
+      case _ => -1
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getShorts with dictionary and nulls", 3, ShortType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.toShort, null, 3.toShort)
+    expected.foreach {
+      case i: Short => testVector.appendShort(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(7.toShort, null, 9.toShort)
+    val dictArray = (Seq(-1, -1) ++ expectedDictionary.map {
+      case i: Short => i.toInt
+      case _ => -1
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getBytes with dictionary and nulls", 3, ByteType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.toByte, null, 3.toByte)
+    expected.foreach {
+      case i: Byte => testVector.appendByte(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(7.toByte, null, 9.toByte)
+    val dictArray = (Seq(-1, -1) ++ expectedDictionary.map {
+      case i: Byte => i.toInt
+      case _ => -1
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getLongs with dictionary and nulls", 3, LongType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(2147483L, null, 2147485L)
+    expected.foreach {
+      case i: Long => testVector.appendLong(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(2147483648L, null, 2147483650L)
+    val dictArray = (Seq(-1L, -1L) ++ expectedDictionary.map {
+      case i: Long => i
+      case _ => -1L
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getFloats with dictionary and nulls", 3, FloatType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.1f, null, 3.3f)
+    expected.foreach {
+      case i: Float => testVector.appendFloat(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(0.1f, null, 0.3f)
+    val dictArray = (Seq(-1f, -1f) ++ expectedDictionary.map {
+      case i: Float => i
+      case _ => -1f
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getDoubles with dictionary and nulls", 3, DoubleType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.1d, null, 3.3d)
+    expected.foreach {
+      case i: Double => testVector.appendDouble(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(1342.17727d, null, 1342.17729d)
+    val dictArray = (Seq(-1d, -1d) ++ expectedDictionary.map {
+      case i: Double => i
+      case _ => -1d
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
   test("[SPARK-22092] off-heap column vector reallocation corrupts array data") {
     withVector(new OffHeapColumnVector(8, arrayType)) { testVector =>
       val data = testVector.arrayData()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 19251330cffe3..bf5d1b24af219 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -141,7 +141,10 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // Examples demonstrate alternative syntax, see SPARK-45574
       "org.apache.spark.sql.catalyst.expressions.Cast",
       // Examples demonstrate alternative syntax, see SPARK-47012
-      "org.apache.spark.sql.catalyst.expressions.Collate"
+      "org.apache.spark.sql.catalyst.expressions.Collate",
+      classOf[ShiftLeft].getName,
+      classOf[ShiftRight].getName,
+      classOf[ShiftRightUnsigned].getName
     )
     spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
       val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
@@ -222,6 +225,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // Throws an error
       "org.apache.spark.sql.catalyst.expressions.RaiseErrorExpressionBuilder",
       "org.apache.spark.sql.catalyst.expressions.AssertTrue",
+      // Requires dynamic class loading not available in this test suite.
+      "org.apache.spark.sql.catalyst.expressions.FromAvro",
+      "org.apache.spark.sql.catalyst.expressions.ToAvro",
       classOf[CurrentUser].getName,
       // The encrypt expression includes a random initialization vector to its encrypted result
       classOf[AesEncrypt].getName)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
index cd6894ee43711..26011af37bf42 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfEntrySuite.scala
@@ -107,7 +107,7 @@ class SQLConfEntrySuite extends SparkFunSuite {
 
   test("stringConf") {
     val key = "spark.sql.SQLConfEntrySuite.string"
-    val confEntry = buildConf(key).stringConf.createWithDefault(null)
+    val confEntry = buildConf(key).stringConf.createWithDefault("")
     assert(conf.getConf(confEntry, "abc") === "abc")
 
     conf.setConf(confEntry, "abcd")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 18a06e83c076f..404ec865c1b00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -121,16 +121,12 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
 
   test(s"SPARK-35168: ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS} should respect" +
       s" ${SQLConf.SHUFFLE_PARTITIONS.key}") {
-    spark.sessionState.conf.clear()
-    try {
-      sql(s"SET ${SQLConf.ADAPTIVE_EXECUTION_ENABLED.key}=true")
-      sql(s"SET ${SQLConf.COALESCE_PARTITIONS_ENABLED.key}=true")
-      sql(s"SET ${SQLConf.COALESCE_PARTITIONS_INITIAL_PARTITION_NUM.key}=1")
-      sql(s"SET ${SQLConf.SHUFFLE_PARTITIONS.key}=2")
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+      SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
+      SQLConf.COALESCE_PARTITIONS_INITIAL_PARTITION_NUM.key -> "1",
+      SQLConf.SHUFFLE_PARTITIONS.key -> "2") {
       checkAnswer(sql(s"SET ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS}"),
         Row(SQLConf.SHUFFLE_PARTITIONS.key, "2"))
-    } finally {
-      spark.sessionState.conf.clear()
     }
   }
 
@@ -243,9 +239,9 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("invalid conf value") {
-    spark.sessionState.conf.clear()
     val e = intercept[IllegalArgumentException] {
-      sql(s"set ${SQLConf.CASE_SENSITIVE.key}=10")
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "10") {
+      }
     }
     assert(e.getMessage === s"${SQLConf.CASE_SENSITIVE.key} should be boolean, but was 10")
   }
@@ -506,7 +502,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-47765: set collation") {
-    Seq("UNICODE", "UNICODE_CI", "utf8_binary_lcase", "utf8_binary").foreach { collation =>
+    Seq("UNICODE", "UNICODE_CI", "utf8_lcase", "utf8_binary").foreach { collation =>
       sql(s"set collation $collation")
       assert(spark.conf.get(SQLConf.DEFAULT_COLLATION) === collation.toUpperCase(Locale.ROOT))
     }
@@ -519,8 +515,16 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
       parameters = Map(
         "confValue" -> "UNICODE_C",
         "confName" -> "spark.sql.session.collation.default",
-        "proposal" -> "UNICODE_CI"
+        "proposals" -> "UNICODE"
       ))
+
+    withSQLConf(SQLConf.COLLATION_ENABLED.key -> "false") {
+      checkError(
+        exception = intercept[AnalysisException](sql(s"SET COLLATION UNICODE_CI")),
+        errorClass = "UNSUPPORTED_FEATURE.COLLATION",
+        parameters = Map.empty
+      )
+    }
   }
 
   test("SPARK-43028: config not found error") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 88bb53cc7488d..e4116b565818e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -915,7 +915,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
   test("DB2Dialect type mapping") {
     val db2Dialect = JdbcDialects.get("jdbc:db2://127.0.0.1/db")
     assert(db2Dialect.getJDBCType(StringType).map(_.databaseTypeDefinition).get == "CLOB")
-    assert(db2Dialect.getJDBCType(BooleanType).map(_.databaseTypeDefinition).get == "CHAR(1)")
+    assert(db2Dialect.getJDBCType(BooleanType).map(_.databaseTypeDefinition).get == "BOOLEAN")
     assert(db2Dialect.getJDBCType(ShortType).map(_.databaseTypeDefinition).get == "SMALLINT")
     assert(db2Dialect.getJDBCType(ByteType).map(_.databaseTypeDefinition).get == "SMALLINT")
     // test db2 dialect mappings on read
@@ -955,6 +955,27 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
       Some(DoubleType))
     assert(mySqlDialect.getCatalystType(java.sql.Types.CHAR, "JSON", Int.MaxValue, metadata) ===
       Some(StringType))
+    assert(mySqlDialect.getCatalystType(java.sql.Types.TIMESTAMP, "DATETIME", 1,
+      metadata.putBoolean("isTimestampNTZ", false)) === Some(TimestampType))
+    assert(mySqlDialect.getCatalystType(java.sql.Types.TIMESTAMP, "DATETIME", 1,
+      metadata.putBoolean("isTimestampNTZ", true)) === Some(TimestampNTZType))
+    withSQLConf(SQLConf.LEGACY_MYSQL_TIMESTAMPNTZ_MAPPING_ENABLED.key -> "true") {
+      // in legacy mode, fallback to common mapping
+      assert(mySqlDialect.getCatalystType(java.sql.Types.TIMESTAMP, "TIMESTAMP", 1,
+        metadata.putBoolean("isTimestampNTZ", true)) === None)
+      mySqlDialect.getJDBCType(TimestampNTZType).foreach { jdbcType =>
+        assert(jdbcType.databaseTypeDefinition === "TIMESTAMP")
+      }
+    }
+    withSQLConf(SQLConf.LEGACY_MYSQL_TIMESTAMPNTZ_MAPPING_ENABLED.key -> "false") {
+      Seq(true, false).foreach(isTimestampNTZ => {
+        assert(mySqlDialect.getCatalystType(java.sql.Types.TIMESTAMP, "TIMESTAMP", 1,
+          metadata.putBoolean("isTimestampNTZ", isTimestampNTZ)) === Some(TimestampType))
+      })
+      mySqlDialect.getJDBCType(TimestampNTZType).foreach { jdbcType =>
+        assert(jdbcType.databaseTypeDefinition === "DATETIME")
+      }
+    }
   }
 
   test("SPARK-35446: MySQLDialect type mapping of float") {
@@ -1333,6 +1354,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     assert(getJdbcType(oracleDialect, ByteType) == "NUMBER(3)")
     assert(getJdbcType(oracleDialect, ShortType) == "NUMBER(5)")
     assert(getJdbcType(oracleDialect, StringType) == "VARCHAR2(255)")
+    assert(getJdbcType(oracleDialect, VarcharType(100)) == "VARCHAR2(100)")
     assert(getJdbcType(oracleDialect, BinaryType) == "BLOB")
     assert(getJdbcType(oracleDialect, DateType) == "DATE")
     assert(getJdbcType(oracleDialect, TimestampType) == "TIMESTAMP WITH LOCAL TIME ZONE")
@@ -1371,9 +1393,9 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
   test("SPARK-16387: Reserved SQL words are not escaped by JDBC writer") {
     val df = spark.createDataset(Seq("a", "b", "c")).toDF("order")
     val schema = JdbcUtils.schemaString(
+      JdbcDialects.get("jdbc:mysql://localhost:3306/temp"),
       df.schema,
-      df.sparkSession.sessionState.conf.caseSensitiveAnalysis,
-      "jdbc:mysql://localhost:3306/temp")
+      df.sparkSession.sessionState.conf.caseSensitiveAnalysis)
     assert(schema.contains("`order` LONGTEXT"))
   }
 
@@ -1455,16 +1477,11 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  test("SPARK-15648: teradataDialect StringType data mapping") {
-    val teradataDialect = JdbcDialects.get("jdbc:teradata://127.0.0.1/db")
-    assert(teradataDialect.getJDBCType(StringType).
-      map(_.databaseTypeDefinition).get == "VARCHAR(255)")
-  }
-
-  test("SPARK-15648: teradataDialect BooleanType data mapping") {
-    val teradataDialect = JdbcDialects.get("jdbc:teradata://127.0.0.1/db")
-    assert(teradataDialect.getJDBCType(BooleanType).
-      map(_.databaseTypeDefinition).get == "CHAR(1)")
+  test("SPARK-48399: TeradataDialect jdbc data mapping") {
+    val dialect = JdbcDialects.get("jdbc:teradata://127.0.0.1/db")
+    assert(dialect.getJDBCType(StringType).map(_.databaseTypeDefinition).get == "VARCHAR(255)")
+    assert(dialect.getJDBCType(BooleanType).map(_.databaseTypeDefinition).get == "CHAR(1)")
+    assert(dialect.getJDBCType(ByteType).map(_.databaseTypeDefinition).get == "BYTEINT")
   }
 
   test("SPARK-38846: TeradataDialect catalyst type mapping") {
@@ -2181,4 +2198,12 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     dialect = JdbcDialects.get("jdbc:dummy:dummy_host:dummy_port/dummy_db")
     assert(dialect === NoopDialect)
   }
+
+  test("SPARK-47882: createTableColumnTypes need to be mapped to database types") {
+    val dialect = JdbcDialects.get("jdbc:oracle:dummy_host:dummy_port/dummy_db")
+    val schema = new StructType().add("b", "boolean")
+    val schemaStr =
+      JdbcUtils.schemaString(dialect, schema, caseSensitive = false, Some("b boolean"))
+    assert(schemaStr === """"b" NUMBER(1) """)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 1b3672cdba5a4..e1a7971b283cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -369,6 +369,20 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     }
   }
 
+  test("null value for option exception") {
+    val df = spark.read
+      .option("pushDownOffset", null)
+      .table("h2.test.employee")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.collect()
+      },
+      errorClass = "NULL_DATA_SOURCE_OPTION",
+      parameters = Map(
+        "option" -> "pushDownOffset")
+    )
+  }
+
   test("simple scan with OFFSET") {
     val df1 = spark.read
       .table("h2.test.employee")
@@ -1305,7 +1319,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df5 = spark.table("h2.test.address").filter($"email".startsWith("abc_'%"))
     checkFiltersRemoved(df5)
     checkPushedInfo(df5,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_\'\%%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_''\%%' ESCAPE '\']")
     checkAnswer(df5, Seq(Row("abc_'%def@gmail.com")))
 
     val df6 = spark.table("h2.test.address").filter($"email".endsWith("_def@gmail.com"))
@@ -1336,7 +1350,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df10 = spark.table("h2.test.address").filter($"email".endsWith("_'%def@gmail.com"))
     checkFiltersRemoved(df10)
     checkPushedInfo(df10,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\'\%def@gmail.com' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_''\%def@gmail.com' ESCAPE '\']")
     checkAnswer(df10, Seq(Row("abc_'%def@gmail.com")))
 
     val df11 = spark.table("h2.test.address").filter($"email".contains("c_d"))
@@ -1364,7 +1378,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df15 = spark.table("h2.test.address").filter($"email".contains("c_'%d"))
     checkFiltersRemoved(df15)
     checkPushedInfo(df15,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\'\%d%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_''\%d%' ESCAPE '\']")
     checkAnswer(df15, Seq(Row("abc_'%def@gmail.com")))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 0d9dc2f76faf1..76a092b552f98 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -406,19 +406,21 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
 
   test("SPARK-10849: test schemaString - from createTableColumnTypes option values") {
     def testCreateTableColDataTypes(types: Seq[String]): Unit = {
+      val dialect = JdbcDialects.get(url1)
       val colTypes = types.zipWithIndex.map { case (t, i) => (s"col$i", t) }
       val schema = colTypes
         .foldLeft(new StructType())((schema, colType) => schema.add(colType._1, colType._2))
       val createTableColTypes =
         colTypes.map { case (col, dataType) => s"$col $dataType" }.mkString(", ")
 
-      val expectedSchemaStr =
-        colTypes.map { case (col, dataType) => s""""$col" $dataType """ }.mkString(", ")
+      val expectedSchemaStr = schema.map { f =>
+          s""""${f.name}" ${JdbcUtils.getJdbcType(f.dataType, dialect).databaseTypeDefinition} """
+        }.mkString(", ")
 
       assert(JdbcUtils.schemaString(
+        dialect,
         schema,
         spark.sessionState.conf.caseSensitiveAnalysis,
-        url1,
         Option(createTableColTypes)) == expectedSchemaStr)
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 93698fdd7bc0f..e3e385e9d1810 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -23,7 +23,7 @@ import java.time.{Duration, Period}
 
 import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataOutputStream, Path, RawLocalFileSystem}
 
-import org.apache.spark.{SparkArithmeticException, SparkException}
+import org.apache.spark.{SparkArithmeticException, SparkException, SparkRuntimeException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
@@ -953,10 +953,10 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       spark.sessionState.catalog.createTable(newTable, false)
 
       sql("INSERT INTO TABLE test_table SELECT 1, 'a'")
-      val msg = intercept[SparkException] {
+      val msg = intercept[SparkRuntimeException] {
         sql("INSERT INTO TABLE test_table SELECT 2, null")
-      }.getCause.getMessage
-      assert(msg.contains("Null value appeared in non-nullable field"))
+      }
+      assert(msg.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index fd3d59af7e6b8..ca4f2a7f26ced 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -2305,7 +2305,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         }
 
         // batch 5 will trigger list operation though the batch 4 should have 1 unseen file:
-        // 1 is smaller than the threshold (refer FileStreamSource.DISCARD_UNSEEN_FILES_RATIO),
+        // 1 is smaller than the threshold (refer FileStreamOptions.discardCachedInputRatio),
         // hence unseen files for batch 4 will be discarded.
         val offsetBatch = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
           .asInstanceOf[FileStreamSourceOffset]
@@ -2357,6 +2357,142 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     }
   }
 
+  test("Options for caching unread files") {
+    withCountListingLocalFileSystemAsLocalFileSystem {
+      withThreeTempDirs { case (src, meta, tmp) =>
+        val options = Map("latestFirst" -> "false", "maxFilesPerTrigger" -> "10",
+          "maxCachedFiles" -> "12", "discardCachedInputRatio" -> "0.1")
+        val scheme = CountListingLocalFileSystem.scheme
+        val source = new FileStreamSource(spark, s"$scheme:///${src.getCanonicalPath}/*/*", "text",
+          StructType(Nil), Seq.empty, meta.getCanonicalPath, options)
+        val _metadataLog = PrivateMethod[FileStreamSourceLog](Symbol("metadataLog"))
+        val metadataLog = source invokePrivate _metadataLog()
+
+        def verifyBatch(
+            offset: FileStreamSourceOffset,
+            expectedBatchId: Long,
+            inputFiles: Seq[File],
+            expectedFileOffset: Int,
+            expectedFilesInBatch: Int,
+            expectedListingCount: Int): Unit = {
+          val batchId = offset.logOffset
+          assert(batchId === expectedBatchId)
+
+          val files = metadataLog.get(batchId).getOrElse(Array.empty[FileEntry])
+          assert(files.forall(_.batchId == batchId))
+
+          val actualInputFiles = files.map { p => p.sparkPath.toUri.getPath }
+          val expectedInputFiles = inputFiles.slice(
+            expectedFileOffset,
+            expectedFileOffset + expectedFilesInBatch
+            )
+            .map(_.getCanonicalPath)
+          assert(actualInputFiles === expectedInputFiles)
+
+          assert(expectedListingCount === CountListingLocalFileSystem.pathToNumListStatusCalled
+            .get(src.getCanonicalPath).map(_.get()).getOrElse(0))
+        }
+
+        CountListingLocalFileSystem.resetCount()
+
+        // provide 44 files in src, with sequential "last modified" to guarantee ordering
+        val inputFiles = (0 to 43).map { idx =>
+          val f = createFile(idx.toString, new File(src, idx.toString), tmp)
+          f.setLastModified(idx * 10000)
+          f
+        }
+
+        // first 3 batches only perform 1 listing
+        // batch 0 processes 10 (12 cached)
+        // batch 1 processes 10 from cache (2 cached)
+        // batch 2 processes 2 from cache (0 cached) since
+        //  discardCachedInputRatio is less than threshold
+        val offsetBatch0 = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
+          .asInstanceOf[FileStreamSourceOffset]
+        verifyBatch(offsetBatch0, expectedBatchId = 0, inputFiles,
+          expectedFileOffset = 0, expectedFilesInBatch = 10, expectedListingCount = 1)
+        val offsetBatch1 = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
+            .asInstanceOf[FileStreamSourceOffset]
+        verifyBatch(offsetBatch1, expectedBatchId = 1, inputFiles,
+          expectedFileOffset = 10, expectedFilesInBatch = 10, expectedListingCount = 1)
+        val offsetBatch2 = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
+            .asInstanceOf[FileStreamSourceOffset]
+        verifyBatch(offsetBatch2, expectedBatchId = 2, inputFiles,
+          expectedFileOffset = 20, expectedFilesInBatch = 2, expectedListingCount = 1)
+
+        // next 3 batches perform another listing
+        // batch 3 processes 10 (12 cached)
+        // batch 4 processes 10 from cache (2 cached)
+        // batch 5 processes 2 from cache (0 cached) since
+        //  discardCachedInputRatio is less than threshold
+        val offsetBatch3 = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
+            .asInstanceOf[FileStreamSourceOffset]
+        verifyBatch(offsetBatch3, expectedBatchId = 3, inputFiles,
+          expectedFileOffset = 22, expectedFilesInBatch = 10, expectedListingCount = 2)
+        val offsetBatch4 = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
+            .asInstanceOf[FileStreamSourceOffset]
+        verifyBatch(offsetBatch4, expectedBatchId = 4, inputFiles,
+          expectedFileOffset = 32, expectedFilesInBatch = 10, expectedListingCount = 2)
+        val offsetBatch5 = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
+            .asInstanceOf[FileStreamSourceOffset]
+        verifyBatch(offsetBatch5, expectedBatchId = 5, inputFiles,
+          expectedFileOffset = 42, expectedFilesInBatch = 2, expectedListingCount = 2)
+
+        // validate no remaining files and another listing is performed
+        val offsetBatch = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(10))
+          .asInstanceOf[FileStreamSourceOffset]
+        assert(5 === offsetBatch.logOffset)
+        assert(3 === CountListingLocalFileSystem.pathToNumListStatusCalled
+          .get(src.getCanonicalPath).map(_.get()).getOrElse(0))
+      }
+    }
+  }
+
+  test("SPARK-48314: Don't cache unread files when using Trigger.AvailableNow") {
+    withCountListingLocalFileSystemAsLocalFileSystem {
+      withThreeTempDirs { case (src, meta, tmp) =>
+        val options = Map("latestFirst" -> "false", "maxFilesPerTrigger" -> "5",
+          "maxCachedFiles" -> "2")
+        val scheme = CountListingLocalFileSystem.scheme
+        val source = new FileStreamSource(spark, s"$scheme:///${src.getCanonicalPath}/*/*", "text",
+          StructType(Nil), Seq.empty, meta.getCanonicalPath, options)
+        val _metadataLog = PrivateMethod[FileStreamSourceLog](Symbol("metadataLog"))
+        val metadataLog = source invokePrivate _metadataLog()
+
+        // provide 20 files in src, with sequential "last modified" to guarantee ordering
+        (0 to 19).map { idx =>
+            val f = createFile(idx.toString, new File(src, idx.toString), tmp)
+          f.setLastModified(idx * 10000)
+          f
+        }
+
+        source.prepareForTriggerAvailableNow()
+        CountListingLocalFileSystem.resetCount()
+
+        var offset = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(5))
+          .asInstanceOf[FileStreamSourceOffset]
+        var files = metadataLog.get(offset.logOffset).getOrElse(Array.empty[FileEntry])
+
+        // All files are already tracked in allFilesForTriggerAvailableNow
+        assert(0 === CountListingLocalFileSystem.pathToNumListStatusCalled
+          .get(src.getCanonicalPath).map(_.get()).getOrElse(0))
+        // Should be 5 files in the batch based on maxFiles limit
+        assert(files.length == 5)
+
+        // Reading again leverages the files already tracked in allFilesForTriggerAvailableNow,
+        // so no more listings need to happen
+        offset = source.latestOffset(FileStreamSourceOffset(-1L), ReadLimit.maxFiles(5))
+          .asInstanceOf[FileStreamSourceOffset]
+        files = metadataLog.get(offset.logOffset).getOrElse(Array.empty[FileEntry])
+
+        assert(0 === CountListingLocalFileSystem.pathToNumListStatusCalled
+          .get(src.getCanonicalPath).map(_.get()).getOrElse(0))
+        // Should be 5 files in the batch since cached files are ignored
+        assert(files.length == 5)
+      }
+    }
+  }
+
   test("SPARK-31962: file stream source shouldn't allow modifiedBefore/modifiedAfter") {
     def formatTime(time: LocalDateTime): String = {
       time.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 32822994c81cb..b4f29fa9f01ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.{LocalLimitExec, SimpleMode, SparkPlan}
 import org.apache.spark.sql.execution.command.ExplainCommand
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.sources.{ContinuousMemoryStream, MemorySink}
+import org.apache.spark.sql.execution.streaming.sources.{ContinuousMemoryStream, ForeachBatchUserFuncException, MemorySink}
 import org.apache.spark.sql.execution.streaming.state.{KeyStateEncoderSpec, StateStore, StateStoreConf, StateStoreId, StateStoreProvider}
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
@@ -1185,6 +1185,17 @@ class StreamSuite extends StreamTest {
     checkAnswer(spark.sql("select * from output"), Row("true"))
   }
 
+  private val py4JInterruptedExceptions = Seq(
+    classOf[InterruptedException].getName,
+    classOf[InterruptedIOException].getName,
+    classOf[ClosedByInterruptException].getName).map { s =>
+    new py4j.Py4JException(
+      s"""
+        |py4j.protocol.Py4JJavaError: An error occurred while calling o44.count.
+        |: $s
+        |""".stripMargin)
+  }
+
   for (e <- Seq(
     new InterruptedException,
     new InterruptedIOException,
@@ -1192,16 +1203,8 @@ class StreamSuite extends StreamTest {
     new UncheckedIOException("test", new ClosedByInterruptException),
     new ExecutionException("test", new InterruptedException),
     new UncheckedExecutionException("test", new InterruptedException)) ++
-    Seq(
-      classOf[InterruptedException].getName,
-      classOf[InterruptedIOException].getName,
-      classOf[ClosedByInterruptException].getName).map { s =>
-    new py4j.Py4JException(
-      s"""
-        |py4j.protocol.Py4JJavaError: An error occurred while calling o44.count.
-        |: $s
-        |""".stripMargin)
-    }) {
+    py4JInterruptedExceptions ++
+    py4JInterruptedExceptions.map { e => ForeachBatchUserFuncException(e) }) {
     test(s"view ${e.getClass.getSimpleName} [${e.getMessage}] as a normal query stop") {
       ThrowingExceptionInCreateSource.createSourceLatch = new CountDownLatch(1)
       ThrowingExceptionInCreateSource.exception = e
@@ -1323,6 +1326,36 @@ class StreamSuite extends StreamTest {
       }
     }
   }
+
+  test("isInterruptionException should correctly unwrap classic py4j InterruptedException") {
+    val e1 = new py4j.Py4JException(
+      """
+        |py4j.protocol.Py4JJavaError: An error occurred while calling o1073599.sql.
+        |: java.util.concurrent.ExecutionException: java.lang.InterruptedException
+        |""".stripMargin)
+    val febError1 = ForeachBatchUserFuncException(e1)
+    assert(StreamExecution.isInterruptionException(febError1, spark.sparkContext))
+
+    // scalastyle:off line.size.limit
+    val e2 = new py4j.Py4JException(
+      """
+        |py4j.protocol.Py4JJavaError: An error occurred while calling o2141502.saveAsTable.
+        |: org.apache.spark.SparkException: Job aborted.
+        |at org.apache.spark.sql.errors.QueryExecutionErrors$.jobAbortedError(QueryExecutionErrors.scala:882)
+        |at org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$1(FileFormatWriter.scala:334)
+        |<REDACTED STACK TRACE>
+        |org.apache.spark.sql.execution.streaming.StreamExecution.withAttributionTags(StreamExecution.scala:82)
+        |at org.apache.spark.sql.execution.streaming.StreamExecution.org$apache$spark$sql$execution$streaming$StreamExecution$$runStream(StreamExecution.scala:339)
+        |at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.$anonfun$run$2(StreamExecution.scala:262)
+        |at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
+        |at org.apache.spark.sql.execution.streaming.StreamExecution$$anon$1.run(StreamExecution.scala:262)
+        |*Caused by: java.lang.InterruptedException
+        |at java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(AbstractQueuedSynchronizer.java:1000)*
+        |""".stripMargin)
+    // scalastyle:on line.size.limit
+    val febError2 = ForeachBatchUserFuncException(e2)
+    assert(StreamExecution.isInterruptionException(febError2, spark.sparkContext))
+  }
 }
 
 abstract class FakeSource extends StreamSourceProvider {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index d7401897ff6a4..7439c7ab6d6e1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -346,7 +346,8 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
   def testStream(
       _stream: Dataset[_],
       outputMode: OutputMode = OutputMode.Append,
-      extraOptions: Map[String, String] = Map.empty)(actions: StreamAction*): Unit = synchronized {
+      extraOptions: Map[String, String] = Map.empty,
+      sink: MemorySink = new MemorySink())(actions: StreamAction*): Unit = synchronized {
     import org.apache.spark.sql.streaming.util.StreamManualClock
 
     // `synchronized` is added to prevent the user from calling multiple `testStream`s concurrently
@@ -359,7 +360,6 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
     var currentStream: StreamExecution = null
     var lastStream: StreamExecution = null
     val awaiting = new mutable.HashMap[Int, OffsetV2]() // source index -> offset to wait for
-    val sink = new MemorySink
     val resetConfValues = mutable.Map[String, Option[String]]()
     val defaultCheckpointLocation =
       Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 8d79cf4af7717..bcf0d4ac46655 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -25,7 +25,7 @@ import scala.annotation.tailrec
 import org.apache.commons.io.FileUtils
 import org.scalatest.Assertions
 
-import org.apache.spark.{SparkEnv, SparkException}
+import org.apache.spark.{SparkEnv, SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -36,7 +36,7 @@ import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.MemorySink
-import org.apache.spark.sql.execution.streaming.state.{StateSchemaNotCompatible, StateStore, StreamingAggregationStateManager}
+import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreValueSchemaNotCompatible, StreamingAggregationStateManager}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode._
@@ -782,11 +782,11 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
       testStream(aggregated, Update())(
         StartStream(checkpointLocation = tempDir.getAbsolutePath),
         AddData(inputData, 21),
-        ExpectFailure[SparkException] { e =>
+        ExpectFailure[StateStoreValueSchemaNotCompatible] { e =>
           val stateSchemaExc = findStateSchemaNotCompatible(e)
           assert(stateSchemaExc.isDefined)
           val msg = stateSchemaExc.get.getMessage
-          assert(msg.contains("Provided schema doesn't match to the schema for existing state"))
+          assert(msg.contains("does not match existing"))
           // other verifications are presented in StateStoreSuite
         }
       )
@@ -909,9 +909,10 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   }
 
   @tailrec
-  private def findStateSchemaNotCompatible(exc: Throwable): Option[StateSchemaNotCompatible] = {
+  private def findStateSchemaNotCompatible(exc: Throwable):
+    Option[SparkUnsupportedOperationException] = {
     exc match {
-      case e1: StateSchemaNotCompatible => Some(e1)
+      case e1: SparkUnsupportedOperationException => Some(e1)
       case e1 if e1.getCause != null => findStateSchemaNotCompatible(e1.getCause)
       case _ => None
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
index 5c3d8d877f390..854893b1f033e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
@@ -21,11 +21,13 @@ import java.io.File
 
 import org.apache.commons.io.FileUtils
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StringType
 import org.apache.spark.tags.SlowSQLTest
 import org.apache.spark.util.Utils
 
@@ -451,28 +453,29 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest {
     }
   }
 
-  test("SPARK-39650: recovery from checkpoint having all columns as value schema") {
-    // NOTE: We are also changing the schema of input compared to the checkpoint. In the checkpoint
-    // we define the input schema as (String, Int).
-    val inputData = MemoryStream[(String, Int, String)]
-    val dedupe = inputData.toDS().dropDuplicates("_1")
+  Seq("3.3.0", "3.5.1").foreach { sparkVersion =>
+    test("SPARK-39650: recovery from checkpoint having all columns as value schema " +
+      s"with sparkVersion=$sparkVersion") {
+      // NOTE: We are also changing the schema of input compared to the checkpoint.
+      // In the checkpoint we define the input schema as (String, Int).
+      val inputData = MemoryStream[(String, Int, String)]
+      val dedupe = inputData.toDS().dropDuplicates("_1")
 
-    // The fix will land after Spark 3.3.0, hence we can check backward compatibility with
-    // checkpoint being built from Spark 3.3.0.
-    val resourceUri = this.getClass.getResource(
-      "/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/").toURI
+      val resourcePath = "/structured-streaming/checkpoint-version-" + sparkVersion +
+        "-streaming-deduplication/"
+      val resourceUri = this.getClass.getResource(resourcePath).toURI
 
-    val checkpointDir = Utils.createTempDir().getCanonicalFile
-    // Copy the checkpoint to a temp dir to prevent changes to the original.
-    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
-    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+      val checkpointDir = Utils.createTempDir().getCanonicalFile
+      // Copy the checkpoint to a temp dir to prevent changes to the original.
+      // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+      FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
 
-    inputData.addData(("a", 1, "dummy"))
-    inputData.addData(("a", 2, "dummy"), ("b", 3, "dummy"))
+      inputData.addData(("a", 1, "dummy"))
+      inputData.addData(("a", 2, "dummy"), ("b", 3, "dummy"))
 
-    testStream(dedupe, Append)(
-      StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
-      /*
+      testStream(dedupe, Append)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        /*
         Note: The checkpoint was generated using the following input in Spark version 3.3.0
         AddData(inputData, ("a", 1)),
         CheckLastBatch(("a", 1)),
@@ -480,8 +483,95 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest {
         CheckLastBatch(("b", 3))
        */
 
-      AddData(inputData, ("a", 5, "a"), ("b", 2, "b"), ("c", 9, "c")),
-      CheckLastBatch(("c", 9, "c"))
+        AddData(inputData, ("a", 5, "a"), ("b", 2, "b"), ("c", 9, "c")),
+        CheckLastBatch(("c", 9, "c"))
+      )
+    }
+  }
+
+  Seq("3.3.0", "3.5.1").foreach { sparkVersion =>
+    test("SPARK-39650: recovery from checkpoint with changes on key schema " +
+      s"are not allowed with sparkVersion=$sparkVersion") {
+      // NOTE: We are also changing the schema of input compared to the checkpoint.
+      // In the checkpoint we define the input schema as (String, Int).
+      val inputData = MemoryStream[(String, Int, String)]
+      val dedupe = inputData.toDS().dropDuplicates("_1", "_2")
+
+      val resourcePath = "/structured-streaming/checkpoint-version-" + sparkVersion +
+        "-streaming-deduplication/"
+      val resourceUri = this.getClass.getResource(resourcePath).toURI
+
+      val checkpointDir = Utils.createTempDir().getCanonicalFile
+      // Copy the checkpoint to a temp dir to prevent changes to the original.
+      // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+      FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+      inputData.addData(("a", 1, "dummy"))
+      inputData.addData(("a", 2, "dummy"), ("b", 3, "dummy"))
+
+      // trying to evolve the key schema is not allowed and should throw an exception
+      val ex = intercept[StreamingQueryException] {
+        testStream(dedupe, Append)(
+          StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+          AddData(inputData, ("a", 5, "a"), ("b", 2, "b"), ("c", 9, "c")),
+          CheckLastBatch(("a", 5, "a"), ("b", 2, "b"), ("c", 9, "c"))
+        )
+      }
+
+      // verify that the key schema not compatible error is thrown
+      checkError(
+        ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
+        errorClass = "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE",
+        parameters = Map("storedKeySchema" -> ".*",
+          "newKeySchema" -> ".*"),
+        matchPVals = true
+      )
+    }
+  }
+
+  test("collation aware deduplication") {
+    val inputData = MemoryStream[(String, Int)]
+    val result = inputData.toDF()
+      .select(col("_1")
+        .try_cast(StringType("UTF8_BINARY")).as("str"),
+        col("_2").as("int"))
+      .dropDuplicates("str")
+
+    testStream(result, Append)(
+      AddData(inputData, "a" -> 1),
+      CheckLastBatch("a" -> 1),
+      assertNumStateRows(total = 1, updated = 1, droppedByWatermark = 0),
+      AddData(inputData, "a" -> 2), // Dropped
+      CheckLastBatch(),
+      assertNumStateRows(total = 1, updated = 0, droppedByWatermark = 0),
+      // scalastyle:off
+      AddData(inputData, "ä" -> 1),
+      CheckLastBatch("ä" -> 1),
+      // scalastyle:on
+      assertNumStateRows(total = 2, updated = 1, droppedByWatermark = 0)
+    )
+  }
+
+  test("non-binary collation aware deduplication not supported") {
+    val inputData = MemoryStream[(String)]
+    val result = inputData.toDF()
+      .select(col("value")
+        .try_cast(StringType("UTF8_LCASE")).as("str"))
+      .dropDuplicates("str")
+
+    val ex = intercept[StreamingQueryException] {
+      testStream(result, Append)(
+        AddData(inputData, "a"),
+        CheckLastBatch("a"))
+    }
+
+    checkError(
+      ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
+      errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
+      parameters = Map(
+        "schema" -> ".+\"str\":\"spark.UTF8_LCASE\".+"
+      ),
+      matchPVals = true
     )
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index e05cb4d3c35ce..5e9bdad8fd825 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -27,6 +27,7 @@ import scala.util.Random
 import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
@@ -688,6 +689,146 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     )
   }
 
+  test("SPARK-48687 - restore the stream-stream inner join query from Spark 3.5 and " +
+   "changing the join condition (key schema) should fail the query") {
+    // NOTE: We are also changing the schema of input compared to the checkpoint.
+    // In the checkpoint we define the input schema as (Int, Long), which does not have name
+    // in both left and right.
+    val inputStream = MemoryStream[(Int, Long, String)]
+    val df = inputStream.toDS()
+      .select(col("_1").as("value"), timestamp_seconds($"_2").as("timestamp"),
+        col("_3").as("name"))
+
+    val leftStream = df.select(col("value").as("leftId"),
+      col("timestamp").as("leftTime"), col("name").as("leftName"))
+
+    val rightStream = df
+      // Introduce misses for ease of debugging
+      .where(col("value") % 2 === 0)
+      .select(col("value").as("rightId"),
+        col("timestamp").as("rightTime"), col("name").as("rightName"))
+
+    val query = leftStream
+      .withWatermark("leftTime", "5 seconds")
+      .join(
+        rightStream.withWatermark("rightTime", "5 seconds"),
+        expr("rightId = leftId AND leftName = rightName AND rightTime >= leftTime AND " +
+          "rightTime <= leftTime + interval 5 seconds"),
+        joinType = "inner")
+      .select(col("leftId"), col("leftTime").cast("int"),
+        col("leftName"),
+        col("rightId"), col("rightTime").cast("int"),
+        col("rightName"))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-3.5.1-streaming-join/").toURI
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+    inputStream.addData((1, 1L, "a"), (2, 2L, "b"), (3, 3L, "c"), (4, 4L, "d"), (5, 5L, "e"))
+
+    val ex = intercept[StreamingQueryException] {
+      testStream(query)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        /*
+        Note: The checkpoint was generated using the following input in Spark version 3.5.1
+        The base query is different because it does not use the leftName/rightName columns
+        as part of the join keys/condition that is used as part of the key schema.
+
+        AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+        // batch 1 - global watermark = 0
+        // states
+        // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)
+        // right: (2, 2L), (4, 4L)
+        CheckNewAnswer((2, 2L, 2, 2L), (4, 4L, 4, 4L)),
+        */
+        AddData(inputStream, (6, 6L, "a"), (7, 7L, "a"), (8, 8L, "a"), (9, 9L, "a"),
+          (10, 10L, "a")),
+        CheckNewAnswer((6, 6L, "a", 6, 6L, "a"), (8, 8L, "a", 8, 8L, "a"),
+          (10, 10L, "a", 10, 10L, "a"))
+      )
+    }
+
+    checkError(
+      ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
+      errorClass = "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE",
+      parameters = Map("storedKeySchema" -> ".*",
+        "newKeySchema" -> ".*"),
+      matchPVals = true
+    )
+  }
+
+  test("SPARK-48687 - restore the stream-stream inner join query from Spark 3.5 and " +
+   "changing the value schema should fail the query") {
+    // NOTE: We are also changing the schema of input compared to the checkpoint.
+    // In the checkpoint we define the input schema as (Int, Long), which does not have name
+    // in both left and right.
+    val inputStream = MemoryStream[(Int, Long, String)]
+    val df = inputStream.toDS()
+      .select(col("_1").as("value"), timestamp_seconds($"_2").as("timestamp"),
+        col("_3").as("name"))
+
+    val leftStream = df.select(col("value").as("leftId"),
+      col("timestamp").as("leftTime"), col("name").as("leftName"))
+
+    val rightStream = df
+      // Introduce misses for ease of debugging
+      .where(col("value") % 2 === 0)
+      .select(col("value").as("rightId"),
+        col("timestamp").as("rightTime"), col("name").as("rightName"))
+
+    val query = leftStream
+      .withWatermark("leftTime", "5 seconds")
+      .join(
+        rightStream.withWatermark("rightTime", "5 seconds"),
+        expr("rightId = leftId AND rightTime >= leftTime AND " +
+          "rightTime <= leftTime + interval 5 seconds"),
+        joinType = "inner")
+      .select(col("leftId"), col("leftTime").cast("int"),
+        col("leftName"),
+        col("rightId"), col("rightTime").cast("int"),
+        col("rightName"))
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-version-3.5.1-streaming-join/").toURI
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+    inputStream.addData((1, 1L, "a"), (2, 2L, "b"), (3, 3L, "c"), (4, 4L, "d"), (5, 5L, "e"))
+
+    val ex = intercept[StreamingQueryException] {
+      testStream(query)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        /*
+        Note: The checkpoint was generated using the following input in Spark version 3.5.1
+        The base query is different because it does not use the leftName/rightName columns
+        as part of the generated output that is used as part of the value schema.
+
+        AddData(inputStream, (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)),
+        // batch 1 - global watermark = 0
+        // states
+        // left: (1, 1L), (2, 2L), (3, 3L), (4, 4L), (5, 5L)
+        // right: (2, 2L), (4, 4L)
+        CheckNewAnswer((2, 2L, 2, 2L), (4, 4L, 4, 4L)),
+        */
+        AddData(inputStream, (6, 6L, "a"), (7, 7L, "a"), (8, 8L, "a"), (9, 9L, "a"),
+          (10, 10L, "a")),
+        CheckNewAnswer((6, 6L, "a", 6, 6L, "a"), (8, 8L, "a", 8, 8L, "a"),
+          (10, 10L, "a", 10, 10L, "a"))
+      )
+    }
+
+    checkError(
+      ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
+      errorClass = "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE",
+      parameters = Map("storedValueSchema" -> ".*",
+        "newValueSchema" -> ".*"),
+      matchPVals = true
+    )
+  }
+
   test("SPARK-35896: metrics in StateOperatorProgress are output correctly") {
     val input1 = MemoryStream[Int]
     val input2 = MemoryStream[Int]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala
index 3423b8b8cb287..3d8c20af3b384 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryHashPartitionVerifySuite.scala
@@ -31,11 +31,24 @@ import org.apache.spark.sql.catalyst.expressions.{BoundReference, GenericInterna
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.types.{BinaryType, DataType, DoubleType, FloatType, IntegerType, LongType, StringType, StructType, TimestampType}
 
+/**
+ * To run the test suite:
+ * {{{
+ *   build/sbt "sql/testOnly *StreamingQueryHashPartitionVerifySuite"
+ * }}}
+ *
+ * To re-generate the golden file with size limit under 10Mb, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *StreamingQueryHashPartitionVerifySuite"
+ *     -Dspark.sql.test.randomDataGenerator.maxStrLen=100
+ *     -Dspark.sql.test.randomDataGenerator.maxArraySize=4
+ * }}}
+ */
 class StreamingQueryHashPartitionVerifySuite extends StreamTest {
 
-  // Configs for golden file
-  private val goldenFileURI =
-    this.getClass.getResource("/structured-streaming/partition-tests/").toURI
+  // A golden file directory in `src/test` instead of `target` directory.
+  private val goldenFileURI = getWorkspaceFilePath(
+    "sql", "core", "src", "test", "resources", "structured-streaming", "partition-tests").toUri
 
   private val schemaFileName = "randomSchemas" // files for storing random input schemas
   private val rowAndPartIdFilename =
@@ -152,9 +165,6 @@ class StreamingQueryHashPartitionVerifySuite extends StreamTest {
     val rowAndPartIdFile = new File(goldenFileURI.getPath, rowAndPartIdFilename)
 
     if (regenerateGoldenFiles) {
-      // To limit the golden file size under 10Mb, please set the final val MAX_STR_LEN: Int = 100
-      // and final val MAX_ARR_SIZE: Int = 4 in org.apache.spark.sql.RandomDataGenerator
-
       val random = new Random()
 
       val schemas = getRandomSchemas(random)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
index efc84c8e4c7cf..782badaef924f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
@@ -21,7 +21,8 @@ import java.sql.Timestamp
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.functions.{lit, window}
+import org.apache.spark.sql.functions.{expr, lit, window}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This test ensures that any optimizations done by Spark SQL optimizer are
@@ -416,4 +417,111 @@ class StreamingQueryOptimizationCorrectnessSuite extends StreamTest {
       )
     }
   }
+
+  test("SPARK-48267: regression test, stream-stream union followed by stream-batch join") {
+    withTempDir { dir =>
+      val input1 = MemoryStream[Int]
+      val input2 = MemoryStream[Int]
+
+      val df1 = input1.toDF().withColumn("code", lit(1))
+      val df2 = input2.toDF().withColumn("code", lit(null))
+
+      // NOTE: The column 'ref_code' is known to be non-nullable.
+      val batchDf = spark.range(1, 5).select($"id".as("ref_code"))
+
+      val unionDf = df1.union(df2)
+        .join(batchDf, expr("code = ref_code"))
+        .select("value")
+
+      testStream(unionDf)(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+
+        AddData(input1, 1, 2, 3),
+        CheckNewAnswer(1, 2, 3),
+
+        AddData(input2, 1, 2, 3),
+        // The test failed before SPARK-47305 - the test failed with below error message:
+        // org.apache.spark.sql.streaming.StreamingQueryException: Stream-stream join without
+        // equality predicate is not supported.;
+        // Join Inner
+        // :- StreamingDataSourceV2ScanRelation[value#3] MemoryStreamDataSource
+        // +- LocalRelation <empty>
+        // Note that LocalRelation <empty> is actually a batch source (Range) but due to
+        // a bug, it was incorrect marked to the streaming. SPARK-47305 fixed the bug.
+        CheckNewAnswer()
+      )
+    }
+  }
+
+  test("SPARK-48481: DISTINCT with empty stream source should retain AGGREGATE") {
+    def doTest(numExpectedStatefulOperatorsForOneEmptySource: Int): Unit = {
+      withTempView("tv1", "tv2") {
+        val inputStream1 = MemoryStream[Int]
+        val ds1 = inputStream1.toDS()
+        ds1.registerTempTable("tv1")
+
+        val inputStream2 = MemoryStream[Int]
+        val ds2 = inputStream2.toDS()
+        ds2.registerTempTable("tv2")
+
+        // DISTINCT is rewritten to AGGREGATE, hence an AGGREGATEs for each source
+        val unioned = spark.sql(
+          """
+            | WITH u AS (
+            |   SELECT DISTINCT value AS value FROM tv1
+            | ), v AS (
+            |   SELECT DISTINCT value AS value FROM tv2
+            | )
+            | SELECT value FROM u UNION ALL SELECT value FROM v
+            |""".stripMargin
+        )
+
+        testStream(unioned, OutputMode.Update())(
+          MultiAddData(inputStream1, 1, 1, 2)(inputStream2, 1, 1, 2),
+          CheckNewAnswer(1, 2, 1, 2),
+          Execute { qe =>
+            val stateOperators = qe.lastProgress.stateOperators
+            // Aggregate should be "stateful" one
+            assert(stateOperators.length === 2)
+            stateOperators.zipWithIndex.foreach { case (op, id) =>
+              assert(op.numRowsUpdated === 2, s"stateful OP ID: $id")
+            }
+          },
+          AddData(inputStream2, 2, 2, 3),
+          // NOTE: this is probably far from expectation to have 2 as output given user intends
+          // deduplicate, but the behavior is still correct with rewritten node and output mode:
+          // Aggregate & Update mode.
+          // TODO: Probably we should disallow DISTINCT or rewrite to
+          //  dropDuplicates(WithinWatermark) for streaming source?
+          CheckNewAnswer(2, 3),
+          Execute { qe =>
+            val stateOperators = qe.lastProgress.stateOperators
+            // Aggregate should be "stateful" one
+            assert(stateOperators.length === numExpectedStatefulOperatorsForOneEmptySource)
+            val opWithUpdatedRows = stateOperators.zipWithIndex.filterNot(_._1.numRowsUpdated == 0)
+            assert(opWithUpdatedRows.length === 1)
+            // If this were dropDuplicates, numRowsUpdated should have been 1.
+            assert(opWithUpdatedRows.head._1.numRowsUpdated === 2,
+              s"stateful OP ID: ${opWithUpdatedRows.head._2}")
+          },
+          AddData(inputStream1, 4, 4, 5),
+          CheckNewAnswer(4, 5),
+          Execute { qe =>
+            val stateOperators = qe.lastProgress.stateOperators
+            assert(stateOperators.length === numExpectedStatefulOperatorsForOneEmptySource)
+            val opWithUpdatedRows = stateOperators.zipWithIndex.filterNot(_._1.numRowsUpdated == 0)
+            assert(opWithUpdatedRows.length === 1)
+            assert(opWithUpdatedRows.head._1.numRowsUpdated === 2,
+              s"stateful OP ID: ${opWithUpdatedRows.head._2}")
+          }
+        )
+      }
+    }
+
+    doTest(numExpectedStatefulOperatorsForOneEmptySource = 2)
+
+    withSQLConf(SQLConf.STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED.key -> "true") {
+      doTest(numExpectedStatefulOperatorsForOneEmptySource = 1)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index dce27bdc5d1ca..e748ae8e7d7df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -172,6 +172,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
   test("StreamingQueryProgress - json") {
     assert(compact(parse(testProgress1.json)) === testProgress1.json)
     assert(compact(parse(testProgress2.json)) === testProgress2.json)
+    assert(compact(parse(testProgress3.json)) === testProgress3.json)
   }
 
   test("StreamingQueryProgress - toString") {
@@ -499,6 +500,28 @@ object StreamingQueryStatusAndProgressSuite {
       "event_b2" -> row(schema2, 200L, "fzo", "baz")).asJava)
   )
 
+  val testProgress3 = new StreamingQueryProgress(
+    id = UUID.randomUUID,
+    runId = UUID.randomUUID,
+    name = "myName",
+    timestamp = "2024-05-28T00:00:00.233Z",
+    batchId = 2L,
+    batchDuration = 0L,
+    durationMs = null,
+    eventTime = null,
+    stateOperators = Array(new StateOperatorProgress(operatorName = "op1",
+      numRowsTotal = 0, numRowsUpdated = 1, allUpdatesTimeMs = 1, numRowsRemoved = 2,
+      allRemovalsTimeMs = 34, commitTimeMs = 23, memoryUsedBytes = 3, numRowsDroppedByWatermark = 0,
+      numShufflePartitions = 2, numStateStoreInstances = 2,
+      customMetrics = new java.util.HashMap(Map("stateOnCurrentVersionSizeBytes" -> 2L,
+        "loadedMapCacheHitCount" -> 1L, "loadedMapCacheMissCount" -> 0L)
+        .transform((_, v) => long2Long(v)).asJava)
+    )),
+    sources = Array(),
+    sink = SinkProgress("sink", None),
+    observedMetrics = null
+  )
+
   val testStatus = new StreamingQueryStatus("active", true, false)
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 504c0b334e426..061b353879d14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -1364,10 +1364,39 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     )
   }
 
+  test("Collation aware streaming") {
+    withTable("parquet_streaming_tbl") {
+      spark.sql(
+        """
+          |CREATE TABLE parquet_streaming_tbl
+          |(
+          |  key STRING COLLATE UTF8_LCASE,
+          |  value_stream INTEGER
+          |) USING parquet""".stripMargin)
+
+      val streamDf = spark.readStream.table("parquet_streaming_tbl")
+      val filteredDf = streamDf.filter("key = 'aaa'")
+
+      val clock = new StreamManualClock()
+      testStream(filteredDf)(
+        StartStream(triggerClock = clock, trigger = Trigger.ProcessingTime(100)),
+        Execute { _ =>
+          spark.createDataFrame(Seq("aaa" -> 1, "AAA" -> 2, "bbb" -> 3, "aa" -> 4))
+            .toDF("key", "value_stream")
+            .write.format("parquet").mode(SaveMode.Append)
+            .saveAsTable("parquet_streaming_tbl")
+        },
+        AdvanceManualClock(150),
+        waitUntilBatchProcessed(clock),
+        CheckLastBatch(("aaa", 1), ("AAA", 2))
+      )
+    }
+  }
+
   test("SPARK-47776: streaming aggregation having binary inequality column in the grouping " +
     "key must be disallowed") {
     val tableName = "parquet_dummy_tbl"
-    val collationName = "UTF8_BINARY_LCASE"
+    val collationName = "UTF8_LCASE"
 
     withTable(tableName) {
       sql(
@@ -1396,13 +1425,30 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
         ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
         errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
         parameters = Map(
-          "schema" -> ".+\"type\":\"string collate UTF8_BINARY_LCASE\".+"
+          "schema" -> ".+\"c1\":\"spark.UTF8_LCASE\".+"
         ),
         matchPVals = true
       )
     }
   }
 
+  test("SPARK-48447: check state store provider class before invoking the constructor") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key -> classOf[Object].getCanonicalName) {
+      val input = MemoryStream[Int]
+      input.addData(1)
+      val query = input.toDF().limit(2).writeStream
+        .trigger(Trigger.AvailableNow())
+        .format("console")
+        .start()
+      val ex = intercept[StreamingQueryException] {
+        query.processAllAvailable()
+      }
+      assert(ex.getMessage.contains(
+        s"The given State Store Provider ${classOf[Object].getCanonicalName} does not " +
+          "extend org.apache.spark.sql.execution.streaming.state.StateStoreProvider."))
+    }
+  }
+
   private def checkExceptionMessage(df: DataFrame): Unit = {
     withTempDir { outputDir =>
       withTempDir { checkpointDir =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala
index 4827d06d64d07..8a9d4d42ef2b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingStateStoreFormatCompatibilitySuite.scala
@@ -23,11 +23,11 @@ import scala.annotation.tailrec
 
 import org.apache.commons.io.FileUtils
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Complete
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.{InvalidUnsafeRowException, StateSchemaNotCompatible}
+import org.apache.spark.sql.execution.streaming.state.{StateStoreKeyRowFormatValidationFailure, StateStoreValueRowFormatValidationFailure}
 import org.apache.spark.sql.functions._
 import org.apache.spark.tags.SlowSQLTest
 import org.apache.spark.util.Utils
@@ -253,8 +253,9 @@ class StreamingStateStoreFormatCompatibilitySuite extends StreamTest {
   @tailrec
   private def findStateSchemaException(exc: Throwable): Boolean = {
     exc match {
-      case _: StateSchemaNotCompatible => true
-      case _: InvalidUnsafeRowException => true
+      case _: SparkUnsupportedOperationException => true
+      case _: StateStoreKeyRowFormatValidationFailure => true
+      case _: StateStoreValueRowFormatValidationFailure => true
       case e1 if e1.getCause != null => findStateSchemaException(e1.getCause)
       case _ => false
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
new file mode 100644
index 0000000000000..bf46c802fdea4
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
@@ -0,0 +1,322 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.time.Duration
+
+import org.apache.spark.sql.Encoders
+import org.apache.spark.sql.execution.streaming.{MapStateImplWithTTL, MemoryStream}
+import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.util.StreamManualClock
+
+class MapStateSingleKeyTTLProcessor(ttlConfig: TTLConfig)
+  extends StatefulProcessor[String, InputEvent, OutputEvent] {
+
+  @transient private var _mapState: MapStateImplWithTTL[String, Int] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _mapState = getHandle
+      .getMapState("mapState", Encoders.STRING, Encoders.scalaInt, ttlConfig)
+      .asInstanceOf[MapStateImplWithTTL[String, Int]]
+  }
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[InputEvent],
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputEvent] = {
+    var results = List[OutputEvent]()
+
+    for (row <- inputRows) {
+      val resultIter = processRow(row, _mapState)
+      resultIter.foreach { r =>
+        results = r :: results
+      }
+    }
+
+    results.iterator
+  }
+
+  def processRow(
+      row: InputEvent,
+      mapState: MapStateImplWithTTL[String, Int]): Iterator[OutputEvent] = {
+    var results = List[OutputEvent]()
+    val key = row.key
+    val userKey = "key"
+    if (row.action == "get") {
+      if (mapState.containsKey(userKey)) {
+        results = OutputEvent(key, mapState.getValue(userKey), isTTLValue = false, -1) :: results
+      }
+    } else if (row.action == "get_without_enforcing_ttl") {
+      val currState = mapState.getWithoutEnforcingTTL(userKey)
+      if (currState.isDefined) {
+        results = OutputEvent(key, currState.get, isTTLValue = false, -1) :: results
+      }
+    } else if (row.action == "get_ttl_value_from_state") {
+      val ttlValue = mapState.getTTLValue(userKey)
+      if (ttlValue.isDefined) {
+        val value = ttlValue.get._1
+        val ttlExpiration = ttlValue.get._2
+        results = OutputEvent(key, value, isTTLValue = true, ttlExpiration) :: results
+      }
+    } else if (row.action == "put") {
+      mapState.updateValue(userKey, row.value)
+    } else if (row.action == "get_values_in_ttl_state") {
+      val ttlValues = mapState.getKeyValuesInTTLState()
+      ttlValues.foreach { v =>
+        results = OutputEvent(key, -1, isTTLValue = true, ttlValue = v._2) :: results
+      }
+    }
+
+    results.iterator
+  }
+}
+
+case class MapInputEvent(
+    key: String,
+    userKey: String,
+    action: String,
+    value: Int)
+
+case class MapOutputEvent(
+    key: String,
+    userKey: String,
+    value: Int,
+    isTTLValue: Boolean,
+    ttlValue: Long)
+
+class MapStateTTLProcessor(ttlConfig: TTLConfig)
+  extends StatefulProcessor[String, MapInputEvent, MapOutputEvent] {
+
+  @transient private var _mapState: MapStateImplWithTTL[String, Int] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _mapState = getHandle
+      .getMapState("mapState", Encoders.STRING, Encoders.scalaInt, ttlConfig)
+      .asInstanceOf[MapStateImplWithTTL[String, Int]]
+  }
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[MapInputEvent],
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[MapOutputEvent] = {
+    var results = List[MapOutputEvent]()
+
+    for (row <- inputRows) {
+      val resultIter = processRow(row, _mapState)
+      resultIter.foreach { r =>
+        results = r :: results
+      }
+    }
+
+    results.iterator
+  }
+
+  def processRow(
+      row: MapInputEvent,
+      mapState: MapStateImplWithTTL[String, Int]): Iterator[MapOutputEvent] = {
+    var results = List[MapOutputEvent]()
+    val key = row.key
+    val userKey = row.userKey
+    if (row.action == "get") {
+      if (mapState.containsKey(userKey)) {
+        results = MapOutputEvent(key, userKey, mapState.getValue(userKey),
+          isTTLValue = false, -1) :: results
+      }
+    } else if (row.action == "get_without_enforcing_ttl") {
+      val currState = mapState.getWithoutEnforcingTTL(userKey)
+      if (currState.isDefined) {
+        results = MapOutputEvent(key, userKey, currState.get, isTTLValue = false, -1) :: results
+      }
+    } else if (row.action == "get_ttl_value_from_state") {
+      val ttlValue = mapState.getTTLValue(userKey)
+      if (ttlValue.isDefined) {
+        val value = ttlValue.get._1
+        val ttlExpiration = ttlValue.get._2
+        results = MapOutputEvent(key, userKey, value, isTTLValue = true, ttlExpiration) :: results
+      }
+    } else if (row.action == "put") {
+      mapState.updateValue(userKey, row.value)
+    } else if (row.action == "get_values_in_ttl_state") {
+      val ttlValues = mapState.getKeyValuesInTTLState()
+      ttlValues.foreach { elem =>
+        results = MapOutputEvent(key, elem._1, -1, isTTLValue = true, ttlValue = elem._2) :: results
+      }
+    } else if (row.action == "iterator") {
+      val iter = mapState.iterator()
+      iter.foreach { elem =>
+        results = MapOutputEvent(key, elem._1, elem._2, isTTLValue = false, -1) :: results
+      }
+    }
+
+    results.iterator
+  }
+}
+
+class TransformWithMapStateTTLSuite extends TransformWithStateTTLTest {
+
+  import testImplicits._
+  override def getProcessor(ttlConfig: TTLConfig):
+      StatefulProcessor[String, InputEvent, OutputEvent] = {
+    new MapStateSingleKeyTTLProcessor(ttlConfig)
+  }
+
+  override def getStateTTLMetricName: String = "numMapStateWithTTLVars"
+
+  test("validate state is evicted with multiple user keys") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+
+      val inputStream = MemoryStream[MapInputEvent]
+      val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+      val result = inputStream.toDS()
+        .groupByKey(x => x.key)
+        .transformWithState(
+          new MapStateTTLProcessor(ttlConfig),
+          TimeMode.ProcessingTime(),
+          OutputMode.Append())
+
+      val clock = new StreamManualClock
+      testStream(result)(
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+        AddData(inputStream, MapInputEvent("k1", "key1", "put", 1)),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(),
+        AddData(inputStream, MapInputEvent("k1", "key1", "get", -1)),
+        AdvanceManualClock(30 * 1000),
+        CheckNewAnswer(MapOutputEvent("k1", "key1", 1, isTTLValue = false, -1)),
+        AddData(inputStream, MapInputEvent("k1", "key2", "put", 2)),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(),
+        // advance clock to expire first key
+        AdvanceManualClock(30 * 1000),
+        AddData(inputStream, MapInputEvent("k1", "key1", "get", -1),
+          MapInputEvent("k1", "key2", "get", -1)),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(MapOutputEvent("k1", "key2", 2, isTTLValue = false, -1)),
+        StopStream
+      )
+    }
+  }
+
+  test("verify iterator doesn't return expired keys") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+
+      val inputStream = MemoryStream[MapInputEvent]
+      val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+      val result = inputStream.toDS()
+        .groupByKey(x => x.key)
+        .transformWithState(
+          new MapStateTTLProcessor(ttlConfig),
+          TimeMode.ProcessingTime(),
+          OutputMode.Append())
+
+      val clock = new StreamManualClock
+      testStream(result)(
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+        AddData(inputStream,
+          MapInputEvent("k1", "key1", "put", 1),
+          MapInputEvent("k1", "key2", "put", 2)
+        ),
+        AdvanceManualClock(1 * 1000), // batch timestamp: 1000
+        CheckNewAnswer(),
+        AddData(inputStream,
+          MapInputEvent("k1", "key1", "get", -1),
+          MapInputEvent("k1", "key2", "get", -1)
+        ),
+        AdvanceManualClock(30 * 1000), // batch timestamp: 31000
+        CheckNewAnswer(
+          MapOutputEvent("k1", "key1", 1, isTTLValue = false, -1),
+          MapOutputEvent("k1", "key2", 2, isTTLValue = false, -1)
+        ),
+        // get values from ttl state
+        AddData(inputStream,
+          MapInputEvent("k1", "", "get_values_in_ttl_state", -1)
+        ),
+        AdvanceManualClock(1 * 1000), // batch timestamp: 32000
+        CheckNewAnswer(
+          MapOutputEvent("k1", "key1", -1, isTTLValue = true, 61000),
+          MapOutputEvent("k1", "key2", -1, isTTLValue = true, 61000)
+        ),
+        // advance clock to expire first two values
+        AdvanceManualClock(30 * 1000), // batch timestamp: 62000
+        AddData(inputStream,
+          MapInputEvent("k1", "key3", "put", 3),
+          MapInputEvent("k1", "key4", "put", 4),
+          MapInputEvent("k1", "key5", "put", 5),
+          MapInputEvent("k1", "", "iterator", -1)
+        ),
+        AdvanceManualClock(1 * 1000), // batch timestamp: 63000
+        CheckNewAnswer(
+          MapOutputEvent("k1", "key3", 3, isTTLValue = false, -1),
+          MapOutputEvent("k1", "key4", 4, isTTLValue = false, -1),
+          MapOutputEvent("k1", "key5", 5, isTTLValue = false, -1)
+        ),
+        AddData(inputStream,
+          MapInputEvent("k1", "", "get_values_in_ttl_state", -1)
+        ),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(
+          MapOutputEvent("k1", "key3", -1, isTTLValue = true, 123000),
+          MapOutputEvent("k1", "key4", -1, isTTLValue = true, 123000),
+          MapOutputEvent("k1", "key5", -1, isTTLValue = true, 123000)
+        ),
+        // get all values without enforcing ttl
+        AddData(inputStream,
+          MapInputEvent("k1", "key1", "get_without_enforcing_ttl", -1),
+          MapInputEvent("k1", "key2", "get_without_enforcing_ttl", -1),
+          MapInputEvent("k1", "key3", "get_without_enforcing_ttl", -1),
+          MapInputEvent("k1", "key4", "get_without_enforcing_ttl", -1),
+          MapInputEvent("k1", "key5", "get_without_enforcing_ttl", -1)
+        ),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(
+          MapOutputEvent("k1", "key3", 3, isTTLValue = false, -1),
+          MapOutputEvent("k1", "key4", 4, isTTLValue = false, -1),
+          MapOutputEvent("k1", "key5", 5, isTTLValue = false, -1)
+        ),
+        // check that updating a key updates its TTL
+        AddData(inputStream, MapInputEvent("k1", "key3", "put", 3)),
+        AdvanceManualClock(1 * 1000),
+        AddData(inputStream, MapInputEvent("k1", "", "get_values_in_ttl_state", -1)),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(
+          MapOutputEvent("k1", "key3", -1, isTTLValue = true, 123000),
+          MapOutputEvent("k1", "key3", -1, isTTLValue = true, 126000),
+          MapOutputEvent("k1", "key4", -1, isTTLValue = true, 123000),
+          MapOutputEvent("k1", "key5", -1, isTTLValue = true, 123000)
+        ),
+        AddData(inputStream, MapInputEvent("k1", "key3", "get_ttl_value_from_state", -1)),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(
+          MapOutputEvent("k1", "key3", 3, isTTLValue = true, 126000)
+        ),
+        StopStream
+      )
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
new file mode 100644
index 0000000000000..5388d6f1fb68a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
@@ -0,0 +1,411 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.sql.Timestamp
+import java.time.{Instant, LocalDateTime, ZoneId}
+
+import org.apache.spark.{SparkRuntimeException, SparkThrowable}
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.ExtendedAnalysisException
+import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamExecution}
+import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
+import org.apache.spark.sql.functions.window
+import org.apache.spark.sql.internal.SQLConf
+
+case class InputEventRow(
+    key: String,
+    eventTime: Timestamp,
+    event: String)
+
+case class OutputRow(
+    key: String,
+    outputEventTime: Timestamp,
+    count: Int)
+
+class TestStatefulProcessor
+  extends StatefulProcessor[String, InputEventRow, OutputRow] {
+  override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {}
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[InputEventRow],
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputRow] = {
+    if (inputRows.isEmpty) {
+      Iterator.empty
+    } else {
+      var minEventTime = inputRows.next().eventTime
+      var count = 1
+      inputRows.foreach { row =>
+        if (row.eventTime.before(minEventTime)) {
+          minEventTime = row.eventTime
+        }
+        count += 1
+      }
+      Iterator.single(OutputRow(key, minEventTime, count))
+    }
+  }
+}
+
+class InputCountStatefulProcessor[T]
+  extends StatefulProcessor[String, T, Int] {
+  override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {}
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[T],
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[Int] = {
+    Iterator.single(inputRows.size)
+  }
+}
+
+/**
+ * Emits output row with timestamp older than current watermark for batchId > 0.
+ */
+class StatefulProcessorEmittingRowsOlderThanWatermark
+  extends StatefulProcessor[String, InputEventRow, OutputRow] {
+  override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {}
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[InputEventRow],
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputRow] = {
+    Iterator.single(
+      OutputRow(
+        key,
+        // always emit value with eventTime 1 which will fail after first batch, as
+        // watermark will move past 0L
+        Timestamp.from(Instant.ofEpochMilli(1)),
+        inputRows.size))
+  }
+}
+
+case class Window(
+    start: Timestamp,
+    end: Timestamp)
+
+case class AggEventRow(
+    window: Window,
+    count: Long)
+
+class TransformWithStateChainingSuite extends StreamTest {
+  import testImplicits._
+
+  test("watermark is propagated correctly for next stateful operator" +
+    " after transformWithState") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+
+      val result = inputData.toDS()
+        .withWatermark("eventTime", "1 minute")
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new TestStatefulProcessor(),
+          "outputEventTime",
+          OutputMode.Append())
+        .groupBy(window($"outputEventTime", "1 minute"))
+        .count()
+        .as[AggEventRow]
+
+      testStream(result, OutputMode.Append())(
+        AddData(inputData, InputEventRow("k1", timestamp("2024-01-01 00:00:00"), "e1")),
+        // watermark should be 1 minute behind `2024-01-01 00:00:00`, nothing is
+        // emitted as all records have timestamp > epoch
+        CheckNewAnswer(),
+        Execute("assertWatermarkEquals") { q =>
+          assertWatermarkEquals(q, timestamp("2023-12-31 23:59:00"))
+        },
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+        // global watermark should now be 1 minute behind  `2024-02-01 00:00:00`.
+        CheckNewAnswer(AggEventRow(
+          Window(timestamp("2024-01-01 00:00:00"), timestamp("2024-01-01 00:01:00")), 1)
+        ),
+        Execute("assertWatermarkEquals") { q =>
+          assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
+        },
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-02 00:00:00"), "e1")),
+        CheckNewAnswer(AggEventRow(
+          Window(timestamp("2024-02-01 00:00:00"), timestamp("2024-02-01 00:01:00")), 1)
+        )
+      )
+    }
+  }
+
+  test("passing eventTime column to transformWithState fails if" +
+    " no watermark is defined") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+
+      val ex = intercept[AnalysisException] {
+        inputData.toDS()
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new TestStatefulProcessor(),
+          "outputEventTime",
+          OutputMode.Append())
+      }
+
+      checkError(ex, "CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK")
+    }
+  }
+
+  test("missing eventTime column to transformWithState fails the query if" +
+    " another stateful operator is added") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+
+      val result = inputData.toDS()
+        .withWatermark("eventTime", "1 minute")
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new TestStatefulProcessor(),
+          TimeMode.None(),
+          OutputMode.Append())
+        .groupBy(window($"outputEventTime", "1 minute"))
+        .count()
+
+      val ex = intercept[ExtendedAnalysisException] {
+        testStream(result, OutputMode.Append())(
+          StartStream()
+        )
+      }
+      assert(ex.getMessage.contains("there are streaming aggregations on" +
+        " streaming DataFrames/DataSets without watermark"))
+    }
+  }
+
+  test("chaining multiple transformWithState operators") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+
+      val result = inputData.toDS()
+        .withWatermark("eventTime", "1 minute")
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new TestStatefulProcessor(),
+          "outputEventTime",
+          OutputMode.Append())
+        .groupByKey(x => x.key)
+        .transformWithState(
+          new InputCountStatefulProcessor[OutputRow](),
+          TimeMode.None(),
+          OutputMode.Append()
+        )
+
+      testStream(result, OutputMode.Append())(
+        AddData(inputData, InputEventRow("k1", timestamp("2024-01-01 00:00:00"), "e1")),
+        CheckNewAnswer(1),
+        Execute("assertWatermarkEquals") { q =>
+          assertWatermarkEquals(q, timestamp("2023-12-31 23:59:00"))
+        },
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+        CheckNewAnswer(1),
+        Execute("assertWatermarkEquals") { q =>
+          assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
+        },
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-02 00:00:00"), "e1")),
+        CheckNewAnswer(1)
+      )
+    }
+  }
+
+  test("dropDuplicateWithWatermark after transformWithState operator" +
+    " fails if watermark column is not provided") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+      val result = inputData.toDS()
+        .withWatermark("eventTime", "1 minute")
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new TestStatefulProcessor(),
+          TimeMode.None(),
+          OutputMode.Append())
+        .dropDuplicatesWithinWatermark()
+
+      val ex = intercept[ExtendedAnalysisException] {
+        testStream(result, OutputMode.Append())(
+          StartStream()
+        )
+      }
+      assert(ex.getMessage.contains("dropDuplicatesWithinWatermark is not supported on" +
+        " streaming DataFrames/DataSets without watermark"))
+    }
+  }
+
+  test("dropDuplicateWithWatermark after transformWithState operator") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+      val result = inputData.toDS()
+        .withWatermark("eventTime", "1 minute")
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new TestStatefulProcessor(),
+          "outputEventTime",
+          OutputMode.Append())
+        .dropDuplicatesWithinWatermark()
+
+      testStream(result, OutputMode.Append())(
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1"),
+          InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+        CheckNewAnswer(OutputRow("k1", timestamp("2024-02-01 00:00:00"), 2)),
+        Execute("assertWatermarkEquals") { q =>
+          assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
+        }
+      )
+    }
+  }
+
+  test("query fails if the output dataset does not contain specified eventTimeColumn") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+
+      val ex = intercept[ExtendedAnalysisException] {
+        val result = inputData.toDS()
+          .withWatermark("eventTime", "1 minute")
+          .groupByKey(x => x.key)
+          .transformWithState[OutputRow](
+            new TestStatefulProcessor(),
+            "missingEventTimeColumn",
+            OutputMode.Append())
+
+        testStream(result, OutputMode.Append())(
+          StartStream()
+        )
+      }
+
+      checkError(ex, "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map(
+          "objectName" -> "`missingEventTimeColumn`",
+          "proposal" -> "`outputEventTime`, `count`, `key`"))
+    }
+  }
+
+  test("query fails if the output dataset contains rows older than current watermark") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+      val result = inputData.toDS()
+        .withWatermark("eventTime", "1 minute")
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new StatefulProcessorEmittingRowsOlderThanWatermark(),
+          "outputEventTime",
+          OutputMode.Append())
+
+      testStream(result, OutputMode.Append())(
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+        // after first batch, the rows are emitted with timestamp 1 ms after epoch
+        CheckNewAnswer(OutputRow("k1", Timestamp.from(Instant.ofEpochMilli(1)), 1)),
+        // this batch would fail now, because watermark will move past 1ms after epoch
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-02 00:00:00"), "e1")),
+        ExpectFailure[SparkRuntimeException] { ex =>
+          checkError(ex.asInstanceOf[SparkThrowable],
+            "EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED",
+            parameters = Map("currentWatermark" -> "1706774340000",
+              "emittedRowEventTime" -> "1000"))
+        }
+      )
+    }
+  }
+
+  test("ensure that watermark delay is resolved from a view") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+      inputData.toDS()
+        .withWatermark("eventTime", "1 minute")
+        .createTempView("tempViewWithWatermark")
+
+      val result = spark.readStream.table("tempViewWithWatermark")
+        .as[InputEventRow]
+        .groupByKey(x => x.key)
+        .transformWithState[OutputRow](
+          new TestStatefulProcessor(),
+          "outputEventTime",
+          OutputMode.Append())
+
+      testStream(result, OutputMode.Append())(
+        AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+        CheckNewAnswer(OutputRow("k1", timestamp("2024-02-01 00:00:00"), 1)),
+        Execute("assertWatermarkEquals") { q =>
+          assertWatermarkEquals(q, timestamp("2024-01-31 23:59:00"))
+        }
+      )
+    }
+  }
+
+  test("ensure that query fails if there is no watermark when reading from a view") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      val inputData = MemoryStream[InputEventRow]
+      inputData.toDS()
+        .createTempView("tempViewWithoutWatermark")
+
+      val ex = intercept[AnalysisException] {
+        val result = spark.readStream.table("tempViewWithoutWatermark")
+          .as[InputEventRow]
+          .groupByKey(x => x.key)
+          .transformWithState[OutputRow](
+            new TestStatefulProcessor(),
+            "outputEventTime",
+            OutputMode.Append())
+
+        testStream(result, OutputMode.Append())(
+          AddData(inputData, InputEventRow("k1", timestamp("2024-02-01 00:00:00"), "e1")),
+          ExpectFailure[SparkRuntimeException] { ex =>
+            checkError(ex.asInstanceOf[AnalysisException],
+              "CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK")
+          }
+        )
+      }
+
+      checkError(ex, "CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK")
+    }
+  }
+
+  private def timestamp(str: String): Timestamp = {
+    Timestamp.valueOf(str)
+  }
+
+  private def assertWatermarkEquals(q: StreamExecution, watermark: Timestamp): Unit = {
+    val queryWatermark = getQueryWatermark(q)
+    assert(queryWatermark.isDefined)
+    assert(queryWatermark.get === watermark)
+  }
+
+  private def getQueryWatermark(q: StreamExecution): Option[Timestamp] = {
+    import scala.jdk.CollectionConverters._
+    val eventTimeMap = q.lastProgress.eventTime.asScala
+    val queryWatermark = eventTimeMap.get("watermark")
+    queryWatermark.map { v =>
+      val instant = Instant.parse(v)
+      val local = LocalDateTime.ofInstant(instant, ZoneId.systemDefault())
+      Timestamp.valueOf(local)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
index 0057af44d3e37..5e408dc999f82 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
@@ -25,7 +25,8 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Dataset, Encoders}
 import org.apache.spark.sql.catalyst.util.stringToFile
 import org.apache.spark.sql.execution.streaming._
-import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBStateStoreProvider, StatefulProcessorCannotPerformOperationWithInvalidHandleState, StateStoreMultipleColumnFamiliesNotSupportedException}
+import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchema.{KEY_ROW_SCHEMA, VALUE_ROW_SCHEMA}
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, ColumnFamilySchemaV1, NoPrefixKeyStateEncoderSpec, RocksDBStateStoreProvider, StatefulProcessorCannotPerformOperationWithInvalidHandleState, StateSchemaV3File, StateStoreMultipleColumnFamiliesNotSupportedException}
 import org.apache.spark.sql.functions.timestamp_seconds
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
@@ -784,6 +785,79 @@ class TransformWithStateSuite extends StateStoreMetricsTest
       }
     }
   }
+
+  test("transformWithState - verify StateSchemaV3 serialization and deserialization" +
+    " works with one batch") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val schema = List(ColumnFamilySchemaV1(
+          "countState",
+          KEY_ROW_SCHEMA,
+          VALUE_ROW_SCHEMA,
+          NoPrefixKeyStateEncoderSpec(KEY_ROW_SCHEMA),
+          None
+        ))
+
+        val schemaFile = new StateSchemaV3File(
+          spark.sessionState.newHadoopConf(), checkpointDir.getCanonicalPath)
+        val path = schemaFile.addWithUUID(0, schema)
+
+        assert(schemaFile.getWithPath(path) == schema)
+      }
+    }
+  }
+
+  test("transformWithState - verify StateSchemaV3 serialization and deserialization" +
+    " works with multiple batches") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+
+        val schema0 = List(ColumnFamilySchemaV1(
+          "countState",
+          KEY_ROW_SCHEMA,
+          VALUE_ROW_SCHEMA,
+          NoPrefixKeyStateEncoderSpec(KEY_ROW_SCHEMA),
+          None
+        ))
+
+        val schema1 = List(
+          ColumnFamilySchemaV1(
+            "countState",
+            KEY_ROW_SCHEMA,
+            VALUE_ROW_SCHEMA,
+            NoPrefixKeyStateEncoderSpec(KEY_ROW_SCHEMA),
+            None
+          ),
+          ColumnFamilySchemaV1(
+            "mostRecent",
+            KEY_ROW_SCHEMA,
+            VALUE_ROW_SCHEMA,
+            NoPrefixKeyStateEncoderSpec(KEY_ROW_SCHEMA),
+            None
+          )
+        )
+
+        val schemaFile = new StateSchemaV3File(spark.sessionState.newHadoopConf(),
+          checkpointDir.getCanonicalPath)
+        val path0 = schemaFile.addWithUUID(0, schema0)
+
+        assert(schemaFile.getWithPath(path0) == schema0)
+
+        // test the case where we are trying to add the schema after
+        // restarting after a few batches
+        val path1 = schemaFile.addWithUUID(3, schema1)
+        val latestSchema = schemaFile.getWithPath(path1)
+
+        assert(latestSchema == schema1)
+      }
+    }
+  }
 }
 
 class TransformWithStateValidationSuite extends StateStoreMetricsTest {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 7da2bb47038ed..5fbf379644f6d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -339,8 +339,7 @@ private[sql] trait SQLTestUtilsBase
     val tableIdent = spark.sessionState.sqlParser.parseTableIdentifier(tableName)
     val cascade = !spark.sessionState.catalog.isTempView(tableIdent)
     spark.sharedState.cacheManager.uncacheQuery(
-      spark,
-      spark.table(tableName).logicalPlan,
+      spark.table(tableName),
       cascade = cascade,
       blocking = true)
   }
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index 053e11d10295b..dc48a5a6155ed 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -163,7 +163,8 @@ def _make_pretty_examples(jspark, infos):
 
     pretty_output = ""
     for info in infos:
-        if info.examples.startswith("\n    Examples:"):
+        if (info.examples.startswith("\n    Examples:")
+                and info.name.lower() not in ("from_avro", "to_avro")):
             output = []
             output.append("-- %s" % info.name)
             query_examples = filter(lambda x: x.startswith("      > "), info.examples.split("\n"))
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java
index 6481cf15075a7..b31d024eeeeb9 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/AbstractService.java
@@ -21,8 +21,11 @@
 import java.util.List;
 
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * AbstractService.
@@ -30,7 +33,7 @@
  */
 public abstract class AbstractService implements Service {
 
-  private static final Logger LOG = LoggerFactory.getLogger(AbstractService.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(AbstractService.class);
 
   /**
    * Service state: initially {@link STATE#NOTINITED}.
@@ -85,7 +88,7 @@ public synchronized void init(HiveConf hiveConf) {
     ensureCurrentState(STATE.NOTINITED);
     this.hiveConf = hiveConf;
     changeState(STATE.INITED);
-    LOG.info("Service:" + getName() + " is inited.");
+    LOG.info("Service:{} is inited.", MDC.of(LogKeys.SERVICE_NAME$.MODULE$, getName()));
   }
 
   /**
@@ -100,7 +103,7 @@ public synchronized void start() {
     startTime = System.currentTimeMillis();
     ensureCurrentState(STATE.INITED);
     changeState(STATE.STARTED);
-    LOG.info("Service:" + getName() + " is started.");
+    LOG.info("Service:{} is started.", MDC.of(LogKeys.SERVICE_NAME$.MODULE$, getName()));
   }
 
   /**
@@ -121,7 +124,7 @@ public synchronized void stop() {
     }
     ensureCurrentState(STATE.STARTED);
     changeState(STATE.STOPPED);
-    LOG.info("Service:" + getName() + " is stopped.");
+    LOG.info("Service:{} is stopped.", MDC.of(LogKeys.SERVICE_NAME$.MODULE$, getName()));
   }
 
   @Override
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java
index 55c1aa52b95ca..663bcdb86f9f6 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CompositeService.java
@@ -23,8 +23,11 @@
 import java.util.List;
 
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * CompositeService.
@@ -32,7 +35,7 @@
  */
 public class CompositeService extends AbstractService {
 
-  private static final Logger LOG = LoggerFactory.getLogger(CompositeService.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(CompositeService.class);
 
   private final List<Service> serviceList = new ArrayList<Service>();
 
@@ -70,7 +73,7 @@ public synchronized void start() {
       }
       super.start();
     } catch (Throwable e) {
-      LOG.error("Error starting services " + getName(), e);
+      LOG.error("Error starting services {}", e, MDC.of(LogKeys.SERVICE_NAME$.MODULE$, getName()));
       // Note that the state of the failed service is still INITED and not
       // STARTED. Even though the last service is not started completely, still
       // call stop() on all services including failed service to make sure cleanup
@@ -100,7 +103,7 @@ private synchronized void stop(int numOfServicesStarted) {
       try {
         service.stop();
       } catch (Throwable t) {
-        LOG.info("Error stopping " + service.getName(), t);
+        LOG.info("Error stopping {}", t, MDC.of(LogKeys.SERVICE_NAME$.MODULE$, service.getName()));
       }
     }
   }
@@ -123,7 +126,8 @@ public void run() {
         // Stop the Composite Service
         compositeService.stop();
       } catch (Throwable t) {
-        LOG.info("Error stopping " + compositeService.getName(), t);
+        LOG.info("Error stopping {}", t,
+          MDC.of(LogKeys.SERVICE_NAME$.MODULE$, compositeService.getName()));
       }
     }
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
index 4b8d2cb1536cd..c315478939c8d 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
@@ -21,8 +21,9 @@
 import java.security.NoSuchAlgorithmException;
 
 import org.apache.commons.codec.binary.Base64;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 /**
  * The cookie signer generates a signature based on SHA digest
@@ -33,7 +34,7 @@ public class CookieSigner {
   private static final String SIGNATURE = "&s=";
   private static final String SHA_STRING = "SHA-256";
   private byte[] secretBytes;
-  private static final Logger LOG = LoggerFactory.getLogger(CookieSigner.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(CookieSigner.class);
 
   /**
    * Constructor
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java
index 434676aa8d215..92d733c563cab 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceOperations.java
@@ -18,15 +18,18 @@
 package org.apache.hive.service;
 
 import org.apache.hadoop.hive.conf.HiveConf;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * ServiceOperations.
  *
  */
 public final class ServiceOperations {
-  private static final Logger LOG = LoggerFactory.getLogger(ServiceOperations.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(ServiceOperations.class);
 
   private ServiceOperations() {
   }
@@ -129,9 +132,8 @@ public static Exception stopQuietly(Service service) {
     try {
       stop(service);
     } catch (Exception e) {
-      LOG.warn("When stopping the service " + service.getName()
-                   + " : " + e,
-               e);
+      LOG.warn("When stopping the service {}", e,
+        MDC.of(LogKeys.SERVICE_NAME$.MODULE$, service.getName()));
       return e;
     }
     return null;
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java
index 7552bda57dc0b..25db121207bbf 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/ServiceUtils.java
@@ -18,7 +18,7 @@
 
 import java.io.IOException;
 
-import org.slf4j.Logger;
+import org.apache.spark.internal.SparkLogger;
 
 public class ServiceUtils {
 
@@ -52,7 +52,7 @@ public static int indexOfDomainMatch(String userName) {
    * @param log the log to record problems to at debug level. Can be null.
    * @param closeables the objects to close
    */
-  public static void cleanup(Logger log, java.io.Closeable... closeables) {
+  public static void cleanup(SparkLogger log, java.io.Closeable... closeables) {
     for (java.io.Closeable c : closeables) {
       if (c != null) {
         try {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
index e3316cef241c3..ecbda2661e960 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
@@ -27,9 +27,7 @@
 
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
-import org.apache.hadoop.hive.metastore.HiveMetaStore;
-import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
-import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.shims.HadoopShims.KerberosNameShim;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.hive.thrift.DBTokenStore;
@@ -44,16 +42,19 @@
 import org.apache.thrift.TProcessorFactory;
 import org.apache.thrift.transport.TTransportException;
 import org.apache.thrift.transport.TTransportFactory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * This class helps in some aspects of authentication. It creates the proper Thrift classes for the
  * given configuration as well as helps with authenticating requests.
  */
 public class HiveAuthFactory {
-  private static final Logger LOG = LoggerFactory.getLogger(HiveAuthFactory.class);
 
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(HiveAuthFactory.class);
 
   public enum AuthTypes {
     NOSASL("NOSASL"),
@@ -132,16 +133,15 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException {
               HiveConf.ConfVars.METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS);
 
           if (tokenStoreClass.equals(DBTokenStore.class.getName())) {
-            HMSHandler baseHandler = new HiveMetaStore.HMSHandler(
-                "new db based metaserver", conf, true);
-            rawStore = baseHandler.getMS();
+            // Follows https://issues.apache.org/jira/browse/HIVE-12270
+            rawStore = Hive.class;
           }
 
           delegationTokenManager.startDelegationTokenSecretManager(
               conf, rawStore, ServerMode.HIVESERVER2);
           saslServer.setSecretManager(delegationTokenManager.getSecretManager());
         }
-        catch (MetaException|IOException e) {
+        catch (IOException e) {
           throw new TTransportException("Failed to start token manager", e);
         }
       }
@@ -288,9 +288,9 @@ public String verifyDelegationToken(String delegationToken) throws HiveSQLExcept
     try {
       return delegationTokenManager.verifyDelegationToken(delegationToken);
     } catch (IOException e) {
-      String msg =  "Error verifying delegation token " + delegationToken;
-      LOG.error(msg, e);
-      throw new HiveSQLException(msg, "08S01", e);
+      String msg = "Error verifying delegation token";
+      LOG.error(msg + " {}", e, MDC.of(LogKeys.TOKEN$.MODULE$, delegationToken));
+      throw new HiveSQLException(msg + delegationToken, "08S01", e);
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
index 08a8258db06f2..e307bdab04498 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
@@ -39,8 +39,11 @@
 import org.ietf.jgss.GSSManager;
 import org.ietf.jgss.GSSName;
 import org.ietf.jgss.Oid;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * Utility functions for HTTP mode authentication.
@@ -50,7 +53,7 @@ public final class HttpAuthUtils {
   public static final String AUTHORIZATION = "Authorization";
   public static final String BASIC = "Basic";
   public static final String NEGOTIATE = "Negotiate";
-  private static final Logger LOG = LoggerFactory.getLogger(HttpAuthUtils.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(HttpAuthUtils.class);
   private static final String COOKIE_ATTR_SEPARATOR = "&";
   private static final String COOKIE_CLIENT_USER_NAME = "cu";
   private static final String COOKIE_CLIENT_RAND_NUMBER = "rn";
@@ -109,7 +112,8 @@ public static String getUserNameFromCookieToken(String tokenStr) {
     Map<String, String> map = splitCookieToken(tokenStr);
 
     if (!map.keySet().equals(COOKIE_ATTRIBUTES)) {
-      LOG.error("Invalid token with missing attributes " + tokenStr);
+      LOG.error("Invalid token with missing attributes {}",
+        MDC.of(LogKeys.TOKEN$.MODULE$, tokenStr));
       return null;
     }
     return map.get(COOKIE_CLIENT_USER_NAME);
@@ -129,7 +133,7 @@ private static Map<String, String> splitCookieToken(String tokenStr) {
       String part = st.nextToken();
       int separator = part.indexOf(COOKIE_KEY_VALUE_SEPARATOR);
       if (separator == -1) {
-        LOG.error("Invalid token string " + tokenStr);
+        LOG.error("Invalid token string {}", MDC.of(LogKeys.TOKEN$.MODULE$, tokenStr));
         return null;
       }
       String key = part.substring(0, separator);
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
index 175412ed98c6c..ef91f94eeec2b 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/KerberosSaslHelper.java
@@ -30,6 +30,7 @@
 import org.apache.thrift.TProcessorFactory;
 import org.apache.thrift.transport.TSaslClientTransport;
 import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
 
 public final class KerberosSaslHelper {
 
@@ -68,8 +69,8 @@ public static TTransport createSubjectAssumedTransport(String principal,
         new TSaslClientTransport("GSSAPI", null, names[0], names[1], saslProps, null,
           underlyingTransport);
       return new TSubjectAssumingTransport(saslTransport);
-    } catch (SaslException se) {
-      throw new IOException("Could not instantiate SASL transport", se);
+    } catch (SaslException | TTransportException se) {
+      throw new IOException("Could not instantiate transport", se);
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
index c06f6ec34653f..5ac29950f4f85 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/PlainSaslHelper.java
@@ -38,6 +38,7 @@
 import org.apache.thrift.transport.TSaslClientTransport;
 import org.apache.thrift.transport.TSaslServerTransport;
 import org.apache.thrift.transport.TTransport;
+import org.apache.thrift.transport.TTransportException;
 import org.apache.thrift.transport.TTransportFactory;
 
 public final class PlainSaslHelper {
@@ -64,7 +65,7 @@ public static TTransportFactory getPlainTransportFactory(String authTypeStr)
   }
 
   public static TTransport getPlainTransport(String username, String password,
-    TTransport underlyingTransport) throws SaslException {
+    TTransport underlyingTransport) throws SaslException, TTransportException {
     return new TSaslClientTransport("PLAIN", null, null, null, new HashMap<String, String>(),
       new PlainCallbackHandler(username, password), underlyingTransport);
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
index 1205d21be6be6..e0091d6c04fe7 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/TSetIpAddressProcessor.java
@@ -25,8 +25,9 @@
 import org.apache.thrift.transport.TSaslServerTransport;
 import org.apache.thrift.transport.TSocket;
 import org.apache.thrift.transport.TTransport;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 /**
  * This class is responsible for setting the ipAddress for operations executed via HiveServer2.
@@ -38,18 +39,19 @@
  */
 public class TSetIpAddressProcessor<I extends Iface> extends TCLIService.Processor<Iface> {
 
-  private static final Logger LOGGER = LoggerFactory.getLogger(TSetIpAddressProcessor.class.getName());
+  private static final SparkLogger LOGGER = SparkLoggerFactory.getLogger(TSetIpAddressProcessor.class);
 
   public TSetIpAddressProcessor(Iface iface) {
     super(iface);
   }
 
   @Override
-  public boolean process(final TProtocol in, final TProtocol out) throws TException {
+  public void process(final TProtocol in, final TProtocol out) throws TException {
     setIpAddress(in);
     setUserName(in);
     try {
-      return super.process(in, out);
+      super.process(in, out);
+      return;
     } finally {
       THREAD_LOCAL_USER_NAME.remove();
       THREAD_LOCAL_IP_ADDRESS.remove();
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
index caccb0c4b76f7..86fb725d3a3cc 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
@@ -49,8 +49,11 @@
 import org.apache.hive.service.rpc.thrift.TRowSet;
 import org.apache.hive.service.rpc.thrift.TTableSchema;
 import org.apache.hive.service.server.HiveServer2;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * CLIService.
@@ -58,7 +61,7 @@
  */
 public class CLIService extends CompositeService implements ICLIService {
 
-  private static final Logger LOG = LoggerFactory.getLogger(CLIService.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(CLIService.class);
 
   public static final TProtocolVersion SERVER_VERSION;
 
@@ -99,8 +102,9 @@ public synchronized void init(HiveConf hiveConf) {
       String principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL);
       String keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB);
       if (principal.isEmpty() || keyTabFile.isEmpty()) {
-        LOG.info("SPNego httpUGI not created, spNegoPrincipal: " + principal +
-            ", ketabFile: " + keyTabFile);
+        LOG.info("SPNego httpUGI not created, spNegoPrincipal: {}, keytabFile: {}",
+          MDC.of(LogKeys.PRINCIPAL$.MODULE$, principal),
+          MDC.of(LogKeys.KEYTAB_FILE$.MODULE$, keyTabFile));
       } else {
         try {
           this.httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf);
@@ -457,7 +461,8 @@ public OperationStatus getOperationStatus(OperationHandle opHandle)
         LOG.trace(opHandle + ": The background operation was cancelled", e);
       } catch (ExecutionException e) {
         // The background operation thread was aborted
-        LOG.warn(opHandle + ": The background operation was aborted", e);
+        LOG.warn("{}: The background operation was aborted", e,
+          MDC.of(LogKeys.OPERATION_HANDLE$.MODULE$, opHandle));
       } catch (InterruptedException e) {
         // No op, this thread was interrupted
         // In this case, the call might return sooner than long polling timeout
@@ -551,7 +556,7 @@ public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory au
       String owner, String renewer) throws HiveSQLException {
     String delegationToken = sessionManager.getSession(sessionHandle)
         .getDelegationToken(authFactory, owner, renewer);
-    LOG.info(sessionHandle  + ": getDelegationToken()");
+    LOG.info("{}: getDelegationToken()", MDC.of(LogKeys.SESSION_HANDLE$.MODULE$, sessionHandle));
     return delegationToken;
   }
 
@@ -559,14 +564,14 @@ public String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory au
   public void cancelDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
       String tokenStr) throws HiveSQLException {
     sessionManager.getSession(sessionHandle).cancelDelegationToken(authFactory, tokenStr);
-    LOG.info(sessionHandle  + ": cancelDelegationToken()");
+    LOG.info("{}: cancelDelegationToken()", MDC.of(LogKeys.SESSION_HANDLE$.MODULE$, sessionHandle));
   }
 
   @Override
   public void renewDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
       String tokenStr) throws HiveSQLException {
     sessionManager.getSession(sessionHandle).renewDelegationToken(authFactory, tokenStr);
-    LOG.info(sessionHandle  + ": renewDelegationToken()");
+    LOG.info("{}: renewDelegationToken()", MDC.of(LogKeys.SESSION_HANDLE$.MODULE$, sessionHandle));
   }
 
   @Override
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
index 629d9abdac2c0..4331f6829fbf3 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
@@ -30,8 +30,11 @@
 import org.apache.thrift.protocol.TCompactProtocol;
 import org.apache.thrift.protocol.TProtocol;
 import org.apache.thrift.transport.TIOStreamTransport;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * ColumnBasedSet.
@@ -44,7 +47,7 @@ public class ColumnBasedSet implements RowSet {
   private final List<ColumnBuffer> columns;
   private byte[] blob;
   private boolean isBlobBased = false;
-  public static final Logger LOG = LoggerFactory.getLogger(ColumnBasedSet.class);
+  public static final SparkLogger LOG = SparkLoggerFactory.getLogger(ColumnBasedSet.class);
 
   public ColumnBasedSet(TableSchema schema) {
     descriptors = schema.toTypeDescriptors();
@@ -68,7 +71,7 @@ public ColumnBasedSet(TRowSet tRowSet) throws TException {
         try {
           tvalue.read(protocol);
         } catch (TException e) {
-          LOG.error(e.getMessage(), e);
+          LOG.error("{}", e, MDC.of(LogKeys.ERROR$.MODULE$, e.getMessage()));
           throw new TException("Error reading column value from the row set blob", e);
         }
         columns.add(new ColumnBuffer(tvalue));
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
index 96c16beac7c4d..0b71b606b9d65 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ClassicTableTypeMapping.java
@@ -28,8 +28,11 @@
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Multimap;
 import org.apache.hadoop.hive.metastore.TableType;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * ClassicTableTypeMapping.
@@ -40,7 +43,7 @@
  */
 public class ClassicTableTypeMapping implements TableTypeMapping {
 
-  private static final Logger LOG = LoggerFactory.getLogger(ClassicTableTypeMapping.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(ClassicTableTypeMapping.class);
 
   public enum ClassicTableTypes {
     TABLE,
@@ -69,7 +72,8 @@ public ClassicTableTypeMapping() {
   public String[] mapToHiveType(String clientTypeName) {
     Collection<String> hiveTableType = clientToHiveMap.get(clientTypeName.toUpperCase());
     if (hiveTableType == null) {
-      LOG.warn("Not supported client table type " + clientTypeName);
+      LOG.warn("Not supported client table type {}",
+        MDC.of(LogKeys.TABLE_TYPE$.MODULE$, clientTypeName));
       return new String[] {clientTypeName};
     }
     return Iterables.toArray(hiveTableType, String.class);
@@ -79,7 +83,8 @@ public String[] mapToHiveType(String clientTypeName) {
   public String mapToClientType(String hiveTypeName) {
     String clientTypeName = hiveToClientMap.get(hiveTypeName);
     if (clientTypeName == null) {
-      LOG.warn("Invalid hive table type " + hiveTypeName);
+      LOG.warn("Invalid hive table type {}",
+        MDC.of(LogKeys.TABLE_TYPE$.MODULE$, hiveTypeName));
       return hiveTypeName;
     }
     return clientTypeName;
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
index ad42925207d69..f488a411c31f3 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
@@ -38,15 +38,18 @@
 import org.apache.hive.service.rpc.thrift.TProtocolVersion;
 import org.apache.hive.service.rpc.thrift.TRowSet;
 import org.apache.hive.service.rpc.thrift.TTableSchema;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 public abstract class Operation {
   protected final HiveSession parentSession;
   private OperationState state = OperationState.INITIALIZED;
   private final OperationHandle opHandle;
   private HiveConf configuration;
-  public static final Logger LOG = LoggerFactory.getLogger(Operation.class.getName());
+  public static final SparkLogger LOG = SparkLoggerFactory.getLogger(Operation.class);
   public static final FetchOrientation DEFAULT_FETCH_ORIENTATION = FetchOrientation.FETCH_NEXT;
   public static final long DEFAULT_FETCH_MAX_ROWS = 100;
   protected boolean hasResultSet;
@@ -208,8 +211,8 @@ protected void createOperationLog() {
       // create log file
       try {
         if (operationLogFile.exists()) {
-          LOG.warn("The operation log file should not exist, but it is already there: " +
-              operationLogFile.getAbsolutePath());
+          LOG.warn("The operation log file should not exist, but it is already there: {}",
+            MDC.of(LogKeys.PATH$.MODULE$, operationLogFile.getAbsolutePath()));
           operationLogFile.delete();
         }
         if (!operationLogFile.createNewFile()) {
@@ -217,13 +220,15 @@ protected void createOperationLog() {
           // If it can be read/written, keep its contents and use it.
           if (!operationLogFile.canRead() || !operationLogFile.canWrite()) {
             LOG.warn("The already existed operation log file cannot be recreated, " +
-                "and it cannot be read or written: " + operationLogFile.getAbsolutePath());
+              "and it cannot be read or written: {}",
+              MDC.of(LogKeys.PATH$.MODULE$, operationLogFile.getAbsolutePath()));
             isOperationLogEnabled = false;
             return;
           }
         }
       } catch (Exception e) {
-        LOG.warn("Unable to create operation log file: " + operationLogFile.getAbsolutePath(), e);
+        LOG.warn("Unable to create operation log file: {}", e,
+          MDC.of(LogKeys.PATH$.MODULE$, operationLogFile.getAbsolutePath()));
         isOperationLogEnabled = false;
         return;
       }
@@ -232,8 +237,8 @@ protected void createOperationLog() {
       try {
         operationLog = new OperationLog(opHandle.toString(), operationLogFile, parentSession.getHiveConf());
       } catch (FileNotFoundException e) {
-        LOG.warn("Unable to instantiate OperationLog object for operation: " +
-            opHandle, e);
+        LOG.warn("Unable to instantiate OperationLog object for operation: {}", e,
+          MDC.of(LogKeys.OPERATION_HANDLE$.MODULE$, opHandle));
         isOperationLogEnabled = false;
         return;
       }
@@ -283,8 +288,9 @@ public void run() throws HiveSQLException {
   protected void cleanupOperationLog() {
     if (isOperationLogEnabled) {
       if (operationLog == null) {
-        LOG.error("Operation [ " + opHandle.getHandleIdentifier() + " ] "
-          + "logging is enabled, but its OperationLog object cannot be found.");
+        LOG.error("Operation [ {} ] logging is enabled, " +
+          "but its OperationLog object cannot be found.",
+          MDC.of(LogKeys.OPERATION_HANDLE_ID$.MODULE$, opHandle.getHandleIdentifier()));
       } else {
         operationLog.close();
       }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
index bb68c840496ad..fd8266d1a9acc 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
@@ -40,15 +40,18 @@
 import org.apache.hive.service.rpc.thrift.TRowSet;
 import org.apache.hive.service.rpc.thrift.TTableSchema;
 import org.apache.logging.log4j.core.Appender;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * OperationManager.
  *
  */
 public class OperationManager extends AbstractService {
-  private static final Logger LOG = LoggerFactory.getLogger(OperationManager.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(OperationManager.class);
 
   private final Map<OperationHandle, Operation> handleToOperation =
       new HashMap<OperationHandle, Operation>();
@@ -289,7 +292,8 @@ public List<Operation> removeExpiredOperations(OperationHandle[] handles) {
     for (OperationHandle handle : handles) {
       Operation operation = removeTimedOutOperation(handle);
       if (operation != null) {
-        LOG.warn("Operation " + handle + " is timed-out and will be closed");
+        LOG.warn("Operation {} is timed-out and will be closed",
+          MDC.of(LogKeys.OPERATION_HANDLE$.MODULE$, handle));
         removed.add(operation);
       }
     }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
index e00d2705d4172..4b55453ec7a8b 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
@@ -22,6 +22,7 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -69,8 +70,11 @@
 import org.apache.hive.service.rpc.thrift.TRowSet;
 import org.apache.hive.service.rpc.thrift.TTableSchema;
 import org.apache.hive.service.server.ThreadWithGarbageCleanup;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 import static org.apache.hadoop.hive.conf.SystemVariables.ENV_PREFIX;
 import static org.apache.hadoop.hive.conf.SystemVariables.HIVECONF_PREFIX;
@@ -91,7 +95,7 @@ public class HiveSessionImpl implements HiveSession {
   private String ipAddress;
   private static final String FETCH_WORK_SERDE_CLASS =
       "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe";
-  private static final Logger LOG = LoggerFactory.getLogger(HiveSessionImpl.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(HiveSessionImpl.class);
   private SessionManager sessionManager;
   private OperationManager operationManager;
   private final Set<OperationHandle> opHandleSet = new HashSet<OperationHandle>();
@@ -116,7 +120,7 @@ public HiveSessionImpl(TProtocolVersion protocol, String username, String passwo
         ShimLoader.getHadoopShims().refreshDefaultQueue(hiveConf, username);
       }
     } catch (IOException e) {
-      LOG.warn("Error setting scheduler queue: " + e, e);
+      LOG.warn("Error setting scheduler queue: ", e);
     }
     // Set an explicit session name to control the download directory name
     hiveConf.set("hive.session.id",
@@ -146,8 +150,8 @@ public void open(Map<String, String> sessionConfMap) throws HiveSQLException {
       sessionState.loadAuxJars();
       sessionState.loadReloadableAuxJars();
     } catch (IOException e) {
-      String msg = "Failed to load reloadable jar file path: " + e;
-      LOG.error(msg, e);
+      String msg = "Failed to load reloadable jar file path.";
+      LOG.error("{}", e, MDC.of(LogKeys.ERROR$.MODULE$, msg));
       throw new HiveSQLException(msg, e);
     }
     // Process global init file: .hiverc
@@ -168,7 +172,7 @@ protected BufferedReader loadFile(String fileName) throws IOException {
       FileInputStream initStream = null;
       BufferedReader bufferedReader = null;
       initStream = new FileInputStream(fileName);
-      bufferedReader = new BufferedReader(new InputStreamReader(initStream));
+      bufferedReader = new BufferedReader(new InputStreamReader(initStream, StandardCharsets.UTF_8));
       return bufferedReader;
     }
 
@@ -197,7 +201,8 @@ private void processGlobalInitFile() {
           hivercFile = new File(hivercFile, SessionManager.HIVERCFILE);
         }
         if (hivercFile.isFile()) {
-          LOG.info("Running global init file: " + hivercFile);
+          LOG.info("Running global init file: {}",
+            MDC.of(LogKeys.GLOBAL_INIT_FILE$.MODULE$, hivercFile));
           int rc = processor.processFile(hivercFile.getAbsolutePath());
           if (rc != 0) {
             LOG.error("Failed on initializing global .hiverc file");
@@ -297,28 +302,29 @@ private static void setConf(String varname, String key, String varvalue, boolean
   @Override
   public void setOperationLogSessionDir(File operationLogRootDir) {
     if (!operationLogRootDir.exists()) {
-      LOG.warn("The operation log root directory is removed, recreating: " +
-          operationLogRootDir.getAbsolutePath());
+      LOG.warn("The operation log root directory is removed, recreating: {}",
+        MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
       if (!operationLogRootDir.mkdirs()) {
-        LOG.warn("Unable to create operation log root directory: " +
-            operationLogRootDir.getAbsolutePath());
+        LOG.warn("Unable to create operation log root directory: {}",
+          MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
       }
     }
     if (!operationLogRootDir.canWrite()) {
-      LOG.warn("The operation log root directory is not writable: " +
-          operationLogRootDir.getAbsolutePath());
+      LOG.warn("The operation log root directory is not writable: {}",
+        MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
     }
     sessionLogDir = new File(operationLogRootDir, sessionHandle.getHandleIdentifier().toString());
     isOperationLogEnabled = true;
     if (!sessionLogDir.exists()) {
       if (!sessionLogDir.mkdir()) {
-        LOG.warn("Unable to create operation log session directory: " +
-            sessionLogDir.getAbsolutePath());
+        LOG.warn("Unable to create operation log session directory: {}",
+          MDC.of(LogKeys.PATH$.MODULE$, sessionLogDir.getAbsolutePath()));
         isOperationLogEnabled = false;
       }
     }
     if (isOperationLogEnabled) {
-      LOG.info("Operation log session directory is created: " + sessionLogDir.getAbsolutePath());
+      LOG.info("Operation log session directory is created: {}",
+        MDC.of(LogKeys.PATH$.MODULE$, sessionLogDir.getAbsolutePath()));
     }
   }
 
@@ -653,7 +659,8 @@ public void close() throws HiveSQLException {
         try {
           operationManager.closeOperation(opHandle);
         } catch (Exception e) {
-          LOG.warn("Exception is thrown closing operation " + opHandle, e);
+          LOG.warn("Exception is thrown closing operation {}", e,
+            MDC.of(LogKeys.OPERATION_HANDLE$.MODULE$, opHandle));
         }
       }
       opHandleSet.clear();
@@ -693,13 +700,14 @@ private void cleanupPipeoutFile() {
             (dir, name) -> name.startsWith(sessionID) && name.endsWith(".pipeout"));
 
     if (fileAry == null) {
-      LOG.error("Unable to access pipeout files in " + lScratchDir);
+      LOG.error("Unable to access pipeout files in {}",
+        MDC.of(LogKeys.LOCAL_SCRATCH_DIR$.MODULE$, lScratchDir));
     } else {
       for (File file : fileAry) {
         try {
           FileUtils.forceDelete(file);
         } catch (Exception e) {
-          LOG.error("Failed to cleanup pipeout file: " + file, e);
+          LOG.error("Failed to cleanup pipeout file: {}", e, MDC.of(LogKeys.PATH$.MODULE$, file));
         }
       }
     }
@@ -710,7 +718,8 @@ private void cleanupSessionLogDir() {
       try {
         FileUtils.forceDelete(sessionLogDir);
       } catch (Exception e) {
-        LOG.error("Failed to cleanup session log dir: " + sessionHandle, e);
+        LOG.error("Failed to cleanup session log dir: {}", e,
+          MDC.of(LogKeys.SESSION_HANDLE$.MODULE$, sessionHandle));
       }
     }
   }
@@ -759,7 +768,8 @@ private void closeTimedOutOperations(List<Operation> operations) {
         try {
           operation.close();
         } catch (Exception e) {
-          LOG.warn("Exception is thrown closing timed-out operation " + operation.getHandle(), e);
+          LOG.warn("Exception is thrown closing timed-out operation {}", e,
+            MDC.of(LogKeys.OPERATION_HANDLE$.MODULE$, operation.getHandle()));
         }
       }
     } finally {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
index 514b19eb7111a..0ec13424fd0f5 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImplwithUGI.java
@@ -28,8 +28,6 @@
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.cli.HiveSQLException;
 import org.apache.hive.service.rpc.thrift.TProtocolVersion;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  *
@@ -41,23 +39,13 @@ public class HiveSessionImplwithUGI extends HiveSessionImpl {
 
   private UserGroupInformation sessionUgi = null;
   private String delegationTokenStr = null;
-  private Hive sessionHive = null;
   private HiveSession proxySession = null;
-  static final Logger LOG = LoggerFactory.getLogger(HiveSessionImplwithUGI.class);
 
   public HiveSessionImplwithUGI(TProtocolVersion protocol, String username, String password,
       HiveConf hiveConf, String ipAddress, String delegationToken) throws HiveSQLException {
     super(protocol, username, password, hiveConf, ipAddress);
     setSessionUGI(username);
     setDelegationToken(delegationToken);
-
-    // create a new metastore connection for this particular user session
-    Hive.set(null);
-    try {
-      sessionHive = Hive.getWithoutRegisterFns(getHiveConf());
-    } catch (HiveException e) {
-      throw new HiveSQLException("Failed to setup metastore connection", e);
-    }
   }
 
   // setup appropriate UGI for the session
@@ -85,15 +73,6 @@ public String getDelegationToken() {
     return this.delegationTokenStr;
   }
 
-  @Override
-  protected synchronized void acquire(boolean userAccess) {
-    super.acquire(userAccess);
-    // if we have a metastore connection with impersonation, then set it first
-    if (sessionHive != null) {
-      Hive.set(sessionHive);
-    }
-  }
-
   /**
    * Close the file systems for the session and remove it from the FileSystem cache.
    * Cancel the session's delegation token and close the metastore connection
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
index fa342feacc7f4..3f60fd00b82a7 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/SessionManager.java
@@ -38,8 +38,11 @@
 import org.apache.hive.service.rpc.thrift.TProtocolVersion;
 import org.apache.hive.service.server.HiveServer2;
 import org.apache.hive.service.server.ThreadFactoryWithGarbageCleanup;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * SessionManager.
@@ -47,7 +50,7 @@
  */
 public class SessionManager extends CompositeService {
 
-  private static final Logger LOG = LoggerFactory.getLogger(SessionManager.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(SessionManager.class);
   public static final String HIVERCFILE = ".hiverc";
   private HiveConf hiveConf;
   private final Map<SessionHandle, HiveSession> handleToSession =
@@ -84,13 +87,15 @@ public synchronized void init(HiveConf hiveConf) {
 
   private void createBackgroundOperationPool() {
     int poolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS);
-    LOG.info("HiveServer2: Background operation thread pool size: " + poolSize);
+    LOG.info("HiveServer2: Background operation thread pool size: {}",
+      MDC.of(LogKeys.THREAD_POOL_SIZE$.MODULE$, poolSize));
     int poolQueueSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE);
-    LOG.info("HiveServer2: Background operation thread wait queue size: " + poolQueueSize);
+    LOG.info("HiveServer2: Background operation thread wait queue size: {}",
+      MDC.of(LogKeys.THREAD_POOL_WAIT_QUEUE_SIZE$.MODULE$, poolQueueSize));
     long keepAliveTime = HiveConf.getTimeVar(
         hiveConf, ConfVars.HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME, TimeUnit.SECONDS);
-    LOG.info(
-        "HiveServer2: Background operation thread keepalive time: " + keepAliveTime + " seconds");
+    LOG.info("HiveServer2: Background operation thread keepalive time: {} ms",
+      MDC.of(LogKeys.THREAD_POOL_KEEPALIVE_TIME$.MODULE$, keepAliveTime * 1000L));
 
     // Create a thread pool with #poolSize threads
     // Threads terminate when they are idle for more than the keepAliveTime
@@ -115,26 +120,27 @@ private void initOperationLogRootDir() {
     isOperationLogEnabled = true;
 
     if (operationLogRootDir.exists() && !operationLogRootDir.isDirectory()) {
-      LOG.warn("The operation log root directory exists, but it is not a directory: " +
-          operationLogRootDir.getAbsolutePath());
+      LOG.warn("The operation log root directory exists, but it is not a directory: {}",
+        MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
       isOperationLogEnabled = false;
     }
 
     if (!operationLogRootDir.exists()) {
       if (!operationLogRootDir.mkdirs()) {
-        LOG.warn("Unable to create operation log root directory: " +
-            operationLogRootDir.getAbsolutePath());
+        LOG.warn("Unable to create operation log root directory: {}",
+          MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
         isOperationLogEnabled = false;
       }
     }
 
     if (isOperationLogEnabled) {
-      LOG.info("Operation log root directory is created: " + operationLogRootDir.getAbsolutePath());
+      LOG.info("Operation log root directory is created: {}",
+        MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
       try {
         FileUtils.forceDeleteOnExit(operationLogRootDir);
       } catch (IOException e) {
-        LOG.warn("Failed to schedule cleanup HS2 operation logging root dir: " +
-            operationLogRootDir.getAbsolutePath(), e);
+        LOG.warn("Failed to schedule cleanup HS2 operation logging root dir: {}", e,
+          MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
       }
     }
   }
@@ -164,12 +170,14 @@ public void run() {
             if (sessionTimeout > 0 && session.getLastAccessTime() + sessionTimeout <= current
                 && (!checkOperation || session.getNoOperationTime() > sessionTimeout)) {
               SessionHandle handle = session.getSessionHandle();
-              LOG.warn("Session " + handle + " is Timed-out (last access : " +
-                  new Date(session.getLastAccessTime()) + ") and will be closed");
+              LOG.warn("Session {} is Timed-out (last access : {}) and will be closed",
+                MDC.of(LogKeys.SESSION_HANDLE$.MODULE$, handle),
+                MDC.of(LogKeys.LAST_ACCESS_TIME$.MODULE$, new Date(session.getLastAccessTime())));
               try {
                 closeSession(handle);
               } catch (HiveSQLException e) {
-                LOG.warn("Exception is thrown closing session " + handle, e);
+                LOG.warn("Exception is thrown closing session {}", e,
+                  MDC.of(LogKeys.SESSION_HANDLE$.MODULE$, handle));
               }
             } else {
               session.closeExpiredOperations();
@@ -210,8 +218,9 @@ public synchronized void stop() {
       try {
         backgroundOperationPool.awaitTermination(timeout, TimeUnit.SECONDS);
       } catch (InterruptedException e) {
-        LOG.warn("HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT = " + timeout +
-            " seconds has been exceeded. RUNNING background operations will be shut down", e);
+        LOG.warn("HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT = {} ms has been exceeded. " +
+          "RUNNING background operations will be shut down", e,
+          MDC.of(LogKeys.TIMEOUT$.MODULE$, timeout * 1000));
       }
       backgroundOperationPool = null;
     }
@@ -223,8 +232,8 @@ private void cleanupLoggingRootDir() {
       try {
         FileUtils.forceDelete(operationLogRootDir);
       } catch (Exception e) {
-        LOG.warn("Failed to cleanup root dir of HS2 logging: " + operationLogRootDir
-            .getAbsolutePath(), e);
+        LOG.warn("Failed to cleanup root dir of HS2 logging: {}", e,
+          MDC.of(LogKeys.PATH$.MODULE$, operationLogRootDir.getAbsolutePath()));
       }
     }
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
index 4d99496876fdc..c7fa7b5f3e0ac 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftBinaryCLIService.java
@@ -91,16 +91,10 @@ protected void initializeServer() {
 
       // Server args
       int maxMessageSize = hiveConf.getIntVar(HiveConf.ConfVars.HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE);
-      int requestTimeout = (int) hiveConf.getTimeVar(
-          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT, TimeUnit.SECONDS);
-      int beBackoffSlotLength = (int) hiveConf.getTimeVar(
-          HiveConf.ConfVars.HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH, TimeUnit.MILLISECONDS);
       TThreadPoolServer.Args sargs = new TThreadPoolServer.Args(serverSocket)
           .processorFactory(processorFactory).transportFactory(transportFactory)
           .protocolFactory(new TBinaryProtocol.Factory())
           .inputProtocolFactory(new TBinaryProtocol.Factory(true, true, maxMessageSize, maxMessageSize))
-          .requestTimeout(requestTimeout).requestTimeoutUnit(TimeUnit.SECONDS)
-          .beBackoffSlotLength(beBackoffSlotLength).beBackoffSlotLengthUnit(TimeUnit.MILLISECONDS)
           .executorService(executorService);
 
       // TCP Server
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index 4b18e2950a3de..07af0013846ba 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -41,8 +41,11 @@
 import org.apache.thrift.server.ServerContext;
 import org.apache.thrift.server.TServerEventHandler;
 import org.apache.thrift.transport.TTransport;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  * ThriftCLIService.
@@ -50,7 +53,7 @@
  */
 public abstract class ThriftCLIService extends AbstractService implements TCLIService.Iface, Runnable {
 
-  public static final Logger LOG = LoggerFactory.getLogger(ThriftCLIService.class.getName());
+  public static final SparkLogger LOG = SparkLoggerFactory.getLogger(ThriftCLIService.class);
 
   protected CLIService cliService;
   private static final TStatus OK_STATUS = new TStatus(TStatusCode.SUCCESS_STATUS);
@@ -83,6 +86,16 @@ public void setSessionHandle(SessionHandle sessionHandle) {
     public SessionHandle getSessionHandle() {
       return sessionHandle;
     }
+
+    @Override
+    public <T> T unwrap(Class<T> aClass) {
+      return null;
+    }
+
+    @Override
+    public boolean isWrapperFor(Class<?> aClass) {
+      return false;
+    }
   }
 
   public ThriftCLIService(CLIService service, String serviceName) {
@@ -106,7 +119,7 @@ public void deleteContext(ServerContext serverContext,
           try {
             cliService.closeSession(sessionHandle);
           } catch (HiveSQLException e) {
-            LOG.warn("Failed to close session: " + e, e);
+            LOG.warn("Failed to close session: ", e);
           }
         }
       }
@@ -236,7 +249,8 @@ private TStatus notSupportTokenErrorStatus() {
 
   @Override
   public TOpenSessionResp OpenSession(TOpenSessionReq req) throws TException {
-    LOG.info("Client protocol version: " + req.getClient_protocol());
+    LOG.info("Client protocol version: {}",
+      MDC.of(LogKeys.PROTOCOL_VERSION$.MODULE$, req.getClient_protocol()));
     TOpenSessionResp resp = new TOpenSessionResp();
     try {
       SessionHandle sessionHandle = getSessionHandle(req, resp);
@@ -272,7 +286,7 @@ public TSetClientInfoResp SetClientInfo(TSetClientInfoReq req) throws TException
         sb.append(e.getKey()).append(" = ").append(e.getValue());
       }
       if (sb != null) {
-        LOG.info("{}", sb);
+        LOG.info("{}", MDC.of(LogKeys.SET_CLIENT_INFO_REQUEST$.MODULE$, sb));
       }
     }
     return new TSetClientInfoResp(OK_STATUS);
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
index b0bede741cb19..d9bf361fdef63 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
@@ -55,8 +55,11 @@
 import org.ietf.jgss.GSSManager;
 import org.ietf.jgss.GSSName;
 import org.ietf.jgss.Oid;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 
 /**
  *
@@ -66,7 +69,7 @@
 public class ThriftHttpServlet extends TServlet {
 
   private static final long serialVersionUID = 1L;
-  public static final Logger LOG = LoggerFactory.getLogger(ThriftHttpServlet.class.getName());
+  public static final SparkLogger LOG = SparkLoggerFactory.getLogger(ThriftHttpServlet.class);
   private final String authType;
   private final UserGroupInformation serviceUGI;
   private final UserGroupInformation httpUGI;
@@ -174,7 +177,8 @@ protected void doPost(HttpServletRequest request, HttpServletResponse response)
         } else {
           response.addCookie(hs2Cookie);
         }
-        LOG.info("Cookie added for clientUserName " + clientUserName);
+        LOG.info("Cookie added for clientUserName {}",
+          MDC.of(LogKeys.USER_NAME$.MODULE$, clientUserName));
       }
       super.doPost(request, response);
     }
@@ -228,7 +232,7 @@ private String getClientNameFromCookie(Cookie[] cookies) {
         String userName = HttpAuthUtils.getUserNameFromCookieToken(currValue);
 
         if (userName == null) {
-          LOG.warn("Invalid cookie token " + currValue);
+          LOG.warn("Invalid cookie token {}", MDC.of(LogKeys.TOKEN$.MODULE$, currValue));
           continue;
         }
         //We have found a valid cookie in the client request.
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
index ad5ca51b9e63d..46ee775e8dd49 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
@@ -36,9 +36,11 @@
 import org.apache.hive.service.cli.thrift.ThriftBinaryCLIService;
 import org.apache.hive.service.cli.thrift.ThriftCLIService;
 import org.apache.hive.service.cli.thrift.ThriftHttpCLIService;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
+import org.apache.spark.internal.LogKeys;
+import org.apache.spark.internal.MDC;
 import org.apache.spark.util.ShutdownHookManager;
 import org.apache.spark.util.SparkExitCode;
 
@@ -47,7 +49,7 @@
  *
  */
 public class HiveServer2 extends CompositeService {
-  private static final Logger LOG = LoggerFactory.getLogger(HiveServer2.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(HiveServer2.class);
 
   private CLIService cliService;
   private ThriftCLIService thriftCLIService;
@@ -142,8 +144,8 @@ private static void startHiveServer2() throws Throwable {
         if (++attempts >= maxAttempts) {
           throw new Error("Max start attempts " + maxAttempts + " exhausted", throwable);
         } else {
-          LOG.warn("Error starting HiveServer2 on attempt " + attempts
-              + ", will retry in 60 seconds", throwable);
+          LOG.warn("Error starting HiveServer2 on attempt {}, will retry in 60 seconds",
+            throwable, MDC.of(LogKeys.NUM_RETRY$.MODULE$, attempts));
           try {
             Thread.sleep(60L * 1000L);
           } catch (InterruptedException e) {
@@ -159,7 +161,7 @@ public static void main(String[] args) {
     ServerOptionsProcessor oproc = new ServerOptionsProcessor("hiveserver2");
     ServerOptionsProcessorResponse oprocResponse = oproc.parse(args);
 
-    HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG);
+    HiveStringUtils.startupShutdownMessage(HiveServer2.class, args, LOG.getSlf4jLogger());
 
     // Call the executor which will execute the appropriate command based on the parsed options
     oprocResponse.getServerOptionsExecutor().execute();
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
index afaa1403bfdcd..16d8540b40560 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/ThreadWithGarbageCleanup.java
@@ -22,8 +22,9 @@
 
 import org.apache.hadoop.hive.metastore.HiveMetaStore;
 import org.apache.hadoop.hive.metastore.RawStore;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import org.apache.spark.internal.SparkLogger;
+import org.apache.spark.internal.SparkLoggerFactory;
 
 /**
  * A HiveServer2 thread used to construct new server threads.
@@ -31,7 +32,7 @@
  * when killed by its corresponding ExecutorService.
  */
 public class ThreadWithGarbageCleanup extends Thread {
-  private static final Logger LOG = LoggerFactory.getLogger(ThreadWithGarbageCleanup.class);
+  private static final SparkLogger LOG = SparkLoggerFactory.getLogger(ThreadWithGarbageCleanup.class);
 
   Map<Long, RawStore> threadRawStoreMap =
       ThreadFactoryWithGarbageCleanup.getThreadRawStoreMap();
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
index c0a0f2e42f5f7..66319fff2468a 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
@@ -25,7 +25,7 @@ import scala.language.implicitConversions
 import org.apache.hive.service.rpc.thrift._
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.execution.HiveResult.{toHiveString, TimeFormatters}
+import org.apache.spark.sql.execution.HiveResult._
 import org.apache.spark.sql.types.{BinaryType, BooleanType, ByteType, DataType, DoubleType, FloatType, IntegerType, LongType, ShortType, StringType}
 
 object RowSetUtils {
@@ -38,12 +38,11 @@ object RowSetUtils {
       startRowOffSet: Long,
       rows: Seq[Row],
       schema: Array[DataType],
-      protocolVersion: TProtocolVersion,
-      timeFormatters: TimeFormatters): TRowSet = {
+      protocolVersion: TProtocolVersion): TRowSet = {
     if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) {
-      toRowBasedSet(startRowOffSet, rows, schema, timeFormatters)
+      toRowBasedSet(startRowOffSet, rows, schema, getTimeFormatters, getBinaryFormatter)
     } else {
-      toColumnBasedSet(startRowOffSet, rows, schema, timeFormatters)
+      toColumnBasedSet(startRowOffSet, rows, schema, getTimeFormatters, getBinaryFormatter)
     }
   }
 
@@ -51,13 +50,14 @@ object RowSetUtils {
       startRowOffSet: Long,
       rows: Seq[Row],
       schema: Array[DataType],
-      timeFormatters: TimeFormatters): TRowSet = {
+      timeFormatters: TimeFormatters,
+      binaryFormatter: BinaryFormatter): TRowSet = {
     val tRows = rows.map { row =>
       val tRow = new TRow()
       var j = 0
       val columnSize = row.length
       while (j < columnSize) {
-        val columnValue = toTColumnValue(j, row, schema(j), timeFormatters)
+        val columnValue = toTColumnValue(j, row, schema(j), timeFormatters, binaryFormatter)
         tRow.addToColVals(columnValue)
         j += 1
       }
@@ -70,13 +70,14 @@ object RowSetUtils {
       startRowOffSet: Long,
       rows: Seq[Row],
       schema: Array[DataType],
-      timeFormatters: TimeFormatters): TRowSet = {
+      timeFormatters: TimeFormatters,
+      binaryFormatter: BinaryFormatter): TRowSet = {
     val rowSize = rows.length
     val tRowSet = new TRowSet(startRowOffSet, new java.util.ArrayList[TRow](rowSize))
     var i = 0
     val columnSize = schema.length
     while (i < columnSize) {
-      val tColumn = toTColumn(rows, i, schema(i), timeFormatters)
+      val tColumn = toTColumn(rows, i, schema(i), timeFormatters, binaryFormatter)
       tRowSet.addToColumns(tColumn)
       i += 1
     }
@@ -84,7 +85,11 @@ object RowSetUtils {
   }
 
   private def toTColumn(
-      rows: Seq[Row], ordinal: Int, typ: DataType, timeFormatters: TimeFormatters): TColumn = {
+      rows: Seq[Row],
+      ordinal: Int,
+      typ: DataType,
+      timeFormatters: TimeFormatters,
+      binaryFormatter: BinaryFormatter): TColumn = {
     val nulls = new java.util.BitSet()
     typ match {
       case BooleanType =>
@@ -137,7 +142,7 @@ object RowSetUtils {
           val value = if (row.isNullAt(ordinal)) {
             ""
           } else {
-            toHiveString((row.get(ordinal), typ), nested = true, timeFormatters)
+            toHiveString((row.get(ordinal), typ), nested = true, timeFormatters, binaryFormatter)
           }
           values.add(value)
           i += 1
@@ -170,7 +175,8 @@ object RowSetUtils {
       ordinal: Int,
       row: Row,
       dataType: DataType,
-      timeFormatters: TimeFormatters): TColumnValue = {
+      timeFormatters: TimeFormatters,
+      binaryFormatter: BinaryFormatter): TColumnValue = {
     dataType match {
       case BooleanType =>
         val boolValue = new TBoolValue
@@ -226,7 +232,8 @@ object RowSetUtils {
       case _ =>
         val tStrValue = new TStringValue
         if (!row.isNullAt(ordinal)) {
-          val value = toHiveString((row.get(ordinal), dataType), nested = false, timeFormatters)
+          val value = toHiveString(
+            (row.get(ordinal), dataType), nested = false, timeFormatters, binaryFormatter)
           tStrValue.setValue(value)
         }
         TColumnValue.stringVal(tStrValue)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index f8f58cd422b67..51a5e88aa633e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -30,12 +30,11 @@ import org.apache.hive.service.cli.operation.ExecuteStatementOperation
 import org.apache.hive.service.cli.session.HiveSession
 import org.apache.hive.service.rpc.thrift.{TCLIServiceConstants, TColumnDesc, TPrimitiveTypeEntry, TRowSet, TTableSchema, TTypeDesc, TTypeEntry, TTypeId, TTypeQualifiers, TTypeQualifierValue}
 
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{HIVE_OPERATION_STATE, STATEMENT_ID, TIMEOUT, USER_NAME}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.{DataFrame, Row, SQLContext}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
-import org.apache.spark.sql.execution.HiveResult.getTimeFormatters
 import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.{Utils => SparkUtils}
@@ -84,7 +83,7 @@ private[hive] class SparkExecuteStatementOperation(
       val sparkType = new StructType().add("Result", "string")
       SparkExecuteStatementOperation.toTTableSchema(sparkType)
     } else {
-      logInfo(s"Result Schema: ${result.schema.sql}")
+      logInfo(log"Result Schema: ${MDC(LogKeys.SCHEMA, result.schema.sql)}")
       SparkExecuteStatementOperation.toTTableSchema(result.schema)
     }
   }
@@ -119,14 +118,16 @@ private[hive] class SparkExecuteStatementOperation(
     val rows = iter.take(maxRows).toList
     log.debug(s"Returning result set with ${rows.length} rows from offsets " +
       s"[${iter.getFetchStart}, ${iter.getPosition}) with $statementId")
-    RowSetUtils.toTRowSet(offset, rows, dataTypes, getProtocolVersion, getTimeFormatters)
+    RowSetUtils.toTRowSet(offset, rows, dataTypes, getProtocolVersion)
   }
 
   def getResultSetSchema: TTableSchema = resultSchema
 
   override def runInternal(): Unit = {
     setState(OperationState.PENDING)
-    logInfo(s"Submitting query '$redactedStatement' with $statementId")
+    logInfo(
+      log"Submitting query '${MDC(LogKeys.REDACTED_STATEMENT, redactedStatement)}' with " +
+      log"${MDC(LogKeys.STATEMENT_ID, statementId)}")
     HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
@@ -213,10 +214,12 @@ private[hive] class SparkExecuteStatementOperation(
     try {
       synchronized {
         if (getStatus.getState.isTerminal) {
-          logInfo(s"Query with $statementId in terminal state before it started running")
+          logInfo(
+            log"Query with ${MDC(LogKeys.STATEMENT_ID, statementId)} in terminal state " +
+            log"before it started running")
           return
         } else {
-          logInfo(s"Running query with $statementId")
+          logInfo(log"Running query with ${MDC(STATEMENT_ID, statementId)}")
           setState(OperationState.RUNNING)
         }
       }
@@ -285,7 +288,9 @@ private[hive] class SparkExecuteStatementOperation(
   def timeoutCancel(): Unit = {
     synchronized {
       if (!getStatus.getState.isTerminal) {
-        logInfo(s"Query with $statementId timed out after $timeout seconds")
+        logInfo(
+          log"Query with ${MDC(LogKeys.STATEMENT_ID, statementId)} timed out " +
+          log"after ${MDC(LogKeys.TIMEOUT, timeout)} seconds")
         setState(OperationState.TIMEDOUT)
         cleanup()
         HiveThriftServer2.eventManager.onStatementTimeout(statementId)
@@ -296,7 +301,7 @@ private[hive] class SparkExecuteStatementOperation(
   override def cancel(): Unit = {
     synchronized {
       if (!getStatus.getState.isTerminal) {
-        logInfo(s"Cancel query with $statementId")
+        logInfo(log"Cancel query with ${MDC(STATEMENT_ID, statementId)}")
         setState(OperationState.CANCELED)
         cleanup()
         HiveThriftServer2.eventManager.onStatementCanceled(statementId)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala
index 01ef78cde8956..fd99a5b246d9b 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetCatalogsOperation.scala
@@ -22,7 +22,8 @@ import org.apache.hive.service.cli.OperationState
 import org.apache.hive.service.cli.operation.GetCatalogsOperation
 import org.apache.hive.service.cli.session.HiveSession
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SQLContext
 
 /**
@@ -40,7 +41,7 @@ private[hive] class SparkGetCatalogsOperation(
 
   override def runInternal(): Unit = {
     val logMsg = "Listing catalogs"
-    logInfo(s"$logMsg with $statementId")
+    logInfo(log"Listing catalogs with ${MDC(STATEMENT_ID, statementId)}")
     setState(OperationState.RUNNING)
     // Always use the latest class loader provided by executionHive's state.
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
index 5dd8caf3f221d..507dfc2ec50eb 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetColumnsOperation.scala
@@ -27,7 +27,8 @@ import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetColumnsOperation
 import org.apache.hive.service.cli.session.HiveSession
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
@@ -60,7 +61,14 @@ private[hive] class SparkGetColumnsOperation(
     // Do not change cmdStr. It's used for Hive auditing and authorization.
     val cmdStr = s"catalog : $catalogName, schemaPattern : $schemaName, tablePattern : $tableName"
     val logMsg = s"Listing columns '$cmdStr, columnName : $columnName'"
-    logInfo(s"$logMsg with $statementId")
+
+    val catalogNameStr = if (catalogName == null) "null" else catalogName
+    val schemaNameStr = if (schemaName == null) "null" else schemaName
+    logInfo(log"Listing columns 'catalog : ${MDC(CATALOG_NAME, catalogNameStr)}, " +
+      log"schemaPattern : ${MDC(DATABASE_NAME, schemaNameStr)}, " +
+      log"tablePattern : ${MDC(TABLE_NAME, tableName)}, " +
+      log"columnName : ${MDC(COLUMN_NAME, columnName)}' " +
+      log"with ${MDC(STATEMENT_ID, statementId)}")
 
     setState(OperationState.RUNNING)
     // Always use the latest class loader provided by executionHive's state.
@@ -101,7 +109,7 @@ private[hive] class SparkGetColumnsOperation(
       }
 
       // Global temporary views
-      val globalTempViewDb = catalog.globalTempViewManager.database
+      val globalTempViewDb = catalog.globalTempDatabase
       val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName))
       if (databasePattern.matcher(globalTempViewDb).matches()) {
         catalog.globalTempViewManager.listViewNames(tablePattern).foreach { globalTempView =>
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
index 53a94a128c0ef..b060bf3d4ec8d 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetFunctionsOperation.scala
@@ -27,7 +27,7 @@ import org.apache.hive.service.cli.operation.GetFunctionsOperation
 import org.apache.hive.service.cli.operation.MetadataOperation.DEFAULT_HIVE_CATALOG
 import org.apache.hive.service.cli.session.HiveSession
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.SQLContext
 
 /**
@@ -51,9 +51,11 @@ private[hive] class SparkGetFunctionsOperation(
 
   override def runInternal(): Unit = {
     // Do not change cmdStr. It's used for Hive auditing and authorization.
-    val cmdStr = s"catalog : $catalogName, schemaPattern : $schemaName"
-    val logMsg = s"Listing functions '$cmdStr, functionName : $functionName'"
-    logInfo(s"$logMsg with $statementId")
+    val cmdMDC = log"catalog : ${MDC(LogKeys.CATALOG_NAME, catalogName)}, " +
+      log"schemaPattern : ${MDC(LogKeys.DATABASE_NAME, schemaName)}"
+    val logMDC = log"Listing functions '" + cmdMDC +
+      log", functionName : ${MDC(LogKeys.FUNCTION_NAME, functionName)}'"
+    logInfo(logMDC + log" with ${MDC(LogKeys.STATEMENT_ID, statementId)}")
     setState(OperationState.RUNNING)
     // Always use the latest class loader provided by executionHive's state.
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
@@ -69,13 +71,13 @@ private[hive] class SparkGetFunctionsOperation(
       // authorize this call on the schema objects
       val privObjs =
         HivePrivilegeObjectUtils.getHivePrivDbObjects(matchingDbs.asJava)
-      authorizeMetaGets(HiveOperationType.GET_FUNCTIONS, privObjs, cmdStr)
+      authorizeMetaGets(HiveOperationType.GET_FUNCTIONS, privObjs, cmdMDC.message)
     }
 
     HiveThriftServer2.eventManager.onStatementStart(
       statementId,
       parentSession.getSessionHandle.getSessionId.toString,
-      logMsg,
+      logMDC.message,
       statementId,
       parentSession.getUsername)
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
index 45cfa86ba9343..db1cf201b2e92 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetSchemasOperation.scala
@@ -25,7 +25,8 @@ import org.apache.hive.service.cli.operation.GetSchemasOperation
 import org.apache.hive.service.cli.operation.MetadataOperation.DEFAULT_HIVE_CATALOG
 import org.apache.hive.service.cli.session.HiveSession
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SQLContext
 
 /**
@@ -49,7 +50,13 @@ private[hive] class SparkGetSchemasOperation(
     // Do not change cmdStr. It's used for Hive auditing and authorization.
     val cmdStr = s"catalog : $catalogName, schemaPattern : $schemaName"
     val logMsg = s"Listing databases '$cmdStr'"
-    logInfo(s"$logMsg with $statementId")
+
+    val catalogNameStr = if (catalogName == null) "null" else catalogName
+    val schemaNameStr = if (schemaName == null) "null" else schemaName
+    logInfo(log"Listing databases 'catalog : ${MDC(CATALOG_NAME, catalogNameStr)}, " +
+      log"schemaPattern : ${MDC(DATABASE_NAME, schemaNameStr)}' " +
+      log"with ${MDC(STATEMENT_ID, statementId)}")
+
     setState(OperationState.RUNNING)
     // Always use the latest class loader provided by executionHive's state.
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
@@ -72,7 +79,7 @@ private[hive] class SparkGetSchemasOperation(
         rowSet.addRow(Array[AnyRef](dbName, DEFAULT_HIVE_CATALOG))
       }
 
-      val globalTempViewDb = sqlContext.sessionState.catalog.globalTempViewManager.database
+      val globalTempViewDb = sqlContext.sessionState.catalog.globalTempDatabase
       val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName))
       if (schemaName == null || schemaName.isEmpty ||
           databasePattern.matcher(globalTempViewDb).matches()) {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala
index 9e31b8baad78e..a0c6cd1dcd92f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTableTypesOperation.scala
@@ -24,7 +24,8 @@ import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetTableTypesOperation
 import org.apache.hive.service.cli.session.HiveSession
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 
@@ -44,7 +45,7 @@ private[hive] class SparkGetTableTypesOperation(
   override def runInternal(): Unit = {
     statementId = UUID.randomUUID().toString
     val logMsg = "Listing table types"
-    logInfo(s"$logMsg with $statementId")
+    logInfo(log"Listing table types with ${MDC(STATEMENT_ID, statementId)}")
     setState(OperationState.RUNNING)
     // Always use the latest class loader provided by executionHive's state.
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
index 38aaed0be2148..9d90878050678 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTablesOperation.scala
@@ -27,7 +27,8 @@ import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.GetTablesOperation
 import org.apache.hive.service.cli.session.HiveSession
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
 
@@ -57,7 +58,14 @@ private[hive] class SparkGetTablesOperation(
     val cmdStr = s"catalog : $catalogName, schemaPattern : $schemaName"
     val tableTypesStr = if (tableTypes == null) "null" else tableTypes.asScala.mkString(",")
     val logMsg = s"Listing tables '$cmdStr, tableTypes : $tableTypesStr, tableName : $tableName'"
-    logInfo(s"$logMsg with $statementId")
+
+    val catalogNameStr = if (catalogName == null) "null" else catalogName
+    val schemaNameStr = if (schemaName == null) "null" else schemaName
+    logInfo(log"Listing tables 'catalog: ${MDC(CATALOG_NAME, catalogNameStr)}, " +
+      log"schemaPattern: ${MDC(DATABASE_NAME, schemaNameStr)}, " +
+      log"tableTypes: ${MDC(TABLE_TYPES, tableTypesStr)}, " +
+      log"tableName: ${MDC(TABLE_NAME, tableName)}' " +
+      log"with ${MDC(STATEMENT_ID, statementId)}")
     setState(OperationState.RUNNING)
     // Always use the latest class loader provided by executionHive's state.
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
@@ -95,7 +103,7 @@ private[hive] class SparkGetTablesOperation(
 
       // Temporary views and global temporary views
       if (tableTypes == null || tableTypes.isEmpty || tableTypes.contains(VIEW.name)) {
-        val globalTempViewDb = catalog.globalTempViewManager.database
+        val globalTempViewDb = catalog.globalTempDatabase
         val databasePattern = Pattern.compile(CLIServiceUtils.patternToRegex(schemaName))
         val tempViews = if (databasePattern.matcher(globalTempViewDb).matches()) {
           catalog.listTables(globalTempViewDb, tablePattern, includeLocalTempViews = true)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
index cecb0dec72c80..9ae62ed2fed74 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkGetTypeInfoOperation.scala
@@ -26,7 +26,8 @@ import org.apache.hive.service.cli.OperationState
 import org.apache.hive.service.cli.operation.GetTypeInfoOperation
 import org.apache.hive.service.cli.session.HiveSession
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.SQLContext
 
 /**
@@ -45,7 +46,7 @@ private[hive] class SparkGetTypeInfoOperation(
   override def runInternal(): Unit = {
     statementId = UUID.randomUUID().toString
     val logMsg = "Listing type info"
-    logInfo(s"$logMsg with $statementId")
+    logInfo(log"Listing type info with ${MDC(STATEMENT_ID, statementId)}")
     setState(OperationState.RUNNING)
     // Always use the latest class loader provided by executionHive's state.
     val executionHiveClassLoader = sqlContext.sharedState.jarClassLoader
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
index d5874fe776655..11e4817fe2a4c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkOperation.scala
@@ -22,7 +22,7 @@ import org.apache.hive.service.cli.operation.Operation
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{HIVE_OPERATION_TYPE, STATEMENT_ID}
+import org.apache.spark.internal.LogKeys.{HIVE_OPERATION_TYPE, STATEMENT_ID}
 import org.apache.spark.sql.{SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
@@ -50,7 +50,7 @@ private[hive] trait SparkOperation extends Operation with Logging {
   abstract override def close(): Unit = {
     super.close()
     cleanup()
-    logInfo(s"Close statement with $statementId")
+    logInfo(log"Close statement with ${MDC(STATEMENT_ID, statementId)}")
     HiveThriftServer2.eventManager.onOperationClosed(statementId)
   }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 888c086e90422..e64e1c283e27c 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -41,7 +41,7 @@ import sun.misc.{Signal, SignalHandler}
 import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.util.SQLKeywordUtils
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
index 7262bc22dc429..46537f75f1a11 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIService.scala
@@ -33,8 +33,8 @@ import org.apache.hive.service.Service.STATE
 import org.apache.hive.service.auth.HiveAuthFactory
 import org.apache.hive.service.cli._
 import org.apache.hive.service.server.HiveServer2
-import org.slf4j.Logger
 
+import org.apache.spark.internal.SparkLogger
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.util.SQLKeywordUtils
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -113,10 +113,10 @@ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLC
 
 private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
 
-  private val logInfo = (msg: String) => getAncestorField[Logger](this, 3, "LOG").info(msg)
+  private val logInfo = (msg: String) => getAncestorField[SparkLogger](this, 3, "LOG").info(msg)
 
   private val logError = (msg: String, e: Throwable) =>
-    getAncestorField[Logger](this, 3, "LOG").error(msg, e)
+    getAncestorField[SparkLogger](this, 3, "LOG").error(msg, e)
 
   def initCompositeService(hiveConf: HiveConf): Unit = {
     // Emulating `CompositeService.init(hiveConf)`
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 29e468aaa9fe6..a9c5d3e250797 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -28,7 +28,7 @@ import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse
 
 import org.apache.spark.SparkThrowable
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.COMMAND
+import org.apache.spark.internal.LogKeys.COMMAND
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.catalyst.plans.logical.CommandResult
 import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
index 8b7e9b00cb52b..8d03d5f848b76 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
@@ -26,7 +26,7 @@ import org.apache.hive.service.server.HiveServer2
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.Status.LIVE_ENTITY_UPDATE_PERIOD
 import org.apache.spark.scheduler._
 import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.ExecutionState
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 3f9eb02d39b01..026b2388c593c 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SQLQueryTestSuite
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.util.fileToString
-import org.apache.spark.sql.execution.HiveResult.{getTimeFormatters, toHiveString, TimeFormatters}
+import org.apache.spark.sql.execution.HiveResult.{getBinaryFormatter, getTimeFormatters, toHiveString, BinaryFormatter, TimeFormatters}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.TimestampTypes
 import org.apache.spark.sql.types._
@@ -131,13 +131,15 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
       }
 
       // Run the SQL queries preparing them for comparison.
-      val outputs: Seq[QueryTestOutput] = queries.map { sql =>
-        val (_, output) = handleExceptions(getNormalizedResult(statement, sql))
-        // We might need to do some query canonicalization in the future.
-        ExecutionOutput(
-          sql = sql,
-          schema = Some(""),
-          output = output.mkString("\n").replaceAll("\\s+$", ""))
+      val outputs: Seq[QueryTestOutput] = withSQLConf(configSet: _*) {
+        queries.map { sql =>
+          val (_, output) = handleExceptions(getNormalizedResult(statement, sql))
+          // We might need to do some query canonicalization in the future.
+          ExecutionOutput(
+            sql = sql,
+            schema = Some(""),
+            output = output.mkString("\n").replaceAll("\\s+$", ""))
+        }
       }
 
       // Read back the golden file.
@@ -298,8 +300,9 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
     val rs = statement.executeQuery(sql)
     val cols = rs.getMetaData.getColumnCount
     val timeFormatters = getTimeFormatters
+    val binaryFormatter = getBinaryFormatter
     val buildStr = () => (for (i <- 1 to cols) yield {
-      getHiveResult(rs.getObject(i), timeFormatters)
+      getHiveResult(rs.getObject(i), timeFormatters, binaryFormatter)
     }).mkString("\t")
 
     val answer = Iterator.continually(rs.next()).takeWhile(identity).map(_ => buildStr()).toSeq
@@ -321,18 +324,20 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
       upperCase.startsWith("(")
   }
 
-  private def getHiveResult(obj: Object, timeFormatters: TimeFormatters): String = {
+  private def getHiveResult(
+      obj: Object, timeFormatters: TimeFormatters, binaryFormatter: BinaryFormatter): String = {
     obj match {
       case null =>
-        toHiveString((null, StringType), false, timeFormatters)
+        toHiveString((null, StringType), false, timeFormatters, binaryFormatter)
       case d: java.sql.Date =>
-        toHiveString((d, DateType), false, timeFormatters)
+        toHiveString((d, DateType), false, timeFormatters, binaryFormatter)
       case t: Timestamp =>
-        toHiveString((t, TimestampType), false, timeFormatters)
+        toHiveString((t, TimestampType), false, timeFormatters, binaryFormatter)
       case d: java.math.BigDecimal =>
-        toHiveString((d, DecimalType.fromDecimal(Decimal(d))), false, timeFormatters)
+        toHiveString((
+          d, DecimalType.fromDecimal(Decimal(d))), false, timeFormatters, binaryFormatter)
       case bin: Array[Byte] =>
-        toHiveString((bin, BinaryType), false, timeFormatters)
+        toHiveString((bin, BinaryType), false, timeFormatters, binaryFormatter)
       case other =>
         other.toString
     }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index 51123b17eeec1..e757487915bbf 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -35,7 +35,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
 
   test("SPARK-29911: Uncache cached tables when session closed") {
     val cacheManager = spark.sharedState.cacheManager
-    val globalTempDB = spark.sharedState.globalTempViewManager.database
+    val globalTempDB = spark.sharedState.globalTempDB
     withJdbcStatement() { statement =>
       statement.execute("CACHE TABLE tempTbl AS SELECT 1")
     }
@@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
       val sessionHandle = client.openSession(user, "")
       val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS)
       // scalastyle:off line.size.limit
-      assert(infoValue.getStringValue == "ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BETWEEN,BIGINT,BINARY,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPUTE,CONCATENATE,CONSTRAINT,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DELETE,DELIMITED,DESC,DESCRIBE,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INDEX,INDEXES,INNER,INPATH,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,IS,ITEMS,JOIN,KEYS,LAST,LATERAL,LAZY,LEADING,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PERCENTILE_CONT,PERCENTILE_DISC,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,REAL,RECORDREADER,RECORDWRITER,RECOVER,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE")
+      assert(infoValue.getStringValue == "ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,JOIN,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE")
       // scalastyle:on line.size.limit
     }
   }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
index b552611b75d17..2b2cbec41d643 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
@@ -108,7 +108,7 @@ class UISeleniumSuite
       val baseURL = s"http://$localhost:$uiPort"
 
       val queries = Seq(
-        "CREATE TABLE test_map(key INT, value STRING)",
+        "CREATE TABLE test_map (key INT, value STRING) USING HIVE",
         s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map")
 
       queries.foreach(statement.execute)
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
index cedff45f362d5..1e39455f294ab 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                3629           3775         207          0.0      354389.9       1.0X
-INSERT INTO HYBRID                                  500            542          30          0.0       48850.2       7.3X
-INSERT INTO STATIC                                  150            163          10          0.1       14611.0      24.3X
-INSERT OVERWRITE DYNAMIC                           3071           3249         252          0.0      299902.1       1.2X
-INSERT OVERWRITE HYBRID                             447            454           9          0.0       43634.8       8.1X
-INSERT OVERWRITE STATIC                             169            177           5          0.1       16521.3      21.5X
+INSERT INTO DYNAMIC                                3930           4221         411          0.0      383795.5       1.0X
+INSERT INTO HYBRID                                  588            621          32          0.0       57447.2       6.7X
+INSERT INTO STATIC                                  188            214          13          0.1       18338.3      20.9X
+INSERT OVERWRITE DYNAMIC                           3723           3853         183          0.0      363603.4       1.1X
+INSERT OVERWRITE HYBRID                             513            527          17          0.0       50096.3       7.7X
+INSERT OVERWRITE STATIC                             191            221          14          0.1       18612.1      20.6X
 
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
index 50b39ecf2fa87..6d925278cc405 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                3478           3720         342          0.0      339620.8       1.0X
-INSERT INTO HYBRID                                  511            544          35          0.0       49878.9       6.8X
-INSERT INTO STATIC                                  151            167          14          0.1       14758.4      23.0X
-INSERT OVERWRITE DYNAMIC                           3152           3338         263          0.0      307817.5       1.1X
-INSERT OVERWRITE HYBRID                             433            454          24          0.0       42330.3       8.0X
-INSERT OVERWRITE STATIC                             167            178          18          0.1       16325.7      20.8X
+INSERT INTO DYNAMIC                                3770           4154         543          0.0      368168.8       1.0X
+INSERT INTO HYBRID                                  511            568          55          0.0       49868.7       7.4X
+INSERT INTO STATIC                                  163            189          23          0.1       15947.9      23.1X
+INSERT OVERWRITE DYNAMIC                           3813           4094         397          0.0      372395.4       1.0X
+INSERT OVERWRITE HYBRID                             489            516          22          0.0       47714.3       7.7X
+INSERT OVERWRITE STATIC                             182            202          29          0.1       17768.5      20.7X
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
index 0126dc82c0137..26d129b66e825 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             3122           3229          75          0.0       47645.3       1.0X
-spark af w/o group by                                19             24           4          3.4         297.4     160.2X
-hive udaf w/ group by                              1981           2031          38          0.0       30230.8       1.6X
-spark af w/ group by w/o fallback                    20             23           3          3.3         302.9     157.3X
-spark af w/ group by w/ fallback                     24             28           6          2.7         364.6     130.7X
+hive udaf w/o group by                             3276           3322          70          0.0       49983.2       1.0X
+spark af w/o group by                                20             25           4          3.3         300.3     166.5X
+hive udaf w/ group by                              2090           2101           7          0.0       31892.1       1.6X
+spark af w/ group by w/o fallback                    21             24           3          3.2         316.8     157.8X
+spark af w/ group by w/ fallback                     25             27           4          2.7         375.5     133.1X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              22021          22273         310          4.8         210.0       1.0X
-object agg w/ group by w/o fallback                6455           7304         390         16.2          61.6       3.4X
-object agg w/ group by w/ fallback                14156          14380         225          7.4         135.0       1.6X
-sort agg w/o group by                              4084           4117          24         25.7          39.0       5.4X
-object agg w/o group by w/o fallback               4054           4144          72         25.9          38.7       5.4X
+sort agg w/ group by                              22925          23221         419          4.6         218.6       1.0X
+object agg w/ group by w/o fallback                7021           7103          64         14.9          67.0       3.3X
+object agg w/ group by w/ fallback                14719          15622        1324          7.1         140.4       1.6X
+sort agg w/o group by                              3908           3946          27         26.8          37.3       5.9X
+object agg w/o group by w/o fallback               3780           4011         331         27.7          36.0       6.1X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                408            424          13          5.1         194.6       1.0X
-object agg w/ group by w/o fallback                 330            337           5          6.4         157.2       1.2X
-object agg w/ group by w/ fallback                  445            454           8          4.7         212.0       0.9X
-sort agg w/o group by                               276            282           3          7.6         131.8       1.5X
-object agg w/o group by w/o fallback                266            273           4          7.9         126.8       1.5X
+sort agg w/ group by                                408            431          15          5.1         194.6       1.0X
+object agg w/ group by w/o fallback                 326            334           5          6.4         155.4       1.3X
+object agg w/ group by w/ fallback                  451            472          19          4.6         215.2       0.9X
+sort agg w/o group by                               274            281           4          7.6         130.8       1.5X
+object agg w/o group by w/o fallback                273            277           5          7.7         130.4       1.5X
 
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
index 79409a2aba027..8fb04e97f4bc5 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             3496           3530          35          0.0       53350.3       1.0X
-spark af w/o group by                                20             26           4          3.3         304.8     175.1X
-hive udaf w/ group by                              2193           2205          10          0.0       33464.2       1.6X
-spark af w/ group by w/o fallback                    22             26           5          3.0         328.8     162.3X
-spark af w/ group by w/ fallback                     25             28           3          2.7         376.0     141.9X
+hive udaf w/o group by                             3375           3488          67          0.0       51493.5       1.0X
+spark af w/o group by                                21             26           4          3.2         313.8     164.1X
+hive udaf w/ group by                              2174           2193          14          0.0       33173.8       1.6X
+spark af w/ group by w/o fallback                    22             27           4          2.9         339.0     151.9X
+spark af w/ group by w/ fallback                     25             28           3          2.6         383.5     134.3X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              24980          25578         846          4.2         238.2       1.0X
-object agg w/ group by w/o fallback                7305           7525         163         14.4          69.7       3.4X
-object agg w/ group by w/ fallback                14433          14586         147          7.3         137.6       1.7X
-sort agg w/o group by                              4331           4359          20         24.2          41.3       5.8X
-object agg w/o group by w/o fallback               3997           4029          40         26.2          38.1       6.2X
+sort agg w/ group by                              23621          24285         938          4.4         225.3       1.0X
+object agg w/ group by w/o fallback                6890           7186         232         15.2          65.7       3.4X
+object agg w/ group by w/ fallback                14883          15203         299          7.0         141.9       1.6X
+sort agg w/o group by                              4104           4125          17         25.5          39.1       5.8X
+object agg w/o group by w/o fallback               3695           3723          26         28.4          35.2       6.4X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                419            427           9          5.0         199.7       1.0X
-object agg w/ group by w/o fallback                 340            349           8          6.2         162.0       1.2X
-object agg w/ group by w/ fallback                  467            476           7          4.5         222.6       0.9X
-sort agg w/o group by                               310            315           5          6.8         147.9       1.4X
-object agg w/o group by w/o fallback                302            309           4          6.9         144.2       1.4X
+sort agg w/ group by                                404            412           6          5.2         192.8       1.0X
+object agg w/ group by w/o fallback                 332            339           6          6.3         158.5       1.2X
+object agg w/ group by w/ fallback                  461            467           8          4.6         219.6       0.9X
+sort agg w/o group by                               308            313           5          6.8         146.9       1.3X
+object agg w/o group by w/o fallback                304            308           4          6.9         144.8       1.3X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
index ad32f1d3ee3ad..eedfc34b5ea13 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   662            691          46         23.8          42.1       1.0X
-Native ORC MR                                       784            812          28         20.1          49.9       0.8X
-Native ORC Vectorized                                92            120          35        171.5           5.8       7.2X
+Hive built-in ORC                                   627            665          40         25.1          39.9       1.0X
+Native ORC MR                                       699            703           4         22.5          44.4       0.9X
+Native ORC Vectorized                                61             81          21        258.1           3.9      10.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   692            724          31         22.7          44.0       1.0X
-Native ORC MR                                       789            803          15         19.9          50.1       0.9X
-Native ORC Vectorized                                73             94          17        214.7           4.7       9.4X
+Hive built-in ORC                                   681            699          17         23.1          43.3       1.0X
+Native ORC MR                                       792            803          14         19.9          50.3       0.9X
+Native ORC Vectorized                                72             86          16        217.6           4.6       9.4X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   685            708          21         23.0          43.5       1.0X
-Native ORC MR                                       811            854          40         19.4          51.5       0.8X
-Native ORC Vectorized                                79             94          18        199.8           5.0       8.7X
+Hive built-in ORC                                   741            764          29         21.2          47.1       1.0X
+Native ORC MR                                       907            929          29         17.4          57.6       0.8X
+Native ORC Vectorized                                95            105          14        164.8           6.1       7.8X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   733            771          39         21.5          46.6       1.0X
-Native ORC MR                                       803            819          15         19.6          51.1       0.9X
-Native ORC Vectorized                               113            128          16        138.8           7.2       6.5X
+Hive built-in ORC                                   860            868          11         18.3          54.7       1.0X
+Native ORC MR                                       831            871          37         18.9          52.8       1.0X
+Native ORC Vectorized                                93            104          15        169.9           5.9       9.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   804            815          15         19.6          51.1       1.0X
-Native ORC MR                                       873            904          27         18.0          55.5       0.9X
-Native ORC Vectorized                               138            169          29        114.2           8.8       5.8X
+Hive built-in ORC                                   803            841          34         19.6          51.1       1.0X
+Native ORC MR                                       839            857          24         18.7          53.3       1.0X
+Native ORC Vectorized                               129            168          37        122.0           8.2       6.2X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   812            825          12         19.4          51.6       1.0X
-Native ORC MR                                       934            943          13         16.8          59.4       0.9X
-Native ORC Vectorized                               188            213          24         83.6          12.0       4.3X
+Hive built-in ORC                                   959            966           8         16.4          61.0       1.0X
+Native ORC MR                                       997           1021          35         15.8          63.4       1.0X
+Native ORC Vectorized                               214            264          30         73.5          13.6       4.5X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1547           1557          15          6.8         147.5       1.0X
-Native ORC MR                                      1469           1478          13          7.1         140.1       1.1X
-Native ORC Vectorized                               573            601          32         18.3          54.6       2.7X
+Hive built-in ORC                                  1565           1567           2          6.7         149.3       1.0X
+Native ORC MR                                      1574           1602          40          6.7         150.1       1.0X
+Native ORC Vectorized                               656            660           6         16.0          62.6       2.4X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                     827            859          48         19.0          52.6       1.0X
-Data column - Native ORC MR                         977           1025          77         16.1          62.1       0.8X
-Data column - Native ORC Vectorized                 109            124          15        144.3           6.9       7.6X
-Partition column - Hive built-in ORC                702            715          12         22.4          44.6       1.2X
-Partition column - Native ORC MR                    556            562           9         28.3          35.3       1.5X
-Partition column - Native ORC Vectorized             31             41           8        510.7           2.0      26.8X
-Both columns - Hive built-in ORC                    888            933          40         17.7          56.5       0.9X
-Both columns - Native ORC MR                       1076           1083          10         14.6          68.4       0.8X
-Both columns - Native ORC Vectorized                126            144          17        125.3           8.0       6.6X
+Data column - Hive built-in ORC                     893            933          35         17.6          56.8       1.0X
+Data column - Native ORC MR                        1154           1159           6         13.6          73.4       0.8X
+Data column - Native ORC Vectorized                  97            123          30        161.6           6.2       9.2X
+Partition column - Hive built-in ORC                702            719          22         22.4          44.7       1.3X
+Partition column - Native ORC MR                    653            670          19         24.1          41.5       1.4X
+Partition column - Native ORC Vectorized             34             47          11        456.3           2.2      25.9X
+Both columns - Hive built-in ORC                   1006           1019          20         15.6          63.9       0.9X
+Both columns - Native ORC MR                       1085           1096          15         14.5          69.0       0.8X
+Both columns - Native ORC Vectorized                111            140          26        142.2           7.0       8.1X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   832            886          48         12.6          79.3       1.0X
-Native ORC MR                                       821            825           6         12.8          78.3       1.0X
-Native ORC Vectorized                               173            189          14         60.7          16.5       4.8X
+Hive built-in ORC                                   808            823          15         13.0          77.1       1.0X
+Native ORC MR                                       791            794           4         13.3          75.4       1.0X
+Native ORC Vectorized                               124            137          15         84.4          11.8       6.5X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1421           1433          16          7.4         135.6       1.0X
-Native ORC MR                                      1205           1215          14          8.7         114.9       1.2X
-Native ORC Vectorized                               285            314          29         36.8          27.2       5.0X
+Hive built-in ORC                                  1404           1416          17          7.5         133.9       1.0X
+Native ORC MR                                      1275           1283          11          8.2         121.6       1.1X
+Native ORC Vectorized                               310            327          16         33.8          29.6       4.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1261           1273          17          8.3         120.3       1.0X
-Native ORC MR                                      1121           1131          14          9.4         106.9       1.1X
-Native ORC Vectorized                               341            357          13         30.8          32.5       3.7X
+Hive built-in ORC                                  1196           1198           4          8.8         114.0       1.0X
+Native ORC MR                                      1182           1182           0          8.9         112.7       1.0X
+Native ORC Vectorized                               346            373          35         30.3          33.0       3.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   840            842           2         12.5          80.1       1.0X
-Native ORC MR                                       750            751           1         14.0          71.5       1.1X
-Native ORC Vectorized                               144            160           9         72.7          13.8       5.8X
+Hive built-in ORC                                   741            769          25         14.1          70.7       1.0X
+Native ORC MR                                       834            838           5         12.6          79.5       0.9X
+Native ORC Vectorized                               136            175          36         77.2          13.0       5.5X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   669            688          23          1.6         638.3       1.0X
-Native ORC MR                                        85            100          13         12.3          81.4       7.8X
-Native ORC Vectorized                                36             46           9         29.3          34.1      18.7X
+Hive built-in ORC                                   570            588          23          1.8         543.8       1.0X
+Native ORC MR                                        84            102          21         12.5          80.0       6.8X
+Native ORC Vectorized                                29             36           8         35.8          27.9      19.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1221           1230          13          0.9        1164.4       1.0X
-Native ORC MR                                       100            111          12         10.5          95.7      12.2X
-Native ORC Vectorized                                50             68          11         20.9          47.8      24.4X
+Hive built-in ORC                                  1062           1069          10          1.0        1012.4       1.0X
+Native ORC MR                                        91            109          21         11.5          87.2      11.6X
+Native ORC Vectorized                                37             48           8         28.3          35.4      28.6X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1834           1884          70          0.6        1749.1       1.0X
-Native ORC MR                                       112            135          23          9.3         107.2      16.3X
-Native ORC Vectorized                                61             70           8         17.3          58.0      30.2X
+Hive built-in ORC                                  1593           1665         101          0.7        1519.1       1.0X
+Native ORC MR                                       101            110           9         10.4          96.2      15.8X
+Native ORC Vectorized                                45             52           6         23.2          43.1      35.3X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   279            338          41          3.8         266.0       1.0X
-Native ORC MR                                       220            268          44          4.8         209.8       1.3X
-Native ORC Vectorized                                97            115          18         10.8          92.5       2.9X
+Hive built-in ORC                                   290            350          48          3.6         276.9       1.0X
+Native ORC MR                                       225            243          25          4.7         215.0       1.3X
+Native ORC Vectorized                                97            109          20         10.8          92.3       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   1869           1890          29          0.6        1782.7       1.0X
-Native ORC MR                                       1684           1729          64          0.6        1606.4       1.1X
-Native ORC Vectorized                                869            913          56          1.2         828.5       2.2X
+Hive built-in ORC                                   2077           2114          52          0.5        1981.2       1.0X
+Native ORC MR                                       1778           1786          12          0.6        1695.4       1.2X
+Native ORC Vectorized                                893            941          45          1.2         851.8       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   5329           5433         148          0.2        5082.1       1.0X
-Native ORC MR                                       5514           5545          43          0.2        5259.0       1.0X
-Native ORC Vectorized                               5450           5502          74          0.2        5197.3       1.0X
+Hive built-in ORC                                   6108           6135          39          0.2        5824.6       1.0X
+Native ORC MR                                       5695           5742          66          0.2        5431.5       1.1X
+Native ORC Vectorized                               5662           5701          55          0.2        5399.8       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  11352          11377          36          0.1       10826.0       1.0X
-Native ORC MR                                      12781          12796          22          0.1       12188.7       0.9X
-Native ORC Vectorized                              12673          12779         151          0.1       12085.7       0.9X
+Hive built-in ORC                                  12790          12832          60          0.1       12197.3       1.0X
+Native ORC MR                                      12987          13006          27          0.1       12385.1       1.0X
+Native ORC Vectorized                              12870          12946         107          0.1       12274.1       1.0X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        1793           1818          36          0.6        1710.3       1.0X
-Native ORC MR                                            2053           2069          22          0.5        1958.2       0.9X
-Native ORC Vectorized                                     539            569          37          1.9         513.7       3.3X
+Hive built-in ORC                                        1907           1949          59          0.5        1818.9       1.0X
+Native ORC MR                                            1645           1678          46          0.6        1569.2       1.2X
+Native ORC Vectorized                                     549            566          26          1.9         523.8       3.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        4477           4533          78          0.2        4270.0       1.0X
-Native ORC MR                                            4467           4484          25          0.2        4259.6       1.0X
-Native ORC Vectorized                                    1470           1490          29          0.7        1402.0       3.0X
+Hive built-in ORC                                        5234           5237           4          0.2        4991.9       1.0X
+Native ORC MR                                            3998           4042          63          0.3        3812.4       1.3X
+Native ORC Vectorized                                    1489           1494           7          0.7        1420.4       3.5X
 
-OpenJDK 64-Bit Server VM 21.0.2+13-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        4417           4430          19          0.2        4212.1       1.0X
-Native ORC MR                                            5039           5069          42          0.2        4805.9       0.9X
-Native ORC Vectorized                                    1726           1737          16          0.6        1645.8       2.6X
+Hive built-in ORC                                        5144           5174          42          0.2        4905.7       1.0X
+Native ORC MR                                            4441           4510          99          0.2        4234.9       1.2X
+Native ORC Vectorized                                    1793           1877         118          0.6        1710.3       2.9X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index 0cbddae8dc9ef..7cdd02dbb8129 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   650            680          33         24.2          41.3       1.0X
-Native ORC MR                                       755            818          73         20.8          48.0       0.9X
-Native ORC Vectorized                                91            108          12        172.9           5.8       7.1X
+Hive built-in ORC                                   686            697          18         22.9          43.6       1.0X
+Native ORC MR                                       792            845          62         19.9          50.4       0.9X
+Native ORC Vectorized                                82             99          13        192.4           5.2       8.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   657            699          56         23.9          41.8       1.0X
-Native ORC MR                                       735            777          49         21.4          46.8       0.9X
-Native ORC Vectorized                                74             88           8        212.5           4.7       8.9X
+Hive built-in ORC                                   785            805          25         20.0          49.9       1.0X
+Native ORC MR                                       810            833          36         19.4          51.5       1.0X
+Native ORC Vectorized                                92            115          18        171.8           5.8       8.6X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   732            735           3         21.5          46.5       1.0X
-Native ORC MR                                       787            809          22         20.0          50.1       0.9X
-Native ORC Vectorized                                78             96          12        200.9           5.0       9.3X
+Hive built-in ORC                                   827            847          18         19.0          52.6       1.0X
+Native ORC MR                                       870            871           2         18.1          55.3       1.0X
+Native ORC Vectorized                               115            133          15        136.3           7.3       7.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   754            755           2         20.9          47.9       1.0X
-Native ORC MR                                       795            804           8         19.8          50.6       0.9X
-Native ORC Vectorized                               112            124          13        140.4           7.1       6.7X
+Hive built-in ORC                                   933            951          28         16.9          59.3       1.0X
+Native ORC MR                                       897            908          10         17.5          57.0       1.0X
+Native ORC Vectorized                               113            128          11        139.3           7.2       8.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   795            798           3         19.8          50.5       1.0X
-Native ORC MR                                       875            908          29         18.0          55.6       0.9X
-Native ORC Vectorized                               141            159          19        111.6           9.0       5.6X
+Hive built-in ORC                                   879            882           5         17.9          55.9       1.0X
+Native ORC MR                                       917            935          25         17.2          58.3       1.0X
+Native ORC Vectorized                               151            182          25        104.3           9.6       5.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   797            813          15         19.7          50.7       1.0X
-Native ORC MR                                       866            878          13         18.2          55.1       0.9X
-Native ORC Vectorized                               183            191           8         86.1          11.6       4.4X
+Hive built-in ORC                                   939            949          12         16.7          59.7       1.0X
+Native ORC MR                                      1016           1039          32         15.5          64.6       0.9X
+Native ORC Vectorized                               248            259          10         63.5          15.7       3.8X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1422           1514         131          7.4         135.6       1.0X
-Native ORC MR                                      1415           1449          47          7.4         134.9       1.0X
-Native ORC Vectorized                               569            580           8         18.4          54.2       2.5X
+Hive built-in ORC                                  1577           1591          20          6.7         150.4       1.0X
+Native ORC MR                                      1524           1539          21          6.9         145.3       1.0X
+Native ORC Vectorized                               630            661          24         16.6          60.1       2.5X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                     838            847           8         18.8          53.3       1.0X
-Data column - Native ORC MR                         937            965          42         16.8          59.6       0.9X
-Data column - Native ORC Vectorized                 109            124          11        144.5           6.9       7.7X
-Partition column - Hive built-in ORC                594            617          16         26.5          37.8       1.4X
-Partition column - Native ORC MR                    599            618          18         26.3          38.1       1.4X
-Partition column - Native ORC Vectorized             32             41           5        491.9           2.0      26.2X
-Both columns - Hive built-in ORC                    889            922          55         17.7          56.5       0.9X
-Both columns - Native ORC MR                        997           1003           8         15.8          63.4       0.8X
-Both columns - Native ORC Vectorized                124            141          12        127.0           7.9       6.8X
+Data column - Hive built-in ORC                    1040           1043           4         15.1          66.1       1.0X
+Data column - Native ORC MR                        1190           1201          15         13.2          75.7       0.9X
+Data column - Native ORC Vectorized                 120            134           8        131.2           7.6       8.7X
+Partition column - Hive built-in ORC                675            682           8         23.3          42.9       1.5X
+Partition column - Native ORC MR                    725            771          40         21.7          46.1       1.4X
+Partition column - Native ORC Vectorized             45             52           6        353.1           2.8      23.4X
+Both columns - Hive built-in ORC                   1049           1078          41         15.0          66.7       1.0X
+Both columns - Native ORC MR                       1238           1321         118         12.7          78.7       0.8X
+Both columns - Native ORC Vectorized                133            153          14        117.8           8.5       7.8X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   830            847          17         12.6          79.2       1.0X
-Native ORC MR                                       789            791           3         13.3          75.3       1.1X
-Native ORC Vectorized                               123            130           8         85.2          11.7       6.7X
+Hive built-in ORC                                   859            878          22         12.2          81.9       1.0X
+Native ORC MR                                       855            877          21         12.3          81.5       1.0X
+Native ORC Vectorized                               145            161          19         72.1          13.9       5.9X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1350           1378          40          7.8         128.8       1.0X
-Native ORC MR                                      1229           1254          35          8.5         117.2       1.1X
-Native ORC Vectorized                               282            289           5         37.2          26.9       4.8X
+Hive built-in ORC                                  1465           1465           0          7.2         139.7       1.0X
+Native ORC MR                                      1412           1438          36          7.4         134.7       1.0X
+Native ORC Vectorized                               326            355          25         32.2          31.1       4.5X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1219           1264          64          8.6         116.3       1.0X
-Native ORC MR                                      1132           1140          12          9.3         107.9       1.1X
-Native ORC Vectorized                               336            362          29         31.2          32.1       3.6X
+Hive built-in ORC                                  1270           1275           7          8.3         121.1       1.0X
+Native ORC MR                                      1311           1318           9          8.0         125.0       1.0X
+Native ORC Vectorized                               371            378           5         28.3          35.3       3.4X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   748            758          13         14.0          71.3       1.0X
-Native ORC MR                                       784            786           2         13.4          74.8       1.0X
-Native ORC Vectorized                               143            158          14         73.1          13.7       5.2X
+Hive built-in ORC                                   775            779           3         13.5          73.9       1.0X
+Native ORC MR                                       894            907          16         11.7          85.3       0.9X
+Native ORC Vectorized                               161            178          15         65.3          15.3       4.8X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   403            430          28          2.6         384.0       1.0X
-Native ORC MR                                        88             96           9         11.9          84.3       4.6X
-Native ORC Vectorized                                36             43           5         28.8          34.7      11.1X
+Hive built-in ORC                                   447            494          41          2.3         426.4       1.0X
+Native ORC MR                                       106            119          15          9.9         100.8       4.2X
+Native ORC Vectorized                                40             51           9         26.1          38.3      11.1X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   717            734          26          1.5         684.1       1.0X
-Native ORC MR                                       101            118          11         10.4          96.4       7.1X
-Native ORC Vectorized                                51             60           7         20.5          48.8      14.0X
+Hive built-in ORC                                   772            804          33          1.4         736.2       1.0X
+Native ORC MR                                       114            124          10          9.2         108.7       6.8X
+Native ORC Vectorized                                49             59           8         21.5          46.5      15.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1052           1088          50          1.0        1003.6       1.0X
-Native ORC MR                                       118            137          13          8.9         112.4       8.9X
-Native ORC Vectorized                                62             75          11         16.8          59.4      16.9X
+Hive built-in ORC                                  1197           1206          12          0.9        1141.7       1.0X
+Native ORC MR                                       131            151          18          8.0         125.1       9.1X
+Native ORC Vectorized                                61             72           8         17.3          58.0      19.7X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   319            334          29          3.3         303.9       1.0X
-Native ORC MR                                       278            298          19          3.8         265.0       1.1X
-Native ORC Vectorized                               103            136          20         10.1          98.6       3.1X
+Hive built-in ORC                                   480            491          11          2.2         457.4       1.0X
+Native ORC MR                                       366            387          20          2.9         348.7       1.3X
+Native ORC Vectorized                               152            175          13          6.9         145.1       3.2X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   2138           2169          45          0.5        2038.6       1.0X
-Native ORC MR                                       1607           1667          84          0.7        1532.8       1.3X
-Native ORC Vectorized                                855            902          42          1.2         815.2       2.5X
+Hive built-in ORC                                   3607           3669          89          0.3        3439.5       1.0X
+Native ORC MR                                       1890           1921          43          0.6        1802.9       1.9X
+Native ORC Vectorized                               1259           1311          74          0.8        1200.6       2.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   6442           6476          48          0.2        6143.4       1.0X
-Native ORC MR                                       5475           5493          26          0.2        5220.9       1.2X
-Native ORC Vectorized                               5492           5500          11          0.2        5237.5       1.2X
+Hive built-in ORC                                  11342          11426         119          0.1       10816.1       1.0X
+Native ORC MR                                       6475           6524          68          0.2        6175.5       1.8X
+Native ORC Vectorized                               6379           6408          41          0.2        6083.8       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  13992          14055          89          0.1       13344.0       1.0X
-Native ORC MR                                      12827          12858          45          0.1       12232.5       1.1X
-Native ORC Vectorized                              12910          12950          58          0.1       12311.8       1.1X
+Hive built-in ORC                                  24544          24920         532          0.0       23406.9       1.0X
+Native ORC MR                                      15124          15472         492          0.1       14423.6       1.6X
+Native ORC Vectorized                              15066          15264         280          0.1       14368.4       1.6X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        2584           2616          46          0.4        2464.2       1.0X
-Native ORC MR                                            2355           2368          18          0.4        2246.2       1.1X
-Native ORC Vectorized                                     660            662           2          1.6         629.3       3.9X
+Hive built-in ORC                                        3951           3965          19          0.3        3768.2       1.0X
+Native ORC MR                                            2319           2417         139          0.5        2211.5       1.7X
+Native ORC Vectorized                                     743            769          27          1.4         708.8       5.3X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        6123           6123           1          0.2        5839.1       1.0X
-Native ORC MR                                            5065           5235         240          0.2        4830.8       1.2X
-Native ORC Vectorized                                    1533           1547          20          0.7        1461.8       4.0X
+Hive built-in ORC                                       10020          10058          54          0.1        9555.4       1.0X
+Native ORC MR                                            4704           4747          61          0.2        4486.5       2.1X
+Native ORC Vectorized                                    2038           2092          76          0.5        1944.0       4.9X
 
-OpenJDK 64-Bit Server VM 17.0.10+7-LTS on Linux 6.5.0-1016-azure
+OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        5890           5899          13          0.2        5616.8       1.0X
-Native ORC MR                                            5753           5836         118          0.2        5486.6       1.0X
-Native ORC Vectorized                                    2154           2183          41          0.5        2053.8       2.7X
+Hive built-in ORC                                        9520           9564          62          0.1        9079.2       1.0X
+Native ORC MR                                            5648           5669          31          0.2        5386.2       1.7X
+Native ORC Vectorized                                    3237           3253          22          0.3        3087.2       2.9X
 
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 3895d9dc5a634..5a325f5f56bfc 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -40,6 +40,10 @@
       <artifactId>spark-core_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -113,12 +117,12 @@
       <scope>${hive.shims.scope}</scope>
     </dependency>
     <dependency>
-      <groupId>org.apache.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-llap-common</artifactId>
       <scope>${hive.llap.scope}</scope>
     </dependency>
     <dependency>
-      <groupId>org.apache.hive</groupId>
+      <groupId>${hive.group}</groupId>
       <artifactId>hive-llap-client</artifactId>
       <scope>${hive.llap.scope}</scope>
     </dependency>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 8c35e10b383f6..77ed81482396b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -34,7 +34,7 @@ import org.apache.thrift.TException
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{DATABASE_NAME, SCHEMA, SCHEMA2, TABLE_NAME}
+import org.apache.spark.internal.LogKeys.{DATABASE_NAME, INCOMPATIBLE_TYPES, PROVIDER, SCHEMA, SCHEMA2, TABLE_NAME}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
@@ -49,6 +49,7 @@ import org.apache.spark.sql.hive.client.HiveClient
 import org.apache.spark.sql.internal.HiveSerDe
 import org.apache.spark.sql.internal.StaticSQLConf._
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.SchemaUtils
 
 /**
  * A persistent implementation of the system catalog using Hive.
@@ -233,12 +234,39 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       tableDefinition.storage.locationUri
     }
 
+    val hiveCompatibleSchema = tryGetHiveCompatibleSchema(tableDefinition.schema)
+
     if (DDLUtils.isDatasourceTable(tableDefinition)) {
+      // To work around some hive metastore issues, e.g. not case-preserving, bad decimal type
+      // support, no column nullability, etc., we should do some extra works before saving table
+      // metadata into Hive metastore:
+      //  1. Put table metadata like table schema, partition columns, etc. in table properties.
+      //  2. Check if this table is hive compatible.
+      //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
+      //         spec to empty and save table metadata to Hive.
+      //    2.2  If it's hive compatible, set serde information in table metadata and try to save
+      //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
+      val tableProperties = tableMetaToTableProps(tableDefinition)
+
+      // put table provider and partition provider in table properties.
+      tableProperties.put(DATASOURCE_PROVIDER, tableDefinition.provider.get)
+      if (tableDefinition.tracksPartitionsInCatalog) {
+        tableProperties.put(TABLE_PARTITION_PROVIDER, TABLE_PARTITION_PROVIDER_CATALOG)
+      }
+
+      // we have to set the table schema here so that the table schema JSON
+      // string in the table properties still uses the original schema
+      val hiveTable = tableDefinition.copy(
+        schema = hiveCompatibleSchema,
+        properties = tableDefinition.properties ++ tableProperties
+      )
+
       createDataSourceTable(
-        tableDefinition.withNewStorage(locationUri = tableLocation),
+        hiveTable.withNewStorage(locationUri = tableLocation),
         ignoreIfExists)
     } else {
       val tableWithDataSourceProps = tableDefinition.copy(
+        schema = hiveCompatibleSchema,
         // We can't leave `locationUri` empty and count on Hive metastore to set a default table
         // location, because Hive metastore uses hive.metastore.warehouse.dir to generate default
         // table location for tables in default database, while we expect to use the location of
@@ -268,23 +296,6 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val provider = table.provider.get
     val options = new SourceOptions(table.storage.properties)
 
-    // To work around some hive metastore issues, e.g. not case-preserving, bad decimal type
-    // support, no column nullability, etc., we should do some extra works before saving table
-    // metadata into Hive metastore:
-    //  1. Put table metadata like table schema, partition columns, etc. in table properties.
-    //  2. Check if this table is hive compatible.
-    //    2.1  If it's not hive compatible, set location URI, schema, partition columns and bucket
-    //         spec to empty and save table metadata to Hive.
-    //    2.2  If it's hive compatible, set serde information in table metadata and try to save
-    //         it to Hive. If it fails, treat it as not hive compatible and go back to 2.1
-    val tableProperties = tableMetaToTableProps(table)
-
-    // put table provider and partition provider in table properties.
-    tableProperties.put(DATASOURCE_PROVIDER, provider)
-    if (table.tracksPartitionsInCatalog) {
-      tableProperties.put(TABLE_PARTITION_PROVIDER, TABLE_PARTITION_PROVIDER_CATALOG)
-    }
-
     // Ideally we should also put `locationUri` in table properties like provider, schema, etc.
     // However, in older version of Spark we already store table location in storage properties
     // with key "path". Here we keep this behaviour for backward compatibility.
@@ -303,8 +314,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           locationUri = None,
           properties = storagePropsWithLocation),
         schema = StructType(EMPTY_DATA_SCHEMA ++ table.partitionSchema),
-        bucketSpec = None,
-        properties = table.properties ++ tableProperties)
+        bucketSpec = None)
     }
 
     // converts the table metadata to Hive compatible format, i.e. set the serde information.
@@ -326,8 +336,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           outputFormat = serde.outputFormat,
           serde = serde.serde,
           properties = storagePropsWithLocation
-        ),
-        properties = table.properties ++ tableProperties)
+        )
+      )
     }
 
     val qualifiedTableName = table.identifier.quotedString
@@ -338,35 +348,37 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val (hiveCompatibleTable, logMessage) = maybeSerde match {
       case _ if options.skipHiveMetadata =>
         val message =
-          s"Persisting data source table $qualifiedTableName into Hive metastore in" +
-            "Spark SQL specific format, which is NOT compatible with Hive."
+          log"Persisting data source table ${MDC(TABLE_NAME, qualifiedTableName)} into Hive " +
+            log"metastore in Spark SQL specific format, which is NOT compatible with Hive."
         (None, message)
 
       case _ if incompatibleTypes.nonEmpty =>
+        val incompatibleTypesStr = incompatibleTypes.mkString(", ")
         val message =
-          s"Hive incompatible types found: ${incompatibleTypes.mkString(", ")}. " +
-            s"Persisting data source table $qualifiedTableName into Hive metastore in " +
-            "Spark SQL specific format, which is NOT compatible with Hive."
+          log"Hive incompatible types found: ${MDC(INCOMPATIBLE_TYPES, incompatibleTypesStr)}. " +
+            log"Persisting data source table ${MDC(TABLE_NAME, qualifiedTableName)} into Hive " +
+            log"metastore in Spark SQL specific format, which is NOT compatible with Hive."
         (None, message)
       // our bucketing is un-compatible with hive(different hash function)
       case Some(serde) if table.bucketSpec.nonEmpty =>
         val message =
-          s"Persisting bucketed data source table $qualifiedTableName into " +
-            "Hive metastore in Spark SQL specific format, which is NOT compatible with " +
-            "Hive bucketed table. But Hive can read this table as a non-bucketed table."
+          log"Persisting bucketed data source table ${MDC(TABLE_NAME, qualifiedTableName)} into " +
+            log"Hive metastore in Spark SQL specific format, which is NOT compatible with " +
+            log"Hive bucketed table. But Hive can read this table as a non-bucketed table."
         (Some(newHiveCompatibleMetastoreTable(serde)), message)
 
       case Some(serde) =>
         val message =
-          s"Persisting file based data source table $qualifiedTableName into " +
-            s"Hive metastore in Hive compatible format."
+          log"Persisting file based data source table ${MDC(TABLE_NAME, qualifiedTableName)} " +
+            log"into Hive metastore in Hive compatible format."
         (Some(newHiveCompatibleMetastoreTable(serde)), message)
 
       case _ =>
         val message =
-          s"Couldn't find corresponding Hive SerDe for data source provider $provider. " +
-            s"Persisting data source table $qualifiedTableName into Hive metastore in " +
-            s"Spark SQL specific format, which is NOT compatible with Hive."
+          log"Couldn't find corresponding Hive SerDe for data source provider " +
+            log"${MDC(PROVIDER, provider)}. Persisting data source table " +
+            log"${MDC(TABLE_NAME, qualifiedTableName)} into Hive metastore in " +
+            log"Spark SQL specific format, which is NOT compatible with Hive."
         (None, message)
     }
 
@@ -667,6 +679,8 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     val schemaProps =
       tableMetaToTableProps(oldTable, StructType(newDataSchema ++ oldTable.partitionSchema)).toMap
 
+    val hiveSchema = tryGetHiveCompatibleSchema(newDataSchema)
+
     if (isDatasourceTable(oldTable)) {
       // For data source tables, first try to write it with the schema set; if that does not work,
       // try again with updated properties and the partition schema. This is a simplified version of
@@ -674,7 +688,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       // (for example, the schema does not match the data source schema, or does not match the
       // storage descriptor).
       try {
-        client.alterTableDataSchema(db, table, newDataSchema, schemaProps)
+        client.alterTableDataSchema(db, table, hiveSchema, schemaProps)
       } catch {
         case NonFatal(e) =>
           val warningMessage = log"Could not alter schema of table " +
@@ -684,10 +698,21 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
           client.alterTableDataSchema(db, table, EMPTY_DATA_SCHEMA, schemaProps)
       }
     } else {
-      client.alterTableDataSchema(db, table, newDataSchema, schemaProps)
+      client.alterTableDataSchema(db, table, hiveSchema, schemaProps)
     }
   }
 
+  /**
+   * Tries to fix the schema so that all column data types are Hive-compatible
+   * ie. the types are converted to the types that Hive supports.
+   */
+  private def tryGetHiveCompatibleSchema(schema: StructType): StructType = {
+    // Since collated strings do not exist in Hive as a type we need to replace them with
+    // the the regular string type. However, as we save the original schema in the table
+    // properties we will be able to restore the original schema when reading back the table.
+    SchemaUtils.replaceCollatedStringWithString(schema).asInstanceOf[StructType]
+  }
+
   /** Alter the statistics of a table. If `stats` is None, then remove all existing statistics. */
   override def alterTableStats(
       db: String,
@@ -790,7 +815,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       val partColumnNames = getPartitionColumnsFromTableProperties(table)
       val reorderedSchema = reorderSchema(schema = schemaFromTableProps, partColumnNames)
 
-      if (DataTypeUtils.equalsIgnoreCaseAndNullability(reorderedSchema, table.schema) ||
+      if (DataTypeUtils.equalsIgnoreCaseNullabilityAndCollation(reorderedSchema, table.schema) ||
           options.respectSparkSchema) {
         hiveTable.copy(
           schema = reorderedSchema,
@@ -1423,6 +1448,7 @@ object HiveExternalCatalog {
     case a: ArrayType => isHiveCompatibleDataType(a.elementType)
     case m: MapType =>
       isHiveCompatibleDataType(m.keyType) && isHiveCompatibleDataType(m.valueType)
+    case st: StringType => st.isUTF8BinaryCollation
     case _ => true
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index a05043b63d51b..9f1954cbf6868 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
 import org.apache.spark.sql.execution.datasources.DaysWritable
 import org.apache.spark.sql.types
 import org.apache.spark.sql.types._
@@ -1126,7 +1127,7 @@ private[hive] trait HiveInspectors {
     private def decimalTypeInfo(decimalType: DecimalType): TypeInfo = decimalType match {
       case DecimalType.Fixed(precision, scale) => new DecimalTypeInfo(precision, scale)
       case dt => throw new AnalysisException(
-        errorClass = "_LEGACY_ERROR_TEMP_3094", messageParameters = Map("dt" -> dt.catalogString))
+        errorClass = "_LEGACY_ERROR_TEMP_3094", messageParameters = Map("dt" -> toSQLType(dt)))
     }
 
     def toTypeInfo: TypeInfo = dt match {
@@ -1155,7 +1156,7 @@ private[hive] trait HiveInspectors {
       case _: YearMonthIntervalType => intervalYearMonthTypeInfo
       case dt =>
         throw new AnalysisException(
-          errorClass = "_LEGACY_ERROR_TEMP_3095", messageParameters = Map("dt" -> dt.catalogString))
+          errorClass = "_LEGACY_ERROR_TEMP_3095", messageParameters = Map("dt" -> toSQLType(dt)))
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 5b3160c563043..60858089875a2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{FILE_FORMAT, FILE_FORMAT2, INFERENCE_MODE, TABLE_NAME}
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.{AnalysisException, SparkSession}
 import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
@@ -339,8 +339,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
     val shouldInfer = (inferenceMode != NEVER_INFER) && !relation.tableMeta.schemaPreservesCase
     val tableName = relation.tableMeta.identifier.unquotedString
     if (shouldInfer) {
-      logInfo(s"Inferring case-sensitive schema for table $tableName (inference mode: " +
-        s"$inferenceMode)")
+      logInfo(log"Inferring case-sensitive schema for table ${MDC(TABLE_NAME, tableName)} " +
+        log"(inference mode:  ${MDC(INFERENCE_MODE, inferenceMode)})})")
       val fileIndex = fileIndexOpt.getOrElse {
         val rootPath = new Path(relation.tableMeta.location)
         new InMemoryFileIndex(sparkSession, Seq(rootPath), options, None)
@@ -372,7 +372,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   }
 
   private def updateDataSchema(identifier: TableIdentifier, newDataSchema: StructType): Unit = try {
-    logInfo(s"Saving case-sensitive schema for table ${identifier.unquotedString}")
+    logInfo(
+      log"Saving case-sensitive schema for table ${MDC(TABLE_NAME, identifier.unquotedString)}")
     sparkSession.sessionState.catalog.alterTableDataSchema(identifier, newDataSchema)
   } catch {
     case NonFatal(ex) =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 416299b189cd5..979ff1e24ef5c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -114,6 +114,7 @@ class HiveSessionStateBuilder(
         TableCapabilityCheck +:
         CommandCheck +:
         CollationCheck +:
+        ViewSyncSchemaToMetaStore +:
         customCheckRules
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 5972a9df78ecc..e74cc088a1f66 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils, Ins
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy, HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec
+import org.apache.spark.sql.hive.execution.InsertIntoHiveTable.BY_CTAS
 import org.apache.spark.sql.internal.HiveSerDe
 
 
@@ -194,6 +195,8 @@ object HiveAnalysis extends Rule[LogicalPlan] {
  * - When writing to non-partitioned Hive-serde Parquet/Orc tables
  * - When writing to partitioned Hive-serde Parquet/Orc tables when
  *   `spark.sql.hive.convertInsertingPartitionedTable` is true
+ * - When writing to unpartitioned Hive-serde Parquet/Orc tables when
+ *   `spark.sql.hive.convertInsertingUnpartitionedTable` is true
  * - When writing to directory with Hive-serde
  * - When writing to non-partitioned Hive-serde Parquet/ORC tables using CTAS
  * - When scanning Hive-serde Parquet/ORC tables
@@ -230,7 +233,8 @@ case class RelationConversions(
       case InsertIntoStatement(
           r: HiveTableRelation, partition, cols, query, overwrite, ifPartitionNotExists, byName)
           if query.resolved && DDLUtils.isHiveTable(r.tableMeta) &&
-            (!r.isPartitioned || conf.getConf(HiveUtils.CONVERT_INSERTING_PARTITIONED_TABLE))
+            ((r.isPartitioned && conf.getConf(HiveUtils.CONVERT_INSERTING_PARTITIONED_TABLE)) ||
+              (!r.isPartitioned && conf.getConf(HiveUtils.CONVERT_INSERTING_UNPARTITIONED_TABLE)))
             && isConvertible(r) =>
         InsertIntoStatement(metastoreCatalog.convert(r, isWrite = true), partition, cols,
           query, overwrite, ifPartitionNotExists, byName)
@@ -245,11 +249,11 @@ case class RelationConversions(
       // that only matches table insertion inside Hive CTAS.
       // This pattern would not cause conflicts because this rule is always applied before
       // `HiveAnalysis` and both of these rules are running once.
-      case InsertIntoHiveTable(
+      case i @ InsertIntoHiveTable(
         tableDesc, _, query, overwrite, ifPartitionNotExists, _, _, _, _, _, _)
           if query.resolved && DDLUtils.isHiveTable(tableDesc) &&
             tableDesc.partitionColumnNames.isEmpty && isConvertible(tableDesc) &&
-            conf.getConf(HiveUtils.CONVERT_METASTORE_CTAS) =>
+            conf.getConf(HiveUtils.CONVERT_METASTORE_CTAS) && i.getTagValue(BY_CTAS).isDefined =>
         // validation is required to be done here before relation conversion.
         DDLUtils.checkTableColumns(tableDesc.copy(schema = query.schema))
         val hiveTable = DDLUtils.readHiveTable(tableDesc)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 68f34bd2beb01..30201dcee552d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -37,7 +37,7 @@ import org.apache.hive.common.util.HiveVersionInfo
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.execution.command.DDLUtils
@@ -74,7 +74,7 @@ private[spark] object HiveUtils extends Logging {
 
   val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
-        "<code>2.0.0</code> through <code>2.3.9</code> and " +
+        "<code>2.0.0</code> through <code>2.3.10</code> and " +
         "<code>3.0.0</code> through <code>3.1.3</code>.")
     .version("1.4.0")
     .stringConf
@@ -154,6 +154,16 @@ private[spark] object HiveUtils extends Logging {
       .booleanConf
       .createWithDefault(true)
 
+  val CONVERT_INSERTING_UNPARTITIONED_TABLE =
+    buildConf("spark.sql.hive.convertInsertingUnpartitionedTable")
+      .doc("When set to true, and `spark.sql.hive.convertMetastoreParquet` or " +
+        "`spark.sql.hive.convertMetastoreOrc` is true, the built-in ORC/Parquet writer is used" +
+        "to process inserting into unpartitioned ORC/Parquet tables created by using the HiveSQL " +
+        "syntax.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val CONVERT_METASTORE_CTAS = buildConf("spark.sql.hive.convertMetastoreCtas")
     .doc("When set to true,  Spark will try to use built-in data source writer " +
       "instead of Hive serde in CTAS. This flag is effective only if " +
@@ -287,7 +297,8 @@ private[spark] object HiveUtils extends Logging {
   protected[hive] def newClientForExecution(
       conf: SparkConf,
       hadoopConf: Configuration): HiveClientImpl = {
-    logInfo(s"Initializing execution hive, version $builtinHiveVersion")
+    logInfo(log"Initializing execution hive, version " +
+      log"${MDC(LogKeys.HIVE_METASTORE_VERSION, builtinHiveVersion)}")
     val loader = new IsolatedClientLoader(
       version = IsolatedClientLoader.hiveVersion(builtinHiveVersion),
       sparkConf = conf,
@@ -321,7 +332,7 @@ private[spark] object HiveUtils extends Logging {
       if (file.getName == "*") {
         val files = file.getParentFile.listFiles()
         if (files == null) {
-          logWarning(log"Hive jar path '${MDC(PATH, file.getPath)}' does not exist.")
+          logWarning(log"Hive jar path '${MDC(LogKeys.PATH, file.getPath)}' does not exist.")
           Nil
         } else {
           files.filter(_.getName.toLowerCase(Locale.ROOT).endsWith(".jar")).map(_.toURI.toURL)
@@ -332,6 +343,12 @@ private[spark] object HiveUtils extends Logging {
       }
     }
 
+    def logInitWithPath(jars: Seq[URL]): Unit = {
+      logInfo(log"Initializing HiveMetastoreConnection version " +
+        log"${MDC(LogKeys.HIVE_METASTORE_VERSION, hiveMetastoreVersion)} using paths: " +
+        log"${MDC(LogKeys.PATH, jars.mkString(", "))}")
+    }
+
     val isolatedLoader = if (hiveMetastoreJars == "builtin") {
       if (builtinHiveVersion != hiveMetastoreVersion) {
         throw new IllegalArgumentException(
@@ -342,7 +359,8 @@ private[spark] object HiveUtils extends Logging {
       }
 
       logInfo(
-        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using Spark classes.")
+        log"Initializing HiveMetastoreConnection version " +
+          log"${MDC(LogKeys.HIVE_METASTORE_VERSION, hiveMetastoreVersion)} using Spark classes.")
       new IsolatedClientLoader(
         version = metaVersion,
         sparkConf = conf,
@@ -355,7 +373,8 @@ private[spark] object HiveUtils extends Logging {
     } else if (hiveMetastoreJars == "maven") {
       // TODO: Support for loading the jars from an already downloaded location.
       logInfo(
-        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion using maven.")
+        log"Initializing HiveMetastoreConnection version " +
+          log"${MDC(LogKeys.HIVE_METASTORE_VERSION, hiveMetastoreVersion)} using maven.")
       IsolatedClientLoader.forVersion(
         hiveMetastoreVersion = hiveMetastoreVersion,
         hadoopVersion = VersionInfo.getVersion,
@@ -381,9 +400,7 @@ private[spark] object HiveUtils extends Logging {
               ).map(_.toUri.toURL)
           }
 
-      logInfo(
-        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion " +
-          s"using path: ${jars.mkString(";")}")
+      logInitWithPath(jars)
       new IsolatedClientLoader(
         version = metaVersion,
         sparkConf = conf,
@@ -402,9 +419,7 @@ private[spark] object HiveUtils extends Logging {
             addLocalHiveJars(new File(path))
           }
 
-      logInfo(
-        s"Initializing HiveMetastoreConnection version $hiveMetastoreVersion " +
-          s"using ${jars.mkString(":")}")
+      logInitWithPath(jars.toSeq)
       new IsolatedClientLoader(
         version = metaVersion,
         sparkConf = conf,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 60970eecc2df1..a93c6bd6b4e9b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -37,7 +37,7 @@ import org.apache.hadoop.mapreduce.{InputFormat => newInputClass}
 
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD, UnionRDD}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 502cec3be9c82..11e077e891bd7 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -47,7 +47,7 @@ import org.apache.hadoop.security.UserGroupInformation
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil.SOURCE_SPARK
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey._
+import org.apache.spark.internal.LogKeys._
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionsAlreadyExistException}
@@ -161,8 +161,9 @@ private[hive] class HiveClientImpl(
 
   // Log the default warehouse location.
   logInfo(
-    s"Warehouse location for Hive client (version ${version.fullVersion}) is " +
-    s"${conf.getVar(HiveConf.getConfVars("hive.metastore.warehouse.dir"))}")
+    log"Warehouse location for Hive client (version " +
+      log"${MDC(HIVE_CLIENT_VERSION, version.fullVersion)}) is " +
+    log"${MDC(PATH, conf.getVar(HiveConf.getConfVars("hive.metastore.warehouse.dir")))}")
 
   private def newState(): SessionState = {
     val hiveConf = newHiveConf(sparkConf, hadoopConf, extraConfig, Some(initClassLoader))
@@ -231,7 +232,7 @@ private[hive] class HiveClientImpl(
           caughtException = e
           logWarning(
             log"HiveClient got thrift exception, destroying client and retrying " +
-              log"${MDC(RETRY_COUNT, numTries)} times", e)
+              log"${MDC(NUM_RETRY, numTries)} times", e)
           clientLoader.cachedHive = null
           Thread.sleep(retryDelayMillis)
       }
@@ -1339,6 +1340,15 @@ private[hive] object HiveClientImpl extends Logging {
         log"will be reset to 'mr' to disable useless hive logic")
       hiveConf.set("hive.execution.engine", "mr", SOURCE_SPARK)
     }
+    val cpType = hiveConf.get("datanucleus.connectionPoolingType")
+    // Bonecp might cause memory leak, it could affect some hive client versions we support
+    // See more details in HIVE-15551
+    // Also, Bonecp is removed in Hive 4.0.0, see HIVE-23258
+    // Here we use DBCP to replace bonecp instead of HikariCP as HikariCP was introduced in
+    // Hive 2.2.0 (see HIVE-13931) while the minium Hive we support is 2.0.0.
+    if ("bonecp".equalsIgnoreCase(cpType)) {
+      hiveConf.set("datanucleus.connectionPoolingType", "DBCP", SOURCE_SPARK)
+    }
     hiveConf
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 07daa29386282..c03fed4cc3184 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -38,7 +38,7 @@ import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde.serdeConstants
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{CONFIG, CONFIG2, CONFIG3}
+import org.apache.spark.internal.LogKeys.{CONFIG, CONFIG2, CONFIG3}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index e4bab4631ab19..b0570f5d30352 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -31,7 +31,7 @@ import org.apache.hadoop.hive.shims.ShimLoader
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkSubmit
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{FALLBACK_VERSION, HADOOP_VERSION}
+import org.apache.spark.internal.LogKeys.{FALLBACK_VERSION, HADOOP_VERSION, PATH}
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.hive.HiveUtils
@@ -149,7 +149,7 @@ private[hive] object IsolatedClientLoader extends Logging {
     // TODO: Remove copy logic.
     val tempDir = Utils.createTempDir(namePrefix = s"hive-${version}")
     allFiles.foreach(f => FileUtils.copyFileToDirectory(f, tempDir))
-    logInfo(s"Downloaded metastore jars to ${tempDir.getCanonicalPath}")
+    logInfo(log"Downloaded metastore jars to ${MDC(PATH, tempDir.getCanonicalPath)}")
     tempDir.listFiles().map(_.toURI.toURL).toImmutableArraySeq
   }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index 564c87a0fca8e..d172af21a9170 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -59,13 +59,12 @@ package object client {
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     // Since HIVE-23980, calcite-core included in Hive package jar.
-    case object v2_3 extends HiveVersion("2.3.9",
+    case object v2_3 extends HiveVersion("2.3.10",
       exclusions = Seq("org.apache.calcite:calcite-core",
         "org.apache.calcite:calcite-druid",
         "org.apache.calcite.avatica:avatica",
-        "com.fasterxml.jackson.core:*",
         "org.apache.curator:*",
-        "org.pentaho:pentaho-aggdesigner-algorithm",
+        "net.hydromatic:aggdesigner-algorithm",
         "org.apache.hive:hive-vector-code-gen"))
 
     // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 811d186b17d26..154d07f80d898 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{CTEInChildren, CTERelationDe
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.{DataWritingCommand, LeafRunnableCommand}
+import org.apache.spark.sql.hive.execution.InsertIntoHiveTable.BY_CTAS
 
 /**
  * Create table and insert the query result into it.
@@ -98,13 +99,15 @@ case class CreateHiveTableAsSelectCommand(
       tableExists: Boolean): DataWritingCommand = {
     // For CTAS, there is no static partition values to insert.
     val partition = tableDesc.partitionColumnNames.map(_ -> None).toMap
-    InsertIntoHiveTable(
+    val insertHive = InsertIntoHiveTable(
       tableDesc,
       partition,
       query,
       overwrite = false,
       ifPartitionNotExists = false,
       outputColumnNames = outputColumnNames)
+    insertHive.setTagValue(BY_CTAS, ())
+    insertHive
   }
 
   override def argString(maxFields: Int): String = {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
index 1b76478a5cf33..cabdddd4c475d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveFileFormat.scala
@@ -32,7 +32,7 @@ import org.apache.hadoop.mapred.{JobConf, Reporter}
 import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext}
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.LogKeys.CLASS_NAME
 import org.apache.spark.internal.config.SPECULATION_ENABLED
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
index a16191b72a8d3..16edfea67e38e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
@@ -32,7 +32,7 @@ import org.apache.hadoop.hive.ql.exec.TaskRunner
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.hive.HiveExternalCatalog
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 4a92bfd840405..cf296e8be4f14 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.SparkPlan
@@ -235,6 +236,12 @@ case class InsertIntoHiveTable(
 }
 
 object InsertIntoHiveTable extends V1WritesHiveUtils {
+
+  /**
+   * A tag to identify if this command is created by a CTAS.
+   */
+  val BY_CTAS = TreeNodeTag[Unit]("by_ctas")
+
   def apply(
       table: CatalogTable,
       partition: Map[String, Option[String]],
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index 3e1bdff8c007b..d588e9f5bd5c4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
 
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.StructType
@@ -59,8 +59,8 @@ private[hive] object OrcFileOperator extends Logging {
       reader.getObjectInspector match {
         case oi: StructObjectInspector if oi.getAllStructFieldRefs.size() == 0 =>
           logInfo(
-            s"ORC file $path has empty schema, it probably contains no rows. " +
-              "Trying to read another ORC file to figure out the schema.")
+            log"ORC file ${MDC(PATH, path)} has empty schema, it probably contains no rows. " +
+              log"Trying to read another ORC file to figure out the schema.")
           false
         case _ => true
       }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
index 13ff721736b2c..0e357d5e39b26 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
@@ -33,7 +33,7 @@ import org.apache.hadoop.security.token.Token
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.CLASS_NAME
+import org.apache.spark.internal.LogKeys.CLASS_NAME
 import org.apache.spark.internal.config.KEYTAB
 import org.apache.spark.security.HadoopDelegationTokenProvider
 import org.apache.spark.sql.hive.client.HiveClientImpl
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/Complex.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/Complex.java
index 9afe53fe825ca..e94a84d6446a9 100644
--- a/sql/hive/src/test/java/org/apache/spark/sql/hive/test/Complex.java
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/test/Complex.java
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.hive.test;
 
-import org.apache.commons.lang.builder.HashCodeBuilder;
+import org.apache.commons.lang3.builder.HashCodeBuilder;
 import org.apache.thrift.scheme.IScheme;
 import org.apache.thrift.scheme.SchemeFactory;
 import org.apache.thrift.scheme.StandardScheme;
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index 89fe10d5c4bd9..d7918f8cbf4f0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -335,9 +335,10 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         options = Map.empty)(sparkSession = spark)
 
       val plan = LogicalRelation(relation, tableMeta)
-      spark.sharedState.cacheManager.cacheQuery(Dataset.ofRows(spark, plan))
+      val df = Dataset.ofRows(spark, plan)
+      spark.sharedState.cacheManager.cacheQuery(df)
 
-      assert(spark.sharedState.cacheManager.lookupCachedData(plan).isDefined)
+      assert(spark.sharedState.cacheManager.lookupCachedData(df).isDefined)
 
       val sameCatalog = new CatalogFileIndex(spark, tableMeta, 0)
       val sameRelation = HadoopFsRelation(
@@ -347,9 +348,9 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
         bucketSpec = None,
         fileFormat = new ParquetFileFormat(),
         options = Map.empty)(sparkSession = spark)
-      val samePlan = LogicalRelation(sameRelation, tableMeta)
+      val samePlanDf = Dataset.ofRows(spark, LogicalRelation(sameRelation, tableMeta))
 
-      assert(spark.sharedState.cacheManager.lookupCachedData(samePlan).isDefined)
+      assert(spark.sharedState.cacheManager.lookupCachedData(samePlanDf).isDefined)
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
index e413e0ee73cb9..8bb33e3383be1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogSuite.scala
@@ -22,9 +22,10 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog._
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.command.DDLUtils
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
 
 /**
  * Test suite for the [[HiveExternalCatalog]].
@@ -200,4 +201,44 @@ class HiveExternalCatalogSuite extends ExternalCatalogSuite {
       assert(alteredTable.provider === Some("foo"))
     })
   }
+
+  test("write collated strings as regular strings in hive - but read them back as collated") {
+    val catalog = newBasicCatalog()
+    val tableName = "collation_tbl"
+    val columnName = "col1"
+
+    val collationsSchema = StructType(Seq(
+      StructField(columnName, StringType("UNICODE"))
+    ))
+    val noCollationsSchema = StructType(Seq(
+      StructField(columnName, StringType)
+    ))
+
+    val tableDDL = CatalogTable(
+      identifier = TableIdentifier(tableName, Some("db1")),
+      tableType = CatalogTableType.MANAGED,
+      storage = storageFormat,
+      schema = collationsSchema,
+      provider = Some("hive"))
+
+    catalog.createTable(tableDDL, ignoreIfExists = false)
+
+    val rawTable = externalCatalog.getRawTable("db1", tableName)
+    assert(DataTypeUtils.sameType(rawTable.schema, noCollationsSchema))
+
+    val readBackTable = externalCatalog.getTable("db1", tableName)
+    assert(DataTypeUtils.sameType(readBackTable.schema, collationsSchema))
+
+    // perform alter table
+    val newSchema = StructType(Seq(
+      StructField("col1", StringType("UTF8_LCASE"))
+    ))
+    catalog.alterTableDataSchema("db1", tableName, newSchema)
+
+    val alteredRawTable = externalCatalog.getRawTable("db1", tableName)
+    assert(DataTypeUtils.sameType(alteredRawTable.schema, noCollationsSchema))
+
+    val alteredTable = externalCatalog.getTable("db1", tableName)
+    assert(DataTypeUtils.sameType(alteredTable.schema, newSchema))
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index 726341ffdf9e3..95baffdee06cb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -211,7 +211,6 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
         tryDownloadSpark(version, sparkTestingDir.getCanonicalPath)
       }
 
-      // Extract major.minor for testing Spark 3.1.x and 3.0.x with metastore 2.3.9 and Java 11.
       val hiveMetastoreVersion = """^\d+\.\d+""".r.findFirstIn(hiveVersion).get
       val args = Seq(
         "--name", "prepare testing tables",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
index e3b649f9a9f01..d84b9f7960231 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
@@ -76,7 +76,7 @@ class HiveSharedStateSuite extends SparkFunSuite {
     assert(client.getConf("hive.metastore.warehouse.dir", "") === qualifiedWHPath,
       "session level conf should be passed to catalog")
 
-    assert(state.globalTempViewManager.database === tmpDb)
+    assert(state.globalTempDB === tmpDb)
 
    val ss2 =
      builder.config("spark.foo", "bar2222").config(WAREHOUSE_PATH.key, invalidPath).getOrCreate()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index c7aa412959097..e88a37f019b7d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -149,7 +149,7 @@ class HiveSparkSubmitSuite
       "--conf", s"${EXECUTOR_MEMORY.key}=512m",
       "--conf", "spark.ui.enabled=false",
       "--conf", "spark.master.rest.enabled=false",
-      "--conf", "spark.sql.hive.metastore.version=2.3.9",
+      "--conf", "spark.sql.hive.metastore.version=2.3.10",
       "--conf", "spark.sql.hive.metastore.jars=maven",
       "--driver-java-options", "-Dderby.system.durability=test",
       unusedJar.toString)
@@ -370,7 +370,7 @@ class HiveSparkSubmitSuite
         "--master", "local-cluster[2,1,512]",
         "--conf", s"${EXECUTOR_MEMORY.key}=512m",
         "--conf", s"${LEGACY_TIME_PARSER_POLICY.key}=LEGACY",
-        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.9",
+        "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=2.3.10",
         "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven",
         "--conf", s"spark.hadoop.javax.jdo.option.ConnectionURL=$metastore",
         unusedJar.toString)
@@ -387,7 +387,7 @@ object SetMetastoreURLTest extends Logging {
     val builder = SparkSession.builder()
       .config(sparkConf)
       .config(UI_ENABLED.key, "false")
-      .config(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.9")
+      .config(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.10")
       // The issue described in SPARK-16901 only appear when
       // spark.sql.hive.metastore.jars is not set to builtin.
       .config(HiveUtils.HIVE_METASTORE_JARS.key, "maven")
@@ -698,7 +698,7 @@ object SparkSQLConfTest extends Logging {
         val filteredSettings = super.getAll.filterNot(e => isMetastoreSetting(e._1))
 
         // Always add these two metastore settings at the beginning.
-        (HiveUtils.HIVE_METASTORE_VERSION.key -> "2.3.9") +:
+        (HiveUtils.HIVE_METASTORE_VERSION.key -> "2.3.10") +:
         (HiveUtils.HIVE_METASTORE_JARS.key -> "maven") +:
         filteredSettings
       }
@@ -726,7 +726,7 @@ object SPARK_9757 extends QueryTest {
     val hiveWarehouseLocation = Utils.createTempDir()
     val sparkContext = new SparkContext(
       new SparkConf()
-        .set(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.9")
+        .set(HiveUtils.HIVE_METASTORE_VERSION.key, "2.3.10")
         .set(HiveUtils.HIVE_METASTORE_JARS.key, "maven")
         .set(UI_ENABLED, false)
         .set(WAREHOUSE_PATH.key, hiveWarehouseLocation.toString))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 5502414629c01..7dc7fc41dc708 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -1418,10 +1418,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
       checkAnswer(df, expectedAnswer) // check correctness of output
 
-      spark.sessionState.conf.settings.synchronized {
-        val tmp = spark.sessionState.conf.autoBroadcastJoinThreshold
-
-        sql(s"""SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key}=-1""")
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
         df = sql(query)
         bhj = df.queryExecution.sparkPlan.collect { case j: BroadcastHashJoinExec => j }
         assert(bhj.isEmpty, "BroadcastHashJoin still planned even though it is switched off")
@@ -1429,10 +1426,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         val shj = df.queryExecution.sparkPlan.collect { case j: SortMergeJoinExec => j }
         assert(shj.size === 1,
           "SortMergeJoin should be planned when BroadcastHashJoin is turned off")
-
-        sql(s"""SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key}=$tmp""")
       }
-
       after()
     }
 
@@ -1474,10 +1468,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
 
     checkAnswer(df, answer) // check correctness of output
 
-    spark.sessionState.conf.settings.synchronized {
-      val tmp = spark.sessionState.conf.autoBroadcastJoinThreshold
-
-      sql(s"SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key}=-1")
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
       df = sql(leftSemiJoinQuery)
       bhj = df.queryExecution.sparkPlan.collect {
         case j: BroadcastHashJoinExec => j
@@ -1489,10 +1480,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       }
       assert(shj.size === 1,
         "SortMergeJoinExec should be planned when BroadcastHashJoin is turned off")
-
-      sql(s"SET ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key}=$tmp")
     }
-
   }
 
   test("Deals with wrong Hive's statistics (zero rowCount)") {
@@ -1615,7 +1603,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         Seq(tbl, ext_tbl).foreach { tblName =>
           sql(s"INSERT INTO $tblName VALUES (1, 'a', '2019-12-13')")
 
-          val expectedSize = 657
+          val expectedSize = 690
           // analyze table
           sql(s"ANALYZE TABLE $tblName COMPUTE STATISTICS NOSCAN")
           var tableStats = getTableStats(tblName)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 0bc288501a01e..b60adfb6f4cf1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -686,6 +686,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
         versionSpark.sql(
           s"""
              |CREATE TABLE tab(c1 string)
+             |USING HIVE
              |location '${tmpDir.toURI.toString}'
              """.stripMargin)
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
index e0d5236e1e019..e43308f62a496 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuites.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.hive.client
 
 import java.net.URI
 
-import scala.collection.immutable.IndexedSeq
-
 import org.apache.hadoop.conf.Configuration
 import org.scalatest.Suite
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
index e076c01c08980..dcf14855a5883 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientUserNameSuites.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive.client
 
-import scala.collection.immutable.IndexedSeq
-
 import org.scalatest.Suite
 
 class HiveClientUserNameSuites extends Suite with HiveClientVersions {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
index 1dee9e6dcfc83..0bc6702079bdb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive.client
 
-import scala.collection.immutable.IndexedSeq
-
 private[client] trait HiveClientVersions {
   private val testVersions = sys.env.get("SPARK_TEST_HIVE_CLIENT_VERSIONS")
   protected val versions = if (testVersions.nonEmpty) {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
index f10e63865423d..6e526bdc6f168 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuites.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.hive.client
 
-import scala.collection.immutable.IndexedSeq
-
 import org.scalatest.Suite
 
 class HivePartitionFilteringSuites extends Suite with HiveClientVersions {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 241fdd4b9ec5a..65b70ad8bcaeb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -154,10 +154,6 @@ class HiveDDLSuite
     fs.exists(filesystemPath)
   }
 
-  test("alter table: set properties") {
-    testSetProperties(isDatasourceTable = false)
-  }
-
   test("alter table: unset properties") {
     testUnsetProperties(isDatasourceTable = false)
   }
@@ -216,7 +212,7 @@ class HiveDDLSuite
 
   test("SPARK-22431: alter table tests with nested types") {
     withTable("t1", "t2", "t3") {
-      spark.sql("CREATE TABLE t1 (q STRUCT<col1:INT, col2:STRING>, i1 INT)")
+      spark.sql("CREATE TABLE t1 (q STRUCT<col1:INT, col2:STRING>, i1 INT) USING HIVE")
       spark.sql("ALTER TABLE t1 ADD COLUMNS (newcol1 STRUCT<`col1`:STRING, col2:Int>)")
       val newcol = spark.sql("SELECT * FROM t1").schema.fields(2).name
       assert("newcol1".equals(newcol))
@@ -2614,7 +2610,7 @@ class HiveDDLSuite
           "msg" -> "java.lang.UnsupportedOperationException: Unknown field type: void")
       )
 
-      sql("CREATE TABLE t3 AS SELECT NULL AS null_col")
+      sql("CREATE TABLE t3 USING HIVE AS SELECT NULL AS null_col")
       checkAnswer(sql("SELECT * FROM t3"), Row(null))
     }
 
@@ -2642,9 +2638,6 @@ class HiveDDLSuite
 
       sql("CREATE TABLE t3 (v VOID) USING hive")
       checkAnswer(sql("SELECT * FROM t3"), Seq.empty)
-
-      sql("CREATE TABLE t4 (v VOID)")
-      checkAnswer(sql("SELECT * FROM t4"), Seq.empty)
     }
 
     // Create table with void type using spark.catalog.createTable
@@ -3324,7 +3317,7 @@ class HiveDDLSuite
            |  INTERVAL '1-1' YEAR TO MONTH AS YM,
            |  INTERVAL '1 02:03:04.123456' DAY TO SECOND AS DT
            |""".stripMargin,
-        s"CREATE TABLE $tbl (dt INTERVAL HOUR TO MINUTE)"
+        s"CREATE TABLE $tbl (dt INTERVAL HOUR TO MINUTE) USING HIVE"
       ).foreach { sqlCmd =>
         checkError(
           exception = intercept[SparkUnsupportedOperationException] {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 86e6b01cb6cae..4d23ac0639b3e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -369,7 +369,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
 
   test("SPARK-7270: consider dynamic partition when comparing table output") {
     withTable("test_partition", "ptest") {
-      sql(s"CREATE TABLE test_partition (a STRING) PARTITIONED BY (b BIGINT, c STRING)")
+      sql(s"CREATE TABLE test_partition (a STRING) USING HIVE PARTITIONED BY (b BIGINT, c STRING)")
       sql(s"CREATE TABLE ptest (a STRING, b BIGINT, c STRING)")
 
       val analyzedPlan = sql(
@@ -804,7 +804,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   }
 
   test("ADD JAR command") {
-    sql("CREATE TABLE alter1(a INT, b INT)")
+    sql("CREATE TABLE alter1(a INT, b INT) USING HIVE")
     checkError(
       exception = intercept[AnalysisException] {
         sql(
@@ -1208,50 +1208,52 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   }
 
   test("SPARK-5592: get java.net.URISyntaxException when dynamic partitioning") {
-    sql("""
-      |create table sc as select *
-      |from (select '2011-01-11', '2011-01-11+14:18:26' from src tablesample (1 rows)
-      |union all
-      |select '2011-01-11', '2011-01-11+15:18:26' from src tablesample (1 rows)
-      |union all
-      |select '2011-01-11', '2011-01-11+16:18:26' from src tablesample (1 rows) ) s
+    withSQLConf("hive.exec.dynamic.partition" -> "true",
+      "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+      sql(
+        """
+          |create table sc as select *
+          |from (select '2011-01-11', '2011-01-11+14:18:26' from src tablesample (1 rows)
+          |union all
+          |select '2011-01-11', '2011-01-11+15:18:26' from src tablesample (1 rows)
+          |union all
+          |select '2011-01-11', '2011-01-11+16:18:26' from src tablesample (1 rows) ) s
     """.stripMargin)
-    sql("create table sc_part (key string) partitioned by (ts string) stored as rcfile")
-    sql("set hive.exec.dynamic.partition=true")
-    sql("set hive.exec.dynamic.partition.mode=nonstrict")
-    sql("insert overwrite table sc_part partition(ts) select * from sc")
-    sql("drop table sc_part")
+      sql("create table sc_part (key string) partitioned by (ts string) stored as rcfile")
+      sql("insert overwrite table sc_part partition(ts) select * from sc")
+      sql("drop table sc_part")
+    }
   }
 
   test("Partition spec validation") {
-    sql("DROP TABLE IF EXISTS dp_test")
-    sql("CREATE TABLE dp_test(key INT, value STRING) PARTITIONED BY (dp INT, sp INT)")
-    sql("SET hive.exec.dynamic.partition.mode=strict")
-
-    // Should throw when using strict dynamic partition mode without any static partition
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql(
-          """INSERT INTO TABLE dp_test PARTITION(dp)
-            |SELECT key, value, key % 5 FROM src""".stripMargin)
-      },
-      errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
-      parameters = Map(
-        "tableName" -> "`spark_catalog`.`default`.`dp_test`",
-        "tableColumns" -> "`key`, `value`, `dp`, `sp`",
-        "dataColumns" -> "`key`, `value`, `(key % 5)`"))
-
-    sql("SET hive.exec.dynamic.partition.mode=nonstrict")
-
-    // Should throw when a static partition appears after a dynamic partition
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql(
-          """INSERT INTO TABLE dp_test PARTITION(dp, sp = 1)
-            |SELECT key, value, key % 5 FROM src""".stripMargin)
-      },
-      errorClass = "_LEGACY_ERROR_TEMP_3079",
-      parameters = Map.empty)
+    withTable("dp_test") {
+      sql("CREATE TABLE dp_test(key INT, value STRING) USING HIVE PARTITIONED BY (dp INT, sp INT)")
+      withSQLConf("hive.exec.dynamic.partition.mode" -> "strict") {
+        // Should throw when using strict dynamic partition mode without any static partition
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(
+              """INSERT INTO TABLE dp_test PARTITION(dp)
+                |SELECT key, value, key % 5 FROM src""".stripMargin)
+          },
+          errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+          parameters = Map(
+            "tableName" -> "`spark_catalog`.`default`.`dp_test`",
+            "tableColumns" -> "`key`, `value`, `dp`, `sp`",
+            "dataColumns" -> "`key`, `value`, `(key % 5)`"))
+      }
+      withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        // Should throw when a static partition appears after a dynamic partition
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(
+              """INSERT INTO TABLE dp_test PARTITION(dp, sp = 1)
+                |SELECT key, value, key % 5 FROM src""".stripMargin)
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_3079",
+          parameters = Map.empty)
+      }
+    }
   }
 
   test("SPARK-3414 regression: should store analyzed logical plan when creating a temporary view") {
@@ -1292,21 +1294,22 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   }
 
   test("SPARK-3810: PreprocessTableInsertion dynamic partitioning support") {
-    val analyzedPlan = {
-      loadTestTable("srcpart")
-      sql("DROP TABLE IF EXISTS withparts")
-      sql("CREATE TABLE withparts LIKE srcpart")
-      sql("SET hive.exec.dynamic.partition.mode=nonstrict")
-
-      sql("CREATE TABLE IF NOT EXISTS withparts LIKE srcpart")
-      sql("INSERT INTO TABLE withparts PARTITION(ds, hr) SELECT key, value, '1', '2' FROM src")
-        .queryExecution.analyzed
-    }
+    withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+      val analyzedPlan = {
+        loadTestTable("srcpart")
+        sql("DROP TABLE IF EXISTS withparts")
+        sql("CREATE TABLE withparts LIKE srcpart")
+
+        sql("CREATE TABLE IF NOT EXISTS withparts LIKE srcpart")
+        sql("INSERT INTO TABLE withparts PARTITION(ds, hr) SELECT key, value, '1', '2' FROM src")
+          .queryExecution.analyzed
+      }
 
-    assertResult(2, "Duplicated project detected\n" + analyzedPlan) {
-      analyzedPlan.collect {
-        case i: InsertIntoHiveTable => i.query.collect { case p: Project => () }.size
-      }.sum
+      assertResult(2, "Duplicated project detected\n" + analyzedPlan) {
+        analyzedPlan.collect {
+          case i: InsertIntoHiveTable => i.query.collect { case p: Project => () }.size
+        }.sum
+      }
     }
   }
 
@@ -1627,10 +1630,8 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   test("SPARK-33084: Add jar support Ivy URI in SQL") {
     val testData = TestHive.getHiveFile("data/files/sample.json").toURI
     withTable("t") {
-      // hive-catalog-core has some transitive dependencies which dont exist on maven central
-      // and hence cannot be found in the test environment or are non-jar (.pom) which cause
-      // failures in tests. Use transitive=false as it should be good enough to test the Ivy
-      // support in Hive ADD JAR
+      // Use transitive=false as it should be good enough to test the Ivy support
+      // in Hive ADD JAR
       sql(s"ADD JAR ivy://org.apache.hive.hcatalog:hive-hcatalog-core:$hiveVersion" +
         "?transitive=false")
       sql(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index daf99886ff6d4..0c704d845a3bf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -391,7 +391,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
           ioschema = hiveIOSchema)
         SparkPlanTest.executePlan(plan, hiveContext)
       }.getMessage
-      assert(e1.contains("interval cannot be converted to Hive TypeInfo"))
+      assert(e1.contains("\"INTERVAL\" cannot be converted to Hive TypeInfo"))
 
       val e2 = intercept[AnalysisException] {
         val plan = createScriptTransformationExec(
@@ -403,7 +403,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
           ioschema = hiveIOSchema)
         SparkPlanTest.executePlan(plan, hiveContext)
       }.getMessage
-      assert(e2.contains("array<double> cannot be converted to Hive TypeInfo"))
+      assert(e2.contains("UDT(\"ARRAY<DOUBLE>\") cannot be converted to Hive TypeInfo"))
     }
   }
 
@@ -416,7 +416,6 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         (1, new CalendarInterval(7, 1, 1000), new TestUDT.MyDenseVector(Array(1, 2, 3)))
       ).toDF("a", "b", "c")
       df.createTempView("v")
-
       val e1 = intercept[AnalysisException] {
         sql(
           """
@@ -424,7 +423,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
             |FROM v
           """.stripMargin).collect()
       }.getMessage
-      assert(e1.contains("interval cannot be converted to Hive TypeInfo"))
+      assert(e1.contains("\"INTERVAL\" cannot be converted to Hive TypeInfo"))
 
       val e2 = intercept[AnalysisException] {
         sql(
@@ -433,7 +432,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
             |FROM v
           """.stripMargin).collect()
       }.getMessage
-      assert(e2.contains("array<double> cannot be converted to Hive TypeInfo"))
+      assert(e2.contains("UDT(\"ARRAY<DOUBLE>\") cannot be converted to Hive TypeInfo"))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 3cf8d5eadb5b1..8280b9624fa2f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -262,15 +262,17 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
         sql("INSERT INTO t VALUES(1)")
         val dir = new File(f.getCanonicalPath + "/data")
         dir.mkdir()
-        sql("set mapreduce.input.fileinputformat.input.dir.recursive=true")
-        assert(sql("select * from t").collect().head.getLong(0) == 1)
-        sql("set mapreduce.input.fileinputformat.input.dir.recursive=false")
-        val e = intercept[IOException] {
-          sql("SELECT * FROM t").collect()
+        withSQLConf("mapreduce.input.fileinputformat.input.dir.recursive" -> "true") {
+          assert(sql("select * from t").collect().head.getLong(0) == 1)
+        }
+        withSQLConf("mapreduce.input.fileinputformat.input.dir.recursive" -> "false") {
+          val e = intercept[IOException] {
+            sql("SELECT * FROM t").collect()
+          }
+          assert(e.getMessage.contains(s"Path: ${dir.getAbsoluteFile} is a directory, " +
+            s"which is not supported by the record reader " +
+            s"when `mapreduce.input.fileinputformat.input.dir.recursive` is false."))
         }
-        assert(e.getMessage.contains(s"Path: ${dir.getAbsoluteFile} is a directory, " +
-          s"which is not supported by the record reader " +
-          s"when `mapreduce.input.fileinputformat.input.dir.recursive` is false."))
         dir.delete()
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 0bcac639443cd..05b73e31d1156 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -178,24 +178,24 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
             |PARTITIONED BY (state STRING, month INT)
             |STORED AS PARQUET
           """.stripMargin)
+        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+          sql("INSERT INTO TABLE orders PARTITION(state, month) SELECT * FROM orders1")
+          sql("INSERT INTO TABLE orderupdates PARTITION(state, month) SELECT * FROM orderupdates1")
 
-        sql("set hive.exec.dynamic.partition.mode=nonstrict")
-        sql("INSERT INTO TABLE orders PARTITION(state, month) SELECT * FROM orders1")
-        sql("INSERT INTO TABLE orderupdates PARTITION(state, month) SELECT * FROM orderupdates1")
-
-        checkAnswer(
-          sql(
-            """
-              |select orders.state, orders.month
-              |from orders
-              |join (
-              |  select distinct orders.state,orders.month
-              |  from orders
-              |  join orderupdates
-              |    on orderupdates.id = orders.id) ao
-              |  on ao.state = orders.state and ao.month = orders.month
+          checkAnswer(
+            sql(
+              """
+                |select orders.state, orders.month
+                |from orders
+                |join (
+                |  select distinct orders.state,orders.month
+                |  from orders
+                |  join orderupdates
+                |    on orderupdates.id = orders.id) ao
+                |  on ao.state = orders.state and ao.month = orders.month
             """.stripMargin),
-          (1 to 6).map(_ => Row("CA", 20151)))
+            (1 to 6).map(_ => Row("CA", 20151)))
+        }
       }
     }
   }
@@ -715,21 +715,23 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   }
 
   test("command substitution") {
-    sql("set tbl=src")
-    checkAnswer(
-      sql("SELECT key FROM ${hiveconf:tbl} ORDER BY key, value limit 1"),
-      sql("SELECT key FROM src ORDER BY key, value limit 1").collect().toSeq)
+    withSQLConf("tbl" -> "src") {
+      checkAnswer(
+        sql("SELECT key FROM ${hiveconf:tbl} ORDER BY key, value limit 1"),
+        sql("SELECT key FROM src ORDER BY key, value limit 1").collect().toSeq)
+    }
 
-    sql("set spark.sql.variable.substitute=false") // disable the substitution
-    sql("set tbl2=src")
-    intercept[Exception] {
-      sql("SELECT key FROM ${hiveconf:tbl2} ORDER BY key, value limit 1").collect()
+    withSQLConf("tbl2" -> "src", "spark.sql.variable.substitute" -> "false") {
+      intercept[Exception] {
+        sql("SELECT key FROM ${hiveconf:tbl2} ORDER BY key, value limit 1").collect()
+      }
     }
 
-    sql("set spark.sql.variable.substitute=true") // enable the substitution
-    checkAnswer(
-      sql("SELECT key FROM ${hiveconf:tbl2} ORDER BY key, value limit 1"),
-      sql("SELECT key FROM src ORDER BY key, value limit 1").collect().toSeq)
+    withSQLConf("tbl2" -> "src", "spark.sql.variable.substitute" -> "true") {
+      checkAnswer(
+        sql("SELECT key FROM ${hiveconf:tbl2} ORDER BY key, value limit 1"),
+        sql("SELECT key FROM src ORDER BY key, value limit 1").collect().toSeq)
+    }
   }
 
   test("ordering not in select") {
@@ -1108,35 +1110,30 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   }
 
   test("dynamic partition value test") {
-    try {
-      sql("set hive.exec.dynamic.partition.mode=nonstrict")
-      // date
-      sql("drop table if exists dynparttest1")
-      sql("create table dynparttest1 (value int) partitioned by (pdate date)")
-      sql(
-        """
-          |insert into table dynparttest1 partition(pdate)
-          | select count(*), cast('2015-05-21' as date) as pdate from src
-        """.stripMargin)
-      checkAnswer(
-        sql("select * from dynparttest1"),
-        Seq(Row(500, java.sql.Date.valueOf("2015-05-21"))))
+    withTable("dynparttest1", "dynparttest2") {
+      withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        // date
+        sql("create table dynparttest1 (value int) partitioned by (pdate date)")
+        sql(
+          """
+             |insert into table dynparttest1 partition(pdate)
+             | select count(*), cast('2015-05-21' as date) as pdate from src
+          """.stripMargin)
+        checkAnswer(
+          sql("select * from dynparttest1"),
+          Seq(Row(500, java.sql.Date.valueOf("2015-05-21"))))
 
-      // decimal
-      sql("drop table if exists dynparttest2")
-      sql("create table dynparttest2 (value int) partitioned by (pdec decimal(5, 1))")
-      sql(
-        """
-          |insert into table dynparttest2 partition(pdec)
-          | select count(*), cast('100.12' as decimal(5, 1)) as pdec from src
-        """.stripMargin)
-      checkAnswer(
-        sql("select * from dynparttest2"),
-        Seq(Row(500, new java.math.BigDecimal("100.1"))))
-    } finally {
-      sql("drop table if exists dynparttest1")
-      sql("drop table if exists dynparttest2")
-      sql("set hive.exec.dynamic.partition.mode=strict")
+        // decimal
+        sql("create table dynparttest2 (value int) partitioned by (pdec decimal(5, 1))")
+        sql(
+          """
+             |insert into table dynparttest2 partition(pdec)
+             | select count(*), cast('100.12' as decimal(5, 1)) as pdec from src
+          """.stripMargin)
+        checkAnswer(
+            sql("select * from dynparttest2"),
+            Seq(Row(500, new java.math.BigDecimal("100.1"))))
+      }
     }
   }
 
@@ -1911,14 +1908,14 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   }
 
   test("SPARK-17354: Partitioning by dates/timestamps works with Parquet vectorized reader") {
-    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
+    withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+      "hive.exec.dynamic.partition.mode" -> "nonstrict") {
       sql(
         """CREATE TABLE order(id INT)
           |PARTITIONED BY (pd DATE, pt TIMESTAMP)
           |STORED AS PARQUET
         """.stripMargin)
 
-      sql("set hive.exec.dynamic.partition.mode=nonstrict")
       sql(
         """INSERT INTO TABLE order PARTITION(pd, pt)
           |SELECT 1 AS id, CAST('1990-02-24' AS DATE) AS pd, CAST('1990-02-24' AS TIMESTAMP) AS pt
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterNamespaceUnsetPropertiesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterNamespaceUnsetPropertiesSuite.scala
new file mode 100644
index 0000000000000..22d833649fc6c
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterNamespaceUnsetPropertiesSuite.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+
+/**
+ * The class contains tests for the `ALTER NAMESPACE ... UNSET PROPERTIES` command to check
+ * V1 Hive external table catalog.
+ */
+class AlterNamespaceUnsetPropertiesSuite extends v1.AlterNamespaceUnsetPropertiesSuiteBase
+  with CommandSuiteBase {
+  override def commandVersion: String = super[AlterNamespaceUnsetPropertiesSuiteBase].commandVersion
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetTblPropertiesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetTblPropertiesSuite.scala
new file mode 100644
index 0000000000000..3926db41b7eb8
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetTblPropertiesSuite.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+
+/**
+ * The class contains tests for the `ALTER TABLE .. SET TBLPROPERTIES` command to check
+ * V1 Hive external table catalog.
+ */
+class AlterTableSetTblPropertiesSuite
+  extends v1.AlterTableSetTblPropertiesSuiteBase with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
index 3098015dc7da8..3dc73e1161523 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
@@ -43,6 +43,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |  c1 INT COMMENT 'bla',
            |  c2 STRING
            |)
+           |USING HIVE
            |TBLPROPERTIES (
            |  'prop1' = 'value1',
            |  'prop2' = 'value2'
@@ -67,6 +68,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
              |  c1 INT COMMENT 'bla',
              |  c2 STRING
              |)
+             |USING HIVE
              |LOCATION '${dir.toURI}'
              |TBLPROPERTIES (
              |  'prop1' = 'value1',
@@ -94,6 +96,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
            |  c1 INT COMMENT 'bla',
            |  c2 STRING
            |)
+           |USING HIVE
            |COMMENT 'bla'
            |PARTITIONED BY (
            |  p1 BIGINT COMMENT 'bla',
@@ -193,6 +196,7 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
     withNamespaceAndTable(ns, table) { t =>
       sql(
         s"""CREATE TABLE $t (a INT, b STRING)
+           |STORED AS TEXTFILE
            |CLUSTERED BY (a)
            |SORTED BY (b)
            |INTO 2 BUCKETS
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
index 79b1eb6c0961a..9ee3a0277c9a1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowTablesSuite.scala
@@ -97,6 +97,7 @@ class ShowTablesSuite extends v1.ShowTablesSuiteBase with CommandSuiteBase {
              |Type: VIEW
              |View Text: SELECT id FROM $catalog.$namespace.$table
              |View Original Text: SELECT id FROM $catalog.$namespace.$table
+             |View Schema Mode: COMPENSATION
              |View Catalog and Namespace: $catalog.$namespace
              |View Query Output Columns: [id]
              |Table Properties: <table properties>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index e52d9b639dc4f..284717739a814 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -207,10 +207,7 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
     }
   }
 
-  // SPARK-28885 String value is not allowed to be stored as numeric type with
-  // ANSI store assignment policy.
-  // TODO: re-enable the test case when SPARK-29462 is fixed.
-  ignore("SPARK-23340 Empty float/double array columns raise EOFException") {
+  test("SPARK-23340 Empty float/double array columns raise EOFException") {
     withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "false") {
       withTable("spark_23340") {
         sql("CREATE TABLE spark_23340(a array<float>, b array<double>) STORED AS ORC")
@@ -284,6 +281,43 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
     }
   }
 
+  test("SPARK-47850 ORC conversation could be applied for unpartitioned table insertion") {
+    withTempView("single") {
+      val singleRowDF = Seq((0, "foo")).toDF("key", "value")
+      singleRowDF.createOrReplaceTempView("single")
+      Seq("true", "false").foreach { conversion =>
+        withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true",
+          HiveUtils.CONVERT_INSERTING_UNPARTITIONED_TABLE.key -> conversion) {
+          withTable("dummy_orc_unpartitioned") {
+            spark.sql(
+              s"""
+                 |CREATE TABLE dummy_orc_unpartitioned(key INT, value STRING)
+                 |STORED AS ORC
+                 """.stripMargin)
+
+            spark.sql(
+              s"""
+                 |INSERT INTO TABLE dummy_orc_unpartitioned
+                 |SELECT key, value FROM single
+                 """.stripMargin)
+
+            val orcUnpartitionedTable = TableIdentifier("dummy_orc_unpartitioned", Some("default"))
+            if (conversion == "true") {
+              // if converted, we refresh the cached relation.
+              assert(getCachedDataSourceTable(orcUnpartitionedTable) === null)
+            } else {
+              // otherwise, not cached.
+              assert(getCachedDataSourceTable(orcUnpartitionedTable) === null)
+            }
+
+            val df = spark.sql("SELECT key, value FROM dummy_orc_unpartitioned WHERE key=0")
+            checkAnswer(df, singleRowDF)
+          }
+        }
+      }
+    }
+  }
+
   test("SPARK-32234 read ORC table with column names all starting with '_col'") {
     Seq("native", "hive").foreach { orcImpl =>
       Seq("false", "true").foreach { vectorized =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index 870e71e17cda0..bac48f6c0c018 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -143,7 +143,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
         errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`testType()`",
-          "columnType" -> "\"INTERVAL\"",
+          "columnType" -> "UDT(\"INTERVAL\")",
           "format" -> "ORC")
       )
 
@@ -170,7 +170,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
         errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
-          "columnType" -> "\"INTERVAL\"",
+          "columnType" -> "UDT(\"INTERVAL\")",
           "format" -> "ORC")
       )
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index a1095ce58a061..0334e9c441610 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -79,6 +79,12 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
     spark.read.format(HIVE_ORC_FORMAT).load(dirORC).createOrReplaceTempView("hiveOrcTable")
   }
 
+  private def getExpr(dataType: DataType = IntegerType): String = dataType match {
+    case ByteType => "cast(value % 128 as byte)"
+    case ShortType => "cast(value % 32768 as short)"
+    case _ => s"cast(value % ${Int.MaxValue} as ${dataType.sql})"
+  }
+
   def numericScanBenchmark(values: Int, dataType: DataType): Unit = {
     val benchmark = new Benchmark(s"SQL Single ${dataType.sql} Column Scan", values, output = output)
 
@@ -87,7 +93,7 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong()).createOrReplaceTempView("t1")
 
-        prepareTable(dir, spark.sql(s"SELECT CAST(value as ${dataType.sql}) id FROM t1"))
+        prepareTable(dir, spark.sql(s"SELECT ${getExpr(dataType)} id FROM t1"))
 
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
@@ -118,7 +124,7 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
 
         prepareTable(
           dir,
-          spark.sql("SELECT CAST(value AS INT) AS c1, CAST(value as STRING) AS c2 FROM t1"))
+          spark.sql(s"SELECT ${getExpr()} AS c1, CAST(value as STRING) AS c2 FROM t1"))
 
         benchmark.addCase("Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(c1), sum(length(c2)) FROM hiveOrcTable").noop()
@@ -147,7 +153,8 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
         import spark.implicits._
         spark.range(values).map(_ => Random.nextLong()).createOrReplaceTempView("t1")
 
-        prepareTable(dir, spark.sql("SELECT value % 2 AS p, value AS id FROM t1"), Some("p"))
+        prepareTable(dir,
+          spark.sql(s"SELECT value % 2 AS p, ${getExpr()} AS id FROM t1"), Some("p"))
 
         benchmark.addCase("Data column - Hive built-in ORC") { _ =>
           spark.sql("SELECT sum(id) FROM hiveOrcTable").noop()
@@ -268,7 +275,7 @@ object OrcReadBenchmark extends SqlBasedBenchmark {
       withTempTable("t1", "nativeOrcTable", "hiveOrcTable") {
         import spark.implicits._
         val middle = width / 2
-        val selectExpr = (1 to width).map(i => s"value as c$i")
+        val selectExpr = (1 to width).map(i => s"${getExpr()} as c$i")
         spark.range(values).map(_ => Random.nextLong()).toDF()
           .selectExpr(selectExpr: _*).createOrReplaceTempView("t1")
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 53d897af5beff..f09f9caf129bd 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -28,7 +28,8 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{BACKUP_FILE, CHECKPOINT_FILE, CHECKPOINT_TIME, NUM_RETRY, PATH, TEMP_FILE}
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.streaming.scheduler.JobGenerator
@@ -85,7 +86,7 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
     }
 
     // Add Yarn proxy filter specific configurations to the recovered SparkConf
-    val filter = "org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter"
+    val filter = "org.apache.spark.deploy.yarn.AmIpFilter"
     val filterPrefix = s"spark.$filter.param."
     newReloadConf.getAll.foreach { case (k, v) =>
       if (k.startsWith(filterPrefix) && k.length > filterPrefix.length) {
@@ -101,7 +102,7 @@ class Checkpoint(ssc: StreamingContext, val checkpointTime: Time)
     assert(framework != null, "Checkpoint.framework is null")
     assert(graph != null, "Checkpoint.graph is null")
     assert(checkpointTime != null, "Checkpoint.checkpointTime is null")
-    logInfo(s"Checkpoint for time $checkpointTime validated")
+    logInfo(log"Checkpoint for time ${MDC(LogKeys.CHECKPOINT_TIME, checkpointTime)} validated")
   }
 }
 
@@ -141,12 +142,12 @@ object Checkpoint extends Logging {
         val filtered = paths.filter(p => REGEX.findFirstIn(p.getName).nonEmpty)
         filtered.sortWith(sortFunc).toImmutableArraySeq
       } else {
-        logWarning(s"Listing $path returned null")
+        logWarning(log"Listing ${MDC(PATH, path)} returned null")
         Seq.empty
       }
     } catch {
       case _: FileNotFoundException =>
-        logWarning(s"Checkpoint directory $path does not exist")
+        logWarning(log"Checkpoint directory ${MDC(PATH, path)} does not exist")
         Seq.empty
     }
   }
@@ -241,7 +242,8 @@ class CheckpointWriter(
       while (attempts < MAX_ATTEMPTS && !stopped) {
         attempts += 1
         try {
-          logInfo(s"Saving checkpoint for time $checkpointTime to file '$checkpointFile'")
+          logInfo(log"Saving checkpoint for time ${MDC(LogKeys.CHECKPOINT_TIME, checkpointTime)} " +
+            log"to file '${MDC(LogKeys.CHECKPOINT_FILE, checkpointFile)}'")
           if (fs == null) {
             fs = new Path(checkpointDir).getFileSystem(hadoopConf)
           }
@@ -259,38 +261,46 @@ class CheckpointWriter(
           if (fs.exists(checkpointFile)) {
             fs.delete(backupFile, true) // just in case it exists
             if (!fs.rename(checkpointFile, backupFile)) {
-              logWarning(s"Could not rename $checkpointFile to $backupFile")
+              logWarning(log"Could not rename ${MDC(CHECKPOINT_FILE, checkpointFile)} to " +
+                log"${MDC(BACKUP_FILE, backupFile)}")
             }
           }
 
           // Rename temp file to the final checkpoint file
           if (!fs.rename(tempFile, checkpointFile)) {
-            logWarning(s"Could not rename $tempFile to $checkpointFile")
+            logWarning(log"Could not rename ${MDC(TEMP_FILE, tempFile)} to " +
+              log"${MDC(CHECKPOINT_FILE, checkpointFile)}")
           }
 
           // Delete old checkpoint files
           val allCheckpointFiles = Checkpoint.getCheckpointFiles(checkpointDir, Some(fs))
           if (allCheckpointFiles.size > 10) {
             allCheckpointFiles.take(allCheckpointFiles.size - 10).foreach { file =>
-              logInfo(s"Deleting $file")
+              logInfo(log"Deleting ${MDC(LogKeys.FILE_NAME, file)}")
               fs.delete(file, true)
             }
           }
 
           // All done, print success
-          logInfo(s"Checkpoint for time $checkpointTime saved to file '$checkpointFile'" +
-            s", took ${bytes.length} bytes and " +
-            s"${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms")
+          logInfo(
+            log"Checkpoint for time ${MDC(LogKeys.CHECKPOINT_TIME, checkpointTime)} " +
+              log"saved to file " +
+            log"'${MDC(LogKeys.CHECKPOINT_FILE, checkpointFile)}', took " +
+            log"${MDC(LogKeys.BYTE_SIZE, bytes.length)} bytes and " +
+            log"${MDC(LogKeys.TIME, TimeUnit.NANOSECONDS.toMillis(System.nanoTime()
+              - startTimeNs))} ms")
           jobGenerator.onCheckpointCompletion(checkpointTime, clearCheckpointDataLater)
           return
         } catch {
           case ioe: IOException =>
-            val msg = s"Error in attempt $attempts of writing checkpoint to '$checkpointFile'"
+            val msg = log"Error in attempt ${MDC(NUM_RETRY, attempts)} of writing checkpoint " +
+              log"to '${MDC(CHECKPOINT_FILE, checkpointFile)}'"
             logWarning(msg, ioe)
             fs = null
         }
       }
-      logWarning(s"Could not write checkpoint for time $checkpointTime to file '$checkpointFile'")
+      logWarning(log"Could not write checkpoint for time ${MDC(CHECKPOINT_TIME, checkpointTime)} " +
+        log"to file '${MDC(CHECKPOINT_FILE, checkpointFile)}'")
     }
   }
 
@@ -299,7 +309,8 @@ class CheckpointWriter(
       val bytes = Checkpoint.serialize(checkpoint, conf)
       executor.execute(new CheckpointWriteHandler(
         checkpoint.checkpointTime, bytes, clearCheckpointDataLater))
-      logInfo(s"Submitted checkpoint of time ${checkpoint.checkpointTime} to writer queue")
+      logInfo(log"Submitted checkpoint of time ${MDC(LogKeys.CHECKPOINT_TIME,
+        checkpoint.checkpointTime)} to writer queue")
     } catch {
       case rej: RejectedExecutionException =>
         logError("Could not submit checkpoint task to the thread pool executor", rej)
@@ -311,8 +322,10 @@ class CheckpointWriter(
 
     val startTimeNs = System.nanoTime()
     ThreadUtils.shutdown(executor, FiniteDuration(10, TimeUnit.SECONDS))
-    logInfo(s"CheckpointWriter executor terminated? ${executor.isTerminated}," +
-      s" waited for ${TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)} ms.")
+    logInfo(log"CheckpointWriter executor terminated? " +
+      log"${MDC(LogKeys.EXECUTOR_STATE, executor.isTerminated)}, waited for " +
+      log"${MDC(LogKeys.DURATION, TimeUnit.NANOSECONDS.toMillis(
+        System.nanoTime() - startTimeNs))} ms.")
     stopped = true
   }
 }
@@ -352,20 +365,22 @@ object CheckpointReader extends Logging {
     }
 
     // Try to read the checkpoint files in the order
-    logInfo(s"Checkpoint files found: ${checkpointFiles.mkString(",")}")
+    logInfo(log"Checkpoint files found: " +
+      log"${MDC(LogKeys.CHECKPOINT_FILE, checkpointFiles.mkString(","))}")
     var readError: Exception = null
     checkpointFiles.foreach { file =>
-      logInfo(s"Attempting to load checkpoint from file $file")
+      logInfo(log"Attempting to load checkpoint from file ${MDC(LogKeys.FILE_NAME, file)}")
       try {
         val fis = fs.open(file)
         val cp = Checkpoint.deserialize(fis, conf)
-        logInfo(s"Checkpoint successfully loaded from file $file")
-        logInfo(s"Checkpoint was generated at time ${cp.checkpointTime}")
+        logInfo(log"Checkpoint successfully loaded from file ${MDC(LogKeys.FILE_NAME, file)}")
+        logInfo(log"Checkpoint was generated at time " +
+          log"${MDC(LogKeys.CHECKPOINT_TIME, cp.checkpointTime)}")
         return Some(cp)
       } catch {
         case e: Exception =>
           readError = e
-          logWarning(s"Error reading checkpoint from file $file", e)
+          logWarning(log"Error reading checkpoint from file ${MDC(PATH, file)}", e)
       }
     }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 30bd30329283b..94b695e6452e5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -36,7 +36,7 @@ import org.apache.spark._
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.input.FixedLengthBinaryInputFormat
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.SerializationDebugger
@@ -725,7 +725,8 @@ class StreamingContext private[streaming] (
 
   private def stopOnShutdown(): Unit = {
     val stopGracefully = conf.get(STOP_GRACEFULLY_ON_SHUTDOWN)
-    logInfo(s"Invoking stop(stopGracefully=$stopGracefully) from shutdown hook")
+    logInfo(log"Invoking stop(stopGracefully=" +
+      log"${MDC(LogKeys.VALUE, stopGracefully)}) from shutdown hook")
     // Do not stop SparkContext, let its own shutdown hook stop it
     stop(stopSparkContext = false, stopGracefully = stopGracefully)
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
index 03fffd9cd6f20..87d6a4909fdd4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStream.scala
@@ -26,7 +26,8 @@ import scala.reflect.ClassTag
 import scala.util.matching.Regex
 
 import org.apache.spark.{SparkContext, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{FROM_TIME, SLIDE_DURATION, TO_TIME}
 import org.apache.spark.internal.io.SparkHadoopWriterUtils
 import org.apache.spark.rdd.{BlockRDD, RDD, RDDOperationScope}
 import org.apache.spark.storage.StorageLevel
@@ -200,7 +201,8 @@ abstract class DStream[T: ClassTag] (
     // Set the checkpoint interval to be slideDuration or 10 seconds, which ever is larger
     if (mustCheckpoint && checkpointDuration == null) {
       checkpointDuration = slideDuration * math.ceil(Seconds(10) / slideDuration).toInt
-      logInfo(s"Checkpoint interval automatically set to $checkpointDuration")
+      logInfo(log"Checkpoint interval automatically set to " +
+        log"${MDC(LogKeys.CHECKPOINT_INTERVAL, checkpointDuration)}")
     }
 
     // Set the minimum value of the rememberDuration if not already set
@@ -276,11 +278,11 @@ abstract class DStream[T: ClassTag] (
 
     dependencies.foreach(_.validateAtStart())
 
-    logInfo(s"Slide time = $slideDuration")
-    logInfo(s"Storage level = ${storageLevel.description}")
-    logInfo(s"Checkpoint interval = $checkpointDuration")
-    logInfo(s"Remember interval = $rememberDuration")
-    logInfo(s"Initialized and validated $this")
+    logInfo(log"Slide time = ${MDC(LogKeys.SLIDE_DURATION, slideDuration)}")
+    logInfo(log"Storage level = ${MDC(LogKeys.STORAGE_LEVEL, storageLevel.description)}")
+    logInfo(log"Checkpoint interval = ${MDC(LogKeys.CHECKPOINT_INTERVAL, checkpointDuration)}")
+    logInfo(log"Remember interval = ${MDC(LogKeys.INTERVAL, rememberDuration)}")
+    logInfo(log"Initialized and validated ${MDC(LogKeys.DSTREAM, this)}")
   }
 
   private[streaming] def setContext(s: StreamingContext): Unit = {
@@ -288,7 +290,7 @@ abstract class DStream[T: ClassTag] (
       throw new SparkException(s"Context must not be set again for $this")
     }
     ssc = s
-    logInfo(s"Set context for $this")
+    logInfo(log"Set context for ${MDC(LogKeys.STREAMING_CONTEXT, this)}")
     dependencies.foreach(_.setContext(ssc))
   }
 
@@ -303,7 +305,9 @@ abstract class DStream[T: ClassTag] (
   private[streaming] def remember(duration: Duration): Unit = {
     if (duration != null && (rememberDuration == null || duration > rememberDuration)) {
       rememberDuration = duration
-      logInfo(s"Duration for remembering RDDs set to $rememberDuration for $this")
+      logInfo(log"Duration for remembering RDDs set to " +
+        log"${MDC(LogKeys.DURATION, rememberDuration)} for " +
+        log"${MDC(LogKeys.DSTREAM, this.toString)}")
     }
     dependencies.foreach(_.remember(parentRememberDuration))
   }
@@ -313,8 +317,10 @@ abstract class DStream[T: ClassTag] (
     if (!isInitialized) {
       throw new SparkException (this.toString + " has not been initialized")
     } else if (time <= zeroTime || ! (time - zeroTime).isMultipleOf(slideDuration)) {
-      logInfo(s"Time $time is invalid as zeroTime is $zeroTime" +
-        s" , slideDuration is $slideDuration and difference is ${time - zeroTime}")
+      logInfo(log"Time ${MDC(LogKeys.TIME, time)} is invalid as zeroTime is " +
+        log"${MDC(LogKeys.ZERO_TIME, zeroTime)}, slideDuration is " +
+        log"${MDC(LogKeys.SLIDE_DURATION, slideDuration)} and difference is " +
+        log"${MDC(LogKeys.DURATION, time - zeroTime)}")
       false
     } else {
       logDebug(s"Time $time is valid")
@@ -352,7 +358,8 @@ abstract class DStream[T: ClassTag] (
           }
           if (checkpointDuration != null && (time - zeroTime).isMultipleOf(checkpointDuration)) {
             newRDD.checkpoint()
-            logInfo(s"Marking RDD ${newRDD.id} for time $time for checkpointing")
+            logInfo(log"Marking RDD ${MDC(LogKeys.RDD_ID, newRDD.id)} for time " +
+              log"${MDC(LogKeys.TIME, time)} for checkpointing")
           }
           generatedRDDs.put(time, newRDD)
         }
@@ -460,7 +467,8 @@ abstract class DStream[T: ClassTag] (
         // Explicitly remove blocks of BlockRDD
         rdd match {
           case b: BlockRDD[_] =>
-            logInfo(s"Removing blocks of RDD $b of time $time")
+            logInfo(log"Removing blocks of RDD ${MDC(LogKeys.RDD_ID, b)} " +
+              log"of time ${MDC(LogKeys.TIME, time)}")
             b.removeBlocks()
           case _ =>
         }
@@ -884,19 +892,23 @@ abstract class DStream[T: ClassTag] (
     val alignedToTime = if ((toTime - zeroTime).isMultipleOf(slideDuration)) {
       toTime
     } else {
-      logWarning(s"toTime ($toTime) is not a multiple of slideDuration ($slideDuration)")
+      logWarning(log"toTime (${MDC(TO_TIME, toTime)}) is not a multiple of slideDuration " +
+        log"(${MDC(SLIDE_DURATION, slideDuration)})")
       toTime.floor(slideDuration, zeroTime)
     }
 
     val alignedFromTime = if ((fromTime - zeroTime).isMultipleOf(slideDuration)) {
       fromTime
     } else {
-      logWarning(s"fromTime ($fromTime) is not a multiple of slideDuration ($slideDuration)")
+      logWarning(log"fromTime (${MDC(FROM_TIME, fromTime)}) is not a multiple of slideDuration " +
+        log"(${MDC(SLIDE_DURATION, slideDuration)})")
       fromTime.floor(slideDuration, zeroTime)
     }
 
-    logInfo(s"Slicing from $fromTime to $toTime" +
-      s" (aligned to $alignedFromTime and $alignedToTime)")
+    logInfo(log"Slicing from ${MDC(LogKeys.FROM_TIME, fromTime)} to " +
+      log"${MDC(LogKeys.TO_TIME, toTime)}" +
+      log" (aligned to ${MDC(LogKeys.ALIGNED_FROM_TIME, alignedFromTime)} and " +
+      log"${MDC(LogKeys.ALIGNED_TO_TIME, alignedToTime)})")
 
     alignedFromTime.to(alignedToTime, slideDuration).flatMap { time =>
       if (time >= zeroTime) getOrCompute(time) else None
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
index 667edf3713d43..8894b3cdc2396 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala
@@ -24,7 +24,8 @@ import scala.reflect.ClassTag
 
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{PATH, TIME}
 import org.apache.spark.streaming.Time
 import org.apache.spark.util.Utils
 
@@ -90,12 +91,14 @@ class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
               if (fileSystem.delete(path, true)) {
                 logInfo("Deleted checkpoint file '" + file + "' for time " + time)
               } else {
-                logWarning(s"Error deleting old checkpoint file '$file' for time $time")
+                logWarning(log"Error deleting old checkpoint file '${MDC(PATH, file)}' for time " +
+                  log"${MDC(TIME, time)}")
               }
               timeToCheckpointFile -= time
             } catch {
               case e: Exception =>
-                logWarning("Error deleting old checkpoint file '" + file + "' for time " + time, e)
+                logWarning(log"Error deleting old checkpoint file '${MDC(PATH, file)}' for time " +
+                  log"${MDC(TIME, time)}", e)
                 fileSystem = null
             }
         }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index e301311c922a2..d133454b832fc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -26,7 +26,7 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.{ELAPSED_TIME, PATH}
 import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.{RDD, UnionRDD}
 import org.apache.spark.streaming._
@@ -205,19 +205,18 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
       val timeTaken = clock.getTimeMillis() - lastNewFileFindingTime
       logDebug(s"Finding new files took $timeTaken ms")
       if (timeTaken > slideDuration.milliseconds) {
-        logWarning(
-          s"Time taken to find new files $timeTaken exceeds the batch size. " +
-            "Consider increasing the batch size or reducing the number of " +
-            "files in the monitored directories."
+        logWarning(log"Time taken to find new files ${MDC(ELAPSED_TIME, timeTaken)} exceeds the " +
+          log"batch size. Consider increasing the batch size or reducing the number of files in " +
+          log"the monitored directories."
         )
       }
       newFiles
     } catch {
       case e: FileNotFoundException =>
-        logWarning(s"No directory to scan: $directoryPath: $e")
+        logWarning(log"No directory to scan: ${MDC(PATH, directoryPath)}:", e)
         Array.empty
       case e: Exception =>
-        logWarning(s"Error finding new files under $directoryPath", e)
+        logWarning(log"Error finding new files under ${MDC(PATH, directoryPath)}", e)
         reset()
         Array.empty
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
index 5a75b77659960..2deb388eb4b82 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/InputDStream.scala
@@ -22,6 +22,8 @@ import java.util.Locale
 import scala.reflect.ClassTag
 
 import org.apache.spark.SparkContext
+import org.apache.spark.internal.LogKeys.{LAST_VALID_TIME, TIME}
+import org.apache.spark.internal.MDC
 import org.apache.spark.rdd.RDDOperationScope
 import org.apache.spark.streaming.{Duration, StreamingContext, Time}
 import org.apache.spark.streaming.scheduler.RateController
@@ -91,8 +93,8 @@ abstract class InputDStream[T: ClassTag](_ssc: StreamingContext)
     } else {
       // Time is valid, but check it is more than lastValidTime
       if (lastValidTime != null && time < lastValidTime) {
-        logWarning(s"isTimeValid called with $time whereas the last valid time " +
-          s"is $lastValidTime")
+        logWarning(log"isTimeValid called with ${MDC(TIME, time)} whereas the last valid time " +
+          log"is ${MDC(LAST_VALID_TIME, lastValidTime)}")
       }
       lastValidTime = time
       true
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
index 883d56c012f61..34b079219c993 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
@@ -24,7 +24,7 @@ import java.nio.charset.StandardCharsets
 import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.StreamingContext
 import org.apache.spark.streaming.receiver.Receiver
@@ -56,7 +56,7 @@ class SocketReceiver[T: ClassTag](
 
   def onStart(): Unit = {
 
-    logInfo(s"Connecting to $host:$port")
+    logInfo(log"Connecting to ${MDC(LogKeys.HOST, host)}:${MDC(LogKeys.PORT, port)}")
     try {
       socket = new Socket(host, port)
     } catch {
@@ -64,7 +64,7 @@ class SocketReceiver[T: ClassTag](
         restart(s"Error connecting to $host:$port", e)
         return
     }
-    logInfo(s"Connected to $host:$port")
+    logInfo(log"Connected to ${MDC(LogKeys.HOST, host)}:${MDC(LogKeys.PORT, port)}")
 
     // Start the thread that receives data over a connection
     new Thread("Socket Receiver") {
@@ -79,7 +79,7 @@ class SocketReceiver[T: ClassTag](
       if (socket != null) {
         socket.close()
         socket = null
-        logInfo(s"Closed socket to $host:$port")
+        logInfo(log"Closed socket to ${MDC(LogKeys.HOST, host)}:${MDC(LogKeys.PORT, port)}")
       }
     }
   }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
index 9c461f0d4270e..12c6c95f7d8d3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDD.scala
@@ -24,6 +24,7 @@ import scala.reflect.ClassTag
 import scala.util.control.NonFatal
 
 import org.apache.spark._
+import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.rdd.BlockRDD
 import org.apache.spark.storage.{BlockId, StorageLevel}
 import org.apache.spark.streaming.util._
@@ -156,8 +157,8 @@ class WriteAheadLogBackedBlockRDD[T: ClassTag](
           s"Could not read data from write ahead log record ${partition.walRecordHandle}, " +
             s"read returned null")
       }
-      logInfo(s"Read partition data of $this from write ahead log, record handle " +
-        partition.walRecordHandle)
+      logInfo(log"Read partition data of ${MDC(LogKeys.RDD, this)} from write ahead log, " +
+        log"record handle ${MDC(LogKeys.WRITE_AHEAD_LOG_RECORD_HANDLE, partition.walRecordHandle)}")
       if (storeInBlockManager) {
         blockManager.putBytes(blockId, new ChunkedByteBuffer(dataRead.duplicate()), storageLevel)
         logDebug(s"Stored partition data of $this into block manager with level $storageLevel")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
index d641f55fa7f6f..33995e6ad0786 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala
@@ -22,7 +22,8 @@ import java.util.concurrent.{ArrayBlockingQueue, TimeUnit}
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.STATUS
 import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.streaming.StreamingConf.BLOCK_INTERVAL
 import org.apache.spark.streaming.util.RecurringTimer
@@ -140,7 +141,8 @@ private[streaming] class BlockGenerator(
       if (state == Active) {
         state = StoppedAddingData
       } else {
-        logWarning(s"Cannot stop BlockGenerator as its not in the Active state [state = $state]")
+        logWarning(log"Cannot stop BlockGenerator as its not in the Active state " +
+          log"[state = ${MDC(STATUS, state)}]")
         return
       }
     }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
index e5b98dd714b3d..e513a75b69903 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala
@@ -24,7 +24,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{EFFECTIVE_STORAGE_LEVEL, STORAGE_LEVEL, STORAGE_LEVEL_DESERIALIZED, STORAGE_LEVEL_REPLICATION}
 import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.storage._
 import org.apache.spark.streaming.receiver.WriteAheadLogBasedBlockHandler._
@@ -137,20 +138,23 @@ private[streaming] class WriteAheadLogBasedBlockHandler(
 
   private val effectiveStorageLevel = {
     if (storageLevel.deserialized) {
-      logWarning(s"Storage level serialization ${storageLevel.deserialized} is not supported when" +
-        s" write ahead log is enabled, change to serialization false")
+      logWarning(log"Storage level serialization " +
+        log"${MDC(STORAGE_LEVEL_DESERIALIZED, storageLevel.deserialized)} is not " +
+        log"supported when write ahead log is enabled, change to serialization false")
     }
     if (storageLevel.replication > 1) {
-      logWarning(s"Storage level replication ${storageLevel.replication} is unnecessary when " +
-        s"write ahead log is enabled, change to replication 1")
+      logWarning(log"Storage level replication " +
+        log"${MDC(STORAGE_LEVEL_REPLICATION, storageLevel.replication)} is unnecessary when " +
+        log"write ahead log is enabled, change to replication 1")
     }
 
     StorageLevel(storageLevel.useDisk, storageLevel.useMemory, storageLevel.useOffHeap, false, 1)
   }
 
   if (storageLevel != effectiveStorageLevel) {
-    logWarning(s"User defined storage level $storageLevel is changed to effective storage level " +
-      s"$effectiveStorageLevel when write ahead log is enabled")
+    logWarning(log"User defined storage level ${MDC(STORAGE_LEVEL, storageLevel)} is changed to " +
+      log"effective storage level ${MDC(EFFECTIVE_STORAGE_LEVEL, effectiveStorageLevel)} when " +
+      log"write ahead log is enabled")
   }
 
   // Write ahead log manages
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
index 15f3464848646..7cc08b421f780 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
@@ -25,8 +25,8 @@ import scala.concurrent._
 import scala.util.control.NonFatal
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, STREAM_ID}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{DELAY, ERROR, MESSAGE, STREAM_ID}
 import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -145,10 +145,10 @@ private[streaming] abstract class ReceiverSupervisor(
   def startReceiver(): Unit = synchronized {
     try {
       if (onReceiverStart()) {
-        logInfo(s"Starting receiver $streamId")
+        logInfo(log"Starting receiver ${MDC(LogKeys.STREAM_ID, streamId)}")
         receiverState = Started
         receiver.onStart()
-        logInfo(s"Called receiver $streamId onStart")
+        logInfo(log"Called receiver ${MDC(LogKeys.STREAM_ID, streamId)} onStart")
       } else {
         // The driver refused us
         stop("Registered unsuccessfully because Driver refused to start receiver " + streamId, None)
@@ -162,7 +162,8 @@ private[streaming] abstract class ReceiverSupervisor(
   /** Stop receiver */
   def stopReceiver(message: String, error: Option[Throwable]): Unit = synchronized {
     try {
-      logInfo("Stopping receiver with message: " + message + ": " + error.getOrElse(""))
+      logInfo(log"Stopping receiver with message: ${MDC(LogKeys.MESSAGE, message)}: " +
+        log"${MDC(LogKeys.ERROR, error.getOrElse(""))}")
       receiverState match {
         case Initialized =>
           logWarning("Skip stopping receiver because it has not yet stared")
@@ -191,8 +192,8 @@ private[streaming] abstract class ReceiverSupervisor(
     Future {
       // This is a blocking action so we should use "futureExecutionContext" which is a cached
       // thread pool.
-      logWarning("Restarting receiver with delay " + delay + " ms: " + message,
-        error.orNull)
+      logWarning(log"Restarting receiver with delay ${MDC(DELAY, delay)} ms: " +
+        log"${MDC(MESSAGE, message)}", error.orNull)
       stopReceiver("Restarting receiver with delay " + delay + "ms: " + message, error)
       logDebug("Sleeping for " + delay)
       Thread.sleep(delay)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
index daaf7ed7eb2b6..aafa99bd5285d 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
@@ -28,7 +28,8 @@ import com.google.common.base.Throwables
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.{SparkEnv, SparkException}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{ERROR, MESSAGE}
 import org.apache.spark.rpc.{RpcEnv, ThreadSafeRpcEndpoint}
 import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.streaming.Time
@@ -84,7 +85,7 @@ private[streaming] class ReceiverSupervisorImpl(
           logDebug("Received delete old batch signal")
           cleanupOldBlocks(threshTime)
         case UpdateRateLimit(eps) =>
-          logInfo(s"Received a new rate limit: $eps.")
+          logInfo(log"Received a new rate limit: ${MDC(LogKeys.RATE_LIMIT, eps)}.")
           registeredBlockGenerators.asScala.foreach { bg =>
             bg.updateRate(eps)
           }
@@ -169,7 +170,7 @@ private[streaming] class ReceiverSupervisorImpl(
   def reportError(message: String, error: Throwable): Unit = {
     val errorString = Option(error).map(Throwables.getStackTraceAsString).getOrElse("")
     trackerEndpoint.send(ReportError(streamId, message, errorString))
-    logWarning("Reported error " + message + " - " + error)
+    logWarning(log"Reported error ${MDC(MESSAGE, message)} - ${MDC(ERROR, error)}")
   }
 
   override protected def onStart(): Unit = {
@@ -194,10 +195,10 @@ private[streaming] class ReceiverSupervisorImpl(
   }
 
   override protected def onReceiverStop(message: String, error: Option[Throwable]): Unit = {
-    logInfo("Deregistering receiver " + streamId)
+    logInfo(log"Deregistering receiver ${MDC(LogKeys.STREAM_ID, streamId)}")
     val errorString = error.map(Throwables.getStackTraceAsString).getOrElse("")
     trackerEndpoint.askSync[Boolean](DeregisterReceiver(streamId, message, errorString))
-    logInfo("Stopped receiver " + streamId)
+    logInfo(log"Stopped receiver ${MDC(LogKeys.STREAM_ID, streamId)}")
   }
 
   override def createBlockGenerator(
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
index 5aa2a9df3ba87..903cde8082db7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
@@ -21,7 +21,7 @@ package org.apache.spark.streaming.scheduler
 import scala.util.Random
 
 import org.apache.spark.{ExecutorAllocationClient, SparkConf}
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.internal.config.Streaming._
 import org.apache.spark.resource.ResourceProfile
@@ -75,8 +75,10 @@ private[streaming] class ExecutorAllocationManager(
 
   def start(): Unit = {
     timer.start()
-    logInfo(s"ExecutorAllocationManager started with " +
-      s"ratios = [$scalingUpRatio, $scalingDownRatio] and interval = $scalingIntervalSecs sec")
+    logInfo(log"ExecutorAllocationManager started with ratios = " +
+      log"[${MDC(LogKeys.SCALING_UP_RATIO, scalingUpRatio)}, " +
+      log"${MDC(LogKeys.SCALING_DOWN_RATIO, scalingDownRatio)}] and interval = " +
+      log"${MDC(LogKeys.INTERVAL, scalingIntervalSecs)} sec")
   }
 
   def stop(): Unit = {
@@ -89,11 +91,14 @@ private[streaming] class ExecutorAllocationManager(
    * batch statistics.
    */
   private def manageAllocation(): Unit = synchronized {
-    logInfo(s"Managing executor allocation with ratios = [$scalingUpRatio, $scalingDownRatio]")
+    logInfo(log"Managing executor allocation with ratios = [" +
+      log"${MDC(LogKeys.SCALING_UP_RATIO, scalingUpRatio)}, " +
+      log"${MDC(LogKeys.SCALING_DOWN_RATIO, scalingDownRatio)}]")
     if (batchProcTimeCount > 0) {
       val averageBatchProcTime = batchProcTimeSum / batchProcTimeCount
       val ratio = averageBatchProcTime.toDouble / batchDurationMs
-      logInfo(s"Average: $averageBatchProcTime, ratio = $ratio" )
+      logInfo(log"Average: ${MDC(LogKeys.AVG_BATCH_PROC_TIME, averageBatchProcTime)}, " +
+        log"ratio = ${MDC(LogKeys.RATIO, ratio)}")
       if (ratio >= scalingUpRatio) {
         logDebug("Requesting executors")
         val numNewExecutors = math.max(math.round(ratio).toInt, 1)
@@ -119,7 +124,8 @@ private[streaming] class ExecutorAllocationManager(
       Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> targetTotalExecutors),
       Map(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID -> 0),
       Map.empty)
-    logInfo(s"Requested total $targetTotalExecutors executors")
+    logInfo(log"Requested total ${MDC(LogKeys.NUM_EXECUTORS,
+      targetTotalExecutors)} executors")
   }
 
   /** Kill an executor that is not running any receiver, if possible */
@@ -129,7 +135,9 @@ private[streaming] class ExecutorAllocationManager(
 
     if (allExecIds.nonEmpty && allExecIds.size > minNumExecutors) {
       val execIdsWithReceivers = receiverTracker.allocatedExecutors().values.flatten.toSeq
-      logInfo(s"Executors with receivers (${execIdsWithReceivers.size}): ${execIdsWithReceivers}")
+      logInfo(log"Executors with receivers (${MDC(LogKeys.NUM_EXECUTORS,
+        execIdsWithReceivers.size)}): " +
+        log"${MDC(LogKeys.EXECUTOR_IDS, execIdsWithReceivers)}")
 
       val removableExecIds = allExecIds.diff(execIdsWithReceivers)
       logDebug(s"Removable executors (${removableExecIds.size}): ${removableExecIds}")
@@ -142,7 +150,7 @@ private[streaming] class ExecutorAllocationManager(
         } else {
           client.killExecutor(execIdToRemove)
         }
-        logInfo(s"Requested to kill executor $execIdToRemove")
+        logInfo(log"Requested to kill executor ${MDC(LogKeys.EXECUTOR_ID, execIdToRemove)}")
       } else {
         logInfo(s"No non-receiver executors to kill")
       }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
index 639ac6de4f5d3..bd9ea7b5a2688 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
@@ -20,7 +20,7 @@ package org.apache.spark.streaming.scheduler
 import scala.collection.mutable
 
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.streaming.{StreamingContext, Time}
 
 /**
@@ -82,7 +82,8 @@ private[streaming] class InputInfoTracker(ssc: StreamingContext) extends Logging
   /** Cleanup the tracked input information older than threshold batch time */
   def cleanup(batchThreshTime: Time): Unit = synchronized {
     val timesToCleanup = batchTimeToInputInfos.keys.filter(_ < batchThreshTime)
-    logInfo(s"remove old batch metadata: ${timesToCleanup.mkString(" ")}")
+    logInfo(log"remove old batch metadata: " +
+      log"${MDC(LogKeys.DURATION, timesToCleanup.mkString(" "))}")
     batchTimeToInputInfos --= timesToCleanup
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 282946dd8ef4b..c0636af690382 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -21,7 +21,8 @@ import java.util.concurrent.TimeUnit
 
 import scala.util.{Failure, Success, Try}
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.TIMEOUT
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{Checkpoint, CheckpointWriter, StreamingConf, Time}
 import org.apache.spark.streaming.api.python.PythonDStream
@@ -123,7 +124,8 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
         val diff = TimeUnit.NANOSECONDS.toMillis((System.nanoTime() - timeWhenStopStarted))
         val timedOut = diff > stopTimeoutMs
         if (timedOut) {
-          logWarning("Timed out while stopping the job generator (timeout = " + stopTimeoutMs + ")")
+          logWarning(log"Timed out while stopping the job generator " +
+            log"(timeout = ${MDC(TIMEOUT, stopTimeoutMs)})")
         }
         timedOut
       }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
index bdb9103372410..7fb35a04be6da 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala
@@ -27,8 +27,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.RECEIVED_BLOCK_INFO
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{RECEIVED_BLOCK_INFO, RECEIVED_BLOCK_TRACKER_LOG_EVENT}
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.streaming.Time
 import org.apache.spark.streaming.util.{WriteAheadLog, WriteAheadLogUtils}
@@ -127,7 +127,9 @@ private[streaming] class ReceivedBlockTracker(
         timeToAllocatedBlocks.put(batchTime, allocatedBlocks)
         lastAllocatedBatchTime = batchTime
       } else {
-        logInfo(s"Possibly processed batch $batchTime needs to be processed again in WAL recovery")
+        logInfo(log"Possibly processed batch ${MDC(LogKeys.BATCH_TIMESTAMP,
+          batchTime)} needs to be " +
+          log"processed again in WAL recovery")
       }
     } else {
       // This situation occurs when:
@@ -137,7 +139,9 @@ private[streaming] class ReceivedBlockTracker(
       // 2. Slow checkpointing makes recovered batch time older than WAL recovered
       // lastAllocatedBatchTime.
       // This situation will only occurs in recovery time.
-      logInfo(s"Possibly processed batch $batchTime needs to be processed again in WAL recovery")
+      logInfo(log"Possibly processed batch ${MDC(LogKeys.BATCH_TIMESTAMP,
+        batchTime)} needs to be processed " +
+        log"again in WAL recovery")
     }
   }
 
@@ -175,7 +179,7 @@ private[streaming] class ReceivedBlockTracker(
   def cleanupOldBatches(cleanupThreshTime: Time, waitForCompletion: Boolean): Unit = synchronized {
     require(cleanupThreshTime.milliseconds < clock.getTimeMillis())
     val timesToCleanup = timeToAllocatedBlocks.keys.filter { _ < cleanupThreshTime }.toSeq
-    logInfo(s"Deleting batches: ${timesToCleanup.mkString(" ")}")
+    logInfo(log"Deleting batches: ${MDC(LogKeys.DURATION, timesToCleanup.mkString(" "))}")
     if (writeToLog(BatchCleanupEvent(timesToCleanup))) {
       timeToAllocatedBlocks --= timesToCleanup
       writeAheadLogOption.foreach(_.clean(cleanupThreshTime.milliseconds, waitForCompletion))
@@ -221,9 +225,10 @@ private[streaming] class ReceivedBlockTracker(
     }
 
     writeAheadLogOption.foreach { writeAheadLog =>
-      logInfo(s"Recovering from write ahead logs in ${checkpointDirOption.get}")
+      logInfo(log"Recovering from write ahead logs in " +
+        log"${MDC(LogKeys.PATH, checkpointDirOption.get)}")
       writeAheadLog.readAll().asScala.foreach { byteBuffer =>
-        logInfo("Recovering record " + byteBuffer)
+        logInfo(log"Recovering record ${MDC(LogKeys.BYTE_BUFFER, byteBuffer)}")
         Utils.deserialize[ReceivedBlockTrackerLogEvent](
           JavaUtils.bufferToArray(byteBuffer), Thread.currentThread().getContextClassLoader) match {
           case BlockAdditionEvent(receivedBlockInfo) =>
@@ -247,7 +252,8 @@ private[streaming] class ReceivedBlockTracker(
         true
       } catch {
         case NonFatal(e) =>
-          logWarning(s"Exception thrown while writing record: $record to the WriteAheadLog.", e)
+          logWarning(log"Exception thrown while writing record: " +
+            log"${MDC(RECEIVED_BLOCK_TRACKER_LOG_EVENT, record)} to the WriteAheadLog.", e)
           false
       }
     } else {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
index 81c6264234f44..a37ba04c10123 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -24,8 +24,8 @@ import scala.concurrent.ExecutionContext
 import scala.util.{Failure, Success}
 
 import org.apache.spark._
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.{ERROR, STREAM_ID}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{ERROR, MESSAGE, RECEIVER_ID, RECEIVER_IDS, STREAM_ID}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, TaskLocation}
@@ -186,7 +186,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
         // Check if all the receivers have been deregistered or not
         val receivers = endpoint.askSync[Seq[Int]](AllReceiverIds)
         if (receivers.nonEmpty) {
-          logWarning("Not all of the receivers have deregistered, " + receivers)
+          logWarning(log"Not all of the receivers have deregistered, " +
+            log"${MDC(RECEIVER_IDS, receivers)}")
         } else {
           logInfo("All of the receivers have deregistered successfully")
         }
@@ -231,7 +232,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
 
     // Signal the receivers to delete old block data
     if (WriteAheadLogUtils.enableReceiverLog(ssc.conf)) {
-      logInfo(s"Cleanup old received batch data: $cleanupThreshTime")
+      logInfo(log"Cleanup old received batch data: " +
+        log"${MDC(LogKeys.CLEANUP_LOCAL_DIRS, cleanupThreshTime)}")
       synchronized {
         if (isTrackerStarted) {
           endpoint.send(CleanupOldBlocks(cleanupThreshTime))
@@ -305,7 +307,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
         endpoint = Some(receiverEndpoint))
       receiverTrackingInfos.put(streamId, receiverTrackingInfo)
       listenerBus.post(StreamingListenerReceiverStarted(receiverTrackingInfo.toReceiverInfo))
-      logInfo("Registered receiver for stream " + streamId + " from " + senderAddress)
+      logInfo(log"Registered receiver for stream ${MDC(LogKeys.STREAM_ID, streamId)} " +
+        log"from ${MDC(LogKeys.RPC_ADDRESS, senderAddress)}")
       true
     }
   }
@@ -365,11 +368,12 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
     receiverTrackingInfos(streamId) = newReceiverTrackingInfo
     listenerBus.post(StreamingListenerReceiverError(newReceiverTrackingInfo.toReceiverInfo))
     val messageWithError = if (error != null && !error.isEmpty) {
-      s"$message - $error"
+      log"${MDC(MESSAGE, message)} - ${MDC(ERROR, error)}"
     } else {
-      s"$message"
+      log"${MDC(MESSAGE, message)}"
     }
-    logWarning(s"Error reported by receiver for stream $streamId: $messageWithError")
+    logWarning(log"Error reported by receiver for stream ${MDC(STREAM_ID, streamId)}: " +
+      messageWithError)
   }
 
   private def scheduleReceiver(receiverId: Int): Seq[TaskLocation] = {
@@ -445,7 +449,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
 
     runDummySparkJob()
 
-    logInfo("Starting " + receivers.length + " receivers")
+    logInfo(log"Starting ${MDC(LogKeys.NUM_RECEIVERS, receivers.length)} receivers")
     endpoint.send(StartAllReceivers(receivers.toImmutableArraySeq))
   }
 
@@ -623,7 +627,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
           if (!shouldStartReceiver) {
             onReceiverJobFinish(receiverId)
           } else {
-            logInfo(s"Restarting Receiver $receiverId")
+            logInfo(log"Restarting Receiver ${MDC(LogKeys.STREAM_ID, receiverId)}")
             self.send(RestartReceiver(receiver))
           }
         case Failure(e) =>
@@ -631,11 +635,11 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
             onReceiverJobFinish(receiverId)
           } else {
             logError("Receiver has been stopped. Try to restart it.", e)
-            logInfo(s"Restarting Receiver $receiverId")
+            logInfo(log"Restarting Receiver ${MDC(LogKeys.STREAM_ID, receiverId)}")
             self.send(RestartReceiver(receiver))
           }
       }(ThreadUtils.sameThread)
-      logInfo(s"Receiver ${receiver.streamId} started")
+      logInfo(log"Receiver ${MDC(LogKeys.STREAM_ID, receiver.streamId)} started")
     }
 
     override def onStop(): Unit = {
@@ -650,7 +654,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
       receiverJobExitLatch.countDown()
       receiverTrackingInfos.remove(receiverId).foreach { receiverTrackingInfo =>
         if (receiverTrackingInfo.state == ReceiverState.ACTIVE) {
-          logWarning(s"Receiver $receiverId exited but didn't deregister")
+          logWarning(log"Receiver ${MDC(RECEIVER_ID, receiverId)} exited but didn't deregister")
         }
       }
     }
@@ -658,7 +662,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
     /** Send stop signal to the receivers. */
     private def stopReceivers(): Unit = {
       receiverTrackingInfos.values.flatMap(_.endpoint).foreach { _.send(StopReceiver) }
-      logInfo("Sent stop signal to all " + receiverTrackingInfos.size + " receivers")
+      logInfo(log"Sent stop signal to all " +
+        log"${MDC(LogKeys.NUM_RECEIVERS, receiverTrackingInfos.size)} receivers")
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimator.scala
index dc02062b9eb44..1b05a6ac30cc4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/rate/PIDRateEstimator.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.streaming.scheduler.rate
 
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 
 /**
  * Implements a proportional-integral-derivative (PID) controller which acts on
@@ -74,8 +74,11 @@ private[streaming] class PIDRateEstimator(
     minRate > 0,
     s"Minimum rate in PIDRateEstimator should be > 0")
 
-  logInfo(s"Created PIDRateEstimator with proportional = $proportional, integral = $integral, " +
-    s"derivative = $derivative, min rate = $minRate")
+  logInfo(log"Created PIDRateEstimator with proportional = " +
+    log"${MDC(LogKeys.PROPORTIONAL, proportional)}, integral = " +
+    log"${MDC(LogKeys.INTEGRAL, integral)}, derivative = " +
+    log"${MDC(LogKeys.DERIVATIVE, derivative)}, min rate = " +
+    log"${MDC(LogKeys.MIN_RATE, minRate)}")
 
   def compute(
       time: Long, // in milliseconds
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala
index a73cde1f99aa9..8befe53efffa7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala
@@ -29,7 +29,8 @@ import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.RECORDS
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -121,7 +122,8 @@ private[util] class BatchedWriteAheadLog(val wrappedLog: WriteAheadLog, conf: Sp
    * Stop the batched writer thread, fulfill promises with failures and close the wrapped WAL.
    */
   override def close(): Unit = {
-    logInfo(s"BatchedWriteAheadLog shutting down at time: ${System.currentTimeMillis()}.")
+    logInfo(log"BatchedWriteAheadLog shutting down at time: " +
+      log"${MDC(LogKeys.TIME, System.currentTimeMillis())}.")
     if (!active.getAndSet(false)) return
     batchedWriterThread.interrupt()
     batchedWriterThread.join()
@@ -178,7 +180,7 @@ private[util] class BatchedWriteAheadLog(val wrappedLog: WriteAheadLog, conf: Sp
         logWarning("BatchedWriteAheadLog Writer queue interrupted.", e)
         buffer.foreach(_.promise.failure(e))
       case NonFatal(e) =>
-        logWarning(s"BatchedWriteAheadLog Writer failed to write $buffer", e)
+        logWarning(log"BatchedWriteAheadLog Writer failed to write ${MDC(RECORDS, buffer)}", e)
         buffer.foreach(_.promise.failure(e))
     } finally {
       buffer.clear()
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index 3d93f045a5ec4..d90095c73785a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -31,8 +31,8 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.RETRY_COUNT
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.internal.LogKeys.{NUM_RETRY, WRITE_AHEAD_LOG_INFO}
 import org.apache.spark.util.{CompletionIterator, ThreadUtils}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -107,7 +107,7 @@ private[streaming] class FileBasedWriteAheadLog(
       }
     }
     if (fileSegment == null) {
-      logError(log"Failed to write to write ahead log after ${MDC(RETRY_COUNT, failures)} failures")
+      logError(log"Failed to write to write ahead log after ${MDC(NUM_RETRY, failures)} failures")
       throw lastException
     }
     fileSegment
@@ -137,7 +137,8 @@ private[streaming] class FileBasedWriteAheadLog(
    */
   def readAll(): JIterator[ByteBuffer] = synchronized {
     val logFilesToRead = pastLogs.map{ _.path} ++ currentLogPath
-    logInfo("Reading from the logs:\n" + logFilesToRead.mkString("\n"))
+    logInfo(log"Reading from the logs:\n" +
+      log"${MDC(LogKeys.PATHS, logFilesToRead.mkString("\n"))}")
     def readFile(file: String): Iterator[ByteBuffer] = {
       logDebug(s"Creating log reader with $file")
       val reader = new FileBasedWriteAheadLogReader(file, hadoopConf)
@@ -170,8 +171,11 @@ private[streaming] class FileBasedWriteAheadLog(
       pastLogs --= expiredLogs
       expiredLogs
     }
-    logInfo(s"Attempting to clear ${oldLogFiles.size} old log files in $logDirectory " +
-      s"older than $threshTime: ${oldLogFiles.map { _.path }.mkString("\n")}")
+    logInfo(log"Attempting to clear ${MDC(LogKeys.NUM_RECORDS_READ, oldLogFiles.size)} " +
+      log"old log files in " +
+      log"${MDC(LogKeys.PATH, logDirectory)} older than " +
+      log"${MDC(LogKeys.THRESHOLD, threshTime)}: " +
+      log"${MDC(LogKeys.FILES, oldLogFiles.map(_.path).mkString("\n"))}")
 
     def deleteFile(walInfo: LogInfo): Unit = {
       try {
@@ -181,9 +185,11 @@ private[streaming] class FileBasedWriteAheadLog(
         logDebug(s"Cleared log file $walInfo")
       } catch {
         case ex: Exception =>
-          logWarning(s"Error clearing write ahead log file $walInfo", ex)
+          logWarning(log"Error clearing write ahead log file " +
+            log"${MDC(WRITE_AHEAD_LOG_INFO, walInfo)}", ex)
       }
-      logInfo(s"Cleared log files in $logDirectory older than $threshTime")
+      logInfo(log"Cleared log files in ${MDC(LogKeys.PATH, logDirectory)} older than " +
+        log"${MDC(LogKeys.THRESH_TIME, threshTime)}")
     }
     oldLogFiles.foreach { logInfo =>
       if (!executionContext.isShutdown) {
@@ -251,7 +257,9 @@ private[streaming] class FileBasedWriteAheadLog(
           fileSystem.listStatus(logDirectoryPath).map { _.getPath }.toImmutableArraySeq)
         pastLogs.clear()
         pastLogs ++= logFileInfo
-        logInfo(s"Recovered ${logFileInfo.size} write ahead log files from $logDirectory")
+        logInfo(log"Recovered ${MDC(LogKeys.NUM_FILES, logFileInfo.size)} " +
+          log"write ahead log files from " +
+          log"${MDC(LogKeys.PATH, logDirectory)}")
         logDebug(s"Recovered files are:\n${logFileInfo.map(_.path).mkString("\n")}")
       }
     } catch {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index afe17936043a2..771e65ed40b51 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -33,7 +33,7 @@ import org.apache.hadoop.fs.Path
 import org.scalatest.Assertions._
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKey.PATH
+import org.apache.spark.internal.LogKeys.PATH
 import org.apache.spark.streaming.dstream.DStream
 import org.apache.spark.util.Utils
 
diff --git a/ui-test/package-lock.json b/ui-test/package-lock.json
index 23ff8ede65159..ec870dfa4801c 100644
--- a/ui-test/package-lock.json
+++ b/ui-test/package-lock.json
@@ -1392,12 +1392,12 @@
       }
     },
     "node_modules/braces": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-      "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
       "dev": true,
       "dependencies": {
-        "fill-range": "^7.0.1"
+        "fill-range": "^7.1.1"
       },
       "engines": {
         "node": ">=8"
@@ -1911,9 +1911,9 @@
       }
     },
     "node_modules/fill-range": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-      "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
       "dev": true,
       "dependencies": {
         "to-regex-range": "^5.0.1"